조성현

complete dblp paper v1

......@@ -11,7 +11,7 @@
using namespace std;
const char* DBLP_FILENAME = "test.json";
const char* DBLP_FILENAME = "dblp.json";
const char* DBLP_COAUTHOR_FILENAME = "tmp_dblp_coauthorship.json";
const string COLUMN_DELIMITER = "||";
const string AUTHOR_DELIMITER = "&&";
......@@ -22,6 +22,7 @@ int main() {
Json::Reader reader;
ifstream dblp_paper, dblp_coauthor;
ofstream dblp_paper_out, dblp_coauthor_out;
boost::regex paper_reg{"(conf|journals).*"};
try {
//1. dblp paper dataset
......@@ -49,6 +50,7 @@ int main() {
for (auto it=root.begin();
it!=root.end();
++it) {
//Àüó¸®
row.clear();
coauthors.clear();
......@@ -62,21 +64,35 @@ int main() {
//row ´ÜÀ§·Î read
row = *it;
paper_key = row[0].asString();
coauthors = row[1];
for (auto coit=coauthors.begin(); coit!=coauthors.end(); ++coit) {
coauthor_list.push_back(coit->asString());
}
year = ((row[2].isNull())?-1:row[2].asInt());
//write
dblp_paper_out << paper_key << COLUMN_DELIMITER;
for (auto auit=coauthor_list.begin(); auit!=coauthor_list.end(); ++auit) {
dblp_paper_out << (*auit);
if () {
dblp_paper_out << AUTHOR_DELIMITER;
//check whether it is paper
if (boost::regex_match(paper_key, paper_reg)) {
coauthors = row[1];
for (auto coit=coauthors.begin(); coit!=coauthors.end(); ++coit) {
coauthor_list.push_back(coit->asString());
}
year = ((row[2].isNull())?-1:row[2].asInt());
//write
dblp_paper_out << paper_key << COLUMN_DELIMITER;
if (coauthor_list.size() > 0) {
for (auto auit=coauthor_list.begin(); auit!=coauthor_list.end();) {
dblp_paper_out << (*auit);
++auit;
if (auit != coauthor_list.end()) {
dblp_paper_out << AUTHOR_DELIMITER;
}
}
} else {
//empty
throw exception("paper without author");
}
dblp_paper_out << COLUMN_DELIMITER
<< year
<< endl;
} else {
//not paper
}
dblp_paper_out << endl;
//ÈÄó¸®
++count;
......@@ -87,17 +103,17 @@ int main() {
//2. dblp coauthorship dataset
//dblp_coauthor.open(DBLP_COAUTHOR_FILENAME);
}
catch (const exception& e) {
cerr << "Error: " << e.what() << endl;
return -1;
}
//release
dblp_paper.close();
dblp_coauthor.close();
dblp_paper_out.close();
dblp_coauthor_out.close();
if (dblp_paper) dblp_paper.close();
if (dblp_coauthor) dblp_coauthor.close();
if (dblp_paper_out) dblp_paper_out.close();
if (dblp_coauthor_out) dblp_coauthor_out.close();
return 0;
}
\ No newline at end of file
......