My Project
|
00001 00009 #ifndef NATION_H 00010 #define NATION_H 00011 00012 #include "MyTools.h" 00013 #include "compare.h" 00014 00015 #include <boost/bimap.hpp> 00016 00017 typedef boost::bimap <string, string> idmap; 00018 typedef idmap::value_type ID; 00019 00026 class Nation{ 00027 public: 00028 00032 Nation():threshold(-1){ 00033 comp = NULL; 00034 } 00035 00039 Nation(double v):threshold(v){ 00040 comp = NULL; 00041 } 00042 00051 Nation(double thresh, string c, int s, string d):threshold(thresh){ 00052 Compare* God = new Compare(); 00053 comp = God->Produce(c); 00054 comp->graphSize = s; 00055 comp->setDist(d); 00056 delete God; 00057 }; 00058 00068 bool CalculateBinary(vector < set < string > > Allies){ 00069 if (threshold == -1) 00070 return false; 00071 00072 if (memberships.size() == 0) 00073 memberships = CalculateMemberships(&communities); 00074 00075 map < string, vector <int> > others = CalculateMemberships(&Allies, "other_memberships.dat"); 00076 map < string, vector <int> >::iterator it_A, it_B; 00077 00078 set < pair <int, int > > potentials; 00079 set < pair <int, int> >::iterator it_pot; 00080 for (it_A = memberships.begin();it_A != memberships.end(); it_A++){ 00081 if ((it_B = others.find(it_A->first)) != others.end()){ 00082 for (sui i = 0; i < it_A->second.size(); i++){ 00083 for (sui j = 0; j < it_B->second.size(); j++){ 00084 int a = it_A->second[i], b = it_B->second[j]; 00085 pair <int, int> n = make_pair<int, int>(a, b); 00086 00087 if (potentials.find(n) == potentials.end()){ 00088 potentials.insert(n); 00089 } 00090 } 00091 } 00092 } 00093 } 00094 00095 set <int> MatchThis, MatchOther; 00096 00097 for (it_pot = potentials.begin(); it_pot != potentials.end(); it_pot++){ 00098 if (Match(&(communities[it_pot->first]), &(Allies[it_pot->second]))){ 00099 MatchThis.insert(it_pot->first); 00100 MatchOther.insert(it_pot->second); 00101 } 00102 } 00103 00104 double tPositives = MatchThis.size(); 00105 double fNegatives = Allies.size() - MatchOther.size(); 00106 double fPositives = communities.size() - tPositives; 00107 00108 prec = tPositives / (tPositives + fPositives); 00109 rec = tPositives / (tPositives + fNegatives); 00110 fscore = (2 * prec * rec) / (prec + rec); 00111 00112 return true; 00113 }; 00114 00118 void PrintStats(){ 00119 cout << "Precision: " << prec << endl; 00120 cout << "Recall: " << rec << endl; 00121 cout << "FScore: " << fscore << endl; 00122 }; 00123 00129 void PrintStats(ofstream * rout){ 00130 *rout << "Precision: " << prec << endl; 00131 *rout << "Recall: " << rec << endl; 00132 *rout << "FScore: " << fscore << endl; 00133 }; 00134 00140 void load(string filename){ 00141 ifstream fin; 00142 openFile(&fin, filename.c_str()); 00143 vector <string> fields; 00144 00145 while (fline_tr(&fin, &fields, " \t")){ 00146 if (fields.size() < 3) 00147 continue; 00148 00149 set < string > current; 00150 for (sui i = 2; i < fields.size(); i++){ 00151 current.insert(fields[i]); 00152 } 00153 00154 communities.push_back(current); 00155 } 00156 }; 00157 00163 void loadLFR(string filename){ 00164 ifstream fin; 00165 openFile(&fin, filename.c_str()); 00166 vector <string> fields; 00167 00168 while (fline_tr(&fin, &fields, " \t")){ 00169 if (fields.size() == 0) 00170 continue; 00171 00172 string node = fields[0]; 00173 00174 for (sui i = 1; i < fields.size(); i++){ 00175 int comm = boost::lexical_cast<int>(fields[i]); 00176 for (sui t = communities.size(); t <= comm - 1; t++){ 00177 set <string> temp; 00178 communities.push_back(temp); 00179 } 00180 00181 communities[comm-1].insert(node); 00182 } 00183 } 00184 }; 00185 00192 void loadID(string filename, string delimiters){ 00193 ifstream fin; 00194 vector <string> fields; 00195 openFile(&fin, filename.c_str()); 00196 00197 while(fline_tr(&fin, &fields, delimiters)){ 00198 if (fields.size() != 2){ 00199 cerr << "Invalid ID entry: "; 00200 for (sui i = 0; i < fields.size(); i++){ 00201 cerr << fields[i] << " "; 00202 } 00203 cerr << endl; 00204 00205 exit(3); 00206 } 00207 00208 directory.insert(make_pair<string, string>(fields[0], fields[1])); 00209 reverse_yellow.insert(make_pair<string, string>(fields[1], fields[0])); 00210 } 00211 } 00212 00216 double getPrecision(){ 00217 return prec; 00218 } 00219 00223 double getFScore(){ 00224 return fscore; 00225 } 00226 00230 double getRecall(){ 00231 return rec; 00232 } 00233 00237 double CalculateCompare(vector < set < string > > Allies){ 00238 00239 if (comp == NULL || comp->graphSize == 0) 00240 return -1; 00241 00242 return (*comp)(&Allies, &communities); 00243 } 00244 00251 void setCompare(string c, string d){ 00252 Compare* God = new Compare(); 00253 comp = God->Produce(c); 00254 comp->graphSize = network.size(); 00255 comp->setDist(d); 00256 delete God; 00257 } 00258 00262 vector < set < string > > getCommunities(){return communities;}; 00263 00273 void LoadGraph(string filename, string delim, bool translate){ 00274 ifstream fin; 00275 openFile(&fin, filename); 00276 vector <string> fields; 00277 00278 while (fline_tr(&fin, &fields, delim)){ 00279 if (fields.size() != 3) 00280 continue; 00281 00282 string to, from; 00283 00284 if (translate){ 00285 map <string, string>::iterator it_id; 00286 if ((it_id = directory.find(fields[0])) != directory.end()) 00287 from = directory.find(fields[0])->second; 00288 else { 00289 cerr << "Vertex not in ID file : " << fields[0] << endl; 00290 exit(5); 00291 } 00292 00293 if ((it_id = directory.find(fields[1])) != directory.end()) 00294 from = directory.find(fields[1])->second; 00295 else { 00296 cerr << "Vertex not in ID file : " << fields[1] << endl; 00297 exit(5); 00298 } 00299 } else { 00300 from = fields[0]; 00301 to = fields[1]; 00302 } 00303 00304 if ((it_net = network.find(from)) != network.end()){ 00305 it_net->second.insert(make_pair<string, double>(to, boost::lexical_cast<double>(fields[2]))); 00306 } else { 00307 map <string, double> temp; 00308 temp.insert(make_pair<string, double>(to, boost::lexical_cast<double>(fields[2]))); 00309 network.insert(make_pair<string, map <string, double> >(from, temp)); 00310 } 00311 00312 if ((it_net = network.find(to)) != network.end()){ 00313 it_net->second.insert(make_pair<string, double>(from, boost::lexical_cast<double>(fields[2]))); 00314 } else { 00315 map <string, double> temp; 00316 temp.insert(make_pair<string, double>(from, boost::lexical_cast<double>(fields[2]))); 00317 network.insert(make_pair<string, map <string, double> >(to, temp)); 00318 } 00319 } 00320 00321 if (comp != NULL) 00322 comp->graphSize = network.size(); 00323 }; 00324 00330 map < string, vector <int> > CalculateMemberships(vector < set < string > >* comms, string fileout = "memberships.dat"){ 00331 00332 map <string, vector <int> > members; 00333 map <string, vector < int > >::iterator it_m; 00334 set <string>::iterator it_s; 00335 00336 map <int, int> reverse_members; 00337 map <int, int>::iterator it_rm; 00338 00339 for (sui i = 0; i < comms->size(); i++){ 00340 for (it_s = (*comms)[i].begin(); it_s != (*comms)[i].end(); it_s++){ 00341 if ((it_m = members.find(*it_s)) == members.end()){ 00342 vector <int> temp; 00343 temp.push_back(i); 00344 members.insert(make_pair<string, vector < int > >(*it_s, temp)); 00345 } else { 00346 it_m->second.push_back(i); 00347 } 00348 } 00349 } 00350 00351 for (it_m = members.begin(); it_m != members.end(); it_m++){ 00352 if ((it_rm = reverse_members.find(it_m->second.size())) == reverse_members.end()){ 00353 reverse_members.insert(make_pair<int, int>(it_m->second.size(), 1)); 00354 } else { 00355 it_rm->second++; 00356 } 00357 } 00358 00359 ofstream fout(fileout.c_str()); 00360 for (it_rm = reverse_members.begin(); it_rm != reverse_members.end(); it_rm++){ 00361 fout << it_rm->first << " : " << it_rm->second << endl; 00362 } 00363 00364 return members; 00365 }; 00366 00367 00368 private: 00369 double threshold, prec, fscore, rec; 00370 vector < set < string > > communities; 00371 map < string, map <string, double > > network; 00372 map < string, map <string, double > >::iterator it_net; 00373 Compare* comp; 00374 map < string, string> directory; 00375 map < string, string > reverse_yellow; 00376 map < string, vector < int > > memberships; 00383 bool Match(set <string>* A, set <string>* B){ 00384 double truePositive = 0, falsePositive = 0, falseNegative = 0; 00385 00386 set <string>::iterator it_slA, it_slB; 00387 for (it_slA = A->begin(); it_slA != A->end(); it_slA++){ 00388 if (B->find(*it_slA) != B->end()) 00389 truePositive++; 00390 else 00391 falsePositive++; 00392 } 00393 00394 for (it_slB = B->begin(); it_slB != B->end(); it_slB++){ 00395 if (A->find(*it_slB) == A->end()) 00396 falseNegative++; 00397 } 00398 00399 double precision = truePositive / (truePositive + falsePositive); 00400 double recall = truePositive / (truePositive + falseNegative); 00401 return ((2 * precision * recall) / (precision + recall) >= threshold); 00402 } 00403 }; 00404 00405 #endif