My Project
nation.h
Go to the documentation of this file.
00001 
00009 #ifndef NATION_H
00010 #define NATION_H
00011 
00012 #include "MyTools.h"
00013 #include "compare.h"
00014 
00015 #include <boost/bimap.hpp>
00016 
00017 typedef boost::bimap <string, string> idmap;
00018 typedef idmap::value_type ID;
00019 
00026 class Nation{
00027 public:
00028   
00032   Nation():threshold(-1){
00033     comp = NULL;
00034   }
00035   
00039   Nation(double v):threshold(v){
00040     comp = NULL;
00041   }
00042   
00051   Nation(double thresh, string c, int s, string d):threshold(thresh){
00052     Compare* God = new Compare();
00053     comp = God->Produce(c);
00054     comp->graphSize = s;
00055     comp->setDist(d);
00056     delete God;
00057   };
00058   
00068   bool CalculateBinary(vector < set < string > > Allies){
00069     if (threshold == -1)
00070       return false;
00071     
00072     if (memberships.size() == 0)
00073       memberships = CalculateMemberships(&communities);
00074     
00075     map < string, vector <int> > others = CalculateMemberships(&Allies, "other_memberships.dat");
00076     map < string, vector <int> >::iterator it_A, it_B;
00077     
00078     set < pair <int, int > > potentials;
00079     set < pair <int, int> >::iterator it_pot;
00080     for (it_A = memberships.begin();it_A != memberships.end(); it_A++){
00081       if ((it_B = others.find(it_A->first)) != others.end()){
00082         for (sui i = 0; i < it_A->second.size(); i++){
00083           for (sui j = 0; j < it_B->second.size(); j++){
00084             int a = it_A->second[i], b = it_B->second[j];
00085             pair <int, int> n = make_pair<int, int>(a, b);
00086             
00087             if (potentials.find(n) == potentials.end()){
00088               potentials.insert(n);
00089             }
00090           }
00091         }
00092       } 
00093     }
00094     
00095     set <int> MatchThis, MatchOther;
00096     
00097     for (it_pot = potentials.begin(); it_pot != potentials.end(); it_pot++){
00098       if (Match(&(communities[it_pot->first]), &(Allies[it_pot->second]))){
00099         MatchThis.insert(it_pot->first);
00100         MatchOther.insert(it_pot->second);
00101       }
00102     }
00103     
00104     double tPositives = MatchThis.size();
00105     double fNegatives = Allies.size() - MatchOther.size();
00106     double fPositives = communities.size() - tPositives;
00107             
00108     prec = tPositives / (tPositives + fPositives);
00109     rec = tPositives / (tPositives + fNegatives);
00110     fscore = (2 * prec * rec) / (prec + rec); 
00111     
00112     return true;
00113   };
00114    
00118   void PrintStats(){
00119     cout << "Precision: " << prec << endl;
00120     cout << "Recall: " << rec << endl;
00121     cout << "FScore: " << fscore << endl;
00122   };
00123   
00129   void PrintStats(ofstream * rout){
00130     *rout << "Precision: " << prec << endl;
00131     *rout << "Recall: " << rec << endl;
00132     *rout << "FScore: " << fscore << endl;
00133   };
00134   
00140   void load(string filename){
00141     ifstream fin;
00142     openFile(&fin, filename.c_str());
00143     vector <string> fields;
00144     
00145     while (fline_tr(&fin, &fields, " \t")){
00146       if (fields.size() < 3)
00147         continue;
00148       
00149       set < string > current;
00150       for (sui i = 2; i < fields.size(); i++){
00151         current.insert(fields[i]);
00152       }
00153       
00154       communities.push_back(current);
00155     }
00156   };
00157   
00163   void loadLFR(string filename){
00164     ifstream fin;
00165     openFile(&fin, filename.c_str());
00166     vector <string> fields;
00167     
00168     while (fline_tr(&fin, &fields, " \t")){
00169       if (fields.size() == 0)
00170         continue;
00171       
00172       string node = fields[0];
00173       
00174       for (sui i = 1; i < fields.size(); i++){
00175         int comm = boost::lexical_cast<int>(fields[i]);
00176         for (sui t = communities.size(); t <= comm - 1; t++){
00177           set <string> temp;
00178           communities.push_back(temp);
00179         }
00180         
00181         communities[comm-1].insert(node);
00182       }
00183     }
00184   };
00185   
00192   void loadID(string filename, string delimiters){
00193     ifstream fin;
00194     vector <string> fields;
00195     openFile(&fin, filename.c_str());
00196     
00197     while(fline_tr(&fin, &fields, delimiters)){
00198       if (fields.size() != 2){
00199         cerr << "Invalid ID entry: ";
00200         for (sui i = 0; i < fields.size(); i++){
00201           cerr << fields[i] << " ";
00202         }
00203         cerr << endl;
00204         
00205         exit(3);
00206       }
00207       
00208       directory.insert(make_pair<string, string>(fields[0], fields[1]));
00209       reverse_yellow.insert(make_pair<string, string>(fields[1], fields[0]));
00210     }
00211   }
00212   
00216   double getPrecision(){
00217     return prec;
00218   }
00219   
00223   double getFScore(){
00224     return fscore;
00225   }
00226   
00230   double getRecall(){
00231     return rec;
00232   }
00233   
00237   double CalculateCompare(vector < set < string > > Allies){
00238     
00239     if (comp == NULL || comp->graphSize == 0)
00240       return -1;
00241     
00242     return (*comp)(&Allies, &communities);
00243   }
00244   
00251   void setCompare(string c, string d){
00252     Compare* God = new Compare();
00253     comp = God->Produce(c);
00254     comp->graphSize = network.size();
00255     comp->setDist(d);
00256     delete God;
00257   }
00258   
00262   vector < set < string > > getCommunities(){return communities;};
00263   
00273   void LoadGraph(string filename, string delim, bool translate){
00274     ifstream fin;
00275     openFile(&fin, filename);
00276     vector <string> fields;
00277     
00278     while (fline_tr(&fin, &fields, delim)){
00279       if (fields.size() != 3)
00280         continue;
00281       
00282       string to, from;
00283       
00284       if (translate){
00285         map <string, string>::iterator it_id;
00286         if ((it_id = directory.find(fields[0])) != directory.end())
00287           from = directory.find(fields[0])->second;
00288         else {
00289           cerr << "Vertex not in ID file : " << fields[0] << endl;
00290           exit(5);
00291         }
00292         
00293         if ((it_id = directory.find(fields[1])) != directory.end())
00294           from = directory.find(fields[1])->second;
00295         else {
00296           cerr << "Vertex not in ID file : " << fields[1] << endl;
00297           exit(5);
00298         }
00299       } else {
00300         from = fields[0];
00301         to = fields[1];
00302       } 
00303       
00304       if ((it_net = network.find(from)) != network.end()){
00305         it_net->second.insert(make_pair<string, double>(to, boost::lexical_cast<double>(fields[2])));   
00306       } else {
00307         map <string, double> temp;
00308         temp.insert(make_pair<string, double>(to, boost::lexical_cast<double>(fields[2])));
00309         network.insert(make_pair<string, map <string, double> >(from, temp));
00310       }
00311       
00312       if ((it_net = network.find(to)) != network.end()){
00313         it_net->second.insert(make_pair<string, double>(from, boost::lexical_cast<double>(fields[2]))); 
00314       } else {
00315         map <string, double> temp;
00316         temp.insert(make_pair<string, double>(from, boost::lexical_cast<double>(fields[2])));
00317         network.insert(make_pair<string, map <string, double> >(to, temp));
00318       }
00319     }
00320     
00321     if (comp != NULL)
00322       comp->graphSize = network.size();
00323   };
00324   
00330   map < string, vector <int> > CalculateMemberships(vector < set < string > >* comms, string fileout = "memberships.dat"){
00331     
00332     map <string, vector <int> > members;
00333     map <string, vector < int > >::iterator it_m;
00334     set <string>::iterator it_s;
00335     
00336     map <int, int> reverse_members;
00337     map <int, int>::iterator it_rm;
00338     
00339     for (sui i = 0; i < comms->size(); i++){
00340       for (it_s = (*comms)[i].begin(); it_s != (*comms)[i].end(); it_s++){
00341         if ((it_m = members.find(*it_s)) == members.end()){
00342           vector <int> temp;
00343           temp.push_back(i);
00344           members.insert(make_pair<string, vector < int > >(*it_s, temp));
00345         } else {
00346           it_m->second.push_back(i);
00347         }
00348       }
00349     }
00350     
00351     for (it_m = members.begin(); it_m != members.end(); it_m++){
00352       if ((it_rm = reverse_members.find(it_m->second.size())) == reverse_members.end()){
00353           reverse_members.insert(make_pair<int, int>(it_m->second.size(), 1));
00354         } else {
00355           it_rm->second++;
00356         }
00357     }
00358     
00359     ofstream fout(fileout.c_str());
00360     for (it_rm = reverse_members.begin(); it_rm != reverse_members.end(); it_rm++){
00361      fout << it_rm->first << " : " << it_rm->second << endl;
00362     }
00363     
00364     return members;
00365   };
00366   
00367   
00368 private:
00369   double threshold, prec,  fscore,  rec;
00370   vector < set < string > > communities;
00371   map < string, map <string, double > > network; 
00372   map < string, map <string, double > >::iterator it_net;
00373   Compare* comp; 
00374   map < string, string> directory; 
00375   map < string, string > reverse_yellow; 
00376   map < string, vector < int > > memberships; 
00383   bool Match(set <string>* A, set <string>* B){
00384     double truePositive = 0, falsePositive = 0, falseNegative = 0;
00385   
00386     set <string>::iterator it_slA, it_slB;
00387     for (it_slA = A->begin(); it_slA != A->end(); it_slA++){
00388       if (B->find(*it_slA) != B->end())
00389         truePositive++;
00390       else
00391         falsePositive++;
00392     }
00393     
00394     for (it_slB = B->begin(); it_slB != B->end(); it_slB++){
00395       if (A->find(*it_slB) == A->end())
00396         falseNegative++;
00397     }
00398     
00399     double precision = truePositive / (truePositive + falsePositive);
00400     double recall = truePositive / (truePositive + falseNegative);
00401     return ((2 * precision * recall) / (precision + recall) >= threshold);
00402   }
00403 };
00404 
00405 #endif
 All Classes Files Functions Variables Typedefs