My Project
compare.h
00001 #ifndef COMP_H
00002 #define COMP_H
00003 
00004 #include "MySets.h"
00005 #include "MyTools.h"
00006 
00007 #include "distance.h"
00008 
00015 class Compare{
00016 public:
00024   virtual double operator () (vector <set <string> >* Cl1, vector < set <string> >* Cl2){return 0;};
00025   
00031   Compare* Produce(string s);
00032   
00038   void setDist(string s){
00039     dist* creator = new dist();
00040     d = creator->Produce(s);
00041     delete creator;
00042   };
00043   
00045   Compare(){};
00050   Compare(int size){graphSize = size;};
00051   
00060   int getNodes(vector <set <string> >* Cl1, vector < set <string> >* Cl2){
00061     set<string> S1;
00062     set<string>::iterator it_ss;
00063     
00064     for (sui t = 0; t < Cl1->size(); t++){
00065       for (it_ss = (*Cl1)[t].begin(); it_ss != (*Cl1)[t].end(); it_ss++){
00066         S1.insert(*it_ss);
00067       }
00068     }
00069     
00070     for (sui t = 0; t < Cl2->size(); t++){
00071       for (it_ss = (*Cl2)[t].begin(); it_ss != (*Cl2)[t].end(); it_ss++){
00072         S1.insert(*it_ss);
00073       }
00074     }
00075     
00076     return S1.size();
00077   };
00078   
00079   double graphSize; 
00081   dist* d;  
00082 };
00083 
00090 class BestMatch:public Compare{
00091 public:
00101   double operator () (vector <set < string > >* Cl1, vector<set <string> >* Cl2){ 
00102     pair <map <int, set <int> >, map <int, set < int > > > Reduced = getSim(Cl1, Cl2);
00103     
00104     if (d->type > 3){
00105       cerr << "Invalid Similarity type for BestMatch measure" << endl;
00106       exit(3);
00107     }
00108     double total = 0, current = 0, min;
00109     
00110     map <int, set <int> >::iterator it_m;
00111     set <int>::iterator it_si;
00112     int nodeCount = getNodes(Cl1, Cl2);
00113     
00114     for (sui t = 0; t < Cl1->size(); t++){
00115       if ((it_m = Reduced.first.find(t)) != Reduced.first.end()){
00116         min = 10000000;
00117         for (it_si = it_m->second.begin(); it_si != it_m->second.end(); it_si++){
00118           if ((current = (*d)((*Cl1)[it_m->first], (*Cl2)[*it_si], graphSize)) < min)
00119             min = current;
00120         }
00121         total += min;
00122       }else{
00123         total += Cl1->at(t).size();
00124       }
00125     }
00126     
00127     for (sui t = 0; t < Cl2->size(); t++){
00128       if ((it_m = Reduced.second.find(t)) != Reduced.second.end()){
00129         min = 10000000;
00130         for (it_si = it_m->second.begin(); it_si != it_m->second.end(); it_si++){
00131           if ((current = (*d)((*Cl1)[*it_si], (*Cl2)[it_m->first], nodeCount)) < min)
00132             min = current;
00133         }
00134         total += min;
00135       }else{
00136         total += Cl2->at(t).size();
00137       }
00138     }
00139     
00140     return (double)(total) / (double)(Cl1->size() + Cl2->size());
00141   };
00142   
00143 private:
00153   pair <map <int, set <int> >, map <int, set < int > > > getSim (vector <set < string > >* Cl1, vector<set <string> >* Cl2){
00154   set<string>::iterator it_sl;
00155    set<int>::iterator it_si, it_si2;
00156    
00157    map <string, set <int> > first, second;
00158    map <string, set <int> >::iterator it_f, it_s;
00159    
00160     for (sui t = 0; t < Cl1->size(); t++){
00161       for (it_sl = Cl1->at(t).begin(); it_sl != Cl1->at(t).end(); it_sl++){
00162         if ((it_f = first.find(*it_sl)) != first.end()){
00163           it_f->second.insert(t);
00164         } else {
00165           set <int> tmp;
00166           tmp.insert(t);
00167           first.insert(make_pair<string, set<int> >(*it_sl, tmp));
00168         }
00169       }
00170     }
00171     
00172     for (sui t = 0; t < Cl2->size(); t++){
00173       for (it_sl = Cl2->at(t).begin(); it_sl != Cl2->at(t).end(); it_sl++){
00174         if ((it_f = second.find(*it_sl)) != second.end()){
00175           it_f->second.insert(t);
00176         } else {
00177           set <int> tmp;
00178           tmp.insert(t);
00179           second.insert(make_pair<string, set<int> >(*it_sl, tmp));
00180         }
00181       }
00182     }
00183     
00184     map <int, set <int> > A, B;
00185     map <int, set <int> >::iterator it_a;
00186     
00187     for (it_f = first.begin(); it_f != first.end(); it_f++){
00188       if ((it_s = second.find(it_f->first)) == second.end()) continue;
00189       
00190       for (it_si = it_f->second.begin(); it_si != it_f->second.end(); it_si++){
00191         if ((it_a = A.find(*it_si)) == A.end()){
00192           set <int> tmp;
00193           A.insert(make_pair<int, set<int> > (*it_si, tmp));
00194           it_a = A.find(*it_si);
00195         }
00196         for (it_si2 = it_s->second.begin(); it_si2 != it_s->second.end(); it_si2++){
00197           it_a->second.insert(*it_si2);
00198         }
00199       }
00200     }
00201     
00202     for (it_f = second.begin(); it_f != second.end(); it_f++){
00203       if ((it_s = first.find(it_f->first)) == first.end()) continue;
00204       
00205       for (it_si = it_f->second.begin(); it_si != it_f->second.end(); it_si++){
00206         if ((it_a = B.find(*it_si)) == B.end()){
00207           set <int> tmp;
00208           B.insert(make_pair<int, set<int> > (*it_si, tmp));
00209           it_a = B.find(*it_si);
00210         }
00211         for (it_si2 = it_s->second.begin(); it_si2 != it_s->second.end(); it_si2++){
00212           it_a->second.insert(*it_si2);
00213         }
00214       }
00215     }
00216   
00217   return make_pair< map <int, set <int> >, map <int, set < int > > >(A, B);
00218   }
00219 };
00220 
00227 class KCenter:public Compare{
00228 public:
00229   
00235   void setDist(string s){
00236     dist* creator = new dist();
00237     d = creator->Produce(s);
00238     distType = s;
00239     delete creator;
00240   };
00241   
00250   double operator() (vector<set<string> >* Cl1, vector<set<string> >* Cl2){
00251     if (d->type > 3){
00252       cerr << "Invalid Similarity type for KCenter measure" << endl;
00253       exit(3);
00254     }
00255     
00256     BestMatch* BM = new BestMatch();
00257     BM->setDist(distType);
00258     
00259     int K = 0.75 * (Cl1->size() + Cl2->size()) / 2;
00260     int N = getNodes(Cl1, Cl2);
00261     vector <set <string> > KC1 = getCenter(Cl1, K, N), KC2 = getCenter(Cl2, K, N);
00262     
00263     cerr << "Retreived Centers" << endl;
00264     
00265     return (*BM)(&KC1, &KC2);
00266   };
00267   
00268 private:
00269   
00270   string distType; 
00282   vector<set<string> > getCenter(vector<set<string> >* Cl, sui K, int N){
00283     if (K < 1 || K > Cl->size()){
00284       cerr << "Invalid K for centers" << endl;
00285       exit(3);
00286     }
00287     
00288     vector< set <string> > ret;
00289     vector < double > dsts;
00290     dsts.resize(Cl->size(), 9999999);
00291     set <string> empty;
00292     
00293      /*
00294      * Add First Cluster
00295      */
00296      
00297     double max = 0, index = 0, weight = 0;
00298     
00299     for (sui t = 0; t < Cl->size(); t++){
00300         if ((weight = (*d)(empty, (*Cl)[t], N)) > max){
00301           max = weight;
00302           index = t;
00303         }
00304     }
00305     
00306     ret.push_back((*Cl)[index]);
00307     empty = (*Cl)[index];
00308     
00309     /*
00310      * Greedily increase the center
00311      */
00312     while (ret.size() != K){
00313       /*
00314        * Recalculate Distances
00315        */
00316       for (sui t = 0; t < Cl->size(); t++){
00317         if ((weight = (*d)(empty, (*Cl)[t], N)) < dsts[t])
00318           dsts[t] = weight;
00319       }
00320       /*
00321        * Add cluster furthest away
00322        */
00323       max = dsts[0];
00324       for (sui t = 0; t < dsts.size(); t++){
00325         if (dsts[t] > max){
00326           max = dsts[t];
00327           index = t;
00328         }
00329       }
00330       
00331       empty = (*Cl)[index];
00332       ret.push_back(empty);
00333     }
00334     
00335     return ret;
00336   };
00337 };
00338 
00345 class Interaction:public Compare{
00346 public:
00354   double operator() (vector<set<string> >* Cl1, vector<set<string> >* Cl2){
00355     if (d->type < 4){
00356       cerr << "Invalid Similarity type for Interaction measure" << endl;
00357       exit(3);
00358     }
00359     map < string, set <int> > S1, S2;
00360     map < string, set <int> >::iterator it_m, it_m2;
00361     set <int>::iterator it_si;
00362     set<string>::iterator it_ss, it_ss2;
00363     
00364     for (sui t = 0; t < Cl1->size(); t++){
00365       for (it_ss = (*Cl1)[t].begin(); it_ss != (*Cl1)[t].end(); it_ss++){
00366         if ((it_m = S1.find(*it_ss)) != S1.end()){
00367           it_m->second.insert(t);
00368         } else {
00369           set <int> tmp;
00370           tmp.insert(t);
00371           S1.insert(make_pair<string, set <int> >(*it_ss, tmp));
00372         }
00373       }
00374     }
00375     
00376     for (sui t = 0; t < Cl2->size(); t++){
00377       for (it_ss = (*Cl2)[t].begin(); it_ss != (*Cl2)[t].end(); it_ss++){
00378         if ((it_m = S2.find(*it_ss)) != S2.end()){
00379           it_m->second.insert(t);
00380         } else {
00381           set <int> tmp;
00382           tmp.insert(t);
00383           S2.insert(make_pair<string, set <int> >(*it_ss, tmp));
00384         }
00385       }
00386     }
00387     
00388     int SC1 = S1.size(), SC2 = S2.size();
00389     double cP = 0, prod = 0;
00390     
00391     for (it_m = S1.begin(); it_m != S1.end(); it_m++){
00392       it_m2 = it_m;
00393       for (++it_m2; it_m2 != S1.end(); it_m2++){
00394         set <int> Int = Intersection< set<int> >(it_m->second, it_m2->second);
00395         prod = 1;
00396         for (it_si = Int.begin(); it_si != Int.end(); it_si++){
00397           prod *= (1.0/double((*Cl1)[*it_si].size()));
00398         }
00399         cP = 1 - (1 - (1.0 / (double)SC1)) * prod;
00400         
00401         pair <string, string> curr = make_pair<string, string> (it_m->first, it_m2->first);
00402         if ((it_p = P.find(curr)) != P.end()){
00403           it_p->second.first = cP;
00404         } else {
00405           P.insert(make_pair<pair <string, string>, pair <double, double> >(curr, make_pair<double, double>(cP, 0.0)));
00406         }
00407       }
00408     }
00409     
00410     for (it_m = S2.begin(); it_m != S2.end(); it_m++){
00411       it_m2 = it_m;
00412       for (++it_m2; it_m2 != S2.end(); it_m2++){
00413         set <int> Int = Intersection< set<int> >(it_m->second, it_m2->second);
00414         prod = 1;
00415         for (it_si = Int.begin(); it_si != Int.end(); it_si++){
00416           prod *= (1.0/double((*Cl2)[*it_si].size()));
00417         }
00418         cP = 1 - (1 - (1.0 / (double)SC2)) * prod;
00419         
00420         pair <string, string> curr = make_pair<string, string> (it_m->first, it_m2->first);
00421         if ((it_p = P.find(curr)) != P.end()){
00422           it_p->second.second = cP;
00423         } else {
00424           P.insert(make_pair<pair <string, string>, pair <double, double> >(curr, make_pair<double, double>(0.0, cP)));
00425         }
00426       }
00427     }
00428     
00429     /*
00430      * Put in a nice form
00431      */
00432     vector <double> A, B;
00433     for (it_p = P.begin(); it_p != P.end(); it_p++){
00434       A.push_back(it_p->second.first);
00435       B.push_back(it_p->second.second);
00436     }
00437     
00438     P.clear();
00439     /*
00440      * Calculate
00441      */
00442     return (*d)(A, B, getNodes(Cl1, Cl2));
00443  }
00444   
00445 private:
00446   map <pair <string, string>, pair <double, double> > P; 
00447   map <pair <string, string>, pair <double, double> >::iterator it_p; 
00448 };
00449 
00457 class FScore:public Compare{
00458 public:
00466   double operator () (vector <set <string> >* Cl1, vector < set <string> >* Cl2){
00467     int tp = 0;
00468     set <int> ttp;
00469     for (sui i = 0; i < Cl2->size(); i++){
00470       for (sui j = 0; j < Cl1->size(); j++){
00471         if (getScore(Cl2->at(i), Cl1->at(j)) > 0.9){
00472           tp++;
00473           ttp.insert(j);
00474         }
00475       }
00476     }
00477     
00478     int fn = Cl1->size() - ttp.size();
00479     int fp = Cl2->size() - tp;
00480     
00481     double p = (double)tp / (double)(tp + fp);
00482     double r = (double)tp / (double)(tp + fn);
00483     
00484     return (2 * p * r)/(p + r);
00485   }
00486   
00487 private:
00494   double getScore (set <string> c1, set <string> c2){    
00495     set <string> inter = Intersection<set <string> >(c1, c2);
00496     
00497     int tp = inter.size();
00498     int fp = c2.size() - tp;
00499     int fn = c1.size() - tp;
00500     
00501     double p = (double)tp / (double)(tp + fp);
00502     double r = (double)tp / (double)(tp + fn);
00503     
00504     return (2 * p * r)/(p + r);
00505   }
00506 };
00507 
00514 class NMI:public Compare{
00515 public:
00523   double operator () (vector <set <string> >* Cl1, vector < set <string> >* Cl2){
00524     if (d->type != 3)
00525     {
00526       cerr << "Entropy Required as comparison" << endl;
00527       exit(22);
00528     }
00529     
00530     double Hy = 0, Hx = 0;
00531     for (sui i = 0; i < Cl2->size(); i++){
00532       Hy += (((*(entropy*)d)).h((*Cl2)[i].size(), graphSize) + ((*(entropy*)d)).h(graphSize - (*Cl2)[i].size(), graphSize));
00533     }
00534     
00535     for (sui i = 0; i < Cl1->size(); i++){
00536       Hx += (((*(entropy*)d)).h((*Cl1)[i].size(), graphSize) + ((*(entropy*)d)).h(graphSize - (*Cl1)[i].size(), graphSize));
00537     }
00538     
00539     set <string>::iterator it_s;
00540     map <string, set <int> > m1, m2;
00541     map <string, set <int> >::iterator it_mm;
00542     sui minC1 = 0, minC2 = 0;
00543     sui minC1Size = numeric_limits<unsigned short int>::max();
00544     sui minC2Size = minC1Size;
00545     /*Membership maps*/
00546     for (sui i = 0; i < Cl1->size(); i++){
00547       if ((*Cl1)[i].size() < minC1Size){
00548         minC1 = i;
00549         minC1Size = (*Cl1)[i].size();
00550       }
00551       
00552       for (it_s = (*Cl1)[i].begin(); it_s != (*Cl1)[i].end(); it_s++){
00553         if ((it_mm = m1.find(*it_s)) != m1.end()){
00554           it_mm->second.insert(i);
00555         } else {
00556           set <int> temp;
00557           temp.insert(i);
00558           m1.insert(make_pair<string, set <int> > (*it_s, temp));
00559         }
00560       }
00561     }
00562     
00563     for (sui i = 0; i < Cl2->size(); i++){
00564       if ((*Cl2)[i].size() < minC2Size){
00565         minC2 = i;
00566         minC2Size = (*Cl2)[i].size();
00567       }
00568       
00569       for (it_s = (*Cl2)[i].begin(); it_s != (*Cl2)[i].end(); it_s++){
00570         if ((it_mm = m2.find(*it_s)) != m2.end()){
00571           it_mm->second.insert(i);
00572         } else {
00573           set <int> temp;
00574           temp.insert(i);
00575           m2.insert(make_pair<string, set <int> > (*it_s, temp));
00576         }
00577       }
00578     }
00579     
00580     double Hxy = 0;
00581     for (sui i = 0; i < Cl1->size(); i++){
00582       set <int> potents;
00583       for (it_s = (*Cl1)[i].begin(); it_s != (*Cl1)[i].end(); it_s++){
00584         if ((it_mm = m2.find(*it_s)) != m2.end()){
00585           potents.insert(it_mm->second.begin(), it_mm->second.end());
00586         }
00587       }
00588       
00589       if (potents.size() == 0){
00590         Hxy += (*d)((*Cl1)[i], (*Cl2)[minC2], graphSize);
00591         continue;
00592       }
00593       
00594       double minimum = numeric_limits<double>::max();
00595       for (set <int>::iterator j = potents.begin(); j != potents.end(); j++)
00596         //for (sui j = 0; j < Cl2->size(); j++)
00597       {
00598         double current = (*d)((*Cl1)[i], (*Cl2)[*j], graphSize);
00599         if (current < minimum)
00600           minimum = current;
00601       }
00602       
00603       Hxy += minimum;
00604     }
00605     
00606     double Hyx = 0;
00607     for (sui i = 0; i < Cl2->size(); i++){
00608       set <int> potents;
00609       for (it_s = (*Cl2)[i].begin(); it_s != (*Cl2)[i].end(); it_s++){
00610         if ((it_mm = m1.find(*it_s)) != m1.end()){
00611           potents.insert(it_mm->second.begin(), it_mm->second.end());
00612         }
00613       }
00614       
00615       if (potents.size() == 0){
00616         Hxy += (*d)((*Cl2)[i], (*Cl1)[minC1], graphSize);
00617         continue;
00618       }
00619       
00620       double minimum = numeric_limits<double>::max();
00621       for (set <int>::iterator j = potents.begin(); j != potents.end(); j++){
00622       //for (sui j = 0; j < Cl1->size(); j++)
00623       
00624         double current = (*d)((*Cl2)[i], (*Cl1)[*j], graphSize);
00625         if (current < minimum)
00626           minimum = current;
00627       }
00628       Hyx += minimum;
00629     }
00630     
00631    double Ixy = (Hx - Hxy + Hy - Hyx) / 2;
00632     
00633     return (Ixy / max(Hx, Hy));
00634   };
00635   
00636 private:
00637   
00638 };
00639 
00647 Compare* Compare::Produce(string s){
00648   if ((strcmp(s.c_str(), "best")) == 0){
00649     return new BestMatch();
00650   } else if ((strcmp(s.c_str(), "kcent")) == 0){
00651     return new KCenter();
00652   } else if ((strcmp(s.c_str(), "inter")) == 0){
00653     return new Interaction();
00654   } else if ((strcmp(s.c_str(), "fscore")) == 0){
00655     return new FScore();
00656   } else if ((strcmp(s.c_str(), "nmi")) == 0){
00657     return new NMI();
00658   } else {
00659     cerr << "Invalid comparison method chosen" << endl;
00660     exit(4);
00661   }
00662 };
00663 
00664 #endif
 All Classes Files Functions Variables Typedefs