From ee552ba962831ab7d6b0c764866479f3671f653b Mon Sep 17 00:00:00 2001 From: thesamovar Date: Wed, 14 Jan 2015 16:36:55 +0000 Subject: [PATCH 1/3] Maybe fixed issue 33? --- klustakwik.cpp | 41 +++++++++++++++++++++-------------------- klustakwik.h | 16 +++++++++++++--- 2 files changed, 34 insertions(+), 23 deletions(-) diff --git a/klustakwik.cpp b/klustakwik.cpp index 53dfc99..60c9c5b 100644 --- a/klustakwik.cpp +++ b/klustakwik.cpp @@ -1112,7 +1112,7 @@ void KK::LoadClu(char *CluFile) integer KK::TrySplits() { integer c, cc, c2, p, p2, DidSplit = 0; - scalar Score, NewScore, UnsplitScore, SplitScore; + CompoundScore Score, NewScore, UnsplitScore, SplitScore; integer UnusedCluster; //KK K2; // second KK structure for sub-clustering //KK K3; // third one for comparison @@ -1208,7 +1208,7 @@ integer KK::TrySplits() // Fix by Michaƫl Zugaro: replace next line with following two lines // if(SplitScore=2)) { + if((SplitScore.total=2)) { if (AlwaysSplitBimodal) { DidSplit = 1; @@ -1255,9 +1255,9 @@ integer KK::TrySplits() //Output("About to compute K3 class penalties"); if (UseDistributional) K3.ComputeClassPenalties(); //SNK Fixed bug: Need to compute the cluster penalty properly, cluster penalty is only used in UseDistributional mode NewScore = K3.ComputeScore(); - Output("\nSplitting cluster %d changes total score from " SCALARFMT " to " SCALARFMT "\n", (int)c, Score, NewScore); + Output("\nSplitting cluster %d changes total score from " SCALARFMT " to " SCALARFMT "\n", (int)c, Score.total, NewScore.total); - if (NewScore < Score) + if (NewScore.total < Score.total) { DidSplit = 1; Output("\n So it's getting split into cluster %d.\n", (int)UnusedCluster); @@ -1277,7 +1277,7 @@ integer KK::TrySplits() } // ComputeScore() - computes total score. Requires M, E, and C steps to have been run -scalar KK::ComputeScore() +CompoundScore KK::ComputeScore() { integer p; // integer debugadd; @@ -1308,7 +1308,8 @@ scalar KK::ComputeScore() } } - return Score; + CompoundScore cscore(Score - penalty, Score, penalty); + return cscore; } // Initialise starting conditions randomly @@ -1449,14 +1450,14 @@ void KK::StartingConditionsFromMasks() // optional start file loads this cluster file to start iteration // if Recurse is 0, it will not try and split. // if InitRand is 0, use cluster assignments already in structure -scalar KK::CEM(char *CluFile, integer Recurse, integer InitRand, +CompoundScore KK::CEM(char *CluFile, integer Recurse, integer InitRand, bool allow_assign_to_noise) { integer p; integer nChanged; integer Iter; vector OldClass(nPoints); - scalar Score, OldScore; + CompoundScore Score, OldScore; integer LastStepFull; // stores whether the last step was a full one integer DidSplit; @@ -1482,7 +1483,7 @@ scalar KK::CEM(char *CluFile, integer Recurse, integer InitRand, // main loop Iter = 0; FullStep = 1; - Score = 0.0; + Score = CompoundScore(0.0, 0.0, 0.0); do { // Store old classifications for(p=0; pChangedThresh*nPoints || nChanged == 0 || Iter%FullStepEvery==0 - || Score > OldScore // SNK: Resurrected + || Score.raw > OldScore.raw // SNK: Resurrected //SNK Score decreases ARE because of quick steps! ) ; if (Iter>MaxIter) @@ -1571,12 +1572,12 @@ scalar KK::CEM(char *CluFile, integer Recurse, integer InitRand, } //Save a temporary clu file when not splitting - if ((SaveTempCluEveryIter && Recurse) && (OldScore> Score)) + if ((SaveTempCluEveryIter && Recurse) && (OldScore.raw >= Score.raw)) { SaveTempOutput(); //SNK Saves a temporary output clu file on each iteration Output("Writing temp clu file \n"); - Output("Because OldScore, %f, is greater than current (better) Score,%f \n ", OldScore, Score); + Output("Because OldScore.raw, %f, is greater than current (better) Score.raw,%f \n ", OldScore.raw, Score.raw); } // try splitting @@ -1585,11 +1586,11 @@ scalar KK::CEM(char *CluFile, integer Recurse, integer InitRand, //Output("Iter-SplitFirst %d \n",(int)(Iter-SplitFirst)); if ((Recurse && SplitEvery>0) && ( Iter==SplitFirst ||( Iter>=SplitFirst+1 && (Iter-SplitFirst)%SplitEvery==SplitEvery-1 ) || (nChanged==0 && LastStepFull) ) ) { - if (OldScore> Score) //This should be trivially true for the first run of KlustaKwik + if (OldScore.raw >= Score.raw) //This should be trivially true for the first run of KlustaKwik { SaveTempOutput(); //SNK Saves a temporary output clu file before each split Output("Writing temp clu file \n"); - Output("Because OldScore, %f, is greater than current (better) Score,%f \n ", OldScore, Score); + Output("Because OldScore.raw, %f, is greater than current (better) Score.raw,%f \n ", OldScore.raw, Score.raw); } DidSplit = TrySplits(); } else DidSplit = 0; @@ -1606,7 +1607,7 @@ scalar KK::CEM(char *CluFile, integer Recurse, integer InitRand, // then run CEM on this // then use these clusters to do a CEM on the full data // It calls CEM whenever there is no initialization clu file (i.e. the most common usage) -scalar KK::Cluster(char *StartCluFile=NULL) +CompoundScore KK::Cluster(char *StartCluFile = NULL) { if (Debug) { @@ -1783,8 +1784,8 @@ KK::~KK() // Main loop int main(int argc, char **argv) { - scalar Score; - scalar BestScore = HugeScore; + CompoundScore Score; + CompoundScore BestScore(HugeScore, HugeScore, 0.0); integer p, i; SetupParams((integer)argc, argv); // This function is defined in parameters.cpp Output("Starting KlustaKwik. Version: %s\n", VERSION); @@ -1827,7 +1828,7 @@ int main(int argc, char **argv) iterationtime = (clock()-iterationtime)/(scalar) CLOCKS_PER_SEC; Output("Time taken for this iteration:" SCALARFMT " seconds.\n", iterationtime); - Output(" %d->%d Clusters: Score " SCALARFMT "\n\n", (int)K1.nStartingClusters, (int)K1.nClustersAlive, BestScore); + Output(" %d->%d Clusters: Score " SCALARFMT "\n\n", (int)K1.nStartingClusters, (int)K1.nClustersAlive, BestScore.total); for(p=0; p%d Clusters: Score " SCALARFMT ", best is " SCALARFMT "\n", (int)K1.nStartingClusters, (int)K1.nClustersAlive, Score, BestScore); - if (Score < BestScore) + Output(" %d->%d Clusters: Score " SCALARFMT ", best is " SCALARFMT "\n", (int)K1.nStartingClusters, (int)K1.nClustersAlive, Score.total, BestScore.total); + if (Score.total < BestScore.total) { Output("THE BEST YET!\n"); // New best classification found BestScore = Score; diff --git a/klustakwik.h b/klustakwik.h index 0a35426..f41d103 100644 --- a/klustakwik.h +++ b/klustakwik.h @@ -23,6 +23,16 @@ using namespace std; +class CompoundScore +{ +public: + scalar raw; + scalar total; + scalar penalty; + CompoundScore() : raw(0.0), total(0.0), penalty(0.0) {}; + CompoundScore(scalar _raw, scalar _total, scalar _penalty) : raw(_raw), total(_total), penalty(_penalty) {}; +}; + class KK { public: /////////////// CONSTRUCTORS /////////////////////////////////////////////// @@ -58,7 +68,7 @@ class KK { // Precomputations for cluster masks void ComputeClusterMasks(); // Score and penalty functions - scalar ComputeScore(); + CompoundScore ComputeScore(); scalar Penalty(integer n); void ComputeClassPenalties(); // Main algorithm functions @@ -67,8 +77,8 @@ class KK { void CStep(bool allow_assign_to_noise=true); void ConsiderDeletion(); integer TrySplits(); - scalar CEM(char *CluFile, integer recurse, integer InitRand, bool allow_assign_to_noise=true); - scalar Cluster(char *CluFile); + CompoundScore CEM(char *CluFile, integer recurse, integer InitRand, bool allow_assign_to_noise = true); + CompoundScore Cluster(char *CluFile); // IO related functions void LoadData(char *FileBase, integer ElecNo, char *UseFeatures); void LoadClu(char *StartCluFile); From 5c6981267f26695588d7bcd31b172d0d66777d7b Mon Sep 17 00:00:00 2001 From: thesamovar Date: Fri, 16 Jan 2015 18:42:05 +0000 Subject: [PATCH 2/3] Only do a full step if both the raw and penalised scores have got worse --- klustakwik.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/klustakwik.cpp b/klustakwik.cpp index 60c9c5b..c3378e4 100644 --- a/klustakwik.cpp +++ b/klustakwik.cpp @@ -1562,8 +1562,7 @@ CompoundScore KK::CEM(char *CluFile, integer Recurse, integer InitRand, nChanged>ChangedThresh*nPoints || nChanged == 0 || Iter%FullStepEvery==0 - || Score.raw > OldScore.raw // SNK: Resurrected - //SNK Score decreases ARE because of quick steps! + || ((Score.raw > OldScore.raw) && (Score.total > OldScore.total)) ) ; if (Iter>MaxIter) { From 0c4aea131f9c660dc1904ec24c513cb72184304c Mon Sep 17 00:00:00 2001 From: thesamovar Date: Mon, 4 May 2015 16:45:33 +0100 Subject: [PATCH 3/3] Temp files saved with different names --- io.cpp | 4 ++-- klustakwik.cpp | 4 ++-- klustakwik.h | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/io.cpp b/io.cpp index 6fca701..985e17e 100644 --- a/io.cpp +++ b/io.cpp @@ -314,7 +314,7 @@ void KK::SaveOutput() } // write output to .clu file - with 1 added to cluster numbers, and empties removed. -void KK::SaveTempOutput() +void KK::SaveTempOutput(integer Iter) { integer c; uinteger p; @@ -354,7 +354,7 @@ void KK::SaveTempOutput() // print temp.clu file //This is the clu for the current iteration //This fixes the bug of having a trivial temp.clu file if there is only one iteration - sprintf(fname, "%s.temp.clu.%d", FileBase, (int)ElecNo); + sprintf(fname, "%s.temp.%d.clu.%d", FileBase, (int)Iter, (int)ElecNo); fp = fopen_safe(fname, "w"); fprintf(fp, "%d\n", (int)MaxClass); diff --git a/klustakwik.cpp b/klustakwik.cpp index c3378e4..6a26633 100644 --- a/klustakwik.cpp +++ b/klustakwik.cpp @@ -1574,7 +1574,7 @@ CompoundScore KK::CEM(char *CluFile, integer Recurse, integer InitRand, if ((SaveTempCluEveryIter && Recurse) && (OldScore.raw >= Score.raw)) { - SaveTempOutput(); //SNK Saves a temporary output clu file on each iteration + SaveTempOutput(Iter); //SNK Saves a temporary output clu file on each iteration Output("Writing temp clu file \n"); Output("Because OldScore.raw, %f, is greater than current (better) Score.raw,%f \n ", OldScore.raw, Score.raw); } @@ -1587,7 +1587,7 @@ CompoundScore KK::CEM(char *CluFile, integer Recurse, integer InitRand, { if (OldScore.raw >= Score.raw) //This should be trivially true for the first run of KlustaKwik { - SaveTempOutput(); //SNK Saves a temporary output clu file before each split + SaveTempOutput(Iter); //SNK Saves a temporary output clu file before each split Output("Writing temp clu file \n"); Output("Because OldScore.raw, %f, is greater than current (better) Score.raw,%f \n ", OldScore.raw, Score.raw); } diff --git a/klustakwik.h b/klustakwik.h index f41d103..78ab2ff 100644 --- a/klustakwik.h +++ b/klustakwik.h @@ -83,7 +83,7 @@ class KK { void LoadData(char *FileBase, integer ElecNo, char *UseFeatures); void LoadClu(char *StartCluFile); void SaveOutput(); - void SaveTempOutput(); + void SaveTempOutput(integer Iter); void SaveSortedData(); void SaveSortedClu(); void SaveCovMeans();