Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix #33 #36

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions io.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,7 @@ void KK::SaveOutput()
}

// write output to .clu file - with 1 added to cluster numbers, and empties removed.
void KK::SaveTempOutput()
void KK::SaveTempOutput(integer Iter)
{
integer c;
uinteger p;
Expand Down Expand Up @@ -354,7 +354,7 @@ void KK::SaveTempOutput()
// print temp.clu file
//This is the clu for the current iteration
//This fixes the bug of having a trivial temp.clu file if there is only one iteration
sprintf(fname, "%s.temp.clu.%d", FileBase, (int)ElecNo);
sprintf(fname, "%s.temp.%d.clu.%d", FileBase, (int)Iter, (int)ElecNo);
fp = fopen_safe(fname, "w");

fprintf(fp, "%d\n", (int)MaxClass);
Expand Down
46 changes: 23 additions & 23 deletions klustakwik.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1112,7 +1112,7 @@ void KK::LoadClu(char *CluFile)
integer KK::TrySplits()
{
integer c, cc, c2, p, p2, DidSplit = 0;
scalar Score, NewScore, UnsplitScore, SplitScore;
CompoundScore Score, NewScore, UnsplitScore, SplitScore;
integer UnusedCluster;
//KK K2; // second KK structure for sub-clustering
//KK K3; // third one for comparison
Expand Down Expand Up @@ -1208,7 +1208,7 @@ integer KK::TrySplits()
// Fix by Michaël Zugaro: replace next line with following two lines
// if(SplitScore<UnsplitScore) {
if(K2.nClustersAlive<2) Output("\n Split failed - leaving alone\n");
if((SplitScore<UnsplitScore)&&(K2.nClustersAlive>=2)) {
if((SplitScore.total<UnsplitScore.total)&&(K2.nClustersAlive>=2)) {
if (AlwaysSplitBimodal)
{
DidSplit = 1;
Expand Down Expand Up @@ -1255,9 +1255,9 @@ integer KK::TrySplits()
//Output("About to compute K3 class penalties");
if (UseDistributional) K3.ComputeClassPenalties(); //SNK Fixed bug: Need to compute the cluster penalty properly, cluster penalty is only used in UseDistributional mode
NewScore = K3.ComputeScore();
Output("\nSplitting cluster %d changes total score from " SCALARFMT " to " SCALARFMT "\n", (int)c, Score, NewScore);
Output("\nSplitting cluster %d changes total score from " SCALARFMT " to " SCALARFMT "\n", (int)c, Score.total, NewScore.total);

if (NewScore < Score)
if (NewScore.total < Score.total)
{
DidSplit = 1;
Output("\n So it's getting split into cluster %d.\n", (int)UnusedCluster);
Expand All @@ -1277,7 +1277,7 @@ integer KK::TrySplits()
}

// ComputeScore() - computes total score. Requires M, E, and C steps to have been run
scalar KK::ComputeScore()
CompoundScore KK::ComputeScore()
{
integer p;
// integer debugadd;
Expand Down Expand Up @@ -1308,7 +1308,8 @@ scalar KK::ComputeScore()
}
}

return Score;
CompoundScore cscore(Score - penalty, Score, penalty);
return cscore;
}

// Initialise starting conditions randomly
Expand Down Expand Up @@ -1449,14 +1450,14 @@ void KK::StartingConditionsFromMasks()
// optional start file loads this cluster file to start iteration
// if Recurse is 0, it will not try and split.
// if InitRand is 0, use cluster assignments already in structure
scalar KK::CEM(char *CluFile, integer Recurse, integer InitRand,
CompoundScore KK::CEM(char *CluFile, integer Recurse, integer InitRand,
bool allow_assign_to_noise)
{
integer p;
integer nChanged;
integer Iter;
vector<integer> OldClass(nPoints);
scalar Score, OldScore;
CompoundScore Score, OldScore;
integer LastStepFull; // stores whether the last step was a full one
integer DidSplit;

Expand All @@ -1482,7 +1483,7 @@ scalar KK::CEM(char *CluFile, integer Recurse, integer InitRand,
// main loop
Iter = 0;
FullStep = 1;
Score = 0.0;
Score = CompoundScore(0.0, 0.0, 0.0);
do {
// Store old classifications
for(p=0; p<nPoints; p++) OldClass[p] = Class[p];
Expand Down Expand Up @@ -1561,8 +1562,7 @@ scalar KK::CEM(char *CluFile, integer Recurse, integer InitRand,
nChanged>ChangedThresh*nPoints
|| nChanged == 0
|| Iter%FullStepEvery==0
|| Score > OldScore // SNK: Resurrected
//SNK Score decreases ARE because of quick steps!
|| ((Score.raw > OldScore.raw) && (Score.total > OldScore.total))
) ;
if (Iter>MaxIter)
{
Expand All @@ -1571,12 +1571,12 @@ scalar KK::CEM(char *CluFile, integer Recurse, integer InitRand,
}

//Save a temporary clu file when not splitting
if ((SaveTempCluEveryIter && Recurse) && (OldScore> Score))
if ((SaveTempCluEveryIter && Recurse) && (OldScore.raw >= Score.raw))
{

SaveTempOutput(); //SNK Saves a temporary output clu file on each iteration
SaveTempOutput(Iter); //SNK Saves a temporary output clu file on each iteration
Output("Writing temp clu file \n");
Output("Because OldScore, %f, is greater than current (better) Score,%f \n ", OldScore, Score);
Output("Because OldScore.raw, %f, is greater than current (better) Score.raw,%f \n ", OldScore.raw, Score.raw);
}

// try splitting
Expand All @@ -1585,11 +1585,11 @@ scalar KK::CEM(char *CluFile, integer Recurse, integer InitRand,
//Output("Iter-SplitFirst %d \n",(int)(Iter-SplitFirst));
if ((Recurse && SplitEvery>0) && ( Iter==SplitFirst ||( Iter>=SplitFirst+1 && (Iter-SplitFirst)%SplitEvery==SplitEvery-1 ) || (nChanged==0 && LastStepFull) ) )
{
if (OldScore> Score) //This should be trivially true for the first run of KlustaKwik
if (OldScore.raw >= Score.raw) //This should be trivially true for the first run of KlustaKwik
{
SaveTempOutput(); //SNK Saves a temporary output clu file before each split
SaveTempOutput(Iter); //SNK Saves a temporary output clu file before each split
Output("Writing temp clu file \n");
Output("Because OldScore, %f, is greater than current (better) Score,%f \n ", OldScore, Score);
Output("Because OldScore.raw, %f, is greater than current (better) Score.raw,%f \n ", OldScore.raw, Score.raw);
}
DidSplit = TrySplits();
} else DidSplit = 0;
Expand All @@ -1606,7 +1606,7 @@ scalar KK::CEM(char *CluFile, integer Recurse, integer InitRand,
// then run CEM on this
// then use these clusters to do a CEM on the full data
// It calls CEM whenever there is no initialization clu file (i.e. the most common usage)
scalar KK::Cluster(char *StartCluFile=NULL)
CompoundScore KK::Cluster(char *StartCluFile = NULL)
{
if (Debug)
{
Expand Down Expand Up @@ -1783,8 +1783,8 @@ KK::~KK()
// Main loop
int main(int argc, char **argv)
{
scalar Score;
scalar BestScore = HugeScore;
CompoundScore Score;
CompoundScore BestScore(HugeScore, HugeScore, 0.0);
integer p, i;
SetupParams((integer)argc, argv); // This function is defined in parameters.cpp
Output("Starting KlustaKwik. Version: %s\n", VERSION);
Expand Down Expand Up @@ -1827,7 +1827,7 @@ int main(int argc, char **argv)
iterationtime = (clock()-iterationtime)/(scalar) CLOCKS_PER_SEC;
Output("Time taken for this iteration:" SCALARFMT " seconds.\n", iterationtime);

Output(" %d->%d Clusters: Score " SCALARFMT "\n\n", (int)K1.nStartingClusters, (int)K1.nClustersAlive, BestScore);
Output(" %d->%d Clusters: Score " SCALARFMT "\n\n", (int)K1.nStartingClusters, (int)K1.nClustersAlive, BestScore.total);
for(p=0; p<K1.nPoints; p++)
K1.BestClass[p] = K1.Class[p];
K1.SaveOutput();
Expand All @@ -1845,8 +1845,8 @@ int main(int argc, char **argv)
iterationtime = (clock()-iterationtime)/(scalar) CLOCKS_PER_SEC;
Output("Time taken for this iteration:" SCALARFMT " seconds.\n", iterationtime);

Output(" %d->%d Clusters: Score " SCALARFMT ", best is " SCALARFMT "\n", (int)K1.nStartingClusters, (int)K1.nClustersAlive, Score, BestScore);
if (Score < BestScore)
Output(" %d->%d Clusters: Score " SCALARFMT ", best is " SCALARFMT "\n", (int)K1.nStartingClusters, (int)K1.nClustersAlive, Score.total, BestScore.total);
if (Score.total < BestScore.total)
{
Output("THE BEST YET!\n"); // New best classification found
BestScore = Score;
Expand Down
18 changes: 14 additions & 4 deletions klustakwik.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,16 @@

using namespace std;

class CompoundScore
{
public:
scalar raw;
scalar total;
scalar penalty;
CompoundScore() : raw(0.0), total(0.0), penalty(0.0) {};
CompoundScore(scalar _raw, scalar _total, scalar _penalty) : raw(_raw), total(_total), penalty(_penalty) {};
};

class KK {
public:
/////////////// CONSTRUCTORS ///////////////////////////////////////////////
Expand Down Expand Up @@ -58,7 +68,7 @@ class KK {
// Precomputations for cluster masks
void ComputeClusterMasks();
// Score and penalty functions
scalar ComputeScore();
CompoundScore ComputeScore();
scalar Penalty(integer n);
void ComputeClassPenalties();
// Main algorithm functions
Expand All @@ -67,13 +77,13 @@ class KK {
void CStep(bool allow_assign_to_noise=true);
void ConsiderDeletion();
integer TrySplits();
scalar CEM(char *CluFile, integer recurse, integer InitRand, bool allow_assign_to_noise=true);
scalar Cluster(char *CluFile);
CompoundScore CEM(char *CluFile, integer recurse, integer InitRand, bool allow_assign_to_noise = true);
CompoundScore Cluster(char *CluFile);
// IO related functions
void LoadData(char *FileBase, integer ElecNo, char *UseFeatures);
void LoadClu(char *StartCluFile);
void SaveOutput();
void SaveTempOutput();
void SaveTempOutput(integer Iter);
void SaveSortedData();
void SaveSortedClu();
void SaveCovMeans();
Expand Down