Skip to content

Commit

Permalink
fix --classify; use old exclusion distribution
Browse files Browse the repository at this point in the history
  • Loading branch information
reality committed Aug 13, 2022
1 parent 0f11428 commit 5e665c6
Show file tree
Hide file tree
Showing 5 changed files with 39 additions and 35 deletions.
2 changes: 1 addition & 1 deletion klarigi/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -95,4 +95,4 @@ jacocoTestReport {
}
}

version = '0.1.1'
version = '0.1.2'
14 changes: 10 additions & 4 deletions klarigi/src/main/groovy/klarigi/App.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,12 @@ class App {
}
}

def includeAll = o['include-all']
// Otherwise, variables we use to classify may not be scored (and we need their old nExclusion values). Strictly it should probably be on CWV rather than c and re?
if(o['classify']) {
includeAll = true
}

def k = new Klarigi(o, excludeClasses, threads)
if(!o['similarity-mode']) {
def allExplanations
Expand All @@ -134,11 +140,11 @@ class App {
System.exit(1)
}

allExplanations = k.explainClusters(groups, o['scores-only'], o['output-scores'], o['output-type'], threads, o['debug'], o['include-all'])
allExplanations = k.explainClusters(groups, o['scores-only'], o['output-scores'], o['output-type'], threads, o['debug'], includeAll)
} else if(o['group'] && o['group'] != '*') {
allExplanations = k.explainClusters([o['group']], o['scores-only'], o['output-scores'], o['output-type'], threads, o['debug'], o['include-all'])
allExplanations = k.explainClusters([o['group']], o['scores-only'], o['output-scores'], o['output-type'], threads, o['debug'], includeAll)
} else {
allExplanations = k.explainAllClusters(o['output-scores'], o['scores-only'], o['output-type'], threads, o['debug'], o['include-all'])
allExplanations = k.explainAllClusters(o['output-scores'], o['scores-only'], o['output-type'], threads, o['debug'], includeAll)
}

if(o['scores-only']) {
Expand Down Expand Up @@ -170,7 +176,7 @@ class App {
k.reclassify(allExplanations, excludeClasses, o['output-classification-scores'], o['ucm'], o['classify-with-variables'], threads)
}
if(o['classify']) {
k.classify(o['classify'], allExplanations, o['output-classification-scores'], o['ucm'], o['classify-with-variables'], excludeClasses, threads)
k.classify(o['classify'], allExplanations, o['output-classification-scores'], o['ucm'], o['classify-with-variables'], excludeClasses, threads, o) // fuck it just passing o
}
}

Expand Down
3 changes: 2 additions & 1 deletion klarigi/src/main/groovy/klarigi/Classifier.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,11 @@ public class Classifier {
//iterate each entity
//GParsPool.withPool(threads) { p ->
data.associations.each { entity, codes ->
// Iterate each group
def scores = [:]

// Iterate each group
allExplanations.each { exps ->
// Start from 1
scores[exps.cluster] = new Float(1.0)

def rs = sterms[exps.cluster].collect { e ->
Expand Down
46 changes: 26 additions & 20 deletions klarigi/src/main/groovy/klarigi/Klarigi.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -153,9 +153,10 @@ public class Klarigi {
}
}

/*groupings.each { k, v ->
println "$k: ${v.size()}"
}*/
println "Groupings loaded:"
groupings.each { k, v ->
println " $k: ${v.size()} members"
}

// kind of stupid but ok
def qa = [:]
Expand All @@ -164,7 +165,9 @@ public class Klarigi {
}
def allAssociations = qa.keySet().toList()

//println "all associ: ${allAssociations.size()}"
if(verbose) {
println "Loaded ${allAssociations.size()} entity-term associations."
}

data = [
groupings: groupings,
Expand Down Expand Up @@ -400,15 +403,22 @@ public class Klarigi {
ucm = false

def assoc = [:]
def allAssoc = []
new File(cwf).splitEachLine('\t') {
if(!assoc.containsKey(it[1])) { assoc[it[1]] = [] }
assoc[it[1]] << 'http://purl.obolibrary.org/obo/' + it[0].replace(':','_')
def t = 'http://purl.obolibrary.org/obo/' + it[0].replace(':','_')
assoc[it[1]] << t
allAssoc << t
}

// We rescore to ensure we have the scores for all of our given classes, and to get the new incEnts if we've reloaded data (per classify)
def reScorer = new Scorer(ontoHelper, coefficients, data, excludeClasses, false, threads)

allExplanations.each { exps ->
def newScores = []
allExplanations.each { exps ->
exps.results[0] = reScorer.scoreClasses(exps.cluster, threads, assoc[exps.cluster], true)
exps.results[0].each { t ->
t.nExclusion = exps.results[2].find { it.iri == t.iri }.nExclusion
}
}
}

Expand All @@ -417,7 +427,7 @@ public class Klarigi {
RaiseError("Failed to build reclassifier. There may have been too few examples.")
}

println 'Reclassification:'
println 'Classification performance:'
Classifier.Print(m)
println ''

Expand All @@ -426,21 +436,17 @@ public class Klarigi {
}
}

def classify(path, allExplanations, outClassScores, ucm, cwf, excludeClasses, threads) {
loadData(path) // TODO I know, i know, this is awful state management and design. i'll fix it later

def m = Classifier.classify(allExplanations, data, ontoHelper, threads, ucm)
if(!m) {
RaiseError("Failed to build classifier. There may have been too few examples.")
def classify(path, allExplanations, outClassScores, ucm, cwf, excludeClasses, threads, o) {
if(o['verbose']) {
println "Loading new dataset at $path in order to classify ..."
}

println 'Classification:'
Classifier.Print(m)
println ''
def saveIc = data.ic
loadData(path, o['pp'], o['group'], o['egl'], threads)
data.ic = saveIc
// holding onto ic saves us a bit of time, but this should be looked at again if decide to involve IC in classify scoring.

if(outClassScores) {
Classifier.WriteScores(m, "classify")
}
reclassify(allExplanations, excludeClasses, outClassScores, ucm, cwf, threads)
}

def genSim(toFile, group) {
Expand Down
9 changes: 0 additions & 9 deletions klarigi/src/main/groovy/klarigi/Scorer.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -61,13 +61,6 @@ public class Scorer {
}

toProcess = toProcess.unique(false)
/*if(manToProcess) {
toProcess = manToProcess
}*/

//toProcess = toProcess.findAll { c -> data.ic[c] >= this.c.MIN_IC }

//println "Processing ${toProcess.size()}"

toProcess.each { iri ->
ass[iri] = [:]
Expand All @@ -85,7 +78,6 @@ public class Scorer {
def i = 0
GParsPool.withPool(threads) { p ->
data.associations.eachParallel { e, terms ->
//println "${++i}"
terms.each { t, v ->
scMap[t].each { dt ->
data.egroups[e].each { g ->
Expand All @@ -99,7 +91,6 @@ public class Scorer {
def z = 0
GParsPool.withPool(threads) { p ->
data.groupings.eachParallel { cid, v ->
//println "${++z}"
toProcess.each { iri ->
ass[iri][cid].inc = ass[iri][cid].incEnts.size()
ass[iri][cid].exc = data.groupings[cid].size() - ass[iri][cid].inc
Expand Down

0 comments on commit 5e665c6

Please sign in to comment.