From 1db79b197cc7786a01f1fc3f78109dd0d7913d43 Mon Sep 17 00:00:00 2001 From: Luke Slater Date: Mon, 14 Jun 2021 01:11:10 +0100 Subject: [PATCH] similarity mode, 0.0.8 --- klarigi/build.gradle | 6 ++- klarigi/src/main/groovy/klarigi/App.groovy | 16 +++++-- .../groovy/klarigi/InformationContent.groovy | 48 ++++++++++++++++++- .../src/main/groovy/klarigi/Klarigi.groovy | 12 ++++- klarigi/src/main/groovy/klarigi/Scorer.groovy | 7 ++- .../src/main/groovy/klarigi/StepDown.groovy | 5 +- 6 files changed, 84 insertions(+), 10 deletions(-) diff --git a/klarigi/build.gradle b/klarigi/build.gradle index b323a43..f5eaec2 100644 --- a/klarigi/build.gradle +++ b/klarigi/build.gradle @@ -18,6 +18,10 @@ plugins { } + +sourceCompatibility = '11' +targetCompatibility = '11' + repositories { // Use Maven Central for resolving dependencies. mavenCentral() @@ -82,4 +86,4 @@ jacocoTestReport { } } -version = '0.0.7' +version = '0.0.8' diff --git a/klarigi/src/main/groovy/klarigi/App.groovy b/klarigi/src/main/groovy/klarigi/App.groovy index 3daa39f..8ec82e1 100644 --- a/klarigi/src/main/groovy/klarigi/App.groovy +++ b/klarigi/src/main/groovy/klarigi/App.groovy @@ -17,6 +17,8 @@ class App { cliBuilder.with { h longOpt: 'help', 'Print this help text and exit.' + _ longOpt: 'similarity-mode', 'Calculate semantic similarity instead of characterising groups', type: Boolean + d longOpt: 'data', 'The data describing entities and associations. See documentation for format.', args: 1 o longOpt: 'ontology', 'The ontology to use for explanations (should be the same as the ontology used to describe patients).', args: 1 _ longOpt: 'turtle', 'Indicates that the ontology is a Turtle ontology (needed for calculating IC...)', type: Boolean @@ -58,13 +60,17 @@ class App { } def k = new Klarigi(o) - if(!o['group'] || (o['group'] && o['group'] == '*')) { - k.explainAllClusters(o['output-scores']).each { - k.output(it.cluster, it.results, o['latex'], o['print-members'], o['output']) + if(!o['similarity-mode']) { + if(!o['group'] || (o['group'] && o['group'] == '*')) { + k.explainAllClusters(o['output-scores']).each { + k.output(it.cluster, it.results, o['latex'], o['print-members'], o['output']) + } + } else { + def r = k.explainCluster(o['group'], o['output-scores']) + k.output(o['group'], r, o['latex'], o['print-members'], o['output']) } } else { - def r = k.explainCluster(o['group'], o['output-scores']) - k.output(o['group'], r, o['latex'], o['print-members'], o['output']) + k.genSim(o['output']) } } } diff --git a/klarigi/src/main/groovy/klarigi/InformationContent.groovy b/klarigi/src/main/groovy/klarigi/InformationContent.groovy index 5442c23..10e8b4f 100644 --- a/klarigi/src/main/groovy/klarigi/InformationContent.groovy +++ b/klarigi/src/main/groovy/klarigi/InformationContent.groovy @@ -49,6 +49,7 @@ public class InformationContent { private engine private icConf private factory + private G graph InformationContent(ontologyPath) { this(ontologyPath, false, false) @@ -61,7 +62,7 @@ public class InformationContent { factory.loadNamespacePrefix("HP", graphURI.toString()); G graph = new GraphMemory(graphURI)*/ - G graph = new GraphMemory() + graph = new GraphMemory() def dataConf if(turtle) { @@ -104,6 +105,51 @@ public class InformationContent { res } + // this should really go to a diff class + def compareEntities(assoc) { + def smConfPairwise = new SMconf(SMConstants.FLAG_SIM_PAIRWISE_DAG_NODE_RESNIK_1995, icConf) + def smConfGroupwise = new SMconf(SMConstants.FLAG_SIM_GROUPWISE_BMA, icConf) + + def results = [:] + assoc.each { k1, v1 -> + if(!results.containsKey(k1)) { results[k1] = [:] } + assoc.each { k2, v2 -> + if(k1 == k2) { return; } + if(results.containsKey(k2) && results[k2].containsKey(k1)) { + results[k1][k2] = results[k2][k1] + } else { + results[k1][k2] = engine.compare(smConfGroupwise, smConfPairwise, + v1.collect { + factory.getURI(it) + }.findAll { graph.containsVertex(it) }.toSet(), + v2.collect { + factory.getURI(it) + }.findAll { graph.containsVertex(it) }.toSet()) + } + } + } + + results + } + + static def WriteSimilarity(results, toFile) { + def out = [] + + results.each { k1, v1 -> + v1.each { k2, v2 -> + out << "$k1\t$k2\t$v2" + } + } + + out = out.join('\n') + + if(toFile) { + new File(toFile).text = out + } else { + println out + } + } + static def Write(ic, path) { new File(path).text = ic.collect { k, v -> "$k\t$v" }.join('\n') } diff --git a/klarigi/src/main/groovy/klarigi/Klarigi.groovy b/klarigi/src/main/groovy/klarigi/Klarigi.groovy index 49c8d41..a2dbfa5 100644 --- a/klarigi/src/main/groovy/klarigi/Klarigi.groovy +++ b/klarigi/src/main/groovy/klarigi/Klarigi.groovy @@ -31,6 +31,7 @@ public class Klarigi { ] def coefficients def verbose + def icFactory Klarigi(o) { loadData(o['data']) @@ -78,7 +79,7 @@ public class Klarigi { } } else { try { - def icFactory = new InformationContent(ontologyFile, annotFile, resnikIc, turtle) + icFactory = new InformationContent(ontologyFile, annotFile, resnikIc, turtle) def allClasses = ontoHelper.reasoner.getSubClasses(ontoHelper.dataFactory.getOWLThing(), false).collect { it.getRepresentativeElement().getIRI().toString() }.unique(false) allClasses = allClasses.findAll { it != 'http://www.w3.org/2002/07/owl#Nothing' } // heh data.ic = icFactory.getInformationContent(allClasses) @@ -158,6 +159,15 @@ public class Klarigi { } } + def genSim(toFile) { + if(!icFactory) { + println "Error: IC class not loaded (--similarity and --ic are not compatible)" + System.exit(1) + } + def results = icFactory.compareEntities(data.associations) + InformationContent.WriteSimilarity(results, toFile) + } + def output(cid, results, latex, printMembers, toFile) { def cSize = data.groupings[cid].size() if(latex) { diff --git a/klarigi/src/main/groovy/klarigi/Scorer.groovy b/klarigi/src/main/groovy/klarigi/Scorer.groovy index b9ce494..92c1467 100644 --- a/klarigi/src/main/groovy/klarigi/Scorer.groovy +++ b/klarigi/src/main/groovy/klarigi/Scorer.groovy @@ -45,7 +45,12 @@ public class Scorer { .collect { k, v -> v.nIc = v.ic // TODO this depends on an already normalised IC value... v.nInclusion = v.inclusion / data.groupings[cid].size() - v.nExclusion = 1 - (v.exclusion / data.groupings.findAll { kk, vv -> kk != cid }.collect { kk, vv -> vv.size() }.sum()) + + v.nExclusion = 1 + if(data.groupings.size() > 1) { + v.nExclusion = 1 - (v.exclusion / data.groupings.findAll { kk, vv -> kk != cid }.collect { kk, vv -> vv.size() }.sum()) + } + v.iri = k v } diff --git a/klarigi/src/main/groovy/klarigi/StepDown.groovy b/klarigi/src/main/groovy/klarigi/StepDown.groovy index 74b3020..798f167 100644 --- a/klarigi/src/main/groovy/klarigi/StepDown.groovy +++ b/klarigi/src/main/groovy/klarigi/StepDown.groovy @@ -11,7 +11,10 @@ public class StepDown { } //println ef totalCoverage = ((ef.collect { it.internalIncluded }.flatten().unique(false).size()) / data.groupings[cid].size()) * 100 - def totalExclusion = (1-(((ef.collect { it.internalExcluded }.flatten().unique(false).size()) / (data.groupings.collect {k,v->v.size()}.sum() - data.groupings[cid].size()))))*100 + def totalExclusion = 100 + if(data.groupings.size() > 1) { + totalExclusion = (1-(((ef.collect { it.internalExcluded }.flatten().unique(false).size()) / (data.groupings.collect {k,v->v.size()}.sum() - data.groupings[cid].size()))))*100 + } //println "DEBUG: running with ic cutoff: $icCutoff exclusion cutoff: $exclusionCutoff inclusion cutoff: $inclusionCutoff total: coverage: $totalCoverage/$totalInclusionCutoff" if(totalCoverage <= (totalInclusionCutoff*100)) { if(inclusionCutoff <= c.MIN_INCLUSION) {