From 1db79b197cc7786a01f1fc3f78109dd0d7913d43 Mon Sep 17 00:00:00 2001
From: Luke Slater <tinmachin3@gmail.com>
Date: Mon, 14 Jun 2021 01:11:10 +0100
Subject: [PATCH] similarity mode, 0.0.8

---
 klarigi/build.gradle                          |  6 ++-
 klarigi/src/main/groovy/klarigi/App.groovy    | 16 +++++--
 .../groovy/klarigi/InformationContent.groovy  | 48 ++++++++++++++++++-
 .../src/main/groovy/klarigi/Klarigi.groovy    | 12 ++++-
 klarigi/src/main/groovy/klarigi/Scorer.groovy |  7 ++-
 .../src/main/groovy/klarigi/StepDown.groovy   |  5 +-
 6 files changed, 84 insertions(+), 10 deletions(-)

diff --git a/klarigi/build.gradle b/klarigi/build.gradle
index b323a43..f5eaec2 100644
--- a/klarigi/build.gradle
+++ b/klarigi/build.gradle
@@ -18,6 +18,10 @@ plugins {
 
 }
 
+
+sourceCompatibility = '11'
+targetCompatibility = '11'
+
 repositories {
     // Use Maven Central for resolving dependencies.
     mavenCentral()
@@ -82,4 +86,4 @@ jacocoTestReport {
   }
 }
 
-version = '0.0.7'
+version = '0.0.8'
diff --git a/klarigi/src/main/groovy/klarigi/App.groovy b/klarigi/src/main/groovy/klarigi/App.groovy
index 3daa39f..8ec82e1 100644
--- a/klarigi/src/main/groovy/klarigi/App.groovy
+++ b/klarigi/src/main/groovy/klarigi/App.groovy
@@ -17,6 +17,8 @@ class App {
     cliBuilder.with {
       h longOpt: 'help', 'Print this help text and exit.'
 
+      _ longOpt: 'similarity-mode', 'Calculate semantic similarity instead of characterising groups', type: Boolean
+
       d longOpt: 'data', 'The data describing entities and associations. See documentation for format.', args: 1
       o longOpt: 'ontology', 'The ontology to use for explanations (should be the same as the ontology used to describe patients).', args: 1
       _ longOpt: 'turtle', 'Indicates that the ontology is a Turtle ontology (needed for calculating IC...)', type: Boolean
@@ -58,13 +60,17 @@ class App {
     }
 
     def k = new Klarigi(o)
-    if(!o['group'] || (o['group'] && o['group'] == '*')) {
-      k.explainAllClusters(o['output-scores']).each {
-        k.output(it.cluster, it.results, o['latex'], o['print-members'], o['output'])
+    if(!o['similarity-mode']) {
+      if(!o['group'] || (o['group'] && o['group'] == '*')) {
+        k.explainAllClusters(o['output-scores']).each {
+          k.output(it.cluster, it.results, o['latex'], o['print-members'], o['output'])
+        }
+      } else {
+        def r = k.explainCluster(o['group'], o['output-scores'])
+        k.output(o['group'], r, o['latex'], o['print-members'], o['output'])
       }
     } else {
-      def r = k.explainCluster(o['group'], o['output-scores'])
-      k.output(o['group'], r, o['latex'], o['print-members'], o['output'])
+      k.genSim(o['output'])
     }
   }
 }
diff --git a/klarigi/src/main/groovy/klarigi/InformationContent.groovy b/klarigi/src/main/groovy/klarigi/InformationContent.groovy
index 5442c23..10e8b4f 100644
--- a/klarigi/src/main/groovy/klarigi/InformationContent.groovy
+++ b/klarigi/src/main/groovy/klarigi/InformationContent.groovy
@@ -49,6 +49,7 @@ public class InformationContent {
   private engine
   private icConf
   private factory
+  private G graph
 
   InformationContent(ontologyPath) {
     this(ontologyPath, false, false)
@@ -61,7 +62,7 @@ public class InformationContent {
     factory.loadNamespacePrefix("HP", graphURI.toString());
     G graph = new GraphMemory(graphURI)*/
 
-    G graph = new GraphMemory()
+    graph = new GraphMemory()
 
     def dataConf
     if(turtle) {
@@ -104,6 +105,51 @@ public class InformationContent {
     res
   }
 
+	// this should really go to a diff class
+  def compareEntities(assoc) {
+    def smConfPairwise = new SMconf(SMConstants.FLAG_SIM_PAIRWISE_DAG_NODE_RESNIK_1995, icConf)
+    def smConfGroupwise = new SMconf(SMConstants.FLAG_SIM_GROUPWISE_BMA, icConf)
+
+    def results = [:]
+		assoc.each { k1, v1 ->
+      if(!results.containsKey(k1)) { results[k1] = [:] }
+      assoc.each { k2, v2 ->
+        if(k1 == k2) { return; }
+        if(results.containsKey(k2) && results[k2].containsKey(k1)) {
+          results[k1][k2] = results[k2][k1] 
+        } else {
+          results[k1][k2] = engine.compare(smConfGroupwise, smConfPairwise,
+                            v1.collect { 
+                              factory.getURI(it)
+                             }.findAll { graph.containsVertex(it) }.toSet(), 
+                            v2.collect { 
+                              factory.getURI(it)
+                            }.findAll { graph.containsVertex(it) }.toSet())
+        }
+      }
+    }
+
+    results
+  }
+
+  static def WriteSimilarity(results, toFile) {
+    def out = []
+
+    results.each { k1, v1 ->
+      v1.each { k2, v2 ->
+        out << "$k1\t$k2\t$v2"
+      }
+    } 
+
+    out = out.join('\n')
+
+    if(toFile) {
+      new File(toFile).text = out 
+    } else {
+      println out
+    }
+  }
+
   static def Write(ic, path) {
     new File(path).text = ic.collect { k, v -> "$k\t$v" }.join('\n')
   }
diff --git a/klarigi/src/main/groovy/klarigi/Klarigi.groovy b/klarigi/src/main/groovy/klarigi/Klarigi.groovy
index 49c8d41..a2dbfa5 100644
--- a/klarigi/src/main/groovy/klarigi/Klarigi.groovy
+++ b/klarigi/src/main/groovy/klarigi/Klarigi.groovy
@@ -31,6 +31,7 @@ public class Klarigi {
   ]
   def coefficients
   def verbose
+  def icFactory
 
   Klarigi(o) {
     loadData(o['data'])
@@ -78,7 +79,7 @@ public class Klarigi {
       }
     } else {
       try {
-        def icFactory = new InformationContent(ontologyFile, annotFile, resnikIc, turtle)
+        icFactory = new InformationContent(ontologyFile, annotFile, resnikIc, turtle)
         def allClasses = ontoHelper.reasoner.getSubClasses(ontoHelper.dataFactory.getOWLThing(), false).collect { it.getRepresentativeElement().getIRI().toString() }.unique(false)
         allClasses = allClasses.findAll { it != 'http://www.w3.org/2002/07/owl#Nothing' } // heh
         data.ic = icFactory.getInformationContent(allClasses)
@@ -158,6 +159,15 @@ public class Klarigi {
     }
   }
 
+  def genSim(toFile) {
+    if(!icFactory) {
+      println "Error: IC class not loaded (--similarity and --ic are not compatible)"
+      System.exit(1)
+    }
+    def results = icFactory.compareEntities(data.associations)
+    InformationContent.WriteSimilarity(results, toFile)
+  }
+
   def output(cid, results, latex, printMembers, toFile) {
     def cSize = data.groupings[cid].size()
     if(latex) {
diff --git a/klarigi/src/main/groovy/klarigi/Scorer.groovy b/klarigi/src/main/groovy/klarigi/Scorer.groovy
index b9ce494..92c1467 100644
--- a/klarigi/src/main/groovy/klarigi/Scorer.groovy
+++ b/klarigi/src/main/groovy/klarigi/Scorer.groovy
@@ -45,7 +45,12 @@ public class Scorer {
       .collect { k, v ->
         v.nIc = v.ic // TODO this depends on an already normalised IC value...
         v.nInclusion = v.inclusion / data.groupings[cid].size()
-        v.nExclusion = 1 - (v.exclusion / data.groupings.findAll { kk, vv -> kk != cid }.collect { kk, vv -> vv.size() }.sum())
+
+        v.nExclusion = 1
+        if(data.groupings.size() > 1) {
+          v.nExclusion = 1 - (v.exclusion / data.groupings.findAll { kk, vv -> kk != cid }.collect { kk, vv -> vv.size() }.sum())
+        }
+
         v.iri = k 
         v
       }
diff --git a/klarigi/src/main/groovy/klarigi/StepDown.groovy b/klarigi/src/main/groovy/klarigi/StepDown.groovy
index 74b3020..798f167 100644
--- a/klarigi/src/main/groovy/klarigi/StepDown.groovy
+++ b/klarigi/src/main/groovy/klarigi/StepDown.groovy
@@ -11,7 +11,10 @@ public class StepDown {
         } 
         //println ef
         totalCoverage = ((ef.collect { it.internalIncluded }.flatten().unique(false).size()) / data.groupings[cid].size()) * 100
-        def totalExclusion = (1-(((ef.collect { it.internalExcluded }.flatten().unique(false).size()) / (data.groupings.collect {k,v->v.size()}.sum() - data.groupings[cid].size()))))*100
+        def totalExclusion = 100
+        if(data.groupings.size() > 1) {
+          totalExclusion = (1-(((ef.collect { it.internalExcluded }.flatten().unique(false).size()) / (data.groupings.collect {k,v->v.size()}.sum() - data.groupings[cid].size()))))*100
+        }
         //println "DEBUG: running with ic cutoff: $icCutoff exclusion cutoff: $exclusionCutoff inclusion cutoff: $inclusionCutoff total: coverage: $totalCoverage/$totalInclusionCutoff"
         if(totalCoverage <= (totalInclusionCutoff*100)) {
           if(inclusionCutoff <= c.MIN_INCLUSION) {