Skip to content

Commit

Permalink
similarity mode, 0.0.8
Browse files Browse the repository at this point in the history
  • Loading branch information
reality committed Jun 14, 2021
1 parent 75cb53e commit 1db79b1
Show file tree
Hide file tree
Showing 6 changed files with 84 additions and 10 deletions.
6 changes: 5 additions & 1 deletion klarigi/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@ plugins {

}


sourceCompatibility = '11'
targetCompatibility = '11'

repositories {
// Use Maven Central for resolving dependencies.
mavenCentral()
Expand Down Expand Up @@ -82,4 +86,4 @@ jacocoTestReport {
}
}

version = '0.0.7'
version = '0.0.8'
16 changes: 11 additions & 5 deletions klarigi/src/main/groovy/klarigi/App.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ class App {
cliBuilder.with {
h longOpt: 'help', 'Print this help text and exit.'

_ longOpt: 'similarity-mode', 'Calculate semantic similarity instead of characterising groups', type: Boolean

d longOpt: 'data', 'The data describing entities and associations. See documentation for format.', args: 1
o longOpt: 'ontology', 'The ontology to use for explanations (should be the same as the ontology used to describe patients).', args: 1
_ longOpt: 'turtle', 'Indicates that the ontology is a Turtle ontology (needed for calculating IC...)', type: Boolean
Expand Down Expand Up @@ -58,13 +60,17 @@ class App {
}

def k = new Klarigi(o)
if(!o['group'] || (o['group'] && o['group'] == '*')) {
k.explainAllClusters(o['output-scores']).each {
k.output(it.cluster, it.results, o['latex'], o['print-members'], o['output'])
if(!o['similarity-mode']) {
if(!o['group'] || (o['group'] && o['group'] == '*')) {
k.explainAllClusters(o['output-scores']).each {
k.output(it.cluster, it.results, o['latex'], o['print-members'], o['output'])
}
} else {
def r = k.explainCluster(o['group'], o['output-scores'])
k.output(o['group'], r, o['latex'], o['print-members'], o['output'])
}
} else {
def r = k.explainCluster(o['group'], o['output-scores'])
k.output(o['group'], r, o['latex'], o['print-members'], o['output'])
k.genSim(o['output'])
}
}
}
48 changes: 47 additions & 1 deletion klarigi/src/main/groovy/klarigi/InformationContent.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ public class InformationContent {
private engine
private icConf
private factory
private G graph

InformationContent(ontologyPath) {
this(ontologyPath, false, false)
Expand All @@ -61,7 +62,7 @@ public class InformationContent {
factory.loadNamespacePrefix("HP", graphURI.toString());
G graph = new GraphMemory(graphURI)*/

G graph = new GraphMemory()
graph = new GraphMemory()

def dataConf
if(turtle) {
Expand Down Expand Up @@ -104,6 +105,51 @@ public class InformationContent {
res
}

// this should really go to a diff class
def compareEntities(assoc) {
def smConfPairwise = new SMconf(SMConstants.FLAG_SIM_PAIRWISE_DAG_NODE_RESNIK_1995, icConf)
def smConfGroupwise = new SMconf(SMConstants.FLAG_SIM_GROUPWISE_BMA, icConf)

def results = [:]
assoc.each { k1, v1 ->
if(!results.containsKey(k1)) { results[k1] = [:] }
assoc.each { k2, v2 ->
if(k1 == k2) { return; }
if(results.containsKey(k2) && results[k2].containsKey(k1)) {
results[k1][k2] = results[k2][k1]
} else {
results[k1][k2] = engine.compare(smConfGroupwise, smConfPairwise,
v1.collect {
factory.getURI(it)
}.findAll { graph.containsVertex(it) }.toSet(),
v2.collect {
factory.getURI(it)
}.findAll { graph.containsVertex(it) }.toSet())
}
}
}

results
}

static def WriteSimilarity(results, toFile) {
def out = []

results.each { k1, v1 ->
v1.each { k2, v2 ->
out << "$k1\t$k2\t$v2"
}
}

out = out.join('\n')

if(toFile) {
new File(toFile).text = out
} else {
println out
}
}

static def Write(ic, path) {
new File(path).text = ic.collect { k, v -> "$k\t$v" }.join('\n')
}
Expand Down
12 changes: 11 additions & 1 deletion klarigi/src/main/groovy/klarigi/Klarigi.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ public class Klarigi {
]
def coefficients
def verbose
def icFactory

Klarigi(o) {
loadData(o['data'])
Expand Down Expand Up @@ -78,7 +79,7 @@ public class Klarigi {
}
} else {
try {
def icFactory = new InformationContent(ontologyFile, annotFile, resnikIc, turtle)
icFactory = new InformationContent(ontologyFile, annotFile, resnikIc, turtle)
def allClasses = ontoHelper.reasoner.getSubClasses(ontoHelper.dataFactory.getOWLThing(), false).collect { it.getRepresentativeElement().getIRI().toString() }.unique(false)
allClasses = allClasses.findAll { it != 'http://www.w3.org/2002/07/owl#Nothing' } // heh
data.ic = icFactory.getInformationContent(allClasses)
Expand Down Expand Up @@ -158,6 +159,15 @@ public class Klarigi {
}
}

def genSim(toFile) {
if(!icFactory) {
println "Error: IC class not loaded (--similarity and --ic are not compatible)"
System.exit(1)
}
def results = icFactory.compareEntities(data.associations)
InformationContent.WriteSimilarity(results, toFile)
}

def output(cid, results, latex, printMembers, toFile) {
def cSize = data.groupings[cid].size()
if(latex) {
Expand Down
7 changes: 6 additions & 1 deletion klarigi/src/main/groovy/klarigi/Scorer.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,12 @@ public class Scorer {
.collect { k, v ->
v.nIc = v.ic // TODO this depends on an already normalised IC value...
v.nInclusion = v.inclusion / data.groupings[cid].size()
v.nExclusion = 1 - (v.exclusion / data.groupings.findAll { kk, vv -> kk != cid }.collect { kk, vv -> vv.size() }.sum())

v.nExclusion = 1
if(data.groupings.size() > 1) {
v.nExclusion = 1 - (v.exclusion / data.groupings.findAll { kk, vv -> kk != cid }.collect { kk, vv -> vv.size() }.sum())
}

v.iri = k
v
}
Expand Down
5 changes: 4 additions & 1 deletion klarigi/src/main/groovy/klarigi/StepDown.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,10 @@ public class StepDown {
}
//println ef
totalCoverage = ((ef.collect { it.internalIncluded }.flatten().unique(false).size()) / data.groupings[cid].size()) * 100
def totalExclusion = (1-(((ef.collect { it.internalExcluded }.flatten().unique(false).size()) / (data.groupings.collect {k,v->v.size()}.sum() - data.groupings[cid].size()))))*100
def totalExclusion = 100
if(data.groupings.size() > 1) {
totalExclusion = (1-(((ef.collect { it.internalExcluded }.flatten().unique(false).size()) / (data.groupings.collect {k,v->v.size()}.sum() - data.groupings[cid].size()))))*100
}
//println "DEBUG: running with ic cutoff: $icCutoff exclusion cutoff: $exclusionCutoff inclusion cutoff: $inclusionCutoff total: coverage: $totalCoverage/$totalInclusionCutoff"
if(totalCoverage <= (totalInclusionCutoff*100)) {
if(inclusionCutoff <= c.MIN_INCLUSION) {
Expand Down

0 comments on commit 1db79b1

Please sign in to comment.