diff --git a/commands.sh b/commands.sh new file mode 100644 index 0000000..ed836fb --- /dev/null +++ b/commands.sh @@ -0,0 +1,41 @@ +#!/bin/sh + +java -Xmx80g -jar graphlod-0.1.jar --excludedNamespaces \ + "http://www4.wiwiss.fu-berlin.de/drugbank/resource/drugtype/" \ + "http://www4.wiwiss.fu-berlin.de/drugbank/resource/references/" \ + --skipChromatic \ + --namespace "http://www4.wiwiss.fu-berlin.de/drugbank/" \ + /data/graphlod/drugbank/drugbank.nt | tee drugbank.txt + +java -Xmx80g -jar graphlod-0.1.jar --skipChromatic \ + --namespace "http://www4.wiwiss.fu-berlin.de/dailymed/" \ + /data/graphlod/dailymed/dailymed_dump.nt | tee dailymed_dump.txt + +java -Xmx80g -jar graphlod-0.1.jar --skipChromatic --excludedNamespaces \ + "http://www4.wiwiss.fu-berlin.de/diseasome/resource/diseaseClass/" \ + --namespace "http://www4.wiwiss.fu-berlin.de/diseasome/" \ + /data/graphlod/diseasome/diseasome.nt | tee diseasome.txt + +java -Xmx80g -jar graphlod-0.1.jar --skipChromatic \ + --namespace "http://dbpedia.org/resource" \ + /data/graphlod/dbpedia/persondata_en.nt | tee dbpedia_persondata.txt + +java -Xmx80g -jar graphlod-0.1.jar --skipChromatic \ + --namespace "http://dbpedia.org/resource" \ + /data/graphlod/dbpedia/geo_coordinates_en.nt | tee geo_coordinate.txt + +java -Xmx80g -jar graphlod-0.1.jar --skipChromatic \ + --namespace "http://dbpedia.org/resource" \ + /data/graphlod/dbpedia/homepages_en.nt | tee dbpedia_homepages.txt + +# fix mapping: sed 's/"\.$/" \./' mappingbased_properties_en.nt > mappingbased_properties_en_fixed.nt + +java -Xmx100g -jar graphlod-0.1.jar --skipChromatic \ + --namespace "http://dbpedia.org/resource" \ + mappingbased_properties_en_fixed.nt | tee dbpedia_mapping.txt + +java -Xmx80g -jar graphlod-0.1.jar --skipChromatic \ + /data/graphlod/linkedgeodata/2013-04-29-{Ae*,C*,E*,Mili*,H*,P*,S*,T*} \ + --namespace "http://linkedgeodata.org/" \ + | tee linkedgeodata.txt + diff --git a/src/graphlod/Dataset.java b/src/graphlod/Dataset.java index a5daa01..729b8dd 100644 --- a/src/graphlod/Dataset.java +++ b/src/graphlod/Dataset.java @@ -5,7 +5,6 @@ import java.io.FileNotFoundException; import java.net.MalformedURLException; import java.net.URL; -import java.util.ArrayList; import java.util.Collection; import java.util.HashSet; import java.util.Set; @@ -17,26 +16,31 @@ import org.semanticweb.yars.nx.Node; import org.semanticweb.yars.nx.parser.NxParser; +import com.google.common.base.Preconditions; + public class Dataset { private final DirectedGraph g = new DefaultDirectedGraph<>(DefaultEdge.class); + private String namespace; private final Collection excludedNamespaces; private Set removeVertices = new HashSet<>(); - private Dataset(Collection excludedNamespaces) { + private Dataset(String namespace, Collection excludedNamespaces) { + Validate.notNull(namespace, "namespace must not be null"); + Validate.notNull(excludedNamespaces, "excludedNamespaces must not be null"); + this.namespace = namespace; this.excludedNamespaces = excludedNamespaces; } - static Dataset fromLines(Iterable lines, Collection excludedNamespaces) { - Dataset s = new Dataset(excludedNamespaces); + static Dataset fromLines(Iterable lines, String namespace, Collection excludedNamespaces) { + Dataset s = new Dataset(namespace, excludedNamespaces); s.readTriples(new NxParser(lines)); s.cleanup(); return s; } - public static Dataset fromFiles(Collection datasets, Collection excludedNamespaces) { - Validate.notNull(datasets); - Validate.notNull(excludedNamespaces); - Dataset s = new Dataset(excludedNamespaces); + public static Dataset fromFiles(Collection datasets, String namespace, Collection excludedNamespaces) { + Validate.notNull(datasets, "datasets must not be null"); + Dataset s = new Dataset(namespace, excludedNamespaces); for (String dataset : datasets) { Validate.isTrue(new File(dataset).exists(), "dataset not found: %s", dataset); @@ -84,16 +88,22 @@ private void readTriples(NxParser nxp) { } else if (propertyUri.equals("http://www.w3.org/2002/07/owl#equivalentClass")) { removeVertices.add(subjectUri); removeVertices.add(objectUri); + } else if (!subjectUri.startsWith(namespace)) { + removeVertices.add(subjectUri); + } else if (!objectUri.startsWith(namespace)) { + removeVertices.add(objectUri); } else { boolean skip = false; for (String s : excludedNamespaces) { if (subjectUri.startsWith(s)) { removeVertices.add(subjectUri); skip = true; + break; } if (objectUri.startsWith(s)) { removeVertices.add(objectUri); skip = true; + break; } } if (skip) { diff --git a/src/graphlod/GraphFeatures.java b/src/graphlod/GraphFeatures.java index c0dee44..dca5bb0 100644 --- a/src/graphlod/GraphFeatures.java +++ b/src/graphlod/GraphFeatures.java @@ -76,7 +76,7 @@ public List> getConnectedSets() { public List getConnectedSubGraphFeatures(float minSize) { List> sets = this.connectivity.connectedSets(); if (sets.size() <= 1) { - return null; + return Collections.emptyList(); } List connectedSubgraphFeatures = new ArrayList<>(); diff --git a/src/graphlod/GraphLOD.java b/src/graphlod/GraphLOD.java index 8740ec1..fad48ff 100644 --- a/src/graphlod/GraphLOD.java +++ b/src/graphlod/GraphLOD.java @@ -28,9 +28,9 @@ public class GraphLOD { private static final Logger logger = Logger.getLogger(GraphLOD.class); public static final int MAX_SIZE_FOR_DIAMETER = 500; - public GraphLOD(Collection datasetLocations, boolean skipChromaticNumber, Collection excludedNamespaces, float minImportantSubgraphSize, int importantDegreeCount) { + public GraphLOD(Collection datasetFiles, boolean skipChromaticNumber, String namespace, Collection excludedNamespaces, float minImportantSubgraphSize, int importantDegreeCount) { Stopwatch sw = Stopwatch.createStarted(); - Dataset dataset = Dataset.fromFiles(datasetLocations, excludedNamespaces); + Dataset dataset = Dataset.fromFiles(datasetFiles, namespace, excludedNamespaces); GraphFeatures graphFeatures = new GraphFeatures(dataset.getGraph()); @@ -133,7 +133,8 @@ public static void main(final String[] args) { ArgumentParser parser = ArgumentParsers.newArgumentParser("GraphLOD") .defaultHelp(true).description("calculates graph features."); parser.addArgument("dataset").nargs("+").setDefault(Arrays.asList(DEFAULT_DATASET_LOCATION)); - parser.addArgument("--excludedNamespace").nargs("*").setDefault(Collections.emptyList()); + parser.addArgument("--namespace").type(String.class).setDefault(""); + parser.addArgument("--excludedNamespaces").nargs("*").setDefault(Collections.emptyList()); parser.addArgument("--skipChromatic").action(Arguments.storeTrue()); parser.addArgument("--minImportantSubgraphSize").type(Integer.class).action(Arguments.store()).setDefault(20); parser.addArgument("--importantDegreeCount").type(Integer.class).action(Arguments.store()).setDefault(5); @@ -144,13 +145,15 @@ public static void main(final String[] args) { parser.handleError(e); System.exit(1); } - List excludedNamespaces = result.getList("excludedNamespace"); List dataset = result.getList("dataset"); + String namespace = result.getString("namespace"); + List excludedNamespaces = result.getList("excludedNamespaces"); boolean skipChromatic = result.getBoolean("skipChromatic"); int minImportantSubgraphSize = result.getInt("minImportantSubgraphSize"); int importantDegreeCount = result.getInt("importantDegreeCount"); System.out.println("reading: " + dataset); + System.out.println("namespace: " + namespace); System.out.println("skip chromatic: " + skipChromatic); System.out.println("excluded namespaces: " + excludedNamespaces); System.out.println("min important subgraph size: " + minImportantSubgraphSize); @@ -159,7 +162,7 @@ public static void main(final String[] args) { Locale.setDefault(Locale.US); - new GraphLOD(dataset, skipChromatic, excludedNamespaces, minImportantSubgraphSize, importantDegreeCount); + new GraphLOD(dataset, skipChromatic, namespace, excludedNamespaces, minImportantSubgraphSize, importantDegreeCount); } } diff --git a/test/graphlod/DatasetTest.java b/test/graphlod/DatasetTest.java index 2bec25e..a999d45 100644 --- a/test/graphlod/DatasetTest.java +++ b/test/graphlod/DatasetTest.java @@ -35,7 +35,7 @@ public void setUp() { public void literalsDontCount() { lines.add(createStatement("a", "p1", "b")); lines.add(createLiteralStatement("a", "p1", "some literal")); - Dataset dataset = Dataset.fromLines(lines, excluded); + Dataset dataset = Dataset.fromLines(lines, "", excluded); assertThat(dataset.getGraph().vertexSet().size(), equalTo(2)); assertThat(dataset.getGraph().edgeSet().size(), equalTo(1)); @@ -44,7 +44,7 @@ public void literalsDontCount() { @Test public void testGetGraph() throws Exception { lines.add(createStatement("a", "p1", "b")); - Dataset dataset = Dataset.fromLines(lines, excluded); + Dataset dataset = Dataset.fromLines(lines, "", excluded); DirectedGraph graph = dataset.getGraph(); Edge edge = graph.getEdge(url("a"),url("b")); @@ -52,13 +52,23 @@ public void testGetGraph() throws Exception { assertThat(edge.getTarget(), equalTo((Object)url("b"))); } + public void testNamespace() { + lines.add(createStatement("a/Thing", "p1", "b/Other")); + lines.add(createStatement("a/Thing", "p1", "a/NotOther")); + + Dataset dataset = Dataset.fromLines(lines, "http://a/", Arrays.asList("http://classes/")); + assertThat(dataset.getGraph().vertexSet(), containsInAnyOrder(url("a/Thing"),url("a/NotOther"))); + assertThat(dataset.getGraph().edgeSet().size(), equalTo(1)); + assertThat(dataset.getGraph().getEdge(url("a/Thing"), url("a/NotOther")), notNullValue()); + } + @Test public void testExcludedNamespace() { lines.add(createStatement("classes/Thing", "p1", "b")); lines.add(createStatement("a", "p1", "classes/Thing")); lines.add(createStatement("a", "p1", "b")); - Dataset dataset = Dataset.fromLines(lines, Arrays.asList("http://classes/")); + Dataset dataset = Dataset.fromLines(lines, "", Arrays.asList("http://classes/")); assertThat(dataset.getGraph().vertexSet(), containsInAnyOrder(url("a"),url("b"))); assertThat(dataset.getGraph().edgeSet().size(), equalTo(1)); diff --git a/test/graphlod/GraphFeaturesTest.java b/test/graphlod/GraphFeaturesTest.java index 61475d1..3a47370 100644 --- a/test/graphlod/GraphFeaturesTest.java +++ b/test/graphlod/GraphFeaturesTest.java @@ -28,7 +28,7 @@ public void setup() { createStatement("b", "p1", "c"), createStatement("b", "p1", "d"), createStatement("d", "p1", "b"), - createStatement("c", "p1", "e")), new ArrayList()); + createStatement("c", "p1", "e")), "", new ArrayList()); features = new GraphFeatures(ds.getGraph()); } @@ -106,7 +106,7 @@ public void testGetChromaticNumber() throws Exception { @Test public void testGetConnectedGraphFeatures() throws Exception { - assertThat(features.getConnectedSubGraphFeatures(0.0f), nullValue()); + assertThat(features.getConnectedSubGraphFeatures(0.0f), empty()); } @Test diff --git a/test/graphlod/GraphFeaturesTest2.java b/test/graphlod/GraphFeaturesTest2.java index c46e3ef..6fce48b 100644 --- a/test/graphlod/GraphFeaturesTest2.java +++ b/test/graphlod/GraphFeaturesTest2.java @@ -32,10 +32,9 @@ public void setup() { */ Dataset ds = Dataset.fromLines(Arrays.asList( createStatement("a", "p1", "b"), - createStatement("c", "p1", "d"), createStatement("d", "p1", "e"), - createStatement("e", "p1", "c")), new ArrayList()); + createStatement("e", "p1", "c")), "", new ArrayList()); features = new GraphFeatures(ds.getGraph()); }