Skip to content

Commit

Permalink
add a whitelist for namespaces #11
Browse files Browse the repository at this point in the history
  • Loading branch information
xchrdw committed Jan 14, 2015
1 parent 4fa631b commit a911e4c
Show file tree
Hide file tree
Showing 7 changed files with 84 additions and 21 deletions.
41 changes: 41 additions & 0 deletions commands.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#!/bin/sh

java -Xmx80g -jar graphlod-0.1.jar --excludedNamespaces \
"http://www4.wiwiss.fu-berlin.de/drugbank/resource/drugtype/" \
"http://www4.wiwiss.fu-berlin.de/drugbank/resource/references/" \
--skipChromatic \
--namespace "http://www4.wiwiss.fu-berlin.de/drugbank/" \
/data/graphlod/drugbank/drugbank.nt | tee drugbank.txt

java -Xmx80g -jar graphlod-0.1.jar --skipChromatic \
--namespace "http://www4.wiwiss.fu-berlin.de/dailymed/" \
/data/graphlod/dailymed/dailymed_dump.nt | tee dailymed_dump.txt

java -Xmx80g -jar graphlod-0.1.jar --skipChromatic --excludedNamespaces \
"http://www4.wiwiss.fu-berlin.de/diseasome/resource/diseaseClass/" \
--namespace "http://www4.wiwiss.fu-berlin.de/diseasome/" \
/data/graphlod/diseasome/diseasome.nt | tee diseasome.txt

java -Xmx80g -jar graphlod-0.1.jar --skipChromatic \
--namespace "http://dbpedia.org/resource" \
/data/graphlod/dbpedia/persondata_en.nt | tee dbpedia_persondata.txt

java -Xmx80g -jar graphlod-0.1.jar --skipChromatic \
--namespace "http://dbpedia.org/resource" \
/data/graphlod/dbpedia/geo_coordinates_en.nt | tee geo_coordinate.txt

java -Xmx80g -jar graphlod-0.1.jar --skipChromatic \
--namespace "http://dbpedia.org/resource" \
/data/graphlod/dbpedia/homepages_en.nt | tee dbpedia_homepages.txt

# fix mapping: sed 's/"\.$/" \./' mappingbased_properties_en.nt > mappingbased_properties_en_fixed.nt

java -Xmx100g -jar graphlod-0.1.jar --skipChromatic \
--namespace "http://dbpedia.org/resource" \
mappingbased_properties_en_fixed.nt | tee dbpedia_mapping.txt

java -Xmx80g -jar graphlod-0.1.jar --skipChromatic \
/data/graphlod/linkedgeodata/2013-04-29-{Ae*,C*,E*,Mili*,H*,P*,S*,T*} \
--namespace "http://linkedgeodata.org/" \
| tee linkedgeodata.txt

26 changes: 18 additions & 8 deletions src/graphlod/Dataset.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import java.io.FileNotFoundException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.Set;
Expand All @@ -17,26 +16,31 @@
import org.semanticweb.yars.nx.Node;
import org.semanticweb.yars.nx.parser.NxParser;

import com.google.common.base.Preconditions;

public class Dataset {
private final DirectedGraph<String, DefaultEdge> g = new DefaultDirectedGraph<>(DefaultEdge.class);
private String namespace;
private final Collection<String> excludedNamespaces;
private Set<String> removeVertices = new HashSet<>();

private Dataset(Collection<String> excludedNamespaces) {
private Dataset(String namespace, Collection<String> excludedNamespaces) {
Validate.notNull(namespace, "namespace must not be null");
Validate.notNull(excludedNamespaces, "excludedNamespaces must not be null");
this.namespace = namespace;
this.excludedNamespaces = excludedNamespaces;
}

static Dataset fromLines(Iterable<String> lines, Collection<String> excludedNamespaces) {
Dataset s = new Dataset(excludedNamespaces);
static Dataset fromLines(Iterable<String> lines, String namespace, Collection<String> excludedNamespaces) {
Dataset s = new Dataset(namespace, excludedNamespaces);
s.readTriples(new NxParser(lines));
s.cleanup();
return s;
}

public static Dataset fromFiles(Collection<String> datasets, Collection<String> excludedNamespaces) {
Validate.notNull(datasets);
Validate.notNull(excludedNamespaces);
Dataset s = new Dataset(excludedNamespaces);
public static Dataset fromFiles(Collection<String> datasets, String namespace, Collection<String> excludedNamespaces) {
Validate.notNull(datasets, "datasets must not be null");
Dataset s = new Dataset(namespace, excludedNamespaces);

for (String dataset : datasets) {
Validate.isTrue(new File(dataset).exists(), "dataset not found: %s", dataset);
Expand Down Expand Up @@ -84,16 +88,22 @@ private void readTriples(NxParser nxp) {
} else if (propertyUri.equals("http://www.w3.org/2002/07/owl#equivalentClass")) {
removeVertices.add(subjectUri);
removeVertices.add(objectUri);
} else if (!subjectUri.startsWith(namespace)) {
removeVertices.add(subjectUri);
} else if (!objectUri.startsWith(namespace)) {
removeVertices.add(objectUri);
} else {
boolean skip = false;
for (String s : excludedNamespaces) {
if (subjectUri.startsWith(s)) {
removeVertices.add(subjectUri);
skip = true;
break;
}
if (objectUri.startsWith(s)) {
removeVertices.add(objectUri);
skip = true;
break;
}
}
if (skip) {
Expand Down
2 changes: 1 addition & 1 deletion src/graphlod/GraphFeatures.java
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ public List<Set<String>> getConnectedSets() {
public List<GraphFeatures> getConnectedSubGraphFeatures(float minSize) {
List<Set<String>> sets = this.connectivity.connectedSets();
if (sets.size() <= 1) {
return null;
return Collections.emptyList();
}
List<GraphFeatures> connectedSubgraphFeatures = new ArrayList<>();

Expand Down
13 changes: 8 additions & 5 deletions src/graphlod/GraphLOD.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,9 @@ public class GraphLOD {
private static final Logger logger = Logger.getLogger(GraphLOD.class);
public static final int MAX_SIZE_FOR_DIAMETER = 500;

public GraphLOD(Collection<String> datasetLocations, boolean skipChromaticNumber, Collection<String> excludedNamespaces, float minImportantSubgraphSize, int importantDegreeCount) {
public GraphLOD(Collection<String> datasetFiles, boolean skipChromaticNumber, String namespace, Collection<String> excludedNamespaces, float minImportantSubgraphSize, int importantDegreeCount) {
Stopwatch sw = Stopwatch.createStarted();
Dataset dataset = Dataset.fromFiles(datasetLocations, excludedNamespaces);
Dataset dataset = Dataset.fromFiles(datasetFiles, namespace, excludedNamespaces);

GraphFeatures graphFeatures = new GraphFeatures(dataset.getGraph());

Expand Down Expand Up @@ -133,7 +133,8 @@ public static void main(final String[] args) {
ArgumentParser parser = ArgumentParsers.newArgumentParser("GraphLOD")
.defaultHelp(true).description("calculates graph features.");
parser.addArgument("dataset").nargs("+").setDefault(Arrays.asList(DEFAULT_DATASET_LOCATION));
parser.addArgument("--excludedNamespace").nargs("*").setDefault(Collections.emptyList());
parser.addArgument("--namespace").type(String.class).setDefault("");
parser.addArgument("--excludedNamespaces").nargs("*").setDefault(Collections.emptyList());
parser.addArgument("--skipChromatic").action(Arguments.storeTrue());
parser.addArgument("--minImportantSubgraphSize").type(Integer.class).action(Arguments.store()).setDefault(20);
parser.addArgument("--importantDegreeCount").type(Integer.class).action(Arguments.store()).setDefault(5);
Expand All @@ -144,13 +145,15 @@ public static void main(final String[] args) {
parser.handleError(e);
System.exit(1);
}
List<String> excludedNamespaces = result.getList("excludedNamespace");
List<String> dataset = result.getList("dataset");
String namespace = result.getString("namespace");
List<String> excludedNamespaces = result.getList("excludedNamespaces");
boolean skipChromatic = result.getBoolean("skipChromatic");
int minImportantSubgraphSize = result.getInt("minImportantSubgraphSize");
int importantDegreeCount = result.getInt("importantDegreeCount");

System.out.println("reading: " + dataset);
System.out.println("namespace: " + namespace);
System.out.println("skip chromatic: " + skipChromatic);
System.out.println("excluded namespaces: " + excludedNamespaces);
System.out.println("min important subgraph size: " + minImportantSubgraphSize);
Expand All @@ -159,7 +162,7 @@ public static void main(final String[] args) {

Locale.setDefault(Locale.US);

new GraphLOD(dataset, skipChromatic, excludedNamespaces, minImportantSubgraphSize, importantDegreeCount);
new GraphLOD(dataset, skipChromatic, namespace, excludedNamespaces, minImportantSubgraphSize, importantDegreeCount);
}

}
16 changes: 13 additions & 3 deletions test/graphlod/DatasetTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ public void setUp() {
public void literalsDontCount() {
lines.add(createStatement("a", "p1", "b"));
lines.add(createLiteralStatement("a", "p1", "some literal"));
Dataset dataset = Dataset.fromLines(lines, excluded);
Dataset dataset = Dataset.fromLines(lines, "", excluded);

assertThat(dataset.getGraph().vertexSet().size(), equalTo(2));
assertThat(dataset.getGraph().edgeSet().size(), equalTo(1));
Expand All @@ -44,21 +44,31 @@ public void literalsDontCount() {
@Test
public void testGetGraph() throws Exception {
lines.add(createStatement("a", "p1", "b"));
Dataset dataset = Dataset.fromLines(lines, excluded);
Dataset dataset = Dataset.fromLines(lines, "", excluded);

DirectedGraph<String, DefaultEdge> graph = dataset.getGraph();
Edge edge = graph.getEdge(url("a"),url("b"));
assertThat(edge.getSource(), equalTo((Object)url("a")));
assertThat(edge.getTarget(), equalTo((Object)url("b")));
}

public void testNamespace() {
lines.add(createStatement("a/Thing", "p1", "b/Other"));
lines.add(createStatement("a/Thing", "p1", "a/NotOther"));

Dataset dataset = Dataset.fromLines(lines, "http://a/", Arrays.asList("http://classes/"));
assertThat(dataset.getGraph().vertexSet(), containsInAnyOrder(url("a/Thing"),url("a/NotOther")));
assertThat(dataset.getGraph().edgeSet().size(), equalTo(1));
assertThat(dataset.getGraph().getEdge(url("a/Thing"), url("a/NotOther")), notNullValue());
}

@Test
public void testExcludedNamespace() {
lines.add(createStatement("classes/Thing", "p1", "b"));
lines.add(createStatement("a", "p1", "classes/Thing"));
lines.add(createStatement("a", "p1", "b"));

Dataset dataset = Dataset.fromLines(lines, Arrays.asList("http://classes/"));
Dataset dataset = Dataset.fromLines(lines, "", Arrays.asList("http://classes/"));
assertThat(dataset.getGraph().vertexSet(), containsInAnyOrder(url("a"),url("b")));
assertThat(dataset.getGraph().edgeSet().size(), equalTo(1));

Expand Down
4 changes: 2 additions & 2 deletions test/graphlod/GraphFeaturesTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ public void setup() {
createStatement("b", "p1", "c"),
createStatement("b", "p1", "d"),
createStatement("d", "p1", "b"),
createStatement("c", "p1", "e")), new ArrayList<String>());
createStatement("c", "p1", "e")), "", new ArrayList<String>());
features = new GraphFeatures(ds.getGraph());

}
Expand Down Expand Up @@ -106,7 +106,7 @@ public void testGetChromaticNumber() throws Exception {

@Test
public void testGetConnectedGraphFeatures() throws Exception {
assertThat(features.getConnectedSubGraphFeatures(0.0f), nullValue());
assertThat(features.getConnectedSubGraphFeatures(0.0f), empty());
}

@Test
Expand Down
3 changes: 1 addition & 2 deletions test/graphlod/GraphFeaturesTest2.java
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,9 @@ public void setup() {
*/
Dataset ds = Dataset.fromLines(Arrays.asList(
createStatement("a", "p1", "b"),

createStatement("c", "p1", "d"),
createStatement("d", "p1", "e"),
createStatement("e", "p1", "c")), new ArrayList<String>());
createStatement("e", "p1", "c")), "", new ArrayList<String>());
features = new GraphFeatures(ds.getGraph());
}

Expand Down

0 comments on commit a911e4c

Please sign in to comment.