diff --git a/commands.sh b/commands.sh
index ed836fb..f7d369b 100644
--- a/commands.sh
+++ b/commands.sh
@@ -1,41 +1,42 @@
#!/bin/sh
-java -Xmx80g -jar graphlod-0.1.jar --excludedNamespaces \
+java -Xmx80g -jar graphlod-0.1.jar --name drugbank --excludedNamespaces \
"http://www4.wiwiss.fu-berlin.de/drugbank/resource/drugtype/" \
"http://www4.wiwiss.fu-berlin.de/drugbank/resource/references/" \
--skipChromatic \
--namespace "http://www4.wiwiss.fu-berlin.de/drugbank/" \
/data/graphlod/drugbank/drugbank.nt | tee drugbank.txt
-java -Xmx80g -jar graphlod-0.1.jar --skipChromatic \
+java -Xmx80g -jar graphlod-0.1.jar --name dailymed --skipChromatic \
--namespace "http://www4.wiwiss.fu-berlin.de/dailymed/" \
/data/graphlod/dailymed/dailymed_dump.nt | tee dailymed_dump.txt
-java -Xmx80g -jar graphlod-0.1.jar --skipChromatic --excludedNamespaces \
+java -Xmx80g -jar graphlod-0.1.jar --name diseasome --skipChromatic --excludedNamespaces \
"http://www4.wiwiss.fu-berlin.de/diseasome/resource/diseaseClass/" \
--namespace "http://www4.wiwiss.fu-berlin.de/diseasome/" \
/data/graphlod/diseasome/diseasome.nt | tee diseasome.txt
-java -Xmx80g -jar graphlod-0.1.jar --skipChromatic \
+java -Xmx80g -jar graphlod-0.1.jar --name dbpedia_person --skipChromatic \
--namespace "http://dbpedia.org/resource" \
/data/graphlod/dbpedia/persondata_en.nt | tee dbpedia_persondata.txt
-java -Xmx80g -jar graphlod-0.1.jar --skipChromatic \
+java -Xmx80g -jar graphlod-0.1.jar --name dbpedia_geo_coordinate --skipChromatic \
--namespace "http://dbpedia.org/resource" \
/data/graphlod/dbpedia/geo_coordinates_en.nt | tee geo_coordinate.txt
-java -Xmx80g -jar graphlod-0.1.jar --skipChromatic \
+java -Xmx80g -jar graphlod-0.1.jar --name dbpedia_homepages --skipChromatic \
--namespace "http://dbpedia.org/resource" \
/data/graphlod/dbpedia/homepages_en.nt | tee dbpedia_homepages.txt
# fix mapping: sed 's/"\.$/" \./' mappingbased_properties_en.nt > mappingbased_properties_en_fixed.nt
-java -Xmx100g -jar graphlod-0.1.jar --skipChromatic \
+java -Xmx100g -jar graphlod-0.1.jar --name dbpedia_mapping --skipChromatic \
--namespace "http://dbpedia.org/resource" \
mappingbased_properties_en_fixed.nt | tee dbpedia_mapping.txt
-java -Xmx80g -jar graphlod-0.1.jar --skipChromatic \
+java -Xmx80g -jar graphlod-0.1.jar --name linkedgeodata --skipChromatic \
/data/graphlod/linkedgeodata/2013-04-29-{Ae*,C*,E*,Mili*,H*,P*,S*,T*} \
--namespace "http://linkedgeodata.org/" \
| tee linkedgeodata.txt
+zip result.txt *.txt *.csv
diff --git a/pom.xml b/pom.xml
index d6c8962..ee63ba7 100644
--- a/pom.xml
+++ b/pom.xml
@@ -55,6 +55,11 @@
commons-lang3
3.3.2
+
+ org.apache.commons
+ commons-csv
+ 1.0
+
com.google.guava
guava
diff --git a/src/graphlod/CollectionUtils.java b/src/graphlod/CollectionUtils.java
index 40caae5..08ec9a8 100644
--- a/src/graphlod/CollectionUtils.java
+++ b/src/graphlod/CollectionUtils.java
@@ -31,7 +31,7 @@ public static > T min(Collection collection) {
}
T result = collection.iterator().next();
for (T element : collection) {
- result = result.compareTo(element) > 0 ? result : element;
+ result = result.compareTo(element) < 0 ? result : element;
}
return result;
}
@@ -42,7 +42,7 @@ public static > T max(Collection collection) {
}
T result = collection.iterator().next();
for (T element : collection) {
- result = result.compareTo(element) < 0 ? result : element;
+ result = result.compareTo(element) > 0 ? result : element;
}
return result;
}
diff --git a/src/graphlod/Dataset.java b/src/graphlod/Dataset.java
index 729b8dd..1257b9f 100644
--- a/src/graphlod/Dataset.java
+++ b/src/graphlod/Dataset.java
@@ -72,7 +72,7 @@ private void readTriples(NxParser nxp) {
}
if (subjectUri.equals(objectUri)) {
- continue; // TODO: why that?
+ continue;
}
if (propertyUri.equals("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")) {
diff --git a/src/graphlod/GraphCsvOutput.java b/src/graphlod/GraphCsvOutput.java
new file mode 100644
index 0000000..154b3e9
--- /dev/null
+++ b/src/graphlod/GraphCsvOutput.java
@@ -0,0 +1,53 @@
+package graphlod;
+
+
+import java.io.IOException;
+import java.io.Writer;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.commons.csv.CSVFormat;
+import org.apache.commons.csv.CSVPrinter;
+
+import com.google.common.base.Charsets;
+import com.google.common.base.Verify;
+
+public class GraphCsvOutput {
+
+ private final CSVPrinter writer;
+ private final int maxSizeForDiameter;
+
+ public GraphCsvOutput(String name, int maxSizeForDiameter) {
+ this.maxSizeForDiameter = maxSizeForDiameter;
+ Writer out;
+ try {
+ Path path = Paths.get(name + "_graphs.csv");
+ out = Files.newBufferedWriter(path, Charsets.UTF_8);
+ writer = CSVFormat.DEFAULT.withHeader("graph", "vertices", "edges", "diameter", "avgindegree", "maxindegree", "avgoutdegree", "maxoutdegree").print(out);
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ public void writeGraph(GraphFeatures graph) {
+ double diameter = graph.getVertexCount() < maxSizeForDiameter ? graph.getDiameter() : -1;
+ try {
+ writer.printRecord(graph.getId(), graph.getVertexCount(), graph.getEdgeCount(), diameter,
+ CollectionUtils.average(graph.getIndegrees()), CollectionUtils.max(graph.getIndegrees()),
+ CollectionUtils.average(graph.getOutdegrees()), CollectionUtils.max(graph.getOutdegrees()));
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ }
+ public void close() {
+ try {
+ writer.close();
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+}
diff --git a/src/graphlod/GraphFeatures.java b/src/graphlod/GraphFeatures.java
index dca5bb0..dd11ed1 100644
--- a/src/graphlod/GraphFeatures.java
+++ b/src/graphlod/GraphFeatures.java
@@ -30,8 +30,10 @@ public class GraphFeatures {
private Set vertices;
private final Set edges;
private AsUndirectedGraph undirectedG;
+ private String id;
- public GraphFeatures(DirectedGraph graph) {
+ public GraphFeatures(String id, DirectedGraph graph) {
+ this.id = id;
this.graph = graph;
this.vertices = this.graph.vertexSet();
this.edges = this.graph.edgeSet();
@@ -79,7 +81,7 @@ public List getConnectedSubGraphFeatures(float minSize) {
return Collections.emptyList();
}
List connectedSubgraphFeatures = new ArrayList<>();
-
+ int i = 0;
for (Set set : sets) {
if (set.size() < minSize) {
continue;
@@ -94,7 +96,8 @@ public List getConnectedSubGraphFeatures(float minSize) {
subgraph.addEdge(vertex, (String) edge.getTarget(), edge);
}
}
- connectedSubgraphFeatures.add(new GraphFeatures(subgraph));
+ connectedSubgraphFeatures.add(new GraphFeatures("subgraph" + i, subgraph));
+ i++;
}
Collections.sort(connectedSubgraphFeatures, new Comparator() {
@Override
@@ -132,6 +135,13 @@ public List getIndegrees() {
return this.indegrees;
}
+ public List getIndegrees2() {
+ if(this.indegrees2 == null) {
+ getIndegrees();
+ }
+ return this.indegrees2;
+ }
+
public List getOutdegrees() {
if (this.outdegrees == null) {
this.outdegrees = new ArrayList<>();
@@ -145,6 +155,14 @@ public List getOutdegrees() {
return this.outdegrees;
}
+
+ public List getOutdegrees2() {
+ if(this.outdegrees2 == null) {
+ getOutdegrees();
+ }
+ return this.outdegrees2;
+ }
+
public ArrayList getEdgeCounts() {
ArrayList edgeCounts = new ArrayList<>();
for (String vertex : this.vertices) {
@@ -165,6 +183,14 @@ public int getChromaticNumber() {
return ChromaticNumber.findGreedyChromaticNumber(this.undirectedG);
}
+ public Set getVertices() {
+ return vertices;
+ }
+
+ public String getId() {
+ return id;
+ }
+
static class Degree implements Comparable {
public String vertex;
public int degree;
diff --git a/src/graphlod/GraphLOD.java b/src/graphlod/GraphLOD.java
index fad48ff..9ea6018 100644
--- a/src/graphlod/GraphLOD.java
+++ b/src/graphlod/GraphLOD.java
@@ -28,11 +28,17 @@ public class GraphLOD {
private static final Logger logger = Logger.getLogger(GraphLOD.class);
public static final int MAX_SIZE_FOR_DIAMETER = 500;
- public GraphLOD(Collection datasetFiles, boolean skipChromaticNumber, String namespace, Collection excludedNamespaces, float minImportantSubgraphSize, int importantDegreeCount) {
+ public GraphCsvOutput graphCsvOutput;
+ public VertexCsvOutput vertexCsvOutput;
+
+ public GraphLOD(String name, Collection datasetFiles, boolean skipChromaticNumber, String namespace, Collection excludedNamespaces, float minImportantSubgraphSize, int importantDegreeCount) {
+ graphCsvOutput = new GraphCsvOutput(name, MAX_SIZE_FOR_DIAMETER);
+ vertexCsvOutput = new VertexCsvOutput(name);
+
Stopwatch sw = Stopwatch.createStarted();
Dataset dataset = Dataset.fromFiles(datasetFiles, namespace, excludedNamespaces);
- GraphFeatures graphFeatures = new GraphFeatures(dataset.getGraph());
+ GraphFeatures graphFeatures = new GraphFeatures("main_graph", dataset.getGraph());
System.out.println("Loading the dataset took " + sw + " to execute.");
@@ -85,6 +91,7 @@ public GraphLOD(Collection datasetFiles, boolean skipChromaticNumber, St
System.out.printf("Subgraph: %s vertices\n", subGraph.getVertexCount());
analyzeConnectedGraph(subGraph, importantDegreeCount);
}
+
System.out.println("Analysing the subgraphs took " + sw + " to execute.");
}
@@ -109,6 +116,8 @@ public GraphLOD(Collection datasetFiles, boolean skipChromaticNumber, St
System.out.println("Chromatic Number: " + cN);
System.out.println("Getting the Chromatic Number took " + sw + " to execute.");
}
+ graphCsvOutput.close();
+ vertexCsvOutput.close();
}
private void analyzeConnectedGraph(GraphFeatures graph, int importantDegreeCount) {
@@ -118,6 +127,8 @@ private void analyzeConnectedGraph(GraphFeatures graph, int importantDegreeCount
} else {
System.out.println("\tGraph too big to show diameter");
}
+ graphCsvOutput.writeGraph(graph);
+ vertexCsvOutput.writeGraph(graph);
System.out.println("\thighest indegrees:");
System.out.println("\t\t" + StringUtils.join(graph.maxInDegrees(importantDegreeCount), "\n\t\t"));
@@ -133,6 +144,7 @@ public static void main(final String[] args) {
ArgumentParser parser = ArgumentParsers.newArgumentParser("GraphLOD")
.defaultHelp(true).description("calculates graph features.");
parser.addArgument("dataset").nargs("+").setDefault(Arrays.asList(DEFAULT_DATASET_LOCATION));
+ parser.addArgument("--name").type(String.class).setDefault("");
parser.addArgument("--namespace").type(String.class).setDefault("");
parser.addArgument("--excludedNamespaces").nargs("*").setDefault(Collections.emptyList());
parser.addArgument("--skipChromatic").action(Arguments.storeTrue());
@@ -145,7 +157,12 @@ public static void main(final String[] args) {
parser.handleError(e);
System.exit(1);
}
+
List dataset = result.getList("dataset");
+ String name = result.getString("name");
+ if(name.isEmpty()) {
+ name = dataset.get(0);
+ }
String namespace = result.getString("namespace");
List excludedNamespaces = result.getList("excludedNamespaces");
boolean skipChromatic = result.getBoolean("skipChromatic");
@@ -153,6 +170,7 @@ public static void main(final String[] args) {
int importantDegreeCount = result.getInt("importantDegreeCount");
System.out.println("reading: " + dataset);
+ System.out.println("name: " + name);
System.out.println("namespace: " + namespace);
System.out.println("skip chromatic: " + skipChromatic);
System.out.println("excluded namespaces: " + excludedNamespaces);
@@ -162,7 +180,7 @@ public static void main(final String[] args) {
Locale.setDefault(Locale.US);
- new GraphLOD(dataset, skipChromatic, namespace, excludedNamespaces, minImportantSubgraphSize, importantDegreeCount);
+ new GraphLOD(name, dataset, skipChromatic, namespace, excludedNamespaces, minImportantSubgraphSize, importantDegreeCount);
}
}
diff --git a/src/graphlod/VertexCsvOutput.java b/src/graphlod/VertexCsvOutput.java
new file mode 100644
index 0000000..4c82dd8
--- /dev/null
+++ b/src/graphlod/VertexCsvOutput.java
@@ -0,0 +1,55 @@
+package graphlod;
+
+
+import java.io.IOException;
+import java.io.Writer;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.List;
+
+import org.apache.commons.csv.CSVFormat;
+import org.apache.commons.csv.CSVPrinter;
+
+import com.google.common.base.Charsets;
+import com.google.common.base.Verify;
+
+public class VertexCsvOutput {
+
+ CSVPrinter writer;
+
+ public VertexCsvOutput(String name) {
+ Writer out;
+ try {
+ Path path = Paths.get(name + "_vertices.csv");
+ out = Files.newBufferedWriter(path, Charsets.UTF_8);
+ writer = CSVFormat.DEFAULT.withHeader("graph", "vertex", "indegree", "outdegree").print(out);
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ public void writeGraph(GraphFeatures graph) {
+ List inDegrees = graph.getIndegrees2();
+ List outDegrees = graph.getIndegrees2();
+
+ for (int i = 0; i < inDegrees.size(); i++) {
+ GraphFeatures.Degree in = inDegrees.get(i);
+ GraphFeatures.Degree out = outDegrees.get(i);
+ Verify.verify(in.vertex.equals(out.vertex));
+ try {
+ writer.printRecord(graph.getId(), in.vertex, in.degree, out.degree);
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+ }
+ public void close() {
+ try {
+ writer.close();
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+}
diff --git a/test/graphlod/CollectionUtilsTest.java b/test/graphlod/CollectionUtilsTest.java
index 7daa737..1a680f7 100644
--- a/test/graphlod/CollectionUtilsTest.java
+++ b/test/graphlod/CollectionUtilsTest.java
@@ -1,11 +1,13 @@
package graphlod;
+import java.util.Arrays;
import java.util.Collection;
import org.junit.Test;
import com.google.common.collect.Lists;
+import static org.hamcrest.Matchers.is;
import static org.junit.Assert.*;
import static org.hamcrest.Matchers.contains;
import static org.hamcrest.Matchers.containsInAnyOrder;
@@ -16,4 +18,17 @@ public class CollectionUtilsTest {
public void testMaxValues() throws Exception {
assertThat(CollectionUtils.maxValues(Lists.newArrayList(5,2,9,2), 2), contains(5,9));
}
+
+ @Test
+ public void testMax() {
+ assertThat(CollectionUtils.max(Arrays.asList(1, 5, 3)), is(5));
+ }
+ @Test
+ public void testMin() {
+ assertThat(CollectionUtils.min(Arrays.asList(1, 5, 3)), is(1));
+ }
+ @Test
+ public void testAvg() {
+ assertThat(CollectionUtils.average(Arrays.asList(1, 5, 3)), is(3.0));
+ }
}
\ No newline at end of file
diff --git a/test/graphlod/GraphFeaturesTest.java b/test/graphlod/GraphFeaturesTest.java
index 3a47370..eb9f933 100644
--- a/test/graphlod/GraphFeaturesTest.java
+++ b/test/graphlod/GraphFeaturesTest.java
@@ -29,7 +29,7 @@ public void setup() {
createStatement("b", "p1", "d"),
createStatement("d", "p1", "b"),
createStatement("c", "p1", "e")), "", new ArrayList());
- features = new GraphFeatures(ds.getGraph());
+ features = new GraphFeatures("" , ds.getGraph());
}
diff --git a/test/graphlod/GraphFeaturesTest2.java b/test/graphlod/GraphFeaturesTest2.java
index 6fce48b..1cd6afc 100644
--- a/test/graphlod/GraphFeaturesTest2.java
+++ b/test/graphlod/GraphFeaturesTest2.java
@@ -35,7 +35,7 @@ public void setup() {
createStatement("c", "p1", "d"),
createStatement("d", "p1", "e"),
createStatement("e", "p1", "c")), "", new ArrayList());
- features = new GraphFeatures(ds.getGraph());
+ features = new GraphFeatures("", ds.getGraph());
}
@Test