Skip to content

Commit

Permalink
export graphs and vertices as csv #1
Browse files Browse the repository at this point in the history
  • Loading branch information
xchrdw committed Jan 15, 2015
1 parent a911e4c commit db9e27c
Show file tree
Hide file tree
Showing 11 changed files with 192 additions and 19 deletions.
17 changes: 9 additions & 8 deletions commands.sh
Original file line number Diff line number Diff line change
@@ -1,41 +1,42 @@
#!/bin/sh

java -Xmx80g -jar graphlod-0.1.jar --excludedNamespaces \
java -Xmx80g -jar graphlod-0.1.jar --name drugbank --excludedNamespaces \
"http://www4.wiwiss.fu-berlin.de/drugbank/resource/drugtype/" \
"http://www4.wiwiss.fu-berlin.de/drugbank/resource/references/" \
--skipChromatic \
--namespace "http://www4.wiwiss.fu-berlin.de/drugbank/" \
/data/graphlod/drugbank/drugbank.nt | tee drugbank.txt

java -Xmx80g -jar graphlod-0.1.jar --skipChromatic \
java -Xmx80g -jar graphlod-0.1.jar --name dailymed --skipChromatic \
--namespace "http://www4.wiwiss.fu-berlin.de/dailymed/" \
/data/graphlod/dailymed/dailymed_dump.nt | tee dailymed_dump.txt

java -Xmx80g -jar graphlod-0.1.jar --skipChromatic --excludedNamespaces \
java -Xmx80g -jar graphlod-0.1.jar --name diseasome --skipChromatic --excludedNamespaces \
"http://www4.wiwiss.fu-berlin.de/diseasome/resource/diseaseClass/" \
--namespace "http://www4.wiwiss.fu-berlin.de/diseasome/" \
/data/graphlod/diseasome/diseasome.nt | tee diseasome.txt

java -Xmx80g -jar graphlod-0.1.jar --skipChromatic \
java -Xmx80g -jar graphlod-0.1.jar --name dbpedia_person --skipChromatic \
--namespace "http://dbpedia.org/resource" \
/data/graphlod/dbpedia/persondata_en.nt | tee dbpedia_persondata.txt

java -Xmx80g -jar graphlod-0.1.jar --skipChromatic \
java -Xmx80g -jar graphlod-0.1.jar --name dbpedia_geo_coordinate --skipChromatic \
--namespace "http://dbpedia.org/resource" \
/data/graphlod/dbpedia/geo_coordinates_en.nt | tee geo_coordinate.txt

java -Xmx80g -jar graphlod-0.1.jar --skipChromatic \
java -Xmx80g -jar graphlod-0.1.jar --name dbpedia_homepages --skipChromatic \
--namespace "http://dbpedia.org/resource" \
/data/graphlod/dbpedia/homepages_en.nt | tee dbpedia_homepages.txt

# fix mapping: sed 's/"\.$/" \./' mappingbased_properties_en.nt > mappingbased_properties_en_fixed.nt

java -Xmx100g -jar graphlod-0.1.jar --skipChromatic \
java -Xmx100g -jar graphlod-0.1.jar --name dbpedia_mapping --skipChromatic \
--namespace "http://dbpedia.org/resource" \
mappingbased_properties_en_fixed.nt | tee dbpedia_mapping.txt

java -Xmx80g -jar graphlod-0.1.jar --skipChromatic \
java -Xmx80g -jar graphlod-0.1.jar --name linkedgeodata --skipChromatic \
/data/graphlod/linkedgeodata/2013-04-29-{Ae*,C*,E*,Mili*,H*,P*,S*,T*} \
--namespace "http://linkedgeodata.org/" \
| tee linkedgeodata.txt

zip result.txt *.txt *.csv
5 changes: 5 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,11 @@
<artifactId>commons-lang3</artifactId>
<version>3.3.2</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-csv</artifactId>
<version>1.0</version>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
Expand Down
4 changes: 2 additions & 2 deletions src/graphlod/CollectionUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ public static <T extends Comparable<T>> T min(Collection<T> collection) {
}
T result = collection.iterator().next();
for (T element : collection) {
result = result.compareTo(element) > 0 ? result : element;
result = result.compareTo(element) < 0 ? result : element;
}
return result;
}
Expand All @@ -42,7 +42,7 @@ public static <T extends Comparable<T>> T max(Collection<T> collection) {
}
T result = collection.iterator().next();
for (T element : collection) {
result = result.compareTo(element) < 0 ? result : element;
result = result.compareTo(element) > 0 ? result : element;
}
return result;
}
Expand Down
2 changes: 1 addition & 1 deletion src/graphlod/Dataset.java
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ private void readTriples(NxParser nxp) {
}

if (subjectUri.equals(objectUri)) {
continue; // TODO: why that?
continue;
}

if (propertyUri.equals("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")) {
Expand Down
53 changes: 53 additions & 0 deletions src/graphlod/GraphCsvOutput.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
package graphlod;


import java.io.IOException;
import java.io.Writer;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.List;
import java.util.Set;

import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVPrinter;

import com.google.common.base.Charsets;
import com.google.common.base.Verify;

public class GraphCsvOutput {

private final CSVPrinter writer;
private final int maxSizeForDiameter;

public GraphCsvOutput(String name, int maxSizeForDiameter) {
this.maxSizeForDiameter = maxSizeForDiameter;
Writer out;
try {
Path path = Paths.get(name + "_graphs.csv");
out = Files.newBufferedWriter(path, Charsets.UTF_8);
writer = CSVFormat.DEFAULT.withHeader("graph", "vertices", "edges", "diameter", "avgindegree", "maxindegree", "avgoutdegree", "maxoutdegree").print(out);
} catch (IOException e) {
throw new RuntimeException(e);
}
}

public void writeGraph(GraphFeatures graph) {
double diameter = graph.getVertexCount() < maxSizeForDiameter ? graph.getDiameter() : -1;
try {
writer.printRecord(graph.getId(), graph.getVertexCount(), graph.getEdgeCount(), diameter,
CollectionUtils.average(graph.getIndegrees()), CollectionUtils.max(graph.getIndegrees()),
CollectionUtils.average(graph.getOutdegrees()), CollectionUtils.max(graph.getOutdegrees()));
} catch (IOException e) {
e.printStackTrace();
}
}
public void close() {
try {
writer.close();
} catch (IOException e) {
throw new RuntimeException(e);
}
}

}
32 changes: 29 additions & 3 deletions src/graphlod/GraphFeatures.java
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,10 @@ public class GraphFeatures {
private Set<String> vertices;
private final Set<DefaultEdge> edges;
private AsUndirectedGraph<String, DefaultEdge> undirectedG;
private String id;

public GraphFeatures(DirectedGraph<String, DefaultEdge> graph) {
public GraphFeatures(String id, DirectedGraph<String, DefaultEdge> graph) {
this.id = id;
this.graph = graph;
this.vertices = this.graph.vertexSet();
this.edges = this.graph.edgeSet();
Expand Down Expand Up @@ -79,7 +81,7 @@ public List<GraphFeatures> getConnectedSubGraphFeatures(float minSize) {
return Collections.emptyList();
}
List<GraphFeatures> connectedSubgraphFeatures = new ArrayList<>();

int i = 0;
for (Set<String> set : sets) {
if (set.size() < minSize) {
continue;
Expand All @@ -94,7 +96,8 @@ public List<GraphFeatures> getConnectedSubGraphFeatures(float minSize) {
subgraph.addEdge(vertex, (String) edge.getTarget(), edge);
}
}
connectedSubgraphFeatures.add(new GraphFeatures(subgraph));
connectedSubgraphFeatures.add(new GraphFeatures("subgraph" + i, subgraph));
i++;
}
Collections.sort(connectedSubgraphFeatures, new Comparator<GraphFeatures>() {
@Override
Expand Down Expand Up @@ -132,6 +135,13 @@ public List<Integer> getIndegrees() {
return this.indegrees;
}

public List<Degree> getIndegrees2() {
if(this.indegrees2 == null) {
getIndegrees();
}
return this.indegrees2;
}

public List<Integer> getOutdegrees() {
if (this.outdegrees == null) {
this.outdegrees = new ArrayList<>();
Expand All @@ -145,6 +155,14 @@ public List<Integer> getOutdegrees() {
return this.outdegrees;
}


public List<Degree> getOutdegrees2() {
if(this.outdegrees2 == null) {
getOutdegrees();
}
return this.outdegrees2;
}

public ArrayList<Integer> getEdgeCounts() {
ArrayList<Integer> edgeCounts = new ArrayList<>();
for (String vertex : this.vertices) {
Expand All @@ -165,6 +183,14 @@ public int getChromaticNumber() {
return ChromaticNumber.findGreedyChromaticNumber(this.undirectedG);
}

public Set<String> getVertices() {
return vertices;
}

public String getId() {
return id;
}

static class Degree implements Comparable<Degree> {
public String vertex;
public int degree;
Expand Down
24 changes: 21 additions & 3 deletions src/graphlod/GraphLOD.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,17 @@ public class GraphLOD {
private static final Logger logger = Logger.getLogger(GraphLOD.class);
public static final int MAX_SIZE_FOR_DIAMETER = 500;

public GraphLOD(Collection<String> datasetFiles, boolean skipChromaticNumber, String namespace, Collection<String> excludedNamespaces, float minImportantSubgraphSize, int importantDegreeCount) {
public GraphCsvOutput graphCsvOutput;
public VertexCsvOutput vertexCsvOutput;

public GraphLOD(String name, Collection<String> datasetFiles, boolean skipChromaticNumber, String namespace, Collection<String> excludedNamespaces, float minImportantSubgraphSize, int importantDegreeCount) {
graphCsvOutput = new GraphCsvOutput(name, MAX_SIZE_FOR_DIAMETER);
vertexCsvOutput = new VertexCsvOutput(name);

Stopwatch sw = Stopwatch.createStarted();
Dataset dataset = Dataset.fromFiles(datasetFiles, namespace, excludedNamespaces);

GraphFeatures graphFeatures = new GraphFeatures(dataset.getGraph());
GraphFeatures graphFeatures = new GraphFeatures("main_graph", dataset.getGraph());

System.out.println("Loading the dataset took " + sw + " to execute.");

Expand Down Expand Up @@ -85,6 +91,7 @@ public GraphLOD(Collection<String> datasetFiles, boolean skipChromaticNumber, St
System.out.printf("Subgraph: %s vertices\n", subGraph.getVertexCount());
analyzeConnectedGraph(subGraph, importantDegreeCount);
}

System.out.println("Analysing the subgraphs took " + sw + " to execute.");
}

Expand All @@ -109,6 +116,8 @@ public GraphLOD(Collection<String> datasetFiles, boolean skipChromaticNumber, St
System.out.println("Chromatic Number: " + cN);
System.out.println("Getting the Chromatic Number took " + sw + " to execute.");
}
graphCsvOutput.close();
vertexCsvOutput.close();
}

private void analyzeConnectedGraph(GraphFeatures graph, int importantDegreeCount) {
Expand All @@ -118,6 +127,8 @@ private void analyzeConnectedGraph(GraphFeatures graph, int importantDegreeCount
} else {
System.out.println("\tGraph too big to show diameter");
}
graphCsvOutput.writeGraph(graph);
vertexCsvOutput.writeGraph(graph);

System.out.println("\thighest indegrees:");
System.out.println("\t\t" + StringUtils.join(graph.maxInDegrees(importantDegreeCount), "\n\t\t"));
Expand All @@ -133,6 +144,7 @@ public static void main(final String[] args) {
ArgumentParser parser = ArgumentParsers.newArgumentParser("GraphLOD")
.defaultHelp(true).description("calculates graph features.");
parser.addArgument("dataset").nargs("+").setDefault(Arrays.asList(DEFAULT_DATASET_LOCATION));
parser.addArgument("--name").type(String.class).setDefault("");
parser.addArgument("--namespace").type(String.class).setDefault("");
parser.addArgument("--excludedNamespaces").nargs("*").setDefault(Collections.emptyList());
parser.addArgument("--skipChromatic").action(Arguments.storeTrue());
Expand All @@ -145,14 +157,20 @@ public static void main(final String[] args) {
parser.handleError(e);
System.exit(1);
}

List<String> dataset = result.getList("dataset");
String name = result.getString("name");
if(name.isEmpty()) {
name = dataset.get(0);
}
String namespace = result.getString("namespace");
List<String> excludedNamespaces = result.getList("excludedNamespaces");
boolean skipChromatic = result.getBoolean("skipChromatic");
int minImportantSubgraphSize = result.getInt("minImportantSubgraphSize");
int importantDegreeCount = result.getInt("importantDegreeCount");

System.out.println("reading: " + dataset);
System.out.println("name: " + name);
System.out.println("namespace: " + namespace);
System.out.println("skip chromatic: " + skipChromatic);
System.out.println("excluded namespaces: " + excludedNamespaces);
Expand All @@ -162,7 +180,7 @@ public static void main(final String[] args) {

Locale.setDefault(Locale.US);

new GraphLOD(dataset, skipChromatic, namespace, excludedNamespaces, minImportantSubgraphSize, importantDegreeCount);
new GraphLOD(name, dataset, skipChromatic, namespace, excludedNamespaces, minImportantSubgraphSize, importantDegreeCount);
}

}
55 changes: 55 additions & 0 deletions src/graphlod/VertexCsvOutput.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
package graphlod;


import java.io.IOException;
import java.io.Writer;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.List;

import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVPrinter;

import com.google.common.base.Charsets;
import com.google.common.base.Verify;

public class VertexCsvOutput {

CSVPrinter writer;

public VertexCsvOutput(String name) {
Writer out;
try {
Path path = Paths.get(name + "_vertices.csv");
out = Files.newBufferedWriter(path, Charsets.UTF_8);
writer = CSVFormat.DEFAULT.withHeader("graph", "vertex", "indegree", "outdegree").print(out);
} catch (IOException e) {
throw new RuntimeException(e);
}
}

public void writeGraph(GraphFeatures graph) {
List<GraphFeatures.Degree> inDegrees = graph.getIndegrees2();
List<GraphFeatures.Degree> outDegrees = graph.getIndegrees2();

for (int i = 0; i < inDegrees.size(); i++) {
GraphFeatures.Degree in = inDegrees.get(i);
GraphFeatures.Degree out = outDegrees.get(i);
Verify.verify(in.vertex.equals(out.vertex));
try {
writer.printRecord(graph.getId(), in.vertex, in.degree, out.degree);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
}
public void close() {
try {
writer.close();
} catch (IOException e) {
throw new RuntimeException(e);
}
}

}
15 changes: 15 additions & 0 deletions test/graphlod/CollectionUtilsTest.java
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
package graphlod;

import java.util.Arrays;
import java.util.Collection;

import org.junit.Test;

import com.google.common.collect.Lists;

import static org.hamcrest.Matchers.is;
import static org.junit.Assert.*;
import static org.hamcrest.Matchers.contains;
import static org.hamcrest.Matchers.containsInAnyOrder;
Expand All @@ -16,4 +18,17 @@ public class CollectionUtilsTest {
public void testMaxValues() throws Exception {
assertThat(CollectionUtils.maxValues(Lists.newArrayList(5,2,9,2), 2), contains(5,9));
}

@Test
public void testMax() {
assertThat(CollectionUtils.max(Arrays.asList(1, 5, 3)), is(5));
}
@Test
public void testMin() {
assertThat(CollectionUtils.min(Arrays.asList(1, 5, 3)), is(1));
}
@Test
public void testAvg() {
assertThat(CollectionUtils.average(Arrays.asList(1, 5, 3)), is(3.0));
}
}
Loading

0 comments on commit db9e27c

Please sign in to comment.