Skip to content

Commit

Permalink
Initial commit.
Browse files Browse the repository at this point in the history
  • Loading branch information
mfalaize committed Oct 8, 2016
1 parent 55bc907 commit ade8eb3
Show file tree
Hide file tree
Showing 4 changed files with 263 additions and 13 deletions.
15 changes: 3 additions & 12 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,12 +1,3 @@
*.class

# Mobile Tools for Java (J2ME)
.mtj.tmp/

# Package Files #
*.jar
*.war
*.ear

# virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
hs_err_pid*
.idea/
*.iml
target/
6 changes: 5 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,6 @@
# schema-fetcher
XSD schema fetcher to persist heavy schema to your disk
XSD schema fetcher to persist heavy schemas to your disk

### Usage

`java -jar schema-fetcher.jar C:\export\xsd http://schemas.opengis.net/gml/3.1.1/base/gml.xsd`
53 changes: 53 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<groupId>com.mfalaize</groupId>
<artifactId>schema-fetcher</artifactId>
<version>1.0.0-SNAPSHOT</version>
<packaging>jar</packaging>

<name>schema-fetcher</name>
<description>XSD schema fetcher to persist heavy schema to your disk</description>
<url>https://github.com/mfalaize/schema-fetcher</url>
<inceptionYear>2016</inceptionYear>
<licenses>
<license>
<name>Apache v2</name>
<url>http://www.apache.org/licenses/LICENSE-2.0</url>
</license>
</licenses>
<developers>
<developer>
<name>Maxime FALAIZE</name>
<email>[email protected]</email>
</developer>
</developers>

<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>1.7</source>
<target>1.7</target>
</configuration>
</plugin>

<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
<configuration>
<archive>
<manifest>
<mainClass>fr.mfalaize.utils.SchemaFetcher</mainClass>
</manifest>
</archive>
</configuration>
</plugin>
</plugins>
</build>
</project>
202 changes: 202 additions & 0 deletions src/main/java/fr/mfalaize/utils/SchemaFetcher.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,202 @@
package fr.mfalaize.utils;

import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;

import javax.xml.xpath.*;
import java.io.*;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.util.*;

/**
* XSD Schemas URL fetcher. It is a improved code of the solution provided by mschwehl in the stackoverflow question :
* http://stackoverflow.com/questions/13394950/download-xsd-with-all-imports.
*
* @author Maxime FALAIZE <[email protected]>
*/
public class SchemaFetcher {

private String rootPath;

// Key = schema URL and value the schema object with values inside
private Map<String, Schema> cache = new HashMap<>();

/**
* Fetch all schemas from URL.
*
* @param args Required : First arg must be the filesystem absolute path to write the files to (without the separator at the end).
* Next you can add as many schemas urls as you want.
* @throws XPathExpressionException
* @throws IOException
* @throws URISyntaxException
*/
public static void main(String[] args) throws XPathExpressionException, IOException, URISyntaxException {
if (args == null || args.length <= 1) {
throw new IllegalArgumentException("Unexpected use of SchemaFetcher. You must provide root absolute path as first argument " +
"and schema URL next to it.");
}
String[] urls = new String[args.length - 1];
System.arraycopy(args, 1, urls, 0, args.length - 1);
new SchemaFetcher(args[0]).fetchAll(urls);
}

public SchemaFetcher(String rootPath) {
this.rootPath = rootPath;
}

public void fetchAll(String... urls) throws IOException, XPathExpressionException, URISyntaxException {
for (String url : urls) {
Schema schema = new Schema(new URL(url));
schema.fetchAll();
}

writeFiles();

System.out.println("Done!");
}

private void writeFiles() throws IOException {
for (String urlCache : cache.keySet()) {
Schema element = cache.get(urlCache);
File f = new File(rootPath + File.separator + element.fileName);

System.out.println("Writing " + f.getAbsolutePath());

String contentTemp = element.content;

for (String schemaLocation : element.includesAndImports.keySet()) {
Schema schema = element.includesAndImports.get(schemaLocation);
if (schema.isHTTP()) {
contentTemp = contentTemp.replace(
"schemaLocation=\"" + schemaLocation,
"schemaLocation=\"" + schema.fileName);
}
}

try (FileOutputStream fos = new FileOutputStream(f)) {
fos.write(contentTemp.getBytes(StandardCharsets.UTF_8));
}
}
}

class Schema {

private URL url;
private String content;
private String fileName;

// Key = schemaLocation and value the include/import schema
public Map<String, Schema> includesAndImports = new HashMap<>();

public Schema(URL url) throws URISyntaxException {
this.url = url;
generateFileName();
}

public void fetchAll() throws IOException, XPathExpressionException, URISyntaxException {
System.out.println("Fetching " + url.toString());

try (Scanner scanner = new Scanner(url.openStream(), StandardCharsets.UTF_8.name())) {
if (isHTTP()) {
content = scanner.useDelimiter("\\A").next();
} else {
content = scanner.useDelimiter("\\Z").next();
}

InputSource source = new InputSource(new StringReader(content));

List<Node> includesImportsList = getXpathNodes(source, "/*[local-name()='schema']/*[local-name()='include' or local-name()='import']");

for (Node element : includesImportsList) {
Node sl = element.getAttributes().getNamedItem("schemaLocation");
if (sl == null) {
System.out.println(url + " defines one include/import but no schemaLocation");
continue;
}

String schemaLocation = sl.getNodeValue();

URL url = buildUrl(schemaLocation, this.url);

Schema schema = new Schema(url);
includesAndImports.put(schemaLocation, schema);
}
}

cache.put(url.toString(), this);
for (Schema includeOrImport : includesAndImports.values()) {
if (!cache.containsKey(includeOrImport.url.toString())) {
includeOrImport.fetchAll();
}
}
}

private URL buildUrl(String schemaLocation, URL parent) throws MalformedURLException, URISyntaxException {

if (schemaLocation.startsWith("http") || schemaLocation.startsWith("file")) {
return new URL(schemaLocation);
}

// relative URL
URI parentUri = parent.toURI().getPath().endsWith("/") ? parent.toURI().resolve("..") : parent.toURI().resolve(".");
URL url = new URL(parentUri.toURL().toString() + schemaLocation);
return new URL(url.toURI().normalize().toString());
}

/**
* Try to use the schema filename in the URL. If it already exists in the cache then add the path before the filename.
* Removes html:// and replaces / with _
*/
public void generateFileName() throws URISyntaxException {
URI parent = url.toURI().resolve(".");

boolean uniqueFileName;
do {
uniqueFileName = true;
fileName = parent.relativize(url.toURI()).toString();

if (isHTTP()) {
fileName = fileName.replace("http://", "");
fileName = fileName.replace("/", "_");
} else {
fileName = fileName.replace("/", "_");
fileName = fileName.replace("\\", "_");
}

for (Schema schema : cache.values()) {
if (schema.fileName != null && schema.fileName.equals(fileName)
&& !schema.url.toString().equals(url.toString())) {
// Filename already exist for another schema. Generate from parent url
uniqueFileName = false;
parent = parent.resolve("..");
}
}
} while (!uniqueFileName);
}

public boolean isHTTP() {
return url.getProtocol().startsWith("http");
}

private List<Node> getXpathNodes(InputSource source, String path) throws XPathExpressionException {
List<Node> returnList = new ArrayList<>();

XPathFactory xPathfactory = XPathFactory.newInstance();
XPath xpath = xPathfactory.newXPath();
XPathExpression expr = xpath.compile(path);

NodeList nodeList = (NodeList) expr.evaluate(source, XPathConstants.NODESET);
for (int i = 0; i < nodeList.getLength(); i++) {
Node n = nodeList.item(i);
returnList.add(n);
}

return returnList;
}
}
}

0 comments on commit ade8eb3

Please sign in to comment.