diff --git a/CONTRIBUTORS b/CONTRIBUTORS index 763537b..366dcb8 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -1 +1,2 @@ Andreas Textor +Florian Kleedorfer diff --git a/README.md b/README.md index ba12b34..c739f14 100644 --- a/README.md +++ b/README.md @@ -631,13 +631,13 @@ model: ``` There is no way to serialize this model in RDF/Turtle while using the inline blank node syntax `[ ]` -for the anonymous node `_:b0`. In this case, the anonymousNodeIdGenerator is called to determine -the name of the blank node in the output. +for the anonymous node `_:b0`. If, as in this example, the node in question already has a label, the label is re-used. +Otherwise, the anonymousNodeIdGenerator is used to generate it. -`(r, i) -> "_:gen" + i` +`(r, i) -> "gen" + i` @@ -691,7 +691,11 @@ elements in RDF lists. \* Adapted from [EditorConfig](https://editorconfig.org/#file-format-details) ## Release Notes - +* 1.2.12: + * Bugfix: Handle RDF lists that start with a non-anonymous node + * Bugfix: Handle blank node cycles + * Bugfix: Ensure constant blank node ordering + * Bugfix: Set Locale for NumberFormat to US * 1.2.11: * Bugfix: `rdf:type` is not printed as `a` when used as an object * Update all dependencies, including Apache Jena to 4.10.0 diff --git a/build.gradle.kts b/build.gradle.kts index f6932cf..4bbe7f3 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -75,7 +75,7 @@ tasks.test { } jacoco { - toolVersion = "0.8.7" + toolVersion = "0.8.12" } tasks.jacocoTestReport { diff --git a/src/main/java/de/atextor/turtle/formatter/FormattingStyle.java b/src/main/java/de/atextor/turtle/formatter/FormattingStyle.java index e319fe0..afb9e7c 100644 --- a/src/main/java/de/atextor/turtle/formatter/FormattingStyle.java +++ b/src/main/java/de/atextor/turtle/formatter/FormattingStyle.java @@ -13,8 +13,10 @@ import java.net.URI; import java.text.DecimalFormat; +import java.text.DecimalFormatSymbols; import java.text.NumberFormat; import java.util.List; +import java.util.Locale; import java.util.Set; import java.util.function.BiFunction; @@ -102,7 +104,7 @@ public class FormattingStyle { public Charset charset = Charset.UTF_8; @Builder.Default - public NumberFormat doubleFormat = new DecimalFormat( "0.####E0" ); + public NumberFormat doubleFormat = new DecimalFormat("0.####E0" , DecimalFormatSymbols.getInstance(Locale.US)); @Builder.Default public EndOfLineStyle endOfLine = EndOfLineStyle.LF; @@ -210,7 +212,7 @@ public class FormattingStyle { ); @Builder.Default - public BiFunction anonymousNodeIdGenerator = ( resource, integer ) -> "_:gen" + integer; + public BiFunction anonymousNodeIdGenerator = ( resource, integer ) -> "gen" + integer; public enum Alignment { LEFT, diff --git a/src/main/java/de/atextor/turtle/formatter/RDFNodeComparatorFactory.java b/src/main/java/de/atextor/turtle/formatter/RDFNodeComparatorFactory.java new file mode 100644 index 0000000..55b85e8 --- /dev/null +++ b/src/main/java/de/atextor/turtle/formatter/RDFNodeComparatorFactory.java @@ -0,0 +1,55 @@ +package de.atextor.turtle.formatter; + +import de.atextor.turtle.formatter.blanknode.BlankNodeMetadata; +import org.apache.jena.rdf.model.RDFNode; +import org.apache.jena.rdf.model.Resource; +import org.apache.jena.shared.PrefixMapping; + +import java.util.Comparator; +import java.util.Optional; + +public class RDFNodeComparatorFactory { + + private final PrefixMapping prefixMapping; + private final BlankNodeMetadata blankNodeOrdering; + private final RDFNodeComparator rdfNodeComparator = new RDFNodeComparator(); + + public RDFNodeComparatorFactory(PrefixMapping prefixMapping, BlankNodeMetadata blankNodeOrdering) { + this.prefixMapping = prefixMapping; + this.blankNodeOrdering = blankNodeOrdering; + } + + public RDFNodeComparatorFactory(PrefixMapping prefixMapping) { + this(prefixMapping, null); + } + + public RDFNodeComparator comparator() { + return rdfNodeComparator; + } + + private class RDFNodeComparator implements Comparator { + @Override public int compare(RDFNode left, RDFNode right) { + if (left.isURIResource()){ + if (right.isURIResource()){ + return prefixMapping.shortForm(left.asResource().getURI()).compareTo(prefixMapping.shortForm(right.asResource().getURI())); + } else if (right.isAnon()) { + return -1 ; // uris first + } + } else if (left.isAnon()) { + if (right.isAnon()) { + if (blankNodeOrdering != null) { + return Optional.ofNullable(blankNodeOrdering.getOrder(left.asResource().asNode())) + .orElse(Long.MAX_VALUE) + .compareTo(Optional.ofNullable( + blankNodeOrdering.getOrder(right.asResource().asNode())) + .orElse(Long.MAX_VALUE)); + } + } else if (right.isResource()) { + return 1; // uris first + } + } + //fall-through for all other cases, especially if we don't have a blank node ordering + return left.toString().compareTo(right.toString()); + } + } +} diff --git a/src/main/java/de/atextor/turtle/formatter/TurtleFormatter.java b/src/main/java/de/atextor/turtle/formatter/TurtleFormatter.java index 0fa3f32..54bdd99 100644 --- a/src/main/java/de/atextor/turtle/formatter/TurtleFormatter.java +++ b/src/main/java/de/atextor/turtle/formatter/TurtleFormatter.java @@ -1,5 +1,7 @@ package de.atextor.turtle.formatter; +import de.atextor.turtle.formatter.blanknode.BlankNodeMetadata; +import de.atextor.turtle.formatter.blanknode.BlankNodeOrderAwareTurtleParser; import lombok.AllArgsConstructor; import lombok.Value; import lombok.With; @@ -42,6 +44,8 @@ import java.util.stream.Collectors; import java.util.stream.Stream; +import static java.util.function.Predicate.not; + public class TurtleFormatter implements Function, BiConsumer { public static final String OUTPUT_ERROR_MESSAGE = "Could not write to stream"; @@ -99,11 +103,11 @@ public TurtleFormatter( final FormattingStyle style ) { Integer.MAX_VALUE ).thenComparing( Map.Entry::getKey ); - objectOrder = Comparator.comparingInt( object -> + objectOrder = Comparator.comparingInt( object -> style.objectOrder.contains( object ) ? style.objectOrder.indexOf( object ) : Integer.MAX_VALUE - ).thenComparing( RDFNode::toString ); + ); } private static List statements( final Model model ) { @@ -114,6 +118,15 @@ private static List statements( final Model model, final Property pre return model.listStatements( null, predicate, object ).toList(); } + /** + * Serializes the specified model as TTL according to the {@link TurtleFormatter}'s {@link FormattingStyle}. + * + *
+ * Note: Using this method, ordering of blank nodes may differ between multiple runs using identical data. + * + * @param model the model to serialize. + * @return the formatted TTL serialization of the model + */ @Override public String apply( final Model model ) { final ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); @@ -121,6 +134,30 @@ public String apply( final Model model ) { return outputStream.toString(); } + /** + * Format the specified TTL content according to the {@link TurtleFormatter}'s {@link FormattingStyle}. + * + * @param content RDF content in TTL format. + * @return the formatted content + */ + public String applyToContent( final String content ) { + final ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); + process( content, outputStream ); + return outputStream.toString(); + } + + private void process(String content, ByteArrayOutputStream outputStream) { + if ( style.charset == FormattingStyle.Charset.UTF_8_BOM ) { + writeByteOrderMark( outputStream ); + } + BlankNodeOrderAwareTurtleParser.ParseResult result = BlankNodeOrderAwareTurtleParser.parseModel(content); + Model model = result.getModel(); + BlankNodeMetadata blankNodeMetadata = result.getBlankNodeMetadata(); + final PrefixMapping prefixMapping = buildPrefixMapping( model ); + RDFNodeComparatorFactory RDFNodeComparatorFactory = new RDFNodeComparatorFactory(prefixMapping, blankNodeMetadata); + doFormat(model, outputStream, prefixMapping, RDFNodeComparatorFactory, blankNodeMetadata); + } + private void writeByteOrderMark( final OutputStream outputStream ) { try { outputStream.write( new byte[]{ (byte) 0xEF, (byte) 0xBB, (byte) 0xBF } ); @@ -129,6 +166,16 @@ private void writeByteOrderMark( final OutputStream outputStream ) { } } + /** + * Serializes the specified model as TTL according to the {@link TurtleFormatter}'s {@link FormattingStyle} + * and writes it to the specified outputStream. + * + *
+ * Note: Using this method, ordering of blank nodes may differ between multiple runs using identical data. + * + * @param model the model to serialize. + * @param outputStream the stream to write to + */ @Override public void accept( final Model model, final OutputStream outputStream ) { if ( style.charset == FormattingStyle.Charset.UTF_8_BOM ) { @@ -136,33 +183,37 @@ public void accept( final Model model, final OutputStream outputStream ) { } final PrefixMapping prefixMapping = buildPrefixMapping( model ); + RDFNodeComparatorFactory RDFNodeComparatorFactory = new RDFNodeComparatorFactory(prefixMapping); + doFormat(model, outputStream, prefixMapping, RDFNodeComparatorFactory, BlankNodeMetadata.gotNothing()); + } + private void doFormat(Model model, OutputStream outputStream, PrefixMapping prefixMapping, + RDFNodeComparatorFactory RDFNodeComparatorFactory, BlankNodeMetadata blankNodeMetadata) { final Comparator predicateOrder = Comparator.comparingInt( property -> style.predicateOrder.contains( property ) ? style.predicateOrder.indexOf( property ) : Integer.MAX_VALUE ).thenComparing( property -> prefixMapping.shortForm( property.getURI() ) ); - - final State initialState = buildInitialState( model, outputStream, prefixMapping, predicateOrder ); - + final State initialState = buildInitialState(model, outputStream, prefixMapping, predicateOrder, + RDFNodeComparatorFactory, blankNodeMetadata); final State prefixesWritten = writePrefixes( initialState ); - - final Comparator subjectComparator = - Comparator.comparing( statement -> statement.getSubject().isURIResource() ? - prefixMapping.shortForm( statement.getSubject().getURI() ) : statement.getSubject().toString() ); - - final List statements = determineStatements( model, subjectComparator ); + final List statements = determineStatements(model, RDFNodeComparatorFactory); final State namedResourcesWritten = writeNamedResources( prefixesWritten, statements ); final State allResourcesWritten = writeAnonymousResources( namedResourcesWritten ); final State finalState = style.insertFinalNewline ? allResourcesWritten.newLine() : allResourcesWritten; - LOG.debug( "Written {} resources, with {} named anonymous resources", finalState.visitedResources.size(), finalState.identifiedAnonymousResources.size() ); } - private State writeAnonymousResources( final State state ) { + private State writeAnonymousResources( final State state) { State currentState = state; - for ( final Resource resource : state.identifiedAnonymousResources.keySet() ) { + List sortedAnonymousIdentifiedResources = state + .identifiedAnonymousResources + .keySet() + .stream() + .sorted( state.getRDFNodeComparatorFactory().comparator()) + .toList(); + for ( final Resource resource : sortedAnonymousIdentifiedResources) { if ( !resource.listProperties().hasNext() ) { continue; } @@ -189,15 +240,17 @@ private State writeNamedResources( final State state, final List stat return currentState; } - private List determineStatements( final Model model, final Comparator subjectComparator ) { + private List determineStatements( final Model model, final RDFNodeComparatorFactory rdfNodeComparatorFactory) { final Stream wellKnownSubjects = style.subjectOrder.stream().flatMap( subjectType -> - statements( model, RDF.type, subjectType ).stream().sorted( subjectComparator ) ); + statements( model, RDF.type, subjectType ) + .stream() + .sorted( Comparator.comparing( Statement::getSubject, rdfNodeComparatorFactory.comparator()))); final Stream otherSubjects = statements( model ).stream() .filter( statement -> !( statement.getPredicate().equals( RDF.type ) && statement.getObject().isResource() && style.subjectOrder.contains( statement.getObject().asResource() ) ) ) - .sorted( subjectComparator ); + .sorted( Comparator.comparing(Statement::getSubject, rdfNodeComparatorFactory.comparator())); return Stream.concat( wellKnownSubjects, otherSubjects ) .filter( statement -> !( statement.getSubject().isAnon() @@ -206,14 +259,21 @@ private List determineStatements( final Model model, final Comparator } private State buildInitialState( final Model model, final OutputStream outputStream, - final PrefixMapping prefixMapping, final Comparator predicateOrder ) { - - State currentState = new State( outputStream, model, predicateOrder, prefixMapping ); + final PrefixMapping prefixMapping, final Comparator predicateOrder, + RDFNodeComparatorFactory RDFNodeComparatorFactory, BlankNodeMetadata blankNodeMetadata) { + State currentState = new State( outputStream, model, predicateOrder, prefixMapping, RDFNodeComparatorFactory, blankNodeMetadata); int i = 0; - for ( final Resource r : anonymousResourcesThatNeedAnId( model ) ) { - final String s = style.anonymousNodeIdGenerator.apply( r, i ); + Set blankNodeLabelsInInput = blankNodeMetadata.getAllBlankNodeLabels(); + for ( final Resource r : anonymousResourcesThatNeedAnId( model, currentState) ) { + // use original label if present + String s = blankNodeMetadata.getLabel(r.asNode()); + if (s == null) { + // not a labeled blank node in the input: generate (and avoid collisions) + do { + s = style.anonymousNodeIdGenerator.apply(r, i++); + } while (currentState.identifiedAnonymousResources.containsValue(s) && blankNodeLabelsInInput.contains(s)); + } currentState = currentState.withIdentifiedAnonymousResource( r, s ); - i++; } return currentState; } @@ -222,18 +282,60 @@ private State buildInitialState( final Model model, final OutputStream outputStr * Anonymous resources that are referred to more than once need to be given an internal id and * can not be serialized using [ ] notation. * - * @param model the input model + * @param model the input model + * @param currentState the state * @return the set of anonymous resources that are referred to more than once */ - private Set anonymousResourcesThatNeedAnId( final Model model ) { - return model.listObjects().toList().stream() + private Set anonymousResourcesThatNeedAnId( final Model model, State currentState) { + Set identifiedResources = new HashSet<>(currentState.identifiedAnonymousResources.keySet()); //needed for cycle detection + Set candidates = model.listObjects().toList().stream() .filter( RDFNode::isResource ) .map( RDFNode::asResource ) - .filter( RDFNode::isAnon ) - .filter( object -> statements( model, null, object ).size() > 1 ) - .collect( Collectors.toSet() ); + .filter( RDFNode::isAnon ).collect(Collectors.toSet()); + candidates.removeAll(currentState.getBlankNodeMetadata().getLabeledBlankNodes()); + List candidatesInOrder = + Stream.concat( + currentState.getBlankNodeMetadata().getLabeledBlankNodes() + .stream() + .sorted( currentState.getRDFNodeComparatorFactory().comparator()), + candidates + .stream() + .sorted( currentState.getRDFNodeComparatorFactory().comparator())) + .toList(); + for (Resource candidate: candidatesInOrder) { + if (identifiedResources.contains (candidate)){ + continue; + } + if (statements(model, null, candidate).size() > 1 || hasBlankNodeCycle( model, candidate, identifiedResources ) ) { + identifiedResources.add(candidate); + } + } + identifiedResources.removeAll(currentState.identifiedAnonymousResources.keySet()); + return identifiedResources; + } + + private boolean hasBlankNodeCycle(Model model, Resource start, Set identifiedResources) { + if (!start.isAnon()){ + return false; + } + return hasBlankNodeCycle(model, start, start, identifiedResources, new HashSet<>()); } + private boolean hasBlankNodeCycle(Model model, Resource resource, Resource target, Set identifiedResources, Set visited) { + if (visited.contains(resource)){ + return false; + } + visited.add(resource); + return model.listStatements(resource, null, (RDFNode) null) + .toList().stream() + .map(Statement::getObject) + .filter(RDFNode::isAnon) + .map(RDFNode::asResource) + .filter(not (identifiedResources::contains)) + .anyMatch(o -> target.equals(o) || hasBlankNodeCycle(model, o, target, identifiedResources, visited)); + } + + private PrefixMapping buildPrefixMapping( final Model model ) { final Map prefixMap = style.knownPrefixes.stream() .filter( knownPrefix -> model.getNsPrefixURI( knownPrefix.prefix() ) == null ) @@ -351,7 +453,7 @@ private State writeClosingSquareBracket( final State state ) { private boolean isList( final RDFNode node, final State state ) { return node.equals( RDF.nil ) || - ( node.isResource() && state.model.contains( node.asResource(), RDF.rest, (RDFNode) null ) ); + ( node.isAnon() && state.model.contains( node.asResource(), RDF.rest, (RDFNode) null ) ); } private State writeResource( final Resource resource, final State state ) { @@ -407,7 +509,7 @@ yield writeRdfNode( element, wouldElementExceedLineLength ? private State writeAnonymousResource( final Resource resource, final State state ) { if ( state.identifiedAnonymousResources.containsKey( resource ) ) { - return state.write( state.identifiedAnonymousResources.getOrDefault( resource, "" ) ); + return state.write( "_:" + state.identifiedAnonymousResources.getOrDefault( resource, "" ) ); } if ( !state.model.contains( resource, null, (RDFNode) null ) ) { @@ -656,7 +758,8 @@ private State writeProperty( final Resource subject, final Property predicate, f int index = 0; State currentState = predicateWrittenOnce; - for ( final RDFNode object : objects.stream().sorted( objectOrder ).toList() ) { + for ( final RDFNode object : objects.stream().sorted( objectOrder.thenComparing( + state.getRDFNodeComparatorFactory().comparator())).toList() ) { final boolean lastObject = index == objects.size() - 1; final State predicateWritten = useComma ? currentState : writeProperty( predicate, currentState ); @@ -776,6 +879,10 @@ private class State { PrefixMapping prefixMapping; + RDFNodeComparatorFactory RDFNodeComparatorFactory; + + BlankNodeMetadata blankNodeMetadata; + int indentationLevel; int alignment; @@ -784,11 +891,11 @@ private class State { public State( final OutputStream outputStream, final Model model, final Comparator predicateOrder, - final PrefixMapping prefixMapping ) { - this( outputStream, model, Set.of(), Map.of(), predicateOrder, prefixMapping, 0, 0, "" ); + final PrefixMapping prefixMapping, final RDFNodeComparatorFactory RDFNodeComparatorFactory, BlankNodeMetadata blankNodeMetadata) { + this( outputStream, model, Set.of(), Map.of(), predicateOrder, prefixMapping, RDFNodeComparatorFactory, blankNodeMetadata, 0, 0, "" ); } - public State withIdentifiedAnonymousResource( final Resource anonymousResource, final String id ) { + public State withIdentifiedAnonymousResource( final Resource anonymousResource, final String id) { final Map newMap = new HashMap<>( identifiedAnonymousResources ); newMap.put( anonymousResource, id ); return withIdentifiedAnonymousResources( newMap ); diff --git a/src/main/java/de/atextor/turtle/formatter/blanknode/BlankNodeMetadata.java b/src/main/java/de/atextor/turtle/formatter/blanknode/BlankNodeMetadata.java new file mode 100644 index 0000000..9f0c5b6 --- /dev/null +++ b/src/main/java/de/atextor/turtle/formatter/blanknode/BlankNodeMetadata.java @@ -0,0 +1,79 @@ +package de.atextor.turtle.formatter.blanknode; + +import org.apache.jena.graph.Node; +import org.apache.jena.rdf.model.Model; +import org.apache.jena.rdf.model.RDFNode; +import org.apache.jena.rdf.model.Resource; +import org.apache.jena.rdf.model.Statement; +import org.apache.jena.vocabulary.RDF; + +import java.util.*; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +/** + * A lookup table for each blank node's order in a TTL file. + */ +public class BlankNodeMetadata { + private final Map blankNodeIndex = new HashMap<>(); + private final Map blankNodeLabels = new HashMap<>(); + private final Set labeledBlankNodes = new HashSet<>(); + private long nextIndex = 0; + + public BlankNodeMetadata() { + } + + public void linkGraphNodesToModelResources(Model model ){ + this.labeledBlankNodes.addAll(model.listStatements() + .toList() + .stream() + .flatMap(s -> Stream.of(s.getSubject(), s.getObject())) + .filter(RDFNode::isAnon) + .filter(a -> this.blankNodeLabels.containsKey(a.asNode())) + .map(RDFNode::asResource) + .collect(Collectors.toSet())); + + } + + public static BlankNodeMetadata gotNothing() { + return new BlankNodeMetadata(); + } + + /** + * Returns the order of the specified node, if it has been added previously via + * {@link #registerNewBlankNode(Node)}, or null. + * @param node the node to look up + * @return the 0-based order of the {@code node} (or null if it has not been registered) + */ + public Long getOrder(Node node) { + return blankNodeIndex.get(node); + } + + /** + * If the specified {@code node} is a labeled blank node, the label is returned. + * @param node + * @return the label or null. + */ + public String getLabel(Node node) { + return blankNodeLabels.get(node); + } + + void registerNewBlankNode(Node blankNode) { + if (blankNode.isBlank() && ! blankNodeIndex.containsKey(blankNode)){ + this.blankNodeIndex.put(blankNode, nextIndex++); + } + } + + void registerNewBlankNode(Node blankNode, String label) { + registerNewBlankNode(blankNode); + this.blankNodeLabels.put(blankNode, label); + } + + public Set getLabeledBlankNodes() { + return Collections.unmodifiableSet(this.labeledBlankNodes); + } + + public Set getAllBlankNodeLabels() { + return Collections.unmodifiableSet(new HashSet<>(this.blankNodeLabels.values())); + } +} diff --git a/src/main/java/de/atextor/turtle/formatter/blanknode/BlankNodeOrderAwareTurtleParser.java b/src/main/java/de/atextor/turtle/formatter/blanknode/BlankNodeOrderAwareTurtleParser.java new file mode 100644 index 0000000..2478411 --- /dev/null +++ b/src/main/java/de/atextor/turtle/formatter/blanknode/BlankNodeOrderAwareTurtleParser.java @@ -0,0 +1,109 @@ +package de.atextor.turtle.formatter.blanknode; + +import org.apache.jena.atlas.web.ContentType; +import org.apache.jena.graph.Graph; +import org.apache.jena.graph.Node; +import org.apache.jena.rdf.model.Model; +import org.apache.jena.rdf.model.ModelFactory; +import org.apache.jena.riot.Lang; +import org.apache.jena.riot.LangBuilder; +import org.apache.jena.riot.RDFParser; +import org.apache.jena.riot.RDFParserRegistry; +import org.apache.jena.riot.ReaderRIOT; +import org.apache.jena.riot.ReaderRIOTFactory; +import org.apache.jena.riot.lang.LabelToNode; +import org.apache.jena.riot.lang.LangRIOT; +import org.apache.jena.riot.lang.RiotParsers; +import org.apache.jena.riot.system.ParserProfile; +import org.apache.jena.riot.system.ParserProfileWrapper; +import org.apache.jena.riot.system.StreamRDF; +import org.apache.jena.riot.tokens.Token; +import org.apache.jena.riot.tokens.TokenType; +import org.apache.jena.sparql.util.Context; + +import java.io.ByteArrayInputStream; +import java.io.InputStream; +import java.io.Reader; + +public class BlankNodeOrderAwareTurtleParser { + /** + * Parses the TTL content and returns a {@link ParseResult}, containing the + * new {@link Model} and a {@link BlankNodeMetadata} object that makes the ordering of the + * blank nodes in the original content accessible for further processing. + * @param content RDF in TTL format + * @return the parse result and the blank node ordering + */ + public static ParseResult parseModel(String content) { + BlankNodeMetadata bnodeMetadata = new BlankNodeMetadata(); + + Lang TTL_bn = LangBuilder.create("TTL_BN", "text/bogus") + .build(); + RDFParserRegistry.registerLangTriples(TTL_bn, new ReaderRIOTFactory() { + @Override public ReaderRIOT create(Lang language, ParserProfile profile) { + ParserProfile profileWrapper = new ParserProfileWrapper(profile) { + @Override public Node createBlankNode(Node scope, String label, long line, long col) { + Node blank = get().createBlankNode(scope, label, line, col); + bnodeMetadata.registerNewBlankNode(blank, label); + return blank; + } + + @Override public Node createBlankNode(Node scope, long line, long col) { + Node blank = get().createBlankNode(scope, line, col); + bnodeMetadata.registerNewBlankNode(blank); + return blank; + } + + @Override + public Node create(Node currentGraph, Token token) { + // Dispatches to the underlying ParserFactory operation + long line = token.getLine(); + long col = token.getColumn(); + String str = token.getImage(); + if (token.getType() == TokenType.BNODE) { + return createBlankNode(currentGraph, str, line, col); + } + return get().create(currentGraph, token); + } + + }; + return new ReaderRIOT() { + @Override public void read(InputStream in, String baseURI, ContentType ct, StreamRDF output, + Context context) { + LangRIOT parser = RiotParsers.createParser(in, Lang.TTL, output, profileWrapper); + parser.parse(); + } + + @Override public void read(Reader reader, String baseURI, ContentType ct, StreamRDF output, + Context context) { + LangRIOT parser = RiotParsers.createParser(reader, Lang.TTL, output, profileWrapper); + parser.parse(); + } + }; + } + }); + Graph graph = RDFParser.source(new ByteArrayInputStream(content.getBytes())).labelToNode(LabelToNode.createUseLabelAsGiven()).lang( + TTL_bn).toGraph(); + RDFParserRegistry.removeRegistration(TTL_bn); + Model model = ModelFactory.createModelForGraph(graph); + bnodeMetadata.linkGraphNodesToModelResources(model); + return new ParseResult(model, bnodeMetadata); + } + + public static class ParseResult { + private final Model model; + private final BlankNodeMetadata blankNodeMetadata; + + public ParseResult(Model model, BlankNodeMetadata blankNodeMetadata) { + this.model = model; + this.blankNodeMetadata = blankNodeMetadata; + } + + public Model getModel() { + return model; + } + + public BlankNodeMetadata getBlankNodeMetadata() { + return blankNodeMetadata; + } + } +} diff --git a/src/test/java/de/atextor/turtle/formatter/TurtleFormatterTest.java b/src/test/java/de/atextor/turtle/formatter/TurtleFormatterTest.java index 521ad70..234fc64 100644 --- a/src/test/java/de/atextor/turtle/formatter/TurtleFormatterTest.java +++ b/src/test/java/de/atextor/turtle/formatter/TurtleFormatterTest.java @@ -1,21 +1,39 @@ package de.atextor.turtle.formatter; +import org.apache.jena.atlas.io.AWriter; +import org.apache.jena.atlas.io.IO; +import org.apache.jena.graph.Graph; import org.apache.jena.rdf.model.Model; import org.apache.jena.rdf.model.ModelFactory; import org.apache.jena.rdf.model.Property; import org.apache.jena.rdf.model.RDFNode; import org.apache.jena.rdf.model.Resource; import org.apache.jena.rdf.model.Statement; +import org.apache.jena.riot.Lang; +import org.apache.jena.riot.RDFParser; +import org.apache.jena.riot.lang.LabelToNode; +import org.apache.jena.riot.out.NodeFormatter; +import org.apache.jena.riot.out.NodeFormatterNT; +import org.apache.jena.riot.system.StreamRDF; +import org.apache.jena.riot.system.StreamRDFOps; +import org.apache.jena.riot.writer.StreamWriterTriX; +import org.apache.jena.riot.writer.WriterStreamRDFPlain; import org.apache.jena.vocabulary.RDF; +import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.InputStream; +import java.io.StringWriter; import java.nio.charset.StandardCharsets; import java.util.List; import java.util.Map; import java.util.Set; +import java.util.stream.Stream; import static org.apache.jena.rdf.model.ResourceFactory.createProperty; import static org.apache.jena.rdf.model.ResourceFactory.createResource; @@ -719,6 +737,301 @@ void testRdfTypeAsObjectIsValid() { assertThat( result.trim() ).isEqualTo( modelString.trim() ); } + @Test + void testRdfListNonAnonymous(){ + final String modelString = """ + @prefix rdf: . + @prefix rdfs: . + @prefix xsd: . + @prefix qudt: . + @prefix sh: . + + qudt:IntegerUnionList a rdf:List ; + rdfs:label "Integer union list" ; + rdf:first [ + sh:datatype xsd:nonNegativeInteger ; + ] ; + rdf:rest ( [ + sh:datatype xsd:positiveInteger ; + ] [ + sh:datatype xsd:integer ; + ] ) . + """; + final Model model = modelFromString( modelString ); + + final FormattingStyle style = FormattingStyle.DEFAULT; + final TurtleFormatter formatter = new TurtleFormatter( style ); + final String result = formatter.apply( model ); + assertThat( result.trim() ).isEqualTo( modelString.trim() ); + } + + @Test + void testRdfListAnonymous(){ + final String modelString = """ + @prefix rdf: . + @prefix ex: . + + ex:something a ex:Thing ; + ex:hasList ( ex:one ex:two ) . + """; + final Model model = modelFromString( modelString ); + + final FormattingStyle style = FormattingStyle.DEFAULT; + final TurtleFormatter formatter = new TurtleFormatter( style ); + final String result = formatter.apply( model ); + assertThat( result.trim() ).isEqualTo( modelString.trim() ); + } + + @ParameterizedTest + @MethodSource + void testConsistentBlankNodeOrdering(String content){ + final FormattingStyle style = FormattingStyle.DEFAULT; + final TurtleFormatter formatter = new TurtleFormatter( style ); + for (int i = 0; i < 1; i++) { + final String result = formatter.applyToContent(content); + assertThat(result.trim()).isEqualTo(content.trim()); + } + } + + static Stream testConsistentBlankNodeOrdering(){ + return Stream.of( + """ + @prefix rdf: . + @prefix ex: . + + [ + a ex:Something ; + ] . + + [ + a ex:SomethingElse ; + ] . + """, + + """ + @prefix rdf: . + @prefix ex: . + + ex:aThing ex:has [ + a ex:Something ; + ] ; + ex:has [ + a ex:SomethingElse ; + ] . + """ + , + """ + @prefix ex: . + + _:blank1 ex:has [ + ex:has _:blank1 ; + ] .""" + ).map(s -> Arguments.of(s)); + } + + @Test + void testPreviouslyIdentifiedBlankNode(){ + String content = """ + @prefix ex: . + + _:gen0 ex:has [ + ex:has _:gen0 ; + ]."""; + String expected = """ + @prefix ex: . + + _:gen0 ex:has [ + ex:has _:gen0 ; + ] ."""; + final FormattingStyle style = FormattingStyle.DEFAULT; + final TurtleFormatter formatter = new TurtleFormatter( style ); + for (int i = 0; i < 20; i++) { + final String result = formatter.applyToContent(content); + assertThat(result.trim()).isEqualTo(expected); + } + } + + @Test + void testBlankNodeCycle(){ + String content = """ + @prefix ex: . + + _:blank1 ex:has _:blank2 . + + _:blank2 ex:has _:blank1 . + """; + String expected = """ + @prefix ex: . + + _:blank1 ex:has [ + ex:has _:blank1 ; + ] ."""; + final FormattingStyle style = FormattingStyle.DEFAULT; + final TurtleFormatter formatter = new TurtleFormatter( style ); + for (int i = 0; i < 20; i++) { + final String result = formatter.applyToContent(content); + assertThat(result.trim()).isEqualTo(expected); + } + } + + @Test + void testNoBlankNodeCycle(){ + String content = """ + @prefix ex: . + + _:one ex:has ex:A . + ex:A ex:has _:one . + + """; + String expected = """ + @prefix ex: . + + ex:A ex:has [ + ex:has ex:A ; + ] ."""; + final FormattingStyle style = FormattingStyle.DEFAULT; + final TurtleFormatter formatter = new TurtleFormatter( style ); + for (int i = 0; i < 20; i++) { + final String result = formatter.applyToContent(content); + assertThat(result.trim()).isEqualTo(expected); + } + } + + @Test + void testNoBlankNodeCycle2Blanks(){ + String content = """ + @prefix ex: . + + _:one ex:has ex:A . + ex:A ex:has _:two . + _:two ex:has _:three . + _:three ex:has _:one . + + """; + String expected = """ + @prefix ex: . + + ex:A ex:has [ + ex:has [ + ex:has [ + ex:has ex:A ; + ] ; + ] ; + ] ."""; + final FormattingStyle style = FormattingStyle.DEFAULT; + final TurtleFormatter formatter = new TurtleFormatter( style ); + for (int i = 0; i < 20; i++) { + final String result = formatter.applyToContent(content); + assertThat(result.trim()).isEqualTo(expected); + } + } + + @Test + void testBlankNodeCycle1ResBetween(){ + String content = """ + @prefix ex: . + + _:one ex:has ex:A . + ex:A ex:has _:two . + _:two ex:has _:one . + + """; + String expected = """ + @prefix ex: . + + ex:A ex:has [ + ex:has [ + ex:has ex:A ; + ] ; + ] ."""; + final FormattingStyle style = FormattingStyle.DEFAULT; + final TurtleFormatter formatter = new TurtleFormatter( style ); + final String result = formatter.applyToContent(content); + assertThat(result.trim()).isEqualTo(expected); + } + + @Test + void testBlankNodeCycle2ResBetween(){ + String content = """ + @prefix ex: . + + _:one ex:has ex:A . + ex:A ex:has _:two . + _:two ex:has ex:B . + ex:B ex:has _:one . + + """; + String expected = """ + @prefix ex: . + + ex:A ex:has [ + ex:has ex:B ; + ] . + + ex:B ex:has [ + ex:has ex:A ; + ] ."""; + final FormattingStyle style = FormattingStyle.DEFAULT; + final TurtleFormatter formatter = new TurtleFormatter( style ); + final String result = formatter.applyToContent(content); + assertThat(result.trim()).isEqualTo(expected); + } + + @Test + void testBlankNodeTriangle1(){ + String content = """ + @prefix : . + _:b1 :foo _:b2, _:b3. + _:b2 :foo _:b3. + + """; + String expected = """ + @prefix : . + + [ + :foo [ + :foo _:b3 ; + ] ; + :foo _:b3 ; + ] ."""; + final FormattingStyle style = FormattingStyle.DEFAULT; + final TurtleFormatter formatter = new TurtleFormatter(style); + for (int i = 0; i < 20; i++) { + final String result = formatter.applyToContent(content); + assertThat(result.trim()).isEqualTo(expected); + } + } + + @Test + void testBlankNodeTriangleWithBlankNodeTriple(){ + String content = """ + @prefix : . + [] :foo [] . + _:b1 :foo _:b2, _:b3. + _:b2 :foo _:b3. + + """; + String expected = """ + @prefix : . + + [ + :foo []; + ] . + + [ + :foo [ + :foo _:b3 ; + ] ; + :foo _:b3 ; + ] ."""; + final FormattingStyle style = FormattingStyle.DEFAULT; + final TurtleFormatter formatter = new TurtleFormatter(style); + for (int i = 0; i < 20; i++) { + final String result = formatter.applyToContent(content); + assertThat(result.trim()).isEqualTo(expected); + } + } + private Model modelFromString( final String content ) { final Model model = ModelFactory.createDefaultModel(); final InputStream stream = new ByteArrayInputStream( content.getBytes( StandardCharsets.UTF_8 ) );