diff --git a/frontend/src/semantic-search/modelToQuery.ts b/frontend/src/semantic-search/modelToQuery.ts index bb64719b..847b8ae2 100644 --- a/frontend/src/semantic-search/modelToQuery.ts +++ b/frontend/src/semantic-search/modelToQuery.ts @@ -1,3 +1,18 @@ +/** + * SemanticSearchView and its subviews work with an underlying data model, + * which is rich enough to represent both the recursive user interface and the + * recursive SPARQL query that is generated from it. In the current module, we + * implement the SPARQL code generation from this common data model. + * + * We generate a useful, logically complete subset of SPARQL that lends itself + * well to being constructed from the user interface. Specifically, we use + * inverse and sequence property paths, `EXISTS` and `NOT EXISTS`, `UNION`, + * pattern concatenation, and a handful of functions and operators. + * + * The default export of the module, `modelToQuery`, is the real interface. + * Some other functions are exported as well, but only for unittesting purposes. + */ + import { map, find, uniqueId, partial, groupBy } from 'lodash'; import * as _ from 'lodash'; @@ -9,9 +24,12 @@ import Node from '../common-rdf/node'; import queryTemplate from './query-template'; -interface nsTable { - [abbreviation: string]: string; -} +// SPARQL is essentially a bi-modal query language: query conditions can be +// either patterns (consisting of triples and delimited by curly braces) or +// expressions (built up from function calls and operators). Expressions can be +// injected into patterns using `FILTER` statements. Conversely, patterns can be +// nested inside expressions using the `EXISTS` function. The next three types +// enable us to distinguish between the two modes. interface TaggedExpression { tag: 'expression'; @@ -25,11 +43,18 @@ interface TaggedPattern { type TaggedSyntax = TaggedExpression | TaggedPattern; +/** + * Return type of `_.groupBy(TaggedSyntax[], 'tag')`, useful for and/or. + */ interface Branches { expression?: TaggedExpression[]; pattern?: TaggedPattern[]; }; +interface nsTable { + [abbreviation: string]: string; +} + const defaultNs = { rdfs: rdfs(), owl: owl(), @@ -37,6 +62,10 @@ const defaultNs = { item: item(), }; +/** + * Serialize an IRI either as `` or as `ns:short`, depending + * on available namespaces. + */ export function serializeIri(iri: string, ns: nsTable): string { let short = ''; find(ns, function(namespace, abbreviation) { @@ -50,6 +79,9 @@ export function serializeIri(iri: string, ns: nsTable): string { return short || `<${iri}>`; } +/** + * Serialize a SPARQL-supported literal with a type in the `xsd` namespace. + */ export function serializeLiteral( literal: string, datatype: string, ns: nsTable ): string { @@ -59,6 +91,7 @@ export function serializeLiteral( case xsd.string: return `"${literal}"`; } + // Assume number since that's the only other type SPARQL supports. return literal; } @@ -66,6 +99,15 @@ function nextVariable(): string { return uniqueId('?x'); } +/** + * In the context of a predicate path, we write the IRI of an inverse property + * as `^direct`. This is safer than always writing an inverse property as + * itself, firstly because this is also how related items are saved to the + * backend, and secondly because our frontend reasoner synthetically generates + * some "pretend" inverse properties from their direct counterparts when no + * existing inverse is found. The latter mechanism is implemented in + * `../utilities/relation-utilities.ts`. + */ export function serializePredicate(predicate: Node, ns: nsTable): string { const inverse = predicate.get(owl.inverseOf) as Node[]; if (inverse && inverse.length) return `^${serializeIri(inverse[0].id, ns)}`; @@ -84,6 +126,12 @@ function tagPattern(pattern: string): TaggedPattern { return { tag: 'pattern', pattern }; } +/** + * Serialize one atomic building block of an expression: either a function call + * with only literal or variable arguments, or a binary operator expression + * with likewise operands. See `./dropdown-constants.ts` for the possible + * filter models. + */ export function serializeExpression(filter: Model, args: string[]): TaggedExpression { const func = filter.get('function') || ''; const op = filter.get('operator'); @@ -95,6 +143,7 @@ function patternAsExpression({ pattern }: TaggedPattern): TaggedExpression { return tagExpression(`EXISTS {\n${pattern}}`); } +// Below, we generate two helpers for `combineAnd` and `combineOr`. function joinTagged(key: K) { return function(constituents: Branches[K], glue: string): string { return map(constituents, key).join(glue); @@ -103,6 +152,11 @@ function joinTagged(key: K) { const joinE = joinTagged('expression'); const joinP = joinTagged('pattern'); +/** + * Apply logical AND to combine a bunch of SPARQL snippets which have already + * been pre-grouped by mode (expression/pattern). The resulting SPARQL snippet + * may be either a pattern or an expression, depending on what went in. + */ export function combineAnd({ expression, pattern }: Branches): TaggedSyntax { let exp = expression ? `${joinE(expression, ' && ')}` : ''; if (expression && expression.length > 1) exp = `(${exp})`; @@ -114,6 +168,11 @@ export function combineAnd({ expression, pattern }: Branches): TaggedSyntax { return tagPattern(pat); } +/** + * Apply logical OR to combine a bunch of SPARQL snippets which have already + * been pre-grouped by mode (expression/pattern). The resulting SPARQL snippet + * may be either a pattern or an expression, depending on what went in. + */ export function combineOr({ expression, pattern }: Branches): TaggedSyntax { if (expression) { const patExp = expression.concat( @@ -125,11 +184,17 @@ export function combineOr({ expression, pattern }: Branches): TaggedSyntax { return tagPattern(`{\n${joinP(pattern, '} UNION {\n')}}\n`); } +// Lookup table to save an `if`/`else` down the line. const combine = { and: combineAnd, or: combineOr, }; +/** + * Apply logical NOT to a SPARQL snippet which may be of either mode + * (expression/pattern). The result is always an expression; patterns need to + * be converted to expression first because they cannot be negated directly. + */ function negate(syntax: TaggedSyntax): TaggedExpression { return tagExpression( syntax.tag === 'expression' ? @@ -138,6 +203,12 @@ function negate(syntax: TaggedSyntax): TaggedExpression { ); } +/** + * Core recursive pattern/expression builder, representing an entire chain + * (row) from the UI, including any subchains that branch out from it. The + * recursion is depth-first and pre-order, so that the smallest constituents + * determine the mode of their containing constituents. + */ export function serializeChain( entry: Model, variableIn: string, ns: nsTable, index: number = 0 ): TaggedSyntax { @@ -146,11 +217,18 @@ export function serializeChain( const predicates: Node[] = []; const args: string[] = []; let variableOut: string = variableIn; + // Conceptually, a chain consists of zero or more property traversals, + // optionally recursing on logical operators. Eventually, chains always + // terminate with a filter. `tail` will contain the SPARQL syntax that + // results either from recursion or termination. The purpose of the loop + // below is to accumulate the properties to traverse until the `tail` is + // found. let tail: TaggedSyntax; while (index < chain.length) { const model = chain.at(index); const scheme = model.get('scheme'); if (scheme === 'logic') { + // Logic, recurse. const branches = model.get('branches'); const action = model.get('action'); if (branches) { @@ -161,6 +239,7 @@ export function serializeChain( break; } } else if (scheme === 'filter') { + // Filter, build expression as `tail`. const value = model.get('value'); const datatype = model.get('range').at(0).id; args.push(variableOut); @@ -171,9 +250,16 @@ export function serializeChain( ); tail = serializeExpression(model.get('filter'), args); } else if (model.get('traversal')) { + // Add another property to traverse. predicates.push(model.get('selection')); if (variableOut === variableIn) variableOut = nextVariable(); } + // You may wonder why there is no final `else` clause. The reason is + // that some models in a chain only serve a purpose for the UI. Those + // are (1) the models corresponding to an "expect type" choice, which + // are currently left implicit in the SPARQL query (since the traversed + // properties already imply a type), and (2) filter selections, which + // are always followed by another model with `scheme === 'filter'`. ++index; } if (!tail) throw new RangeError( @@ -188,6 +274,10 @@ export function serializeChain( ); } +/** + * Recursion helper for `serializeChain` that accumulates the results when + * branching out over multiple subchains by and/or. + */ function serializeBranchout( branches: Collection, action: string, variableIn: string, ns: nsTable ): TaggedSyntax { @@ -199,8 +289,14 @@ function serializeBranchout( return combine[action](segments); } +// Callback used with `_.map` below to convert `nsTable` to the format that +// `../sparql/query-templates/preamble-template.hbs` requires. const explodeNs = (prefix, label) => ({ label, prefix }); +/** + * Convert the data model into a complete `CONSTRUCT` query including prefix + * headers. + */ export default function modelToQuery( entry: Model, ns: nsTable = defaultNs ): string {