Skip to content

Commit

Permalink
Document modelToQuery (#455)
Browse files Browse the repository at this point in the history
  • Loading branch information
jgonggrijp committed May 31, 2021
1 parent 9da69ae commit 295a152
Showing 1 changed file with 99 additions and 3 deletions.
102 changes: 99 additions & 3 deletions frontend/src/semantic-search/modelToQuery.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,18 @@
/**
* SemanticSearchView and its subviews work with an underlying data model,
* which is rich enough to represent both the recursive user interface and the
* recursive SPARQL query that is generated from it. In the current module, we
* implement the SPARQL code generation from this common data model.
*
* We generate a useful, logically complete subset of SPARQL that lends itself
* well to being constructed from the user interface. Specifically, we use
* inverse and sequence property paths, `EXISTS` and `NOT EXISTS`, `UNION`,
* pattern concatenation, and a handful of functions and operators.
*
* The default export of the module, `modelToQuery`, is the real interface.
* Some other functions are exported as well, but only for unittesting purposes.
*/

import { map, find, uniqueId, partial, groupBy } from 'lodash';
import * as _ from 'lodash';

Expand All @@ -9,9 +24,12 @@ import Node from '../common-rdf/node';

import queryTemplate from './query-template';

interface nsTable {
[abbreviation: string]: string;
}
// SPARQL is essentially a bi-modal query language: query conditions can be
// either patterns (consisting of triples and delimited by curly braces) or
// expressions (built up from function calls and operators). Expressions can be
// injected into patterns using `FILTER` statements. Conversely, patterns can be
// nested inside expressions using the `EXISTS` function. The next three types
// enable us to distinguish between the two modes.

interface TaggedExpression {
tag: 'expression';
Expand All @@ -25,18 +43,29 @@ interface TaggedPattern {

type TaggedSyntax = TaggedExpression | TaggedPattern;

/**
* Return type of `_.groupBy(TaggedSyntax[], 'tag')`, useful for and/or.
*/
interface Branches {
expression?: TaggedExpression[];
pattern?: TaggedPattern[];
};

interface nsTable {
[abbreviation: string]: string;
}

const defaultNs = {
rdfs: rdfs(),
owl: owl(),
readit: readit(),
item: item(),
};

/**
* Serialize an IRI either as `<http://full.url>` or as `ns:short`, depending
* on available namespaces.
*/
export function serializeIri(iri: string, ns: nsTable): string {
let short = '';
find(ns, function(namespace, abbreviation) {
Expand All @@ -50,6 +79,9 @@ export function serializeIri(iri: string, ns: nsTable): string {
return short || `<${iri}>`;
}

/**
* Serialize a SPARQL-supported literal with a type in the `xsd` namespace.
*/
export function serializeLiteral(
literal: string, datatype: string, ns: nsTable
): string {
Expand All @@ -59,13 +91,23 @@ export function serializeLiteral(
case xsd.string:
return `"${literal}"`;
}
// Assume number since that's the only other type SPARQL supports.
return literal;
}

function nextVariable(): string {
return uniqueId('?x');
}

/**
* In the context of a predicate path, we write the IRI of an inverse property
* as `^direct`. This is safer than always writing an inverse property as
* itself, firstly because this is also how related items are saved to the
* backend, and secondly because our frontend reasoner synthetically generates
* some "pretend" inverse properties from their direct counterparts when no
* existing inverse is found. The latter mechanism is implemented in
* `../utilities/relation-utilities.ts`.
*/
export function serializePredicate(predicate: Node, ns: nsTable): string {
const inverse = predicate.get(owl.inverseOf) as Node[];
if (inverse && inverse.length) return `^${serializeIri(inverse[0].id, ns)}`;
Expand All @@ -84,6 +126,12 @@ function tagPattern(pattern: string): TaggedPattern {
return { tag: 'pattern', pattern };
}

/**
* Serialize one atomic building block of an expression: either a function call
* with only literal or variable arguments, or a binary operator expression
* with likewise operands. See `./dropdown-constants.ts` for the possible
* filter models.
*/
export function serializeExpression(filter: Model, args: string[]): TaggedExpression {
const func = filter.get('function') || '';
const op = filter.get('operator');
Expand All @@ -95,6 +143,7 @@ function patternAsExpression({ pattern }: TaggedPattern): TaggedExpression {
return tagExpression(`EXISTS {\n${pattern}}`);
}

// Below, we generate two helpers for `combineAnd` and `combineOr`.
function joinTagged<K extends keyof Branches>(key: K) {
return function(constituents: Branches[K], glue: string): string {
return map(constituents, key).join(glue);
Expand All @@ -103,6 +152,11 @@ function joinTagged<K extends keyof Branches>(key: K) {
const joinE = joinTagged('expression');
const joinP = joinTagged('pattern');

/**
* Apply logical AND to combine a bunch of SPARQL snippets which have already
* been pre-grouped by mode (expression/pattern). The resulting SPARQL snippet
* may be either a pattern or an expression, depending on what went in.
*/
export function combineAnd({ expression, pattern }: Branches): TaggedSyntax {
let exp = expression ? `${joinE(expression, ' && ')}` : '';
if (expression && expression.length > 1) exp = `(${exp})`;
Expand All @@ -114,6 +168,11 @@ export function combineAnd({ expression, pattern }: Branches): TaggedSyntax {
return tagPattern(pat);
}

/**
* Apply logical OR to combine a bunch of SPARQL snippets which have already
* been pre-grouped by mode (expression/pattern). The resulting SPARQL snippet
* may be either a pattern or an expression, depending on what went in.
*/
export function combineOr({ expression, pattern }: Branches): TaggedSyntax {
if (expression) {
const patExp = expression.concat(
Expand All @@ -125,11 +184,17 @@ export function combineOr({ expression, pattern }: Branches): TaggedSyntax {
return tagPattern(`{\n${joinP(pattern, '} UNION {\n')}}\n`);
}

// Lookup table to save an `if`/`else` down the line.
const combine = {
and: combineAnd,
or: combineOr,
};

/**
* Apply logical NOT to a SPARQL snippet which may be of either mode
* (expression/pattern). The result is always an expression; patterns need to
* be converted to expression first because they cannot be negated directly.
*/
function negate(syntax: TaggedSyntax): TaggedExpression {
return tagExpression(
syntax.tag === 'expression' ?
Expand All @@ -138,6 +203,12 @@ function negate(syntax: TaggedSyntax): TaggedExpression {
);
}

/**
* Core recursive pattern/expression builder, representing an entire chain
* (row) from the UI, including any subchains that branch out from it. The
* recursion is depth-first and pre-order, so that the smallest constituents
* determine the mode of their containing constituents.
*/
export function serializeChain(
entry: Model, variableIn: string, ns: nsTable, index: number = 0
): TaggedSyntax {
Expand All @@ -146,11 +217,18 @@ export function serializeChain(
const predicates: Node[] = [];
const args: string[] = [];
let variableOut: string = variableIn;
// Conceptually, a chain consists of zero or more property traversals,
// optionally recursing on logical operators. Eventually, chains always
// terminate with a filter. `tail` will contain the SPARQL syntax that
// results either from recursion or termination. The purpose of the loop
// below is to accumulate the properties to traverse until the `tail` is
// found.
let tail: TaggedSyntax;
while (index < chain.length) {
const model = chain.at(index);
const scheme = model.get('scheme');
if (scheme === 'logic') {
// Logic, recurse.
const branches = model.get('branches');
const action = model.get('action');
if (branches) {
Expand All @@ -161,6 +239,7 @@ export function serializeChain(
break;
}
} else if (scheme === 'filter') {
// Filter, build expression as `tail`.
const value = model.get('value');
const datatype = model.get('range').at(0).id;
args.push(variableOut);
Expand All @@ -171,9 +250,16 @@ export function serializeChain(
);
tail = serializeExpression(model.get('filter'), args);
} else if (model.get('traversal')) {
// Add another property to traverse.
predicates.push(model.get('selection'));
if (variableOut === variableIn) variableOut = nextVariable();
}
// You may wonder why there is no final `else` clause. The reason is
// that some models in a chain only serve a purpose for the UI. Those
// are (1) the models corresponding to an "expect type" choice, which
// are currently left implicit in the SPARQL query (since the traversed
// properties already imply a type), and (2) filter selections, which
// are always followed by another model with `scheme === 'filter'`.
++index;
}
if (!tail) throw new RangeError(
Expand All @@ -188,6 +274,10 @@ export function serializeChain(
);
}

/**
* Recursion helper for `serializeChain` that accumulates the results when
* branching out over multiple subchains by and/or.
*/
function serializeBranchout(
branches: Collection, action: string, variableIn: string, ns: nsTable
): TaggedSyntax {
Expand All @@ -199,8 +289,14 @@ function serializeBranchout(
return combine[action](segments);
}

// Callback used with `_.map` below to convert `nsTable` to the format that
// `../sparql/query-templates/preamble-template.hbs` requires.
const explodeNs = (prefix, label) => ({ label, prefix });

/**
* Convert the data model into a complete `CONSTRUCT` query including prefix
* headers.
*/
export default function modelToQuery(
entry: Model, ns: nsTable = defaultNs
): string {
Expand Down

0 comments on commit 295a152

Please sign in to comment.