Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add optional argument of table schema source #348

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion deps.edn
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@
org.apache.logging.log4j/log4j-core {:mvn/version "2.20.0"}
org.apache.logging.log4j/log4j-slf4j2-impl {:mvn/version "2.20.0"}}}
:dev {
:extra-deps {com.clojure-goes-fast/clj-async-profiler {:mvn/version "1.0.4"}}
:extra-deps {com.clojure-goes-fast/clj-async-profiler {:mvn/version "1.0.4"}
org.clojure/tools.namespace {:mvn/version "1.4.4"}}

:jvm-opts ["-Djdk.attach.allowAttachSelf" ;; for jdk9+
]
Expand Down
21 changes: 16 additions & 5 deletions src/csv2rdf/csvw.clj
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,13 @@
- `:annotated` a custom mode, not part of the standard, which is
like `:minimal`, but it also includes RDF data from the CSVW metadata
json file."
([tabular-source metadata-source] (csv->rdf tabular-source metadata-source {}))
([tabular-source metadata-source {:keys [mode] :as options}]
([tabular-source metadata-source] (csv->rdf tabular-source metadata-source nil {}))
([tabular-source
metadata-source
table-schema-source
{:keys [mode]}]
(let [mode (or mode :standard)
{:keys [tables] :as metadata} (processing/get-metadata tabular-source metadata-source)
{:keys [tables] :as metadata} (processing/get-metadata tabular-source metadata-source table-schema-source)
table-group-dialect (:dialect metadata)
output-tables (remove properties/suppress-output? tables)
{:keys [statements] :as ctx} (table-group-context mode metadata)
Expand All @@ -73,8 +76,16 @@
"Run csv->rdf for the given tabular/metadata sources and options then write the resulting
statements to the given destination. destination must implement
grafter-2.rdf.protocols/ITripleWriteable."
[tabular-source metadata-source destination options]
(gproto/add destination (csv->rdf tabular-source metadata-source options)))
[tabular-source
metadata-source
table-schema-source
destination
options]
(gproto/add destination (csv->rdf
tabular-source
metadata-source
table-schema-source
options)))

(defn csv->rdf->file
"Run csv->rdf for the given tabular/metadata source and options then write the resulting
Expand Down
13 changes: 10 additions & 3 deletions src/csv2rdf/main.clj
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
[["-t" "--tabular TABULAR" "Location of the tabular file"]
["-u" "--user-metadata METADATA" "Location of the metadata file"]
["-o" "--output-file OUTPUT" "Output file to write to"]
["-s" "--table-schema TABLE-SCHEMA" "A directory containing any referenced table schema files"]
["-m" "--mode MODE" "CSVW mode to run"
:validate [#(contains? #{:minimal :standard :annotated} %)]
:default :standard
Expand Down Expand Up @@ -51,9 +52,14 @@
:summary summary}))
options)))

(defn- write-output [writer {:keys [rdf-format tabular-source metadata-source mode]}]
(defn- write-output [writer {:keys [rdf-format tabular-source metadata-source mode table-schema-source]}]
(let [dest (gio/rdf-writer writer :format rdf-format :prefixes nil)]
(csvw/csv->rdf->destination tabular-source metadata-source dest {:mode mode})))
(csvw/csv->rdf->destination
tabular-source
metadata-source
table-schema-source
dest
{:mode mode})))

(defmulti display-error
"Displays an exception in the UI"
Expand All @@ -71,9 +77,10 @@

(defn- inner-main [args]
(let [options (parse-cli-options args)
{:keys [mode tabular user-metadata output-file]} options
{:keys [mode tabular user-metadata output-file table-schema]} options
opts {:tabular-source (some-> tabular parse-source)
:metadata-source (some-> user-metadata parse-source)
:table-schema-source (some-> table-schema ((fn [t] (URI. (str "file://" t)))))
:rdf-format (or (some-> output-file formats/->rdf-format) RDFFormat/TURTLE)
:mode mode}
output-file (some-> output-file io/file)]
Expand Down
8 changes: 4 additions & 4 deletions src/csv2rdf/metadata.clj
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
[csv2rdf.source :as source]
[clojure.spec.alpha :as s]))

(defn parse-metadata-json [base-uri json]
(let [context (make-context base-uri)]
(defn parse-metadata-json [base-uri json table-schema-source]
(let [context (make-context base-uri table-schema-source)]
(cond
(table-group/looks-like-table-group-json? json)
(properties/set-table-group-parent-references (table-group/parse-table-group-json context json))
Expand All @@ -18,9 +18,9 @@

:else (make-error context "Expected top-level of metadata document to describe a table or table group"))))

(defn parse-table-group-from-source [source]
(defn parse-table-group-from-source [source table-schema-source]
(let [json (source/get-json source)]
(parse-metadata-json (source/->uri source) json)))
(parse-metadata-json (source/->uri source) json table-schema-source)))

(s/fdef parse-table-group-from-source
:args (s/cat :source (s/and ::source/uriable ::source/json-source)))
Expand Down
13 changes: 8 additions & 5 deletions src/csv2rdf/metadata/context.clj
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,14 @@
(defn document-uri ^URI [context]
(:document-uri context))

(defn make-context [metadata-uri]
{:base-uri metadata-uri :document-uri metadata-uri :path [] :language nil})
(defn make-context
([metadata-uri] (make-context metadata-uri nil))
([metadata-uri table-schema-source]
{:base-uri metadata-uri
:document-uri metadata-uri
:path []
:language nil
:table-schema-source table-schema-source}))

(defn language-code-or-default [{:keys [language] :as context}]
(or language "und"))
Expand All @@ -29,9 +35,6 @@
(defn append-path [context path-element]
(update context :path conj path-element))

(defn resolve-uri [{:keys [^URI base-uri] :as context} ^URI uri]
(util/resolve-uri base-uri uri))

(defn with-document-uri [context ^URI new-document-uri]
(assoc context :document-uri new-document-uri))

4 changes: 2 additions & 2 deletions src/csv2rdf/metadata/schema.clj
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
(ns csv2rdf.metadata.schema
(:require [csv2rdf.metadata.validator :refer [make-warning make-error invalid chain array-of type-eq strict variant
(:require [csv2rdf.metadata.validator :refer [make-warning make-error array-of type-eq strict variant
type-error-message]]
[csv2rdf.json :as mjson]
[csv2rdf.metadata.context :refer [append-path]]
[csv2rdf.metadata.types :refer [object-of object-property link-property column-reference id]]
[csv2rdf.metadata.types :refer [link-property column-reference id]]
[csv2rdf.metadata.inherited :refer [metadata-of]]
[csv2rdf.metadata.column :as column]
[clojure.string :as string]
Expand Down
11 changes: 8 additions & 3 deletions src/csv2rdf/metadata/table.clj
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
(ns csv2rdf.metadata.table
(:require [csv2rdf.metadata.validator :refer [array-of bool type-eq strict]]
[csv2rdf.metadata.types :refer [link-property note table-direction object-property id contextual-object]]
[csv2rdf.metadata.types :refer [link-property note table-direction table-schema-object-property id contextual-object]]
[csv2rdf.metadata.inherited :refer [metadata-of]]
[csv2rdf.metadata.schema :as schema]
[csv2rdf.metadata.transformation :as transformation]
Expand All @@ -20,7 +20,7 @@
:notes (array-of note)
:suppressOutput bool
:tableDirection table-direction
:tableSchema (object-property schema/schema)
:tableSchema (table-schema-object-property schema/schema)
:transformations (array-of transformation/transformation)
:id id
:type (type-eq "Table")}}))
Expand All @@ -45,7 +45,12 @@
{:url table-uri
:tableSchema schema})

(defn ^{:metadata-spec "5.4.3"} validate-compatible [validating? {^URI uri1 :url schema1 :tableSchema :as table1} {^URI uri2 :url schema2 :tableSchema :as table2}]
(defn ^{:metadata-spec "5.4.3"} validate-compatible
[validating?
{^URI uri1 :url
schema1 :tableSchema}
{^URI uri2 :url
schema2 :tableSchema}]
(when-not (= (.normalize uri1) (.normalize uri2))
(logging/log-warning (format "Table URIs %s and %s not equal after normalisation" uri1 uri2)))
(schema/validate-compatible validating? schema1 schema2))
Expand Down
35 changes: 24 additions & 11 deletions src/csv2rdf/metadata/types.clj
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
(ns csv2rdf.metadata.types
(:require [csv2rdf.metadata.validator :refer [make-warning default-if-invalid variant invalid array-of kvps optional-key
required-key any map-of one-of string invalid? warn-invalid
chain try-parse-with where make-error uri ignore-invalid
required-key map-of one-of string invalid? warn-invalid
chain where make-error uri ignore-invalid
type-error-message with-error-handler]]
[csv2rdf.metadata.context :refer [resolve-uri append-path language-code-or-default
[csv2rdf.metadata.context :refer [append-path language-code-or-default
base-key language-key id-key update-from-local-context with-document-uri]]
[csv2rdf.json-ld :refer [expand-uri-string]]
[csv2rdf.json :refer [array? object?] :as mjson]
Expand Down Expand Up @@ -60,17 +60,20 @@

(def default-uri (URI. ""))

(defn ^{:metadata-spec "6.3"} normalise-link-property
"Normalises a link property URI by resolving it against the current base URI."
[context uri]
(resolve-uri context uri))

(defn ^{:metadata-spec "5.1.2"} link-property
([context x] (link-property context x warn-invalid))
([context x error-fn]
(let [v (chain (default-if-invalid (with-error-handler (variant {:string uri}) error-fn) default-uri) normalise-link-property)]
(let [v (chain (default-if-invalid (with-error-handler (variant {:string uri}) error-fn) default-uri)
#(util/resolve-uri (:base-uri %1) %2))]
(v context x))))

(defn ^{:metadata-spec "5.1.2"} link-property-from-table-schema-source
([context x] (link-property-from-table-schema-source context x warn-invalid))
([context x error-fn]
(let [v (chain (default-if-invalid (with-error-handler (variant {:string uri}) error-fn) default-uri)
#(util/resolve-uri (or (:table-schema-source %1) (:base-uri %1)) %2))]
(v context x))))

(defn id
"An id is a link property whose value cannot begin with _:"
[context x]
Expand Down Expand Up @@ -123,7 +126,7 @@

(defn ^{:metadata-spec "5.8.2"} expand-description-object-type-uri
"If type is the name of a description object defined in the metadata specification (e.g. Table, Schema),
returns the corresponding id URI for the type. Otherwise returns nil."
returns the corresponding id URI for the type. Otherwise, returns nil."
[type]
(if (contains? description-object-types type)
(util/set-fragment csvw type)))
Expand Down Expand Up @@ -164,7 +167,7 @@
(URI. expanded)
(catch URISyntaxException ex
(make-error context (format "Invalid URI '%s'" s))))]
(resolve-uri context uri))))
(util/resolve-uri (:base-uri context) uri))))
(make-error context (type-error-message #{:string} (mjson/get-json-type x)))))

(defn type-one-of [allowed-types]
Expand Down Expand Up @@ -397,3 +400,13 @@
(variant {:string (chain link-property (linked-object-property object-validator))
:object object-validator
:default {}}))

(defn ^{:metadata-spec "5.1.5"} table-schema-object-property
"Object which may be:
1. specified in line in the metadata document
2. referenced through a URI
3. looked up from a file in the table-schema-source directory"
[object-validator]
(variant {:string (chain link-property-from-table-schema-source (linked-object-property object-validator))
:object object-validator
:default {}}))
2 changes: 1 addition & 1 deletion src/csv2rdf/tabular/metadata.clj
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@
(if-let [metadata-doc (resolve-associated-metadata uri metadata-link)]
(do
(.close stream)
(meta/parse-metadata-json uri metadata-doc))
(meta/parse-metadata-json uri metadata-doc nil))
(let [dialect (dialect/get-default-dialect headers)
options (dialect/dialect->options dialect)
rows (reader/make-row-seq stream options)]
Expand Down
10 changes: 5 additions & 5 deletions src/csv2rdf/tabular/processing.clj
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@
(table/validate-compatible validating? user-table table-metadata)
(table/compatibility-merge user-table table-metadata)))

(defn- from-metadata-source [metadata-source]
(let [{:keys [tables] :as user-table-group} (meta/parse-table-group-from-source metadata-source)
(defn- from-metadata-source [metadata-source table-schema-source]
(let [{:keys [tables] :as user-table-group} (meta/parse-table-group-from-source metadata-source table-schema-source)
validating? false
merged-tables (mapv (fn [table] (validate-merge-table validating? table)) tables)
merged-table-group (assoc user-table-group :tables merged-tables)]
Expand All @@ -27,13 +27,13 @@
(defn ^{:tabular-spec "6.1"} get-metadata
"Retrieves and resolves the metadata given either a tabular data source or metadata source. If user metadata
is provided, each referenced table definition is validated against the corresponding tabular data file."
[tabular-source metadata-source]
[tabular-source metadata-source table-schema-source]
(cond
(and (some? tabular-source) (some? metadata-source))
(from-metadata-source (meta/overriding-metadata tabular-source metadata-source))
(from-metadata-source (meta/overriding-metadata tabular-source metadata-source) table-schema-source)

(some? metadata-source)
(from-metadata-source metadata-source)
(from-metadata-source metadata-source table-schema-source)

(some? tabular-source)
(from-tabular-source tabular-source)
Expand Down
4 changes: 2 additions & 2 deletions test/csv2rdf/metadata_test.clj
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
"tableSchema" {"columns" [{"name" "col1"}
{"name" "col2"}]}
"tables" [{"url" "http://example.com/table.csv"}]}
parsed (parse-metadata-json (URI. "http://example.com/metadata.json") json)
parsed (parse-metadata-json (URI. "http://example.com/metadata.json") json nil)
table (get-in parsed [:tables 0])
dialect (properties/dialect table)
schema (properties/table-schema table)]
Expand All @@ -28,7 +28,7 @@
"dialect" {"quoteChar" "{"}
"tableSchema" {"columns" [{"name" "col1"}
{"name" "col2"}]}}
parsed (parse-metadata-json (URI. "http://example.com/metadata.json") json)
parsed (parse-metadata-json (URI. "http://example.com/metadata.json") json nil)
table (get-in parsed [:tables 0])
dialect (properties/dialect table)
schema (properties/table-schema table)]
Expand Down
2 changes: 1 addition & 1 deletion test/csv2rdf/w3c_csvw_suite_test/impl.clj
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
(logging/with-logger logger
(with-open [destination (repo/->connection repo)]
(try
(csv->rdf->destination tabular-source metadata-source destination options)
(csv->rdf->destination tabular-source metadata-source nil destination options)
{:errors [] :warnings @(:warnings logger) :result (into [] (gio/statements destination))}
(catch Exception ex
{:errors [(.getMessage ex)] :warnings @(:warnings logger) :result nil}))))))