Merge branch 'develop' into feature/interface-clarifications

UUDigitalHumanitieslab · Mar 15, 2022 · 2895e63 · 2895e63
2 parents a8cf83a + 57fc2e9
commit 2895e63
Show file tree

Hide file tree

Showing 63 changed files with 4,318 additions and 2,475 deletions.
diff --git a/LICENSE b/LICENSE
@@ -1,29 +1,7 @@
-BSD 3-Clause License
-
-Copyright (c) 2018, UU Digital Humanities Lab
+Copyright (c) 2018-2022, UU Digital Humanities Lab
 All rights reserved.
 
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-* Redistributions of source code must retain the above copyright notice, this
-  list of conditions and the following disclaimer.
-
-* Redistributions in binary form must reproduce the above copyright notice,
-  this list of conditions and the following disclaimer in the documentation
-  and/or other materials provided with the distribution.
-
-* Neither the name of the copyright holder nor the names of its
-  contributors may be used to endorse or promote products derived from
-  this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
-FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+This work is licensed under the Creative Commons
+Attribution-NonCommercial-ShareAlike 4.0 International License. To view a copy
+of this license, visit http://creativecommons.org/licenses/by-nc-sa/4.0/ or
+send a letter to Creative Commons, PO Box 1866, Mountain View, CA 94042, USA.
diff --git a/backend/scripts/sources_to_elasticsearch.py b/backend/scripts/sources_to_elasticsearch.py
@@ -82,6 +82,7 @@ def title_author_to_index():
             }}, index=settings.ES_ALIASNAME)
         if document['hits']['total']['value']==0:
             print("serial {} not found in the index".format(serial))
+            continue
         source_id = document['hits']['hits'][0]['_id']
         es.update(index=settings.ES_ALIASNAME, id=source_id, body={
             "doc": {

diff --git a/documentation/Local-Debian-Read-It-setup.md b/documentation/Local-Debian-Read-It-setup.md
@@ -0,0 +1,144 @@
+This is a recipe for installing Read-It on Debian 10 Buster. It uses [Linux containers via LXD](https://linuxcontainers.org/lxd/getting-started-cli/) to keep things isolated from the host system, or you can skip the LXD part and move to installing the prerequisites. Modify as needed.
+
+# LXD Container setup
+
+Install LXD, add your user to the `lxd` group, and setup LXD with `lxd init`.
+
+Refresh group membership by logging out and back in (or via `su - yourname`). Then:
+
+    lxc launch images:debian/10 Readit
+    lxc start Readit
+
+Login as root:
+
+`lxc exec Readit -- /bin/bash`
+
+In the Debian container:
+
+    apt-get install sudo
+    adduser yourname
+    usermod -aG sudo yourname
+
+Copy files you need into the container. For example:
+
+`lxc file push -r ~/.ssh Readit/home/yourname/`
+
+Login as user `yourname`:
+
+`lxc exec Readit -- su --login yourname`
+
+Proxy container ports that you want to be available on your host system:
+
+    lxc config device add Readit postgresport proxy listen=tcp:0.0.0.0:5433 connect=tcp:127.0.0.1:5432
+    lxc config device add Readit elasticport proxy listen=tcp:0.0.0.0:9201 connect=tcp:127.0.0.1:9200
+    lxc config device add Readit fuseki proxy listen=tcp:0.0.0.0:3030 connect=tcp:127.0.0.1:3030
+    lxc config device add Readit mainApp proxy listen=tcp:0.0.0.0:8000 connect=tcp:127.0.0.1:8000
+
+# Install prerequisites on Debian 10
+
+As root:
+
+`apt-get install wget curl git git-flow ssh nano vim gnupg apt-transport-https lsb-release unzip python3 python3-pip pkg-config postgresql postgresql-client libpq-dev openjdk-11-jre`
+
+Notes:
+
+- Fuseki lists Java8 requirement, but `openjdk-11-jre` which is available by default seems to work fine.
+- This installs the default version of `python3` available for Debian 10, which is 3.7 rather than 3.6, but this seems to work as well for Read-It.
+
+## Configure PostgreSQL
+
+    systemctl status postgresql.service
+    systemctl start postgresql.service
+    passwd postgres
+
+Allow to login to PostgreSQL with role `postgres` from your `yourname` account:
+
+`nano /etc/postgresql/11/main/pg_ident.conf`:
+
+    yourname_postgres  postgres                postgres
+    yourname_postgres  yourname                postgres
+
+`nano /etc/postgresql/11/main/pg_hba.conf`:
+
+> local   all             postgres                                peer map=yourname_postgres
+
+`systemctl restart postgresql`
+
+Check that you can login as `postgres` from `yourname`:
+
+`psql -U postgres`
+
+Set a postgres role password (for TCP port-based connections):
+
+postgres=# `\password`
+
+## Install virtualenv:
+
+`pip3 install virtualenv`
+
+Make sure ~/.local/bin is on the PATH for virtualenv:
+
+`source ~/.profile`
+
+## Install ElasticSearch:
+
+    wget -qO - https://artifacts.elastic.co/GPG-KEY-elasticsearch | apt-key add -
+    echo "deb https://artifacts.elastic.co/packages/7.x/apt stable main" > /etc/apt/sources.list.d/elastic-7.x.list
+    apt-get update && apt-get install elasticsearch
+
+`systemctl start elasticsearch`
+
+Check the installation:
+
+`curl http://127.0.0.1:9200`
+
+## Install NodeJS and NPM:
+
+    curl -sL https://deb.nodesource.com/setup_14.x | bash -
+    apt-get install nodejs
+    nodejs --version
+    npm --version
+
+## Install Yarn:
+
+    curl -sL https://dl.yarnpkg.com/debian/pubkey.gpg | apt-key add -
+    echo "deb https://dl.yarnpkg.com/debian/ stable main" | tee /etc/apt/sources.list.d/yarn.list
+    apt-get update && apt-get install yarn
+
+## Install Chrome
+
+    wget https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb
+    apt-get install ./google-chrome-stable_current_amd64.deb
+
+    CHROME_MAIN_VERSION=`google-chrome-stable --version | sed -E 's/(^Google Chrome |\.[0-9]+ )//g'
+    CHROMEDRIVER_VERSION=`curl -s "https://chromedriver.storage.googleapis.com/LATEST_RELEASE_$CHROME_MAIN_VERSION"
+    curl "https://chromedriver.storage.googleapis.com/${CHROMEDRIVER_VERSION}/chromedriver_linux64.zip" -O
+    unzip chromedriver_linux64.zip -d /usr/local/bin
+
+# Install Fuseki
+
+As non-root `yourname` account:
+
+    mkdir -p ~/.local/bin
+    wget https://mirror.serverion.com/apache/jena/binaries/apache-jena-fuseki-3.17.0.tar.gz -O - | tar xz -C ~/.local/bin
+
+# Setup Read-It
+
+Follow the steps as described in the READMEs. For a quick development setup: 
+
+- Create an ElasticSearch index [as described here](https://github.com/UUDigitalHumanitieslab/readit-interface/blob/develop/backend/README.md#setting-up-elasticsearch).
+
+- Run Fuseki:
+
+        mkdir ~/readit_data
+        ~/.local/bin/apache-jena-fuseki-3.17.0/fuseki-server --loc=/home/yourname/readit_data --update --localhost /readit
+
+    - At http://localhost:3030/manage.html, add a dataset named `readit-test`
+
+- Download and install -- basically:
+
+        git clone [email protected]:UUDigitalHumanitieslab/readit-interface.git
+        cd readit-interface
+        python bootstrap.py # Use "psql -U postgres" as psql prompt
+
+- `yarn start` (development mode)
diff --git a/documentation/SPARQL-API.md b/documentation/SPARQL-API.md
@@ -0,0 +1,100 @@
+# 1. Endpoints
+| graph                                   | query endpoint             | update endpoint             |
+|-----------------------------------------|----------------------------|-----------------------------|
+| https://read-it.hum.uu.nl/item/         | /sparql/item/query         | /sparql/item/update         |
+| https://read-it.hum.uu.nl/vocab#        | /sparql/vocab/query        |                             |
+| https://read-it.hum.uu.nl/source/       | /sparql/source/query       | /sparql/source/update       |
+| https://read-it.hum.uu.nl/ontology#     | /sparql/ontology/query     |                             |
+| https://read-it.hum.uu.nl/nlp-ontology# | /sparql/nlp-ontology/query | /sparql/nlp-ontology/update |
+
+Example full url for endpoint: https://read-it.hum.uu.nl/sparql/item/query
+
+# 2. Query protocol
+
+## Request
+- GET request to a query endpoint.
+    - Obtain the triples in the corresponding graph.
+- POST request to a query endpoint.  
+    - Obtain query results.
+    - Only the corresponding graph will be queried.
+    - Request body should contain an `query` parameter containing SPARQL QUERY. `DESCRIBE` queries are not supported.  
+    - An `Accept` header can be included to specify the desired output format, see below for options.  
+
+## Output formats
+| `Accept` header                 | Format                                                                                | Applicable query types     |
+|---------------------------------|---------------------------------------------------------------------------------------|------------------------|
+| application/sparql-results+json (default) | [SPARQL 1.1 Query Results JSON Format](https://www.w3.org/TR/sparql11-results-json/)  | ASK, SELECT            |
+| application/sparql-results+xml  | [SPARQL 1.1 Query Results XML Format](https://www.w3.org/TR/rdf-sparql-XMLres/)       | ASK, SELECT            |
+| text/turtle                     | [Turtle](https://www.w3.org/TR/turtle/)                                               | ASK, SELECT, CONSTRUCT |
+
+## Examples
+Retrieve 5 sources:
+#### cURL
+```
+curl -d "query=SELECT ?s ?p ?o WHERE {?s ?p ?o } LIMIT 5"  -X POST -H "Accept: text/turtle" https://read-it.hum.uu.nl/sparql/source/query
+```
+
+#### Python
+Uses the `requests` package.
+```
+import requests
+q = 'SELECT ?s ?p ?o WHERE {?s ?p ?o } LIMIT 5'
+endpoint = 'https://read-it.hum.uu.nl/sparql/source/query'
+response = requests.post(endpoint,
+                         data={'query': q},
+                         headers={'Accept': 'text/turtle'})
+results = response.text
+print(results)
+```
+
+# 3. Authentication
+Make a POST request to https://read-it.hum.uu.nl/rest-auth/login/ (note the trailing slash) with your username and password in the body to obtain an authentication token.
+
+## Examples
+#### cUrl
+```
+curl -d "username=yourusername&password=yourpassword" -X POST https:/read-it.hum.uu.nl/rest-auth/login/
+```
+
+#### Python
+```
+import requests
+login = requests.post('https://read-it.hum.uu.nl/rest-auth/login/',
+                      data={'username': 'yourusername',
+                            'password': 'yourpassword'})
+key = login.json()['key']
+
+```
+
+# 4. Update protocol
+
+## Request
+- POST request to an update endpoint.
+    - Only the corresponding graph can be updated.
+    - Only specific users are allowed to UPDATE. Contact admins for information.
+    - Request body should contain an `update` parameter containing valid SPARQL UPDATE.
+    - An `Authorization` header should be included containing your `Token <your_token>`. See above.
+    - The following operations are not supported, because they make no sense in the context of a single graph, or are unsafe:
+`LOAD`, `CLEAR`,`DROP`, `ADD`, `MOVE`, `COPY`, `CREATE`. 
+
+## Examples
+
+#### Python (includes authentication)
+```
+import requests
+# Authentication
+login = requests.post('https://read-it.hum.uu.nl/rest-auth/login/',
+                      data={'username': 'yourusername',
+                            'password': 'yourpassword'})
+key = login.json()['key']
+
+# Update
+q = 'INSERT DATA { <https://read-it.hum.uu.nl/nlp-ontology#content> a <http://www.w3.org/2000/01/rdf-schema#Class> }'
+endpoint = 'https://read-it.hum.uu.nl/sparql/nlp-ontology/update'
+response = requests.post(endpoint,
+                         data={'update': q},
+                         headers={'Authorization': 'Token {}'.format(key)})
+succes_msg = response.text
+print(succes_msg)
+print(response.status_code)
+```
diff --git a/frontend/gulpfile.ts b/frontend/gulpfile.ts
@@ -6,11 +6,15 @@ import { Readable } from 'stream';
 import { src, dest, symlink, parallel, series, watch as watchApi } from 'gulp';
 import * as vinylStream from 'vinyl-source-stream';
 import * as vinylBuffer from 'vinyl-buffer';
+import * as nodeSass from 'node-sass';
 import * as log from 'fancy-log';
 import * as exorcist from 'exorcist';
 import globbedBrowserify from 'gulp-browserify-watchify-glob';
 import * as loadPlugins from 'gulp-load-plugins';
 const plugins = loadPlugins();
+// verderop na const plugins = loadPlugins();
+const sass = plugins.sass(nodeSass);
+
 
 import * as browserify from 'browserify';
 import * as tsify from 'tsify';
@@ -28,6 +32,7 @@ import * as through2 from 'through2';
 import chalk from 'chalk';
 import * as i18nextParser from 'i18next-parser';
 
+
 type LibraryProps = {
     module: string,
     browser?: string,
@@ -66,10 +71,10 @@ const sourceDir = `src`,
     jsModuleType = `commonjs`,
     tsOptions = {
         target: `es5`,
-        lib: ['DOM', 'ES5', 'ES6', 'DOM.Iterable', 'ScriptHost'],
+        lib: ['DOM', 'ES5', 'ES6', 'es2020.string', 'DOM.Iterable', 'ScriptHost'],
         resolveJsonModule: true,
         paths: { configModuleName: indexConfig },
-        baseUrl: '.',
+        baseUrl: '.'
     },
     aliasOptions = {
         aliases: { [configModuleName]: `./${indexConfig}` },
@@ -332,7 +337,7 @@ export function style() {
     if (production) postcssPlugins.push(cssnano());
     return src(mainStylesheet)
         .pipe(ifNotProd(plugins.sourcemaps.init()))
-        .pipe(plugins.sass({ includePaths: [nodeDir] }))
+        .pipe(sass({ includePaths: [nodeDir] }))
         .pipe(plugins.postcss(postcssPlugins))
         .pipe(plugins.rename(cssBundleName))
         .pipe(ifNotProd(plugins.sourcemaps.write('.')))