diff --git a/README.md b/README.md index d859288..a231f11 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,11 @@ If you prefer a containerized approach, see a [variation of this script for Dock ### Install -Download the script and grant file permissions to execute: `wget https://github.com/felixlohmeier/openrefine-batch/raw/master/openrefine-batch.sh && chmod +x openrefine-batch.sh` +Download the script and grant file permissions to execute: +``` +wget https://github.com/felixlohmeier/openrefine-batch/raw/master/openrefine-batch.sh +chmod +x openrefine-batch.sh +``` That's all. The script will automatically download copies of OpenRefine and the python client on first run and will tell you if something (python, java) is missing. @@ -288,8 +292,14 @@ A variation of the shell script orchestrates a [docker container for OpenRefine] **Install** -1. Install [Docker](https://docs.docker.com/engine/installation/#on-linux) and **a)** [configure Docker to start on boot](https://docs.docker.com/engine/installation/linux/linux-postinstall/#configure-docker-to-start-on-boot) or **b)** start Docker on demand each time you use the script: `sudo systemctl start docker` -2. Download the script and grant file permissions to execute: `wget https://github.com/felixlohmeier/openrefine-batch/raw/master/openrefine-batch-docker.sh && chmod +x openrefine-batch-docker.sh` +1. Install [Docker](https://docs.docker.com/engine/installation/#on-linux) + * **a)** [configure Docker to start on boot](https://docs.docker.com/engine/installation/linux/linux-postinstall/#configure-docker-to-start-on-boot) + * or **b)** start Docker on demand each time you use the script: `sudo systemctl start docker` +2. Download the script and grant file permissions to execute: +``` +wget https://github.com/felixlohmeier/openrefine-batch/raw/master/openrefine-batch-docker.sh +chmod +x openrefine-batch-docker.sh +``` **Usage** @@ -298,10 +308,36 @@ mkdir input cp INPUTFILES input/ mkdir config cp CONFIGFILES config/ -sudo ./openrefine-batch-docker.sh -a input/ -b config/ -c OUTPUT/ +./openrefine-batch-docker.sh -a input/ -b config/ -c OUTPUT/ ``` -Why `sudo`? Non-root users can only access the Unix socket of the Docker daemon by using `sudo`. If you created a Docker group in [Post-installation steps for Linux](https://docs.docker.com/engine/installation/linux/linux-postinstall/) then you may call the script without `sudo`. +The script may ask you for sudo privileges. Why `sudo`? Non-root users can only access the Unix socket of the Docker daemon by using `sudo`. If you created a Docker group in [Post-installation steps for Linux](https://docs.docker.com/engine/installation/linux/linux-postinstall/) then you may call the script without `sudo`. + +**Example** + +[Example Powerhouse Museum](examples/powerhouse-museum) + +download example data + +``` +wget https://github.com/opencultureconsulting/openrefine-batch/archive/master.zip +unzip master.zip openrefine-batch-master/examples/* +mv openrefine-batch-master/examples . +rm -f master.zip +``` + +execute openrefine-batch-docker.sh + +``` +./openrefine-batch-docker.sh \ +-a examples/powerhouse-museum/input/ \ +-b examples/powerhouse-museum/config/ \ +-c examples/powerhouse-museum/output/ \ +-f tsv \ +-i processQuotes=false \ +-i guessCellValueTypes=true \ +-RX +``` ### Todo diff --git a/openrefine-batch-docker.sh b/openrefine-batch-docker.sh index da51a69..d580579 100755 --- a/openrefine-batch-docker.sh +++ b/openrefine-batch-docker.sh @@ -1,23 +1,32 @@ #!/bin/bash -# openrefine-batch-docker.sh, Felix Lohmeier, v1.12, 2019-07-29 +# openrefine-batch-docker.sh, Felix Lohmeier, v1.13, 2019-08-06 # https://github.com/felixlohmeier/openrefine-batch # check system requirements -DOCKER="$(which docker 2> /dev/null)" +DOCKER="$(command -v docker 2> /dev/null)" if [ -z "$DOCKER" ] ; then echo 1>&2 "This action requires you to have 'docker' installed and present in your PATH. You can download it for free at http://www.docker.com/" exit 1 fi DOCKERINFO="$(docker info 2>/dev/null | grep 'Server Version')" -if [ -z "$DOCKERINFO" ] ; then - echo 1>&2 "This action requires you to start the docker daemon. Try 'sudo systemctl start docker' or 'sudo start docker'. If the docker daemon is already running then maybe some security privileges are missing to run docker commands. Try to run the script with 'sudo ./openrefine-batch-docker.sh ...'" - exit 1 +if [ -z "$DOCKERINFO" ] +then + echo "command 'docker info' failed, trying again with sudo..." + DOCKERINFO="$(sudo docker info 2>/dev/null | grep 'Server Version')" + echo "OK" + docker=(sudo docker) + if [ -z "$DOCKERINFO" ] ; then + echo 1>&2 "This action requires you to start the docker daemon. Try 'sudo systemctl start docker' or 'sudo start docker'. If the docker daemon is already running then maybe some security privileges are missing to run docker commands.'" + exit 1 + fi +else + docker=(docker) fi # help screen function usage () { cat < "${outputdir}/projects.tmp" + ${docker[*]} run --rm --link ${uuid} felixlohmeier/openrefine-client:v0.3.4 -H ${uuid} -l > "${outputdir}/projects.tmp" projectids=($(cut -c 2-14 "${outputdir}/projects.tmp")) projectnames=($(cut -c 17- "${outputdir}/projects.tmp")) cat "${outputdir}/projects.tmp" && rm "${outputdir:?}/projects.tmp" @@ -280,11 +289,11 @@ if [ -n "$jsonfiles" ] || [ "$export" = "true" ]; then rsync -a --exclude='*.project/history' "${crossdir}"/*.project "${outputdir}" # restart server to advertise copied projects echo "restart OpenRefine server to advertise copied projects..." - docker stop -t=5000 ${uuid} - docker rm ${uuid} - sudo docker run -d --name=${uuid} -v ${outputdir}:/data:z felixlohmeier/openrefine:${version} -i 0.0.0.0 -m ${ram} -d /data - until sudo docker run --rm --link ${uuid} --entrypoint /usr/bin/curl felixlohmeier/openrefine-client --silent -N http://${uuid}:3333 | cat | grep -q -o "OpenRefine" ; do sleep 1; done - docker attach ${uuid} & + ${docker[*]} stop -t=5000 ${uuid} + ${docker[*]} rm ${uuid} + ${docker[*]} run -d --name=${uuid} -v ${outputdir}:/data:z felixlohmeier/openrefine:${version} -i 0.0.0.0 -m ${ram} -d /data + until ${docker[*]} run --rm --link ${uuid} --entrypoint /usr/bin/curl felixlohmeier/openrefine-client:v0.3.4 --silent -N http://${uuid}:3333 | cat | grep -q -o "OpenRefine" ; do sleep 1; done + ${docker[*]} attach ${uuid} & echo "" fi @@ -303,7 +312,7 @@ if [ -n "$jsonfiles" ] || [ "$export" = "true" ]; then for jsonfile in "${jsonfiles[@]}" ; do echo "transform ${jsonfile}..." # run client with apply command - sudo docker run --rm --link ${uuid} -v ${configdir}:/data:z felixlohmeier/openrefine-client -H ${uuid} -f ${jsonfile} ${projectids[i]} + ${docker[*]} run --rm --link ${uuid} -v ${configdir}:/data:z felixlohmeier/openrefine-client:v0.3.4 -H ${uuid} -f ${jsonfile} ${projectids[i]} # allocated system resources ps -o start,etime,%mem,%cpu,rss -C java --sort=start memoryload+=($(ps --no-headers -o rss -C java)) @@ -311,11 +320,11 @@ if [ -n "$jsonfiles" ] || [ "$export" = "true" ]; then # restart server to clear memory if [ "$restarttransform" = "true" ]; then echo "save project and restart OpenRefine server..." - docker stop -t=5000 ${uuid} - docker rm ${uuid} - sudo docker run -d --name=${uuid} -v ${outputdir}:/data:z felixlohmeier/openrefine:${version} -i 0.0.0.0 -m ${ram} -d /data - until sudo docker run --rm --link ${uuid} --entrypoint /usr/bin/curl felixlohmeier/openrefine-client --silent -N http://${uuid}:3333 | cat | grep -q -o "OpenRefine" ; do sleep 1; done - docker attach ${uuid} & + ${docker[*]} stop -t=5000 ${uuid} + ${docker[*]} rm ${uuid} + ${docker[*]} run -d --name=${uuid} -v ${outputdir}:/data:z felixlohmeier/openrefine:${version} -i 0.0.0.0 -m ${ram} -d /data + until ${docker[*]} run --rm --link ${uuid} --entrypoint /usr/bin/curl felixlohmeier/openrefine-client:v0.3.4 --silent -N http://${uuid}:3333 | cat | grep -q -o "OpenRefine" ; do sleep 1; done + ${docker[*]} attach ${uuid} & fi echo "" done @@ -334,7 +343,7 @@ if [ -n "$jsonfiles" ] || [ "$export" = "true" ]; then filename=${projectnames[i]%.*} echo "export to file ${filename}.${exportformat}..." # run client with export command - sudo docker run --rm --link ${uuid} -v ${outputdir}:/data:z felixlohmeier/openrefine-client -H ${uuid} -E --output="${filename}.${exportformat}" "${templating[@]}" ${projectids[i]} + ${docker[*]} run --rm --link ${uuid} -v ${outputdir}:/data:z felixlohmeier/openrefine-client:v0.3.4 -H ${uuid} -E --output="${filename}.${exportformat}" "${templating[@]}" ${projectids[i]} # show allocated system resources ps -o start,etime,%mem,%cpu,rss -C java --sort=start memoryload+=($(ps --no-headers -o rss -C java)) @@ -344,11 +353,11 @@ if [ -n "$jsonfiles" ] || [ "$export" = "true" ]; then # restart server to clear memory if [ "$restartfile" = "true" ]; then echo "restart OpenRefine server..." - docker stop -t=5000 ${uuid} - docker rm ${uuid} - sudo docker run -d --name=${uuid} -v ${outputdir}:/data:z felixlohmeier/openrefine:${version} -i 0.0.0.0 -m ${ram} -d /data - until sudo docker run --rm --link ${uuid} --entrypoint /usr/bin/curl felixlohmeier/openrefine-client --silent -N http://${uuid}:3333 | cat | grep -q -o "OpenRefine" ; do sleep 1; done - docker attach ${uuid} & + ${docker[*]} stop -t=5000 ${uuid} + ${docker[*]} rm ${uuid} + ${docker[*]} run -d --name=${uuid} -v ${outputdir}:/data:z felixlohmeier/openrefine:${version} -i 0.0.0.0 -m ${ram} -d /data + until ${docker[*]} run --rm --link ${uuid} --entrypoint /usr/bin/curl felixlohmeier/openrefine-client:v0.3.4 --silent -N http://${uuid}:3333 | cat | grep -q -o "OpenRefine" ; do sleep 1; done + ${docker[*]} attach ${uuid} & fi echo "" diff --git a/openrefine-batch.sh b/openrefine-batch.sh index 043b6c1..e078939 100755 --- a/openrefine-batch.sh +++ b/openrefine-batch.sh @@ -1,5 +1,5 @@ #!/bin/bash -# openrefine-batch.sh, Felix Lohmeier, v1.12, 2019-07-29 +# openrefine-batch.sh, Felix Lohmeier, v1.13, 2019-08-06 # https://github.com/felixlohmeier/openrefine-batch # declare download URLs for OpenRefine and OpenRefine client