diff --git a/README.md b/README.md index b43fdfb..bbd9987 100644 --- a/README.md +++ b/README.md @@ -130,4 +130,28 @@ docker-compose exec kafka bash # the following will consume / display a few messages, just to make sure all is well /opt/bitnami/kafka/bin/kafka-console-consumer.sh --bootstrap-server localhost:9092 --topic deflect.logs --offset 6131 --partition 0 -``` \ No newline at end of file +``` + +### Dashboard +Notes for the Baskerville dashboard: +- The Dockerfile is heavy, as it is a multistage Dockerfile. It uses Nginx internally but, since we already have an nginx service +it would be nice to have a common volume for the front-end to be served and proper networking for the backend to be served also (only for the web-sockets) +- It has Baskerville as a dependency (with all that this entails, like esretriever, iforest pyspark etc, which means different pyspark versions with conflicts and a lot of build time) + + + +### Misc +In case baskerville_preprocessing and baskerville_postprocessing fail to start because `baskerville` database does not exist: +```bash +docker-compose exec postgres bash +psql +CREATE DATABASE baskerville; +\q +exit + +docker-compose restart baskerville_preprocessing baskerville_postprocessing +``` + +### Firewall +- open 29092 port for Kafka connections + diff --git a/containers/baskerville_dashboard/Dockerfile b/containers/baskerville_dashboard/Dockerfile new file mode 100644 index 0000000..1eb87f6 --- /dev/null +++ b/containers/baskerville_dashboard/Dockerfile @@ -0,0 +1,97 @@ +# GLOBAL ARGS: +ARG DOCKER_KAFKA_HOST +ARG DASHBOARD_BRANCH +# for front-end +ARG API_BASE_URL +ARG SOCKET_URL +# for backend: +ARG BASKERVILLE_BRANCH +ARG REDIS_HOST + +#FROM nginx:1.17.1-alpine AS NGINX +FROM node:14.8.0-alpine AS FRONTEND +ARG DASHBOARD_BRANCH +ARG API_BASE_URL +ARG SOCKET_URL + +ENV DASHBOARD_BRANCH $DASHBOARD_BRANCH +ENV API_BASE_URL $API_BASE_URL +ENV SOCKET_URL $SOCKET_URL + +RUN apk update \ +# && apk upgrade \ + && apk add git \ + && mkdir /app && cd /app \ + && git clone --branch $DASHBOARD_BRANCH https://github.com/deflect-ca/baskerville_dashboard.git \ + && cd baskerville_dashboard/front-end \ + && npm install \ + && npm install -g @angular/cli@11.1.0 + +RUN echo $SOCKET_URL, $API_BASE_URL && echo "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<" + +# this builds the front-end with provided configuration and copies the result in /var/www for nginx +RUN cd /app/baskerville_dashboard/front-end \ + && npm run config \ + && ng build --prod + +FROM openjdk:8 AS OJDK8 +FROM python:3.6 AS BACKEND + +ARG DOCKER_KAFKA_HOST +ARG BASKERVILLE_BRANCH +ARG DASHBOARD_BRANCH +ARG REDIS_HOST + +ENV DOCKER_KAFKA_HOST $DOCKER_KAFKA_HOST +ENV DASHBOARD_BRANCH $DASHBOARD_BRANCH +ENV BASKERVILLE_BRANCH $BASKERVILLE_BRANCH +ENV REDIS_HOST $REDIS_HOST +ENV BASKERVILLE_ROOT '/app/baskerville' +ENV BASKERVILLE_DASH_ROOT '/app/baskerville_dashboard' + + +# Get jdk8 from previous stage https://docs.docker.com/develop/develop-images/multistage-build/ +COPY --from=OJDK8 /usr/local/openjdk-8 /usr/local/openjdk-8 +COPY --from=FRONTEND /app/baskerville_dashboard/front-end/dist/baskerville_dashboard_frontend/ /var/www/baskerville_dashboard_frontend/ + +# Set java path +ENV JAVA_HOME /usr/local/openjdk-8 +ENV PATH $PATH:$JAVA_HOME/bin + +# TODO: The following installs spark-iforest, esretriever and baskerville +# which takes a long time - mostly because of the different pyspark versions. +RUN apt-get clean && apt-get update \ +# && apt-get -y upgrade \ + && apt-get install -y nginx=1.18.* \ + && apt-get install git \ + && pip install --upgrade pip \ + && mkdir /app && cd /app \ + && git clone https://github.com/titicaca/spark-iforest.git \ + && cd spark-iforest/python \ + && pip install . \ + && cd /app \ + && git clone https://github.com/equalitie/esretriever.git \ + && cd esretriever \ + && pip install . \ + && cd /app \ + && git clone --branch $BASKERVILLE_BRANCH https://github.com/deflect-ca/baskerville.git \ + && cd baskerville \ + && pip install . \ + && cd /app \ + && git clone --branch $DASHBOARD_BRANCH https://github.com/deflect-ca/baskerville_dashboard.git \ + && cd baskerville_dashboard/backend \ + && pip install . \ + && cd /app/baskerville_dashboard/backend/src/baskerville_dashboard + +# copy all related configurations +COPY ./nginx.conf /etc/nginx/nginx.conf +COPY ./config.yaml /app/baskerville_dashboard/backend/conf +COPY ./baskerville.yaml /app/baskerville_dashboard/backend/conf + +WORKDIR /app/baskerville_dashboard/backend/src/baskerville_dashboard + +# socketio for Python includes a production grade web server. +# start.sh starts nginx and runs dashboard backend. +COPY start.sh start.sh +CMD ["sh", "./start.sh"] +EXPOSE 80 81 5000 \ No newline at end of file diff --git a/containers/baskerville_dashboard/backend/Dockerfile b/containers/baskerville_dashboard/backend/Dockerfile deleted file mode 100644 index a7997eb..0000000 --- a/containers/baskerville_dashboard/backend/Dockerfile +++ /dev/null @@ -1,51 +0,0 @@ -FROM openjdk:8 -FROM python:3.6 - -ARG BASKERVILLE_BRANCH -ARG DASHBOARD_BRANCH - -ENV BASKERVILLE_ROOT '/app/baskerville' -ENV BASKERVILLE_DASH_ROOT '/app/baskerville_dashboard' -ENV REDIS_HOST 'redis' -# TODO: The following installs spark-iforest, esretriever and baskerville -# which takes a long time - mostly because of the different pyspark versions. - -# Get jdk8 from previous stage https://docs.docker.com/develop/develop-images/multistage-build/ -COPY --from=openjdk:8 /usr/local/openjdk-8 /usr/local/openjdk-8 - -# Set java path -ENV JAVA_HOME /usr/local/openjdk-8 -ENV PATH $PATH:$JAVA_HOME/bin - - -RUN apt-get update \ - && apt-get -y upgrade \ - && apt-get install git \ - && pip install --upgrade pip \ - && mkdir /app && cd /app \ - && git clone https://github.com/titicaca/spark-iforest.git \ - && cd spark-iforest/python \ - && pip install . \ - && cd /app \ - && git clone https://github.com/equalitie/esretriever.git \ - && cd esretriever \ - && pip install . \ - && cd /app \ - && git clone --branch $BASKERVILLE_BRANCH https://github.com/deflect-ca/baskerville.git \ - && cd baskerville \ - && pip install . \ - && cd /app \ - && git clone --branch $DASHBOARD_BRANCH https://github.com/deflect-ca/baskerville_dashboard.git \ - && cd baskerville_dashboard/backend \ - && pip install . - -COPY ./config.yaml /app/baskerville_dashboard/backend/conf -COPY ./baskerville.yaml /app/baskerville_dashboard/backend/conf - -RUN cd /app/baskerville_dashboard/backend/src/baskerville_dashboard -WORKDIR /app/baskerville_dashboard/backend/src/baskerville_dashboard - -# socketio for Python includes a production grade web server: -CMD ["python", "app.py"] - -EXPOSE 5000 \ No newline at end of file diff --git a/containers/baskerville_dashboard/backend/baskerville.yaml b/containers/baskerville_dashboard/baskerville.yaml similarity index 84% rename from containers/baskerville_dashboard/backend/baskerville.yaml rename to containers/baskerville_dashboard/baskerville.yaml index 2151c59..4edfa7a 100644 --- a/containers/baskerville_dashboard/backend/baskerville.yaml +++ b/containers/baskerville_dashboard/baskerville.yaml @@ -2,7 +2,7 @@ database: name: baskerville user: !ENV ${DB_USER:postgres} - host: !ENV ${DB_HOST:postgres} + host: !ENV ${DB_HOST:127.0.0.1} port: !ENV ${DB_PORT:5432} password: !ENV ${DB_PASS:secret} type: 'postgres' @@ -91,7 +91,7 @@ engine: kafka: connection: - bootstrap_servers: !ENV '${KAFKA_HOST:kafka}' + bootstrap_servers: !ENV '${DOCKER_KAFKA_HOST:0.0.0.0}' # bootstrap_servers: !ENV ${KAFKA_HOST} auto_offset_reset: 'smallest' data_topic: 'feedback' @@ -111,10 +111,10 @@ spark: parallelism: -1 log_conf: 'true' log_level: 'ERROR' - redis_host: !ENV ${REDIS_HOST:localhost} + redis_host: !ENV ${REDIS_HOST:redis} redis_password: !ENV ${REDIS_PASSWORD:""} redis_port: '6379' - jars: !ENV '${BASKERVILLE_ROOT}/data/jars/spark-redis_2.11-2.5.0-SNAPSHOT-jar-with-dependencies.jar,${BASKERVILLE_ROOT}/data/jars/spark-iforest-3.0.1.jar,${BASKERVILLE_ROOT}/data/jars/postgresql-42.2.4.jar,${BASKERVILLE_ROOT}/data/jars/spark-streaming-kafka-0-8-assembly_2.11-2.4.0.jar' + jars: !ENV '${BASKERVILLE_ROOT}/data/jars/spark-redis_2.11-2.5.0-SNAPSHOT-jar-with-dependencies.jar,${BASKERVILLE_ROOT}/data/jars/spark-iforest-2.4.0.99.jar,${BASKERVILLE_ROOT}/data/jars/postgresql-42.2.4.jar,${BASKERVILLE_ROOT}/data/jars/spark-streaming-kafka-0-8-assembly_2.11-2.4.0.jar' session_timezone: 'UTC' shuffle_partitions: 12 spark_driver_memory: '8G' @@ -135,20 +135,3 @@ spark: driver_java_options: '-Dio.netty.noPreferDirect=true -Dio.netty.allocator.type=unpooled -XX:+UseCompressedOops -XX:G1HeapRegionSize=10 -XX:+UseG1GC -XX:ParallelGCThreads=8 -XX:ConcGCThreads=2 -XX:InitiatingHeapOccupancyPercent=25 -XX:+UnlockDiagnosticVMOptions -Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.port=1098' executor_extra_java_options: '-Dio.netty.noPreferDirect=true -Dio.netty.allocator.type=unpooled -XX:+UseCompressedOops -XX:G1HeapRegionSize=10 -XX:+UseG1GC -XX:ParallelGCThreads=8 -XX:ConcGCThreads=2 -XX:InitiatingHeapOccupancyPercent=25 -XX:+PrintFlagsFinal -XX:+PrintReferenceGC -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+UnlockDiagnosticVMOptions -Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.port=1098' - -#spark: -# ssl_enabled: True -# ssl_truststore: '/root/keys/truststore/kafka.truststore.jks' -# ssl_truststore_password: 'B1^ZRUUVoIuKND7t2HiJ8fwRg0kdMo4zdh8m8eRzgXw!' -# ssl_keystore: '/root/keys/keystore_client/kafka.keystore.jks' -# ssl_keystore_password: 'B1^ZRUUVoIuKND7t2HiJ8fwRg0kdMo4zdh8m8eRzgXw!' -# ssl_keypassword: 'B1^ZRUUVoIuKND7t2HiJ8fwRg0kdMo4zdh8m8eRzgXw!' -# auth_secret: 'B1^ZRUUVoIuKND7t2HiJ8fwRg0kdMo4zdh8m8eRzgXw!' -# redis_host: 'bnode1.deflect.ca' -# redis_password: 'B1^ZRUUVoIuKND7t2HiJ8fwRg0kdMo4zdh8m8eRzgXw!' -# app_name: 'Postprocessing' -# master: !ENV 'spark://${SPARK_MASTER_HOST}:7077' -# parallelism: -1 -# log_conf: 'true' -# log_level: 'ERROR' - diff --git a/containers/baskerville_dashboard/backend/config.yaml b/containers/baskerville_dashboard/config.yaml similarity index 100% rename from containers/baskerville_dashboard/backend/config.yaml rename to containers/baskerville_dashboard/config.yaml diff --git a/containers/baskerville_dashboard/front-end/Dockerfile b/containers/baskerville_dashboard/front-end/Dockerfile deleted file mode 100644 index 718578e..0000000 --- a/containers/baskerville_dashboard/front-end/Dockerfile +++ /dev/null @@ -1,34 +0,0 @@ -FROM node:14.8.0-alpine AS build -ARG DASHBOARD_BRANCH -ARG API_BASE_URL -ARG SOCKET_URL - -RUN apk update \ -# && apk upgrade \ - && apk add git \ - && mkdir /app && cd /app \ - && git clone --branch $DASHBOARD_BRANCH https://github.com/deflect-ca/baskerville_dashboard.git \ - && cd baskerville_dashboard/front-end \ - && npm install \ - && npm install -g @angular/cli@11.1.0 - -# provide values for the following -ENV API_BASE_URL $API_BASE_URL -ENV SOCKET_URL $SOCKET_URL - -# this builds the front-end with provided configuration and copies the result in /var/www for nginx -RUN cd /app/baskerville_dashboard/front-end \ - && npm run config \ - && ng build --prod - - -FROM nginx:1.17.1-alpine -COPY nginx.conf /etc/nginx/nginx.conf -COPY frontend.conf /etc/nginx/sites.d/frontend.conf -COPY --from=build /app/baskerville_dashboard/front-end/dist/baskerville_dashboard_frontend/ /var/www/baskerville_dashboard_frontend/ - -#RUN /usr/sbin/nginx -s reload -#RUN #/etc/init.d/nginx reload - -CMD ["nginx", "-g", "daemon off;"] -EXPOSE 80 5000 \ No newline at end of file diff --git a/containers/baskerville_dashboard/front-end/nginx.conf b/containers/baskerville_dashboard/front-end/nginx.conf deleted file mode 100644 index 2bf5997..0000000 --- a/containers/baskerville_dashboard/front-end/nginx.conf +++ /dev/null @@ -1,26 +0,0 @@ -events{} -http { - include /etc/nginx/mime.types; - server { - listen 80; - server_name _; - location / { - root /var/www/baskerville_dashboard_frontend/; - index index.html; - } - location /api { - rewrite ^/api/(.*) /$1 break; - proxy_pass http://127.0.0.1:5000; - proxy_set_header Host $host; - proxy_set_header X-Real-IP ip_address; - } - location /socket.io { - proxy_http_version 1.1; - proxy_buffering off; - proxy_set_header Upgrade $http_upgrade; - proxy_set_header Connection "Upgrade"; - proxy_pass http://127.0.0.1:5000/socket.io; - } - } - include /etc/nginx/sites.d/*.conf; -} diff --git a/containers/baskerville_dashboard/nginx.conf b/containers/baskerville_dashboard/nginx.conf new file mode 100644 index 0000000..225a5ca --- /dev/null +++ b/containers/baskerville_dashboard/nginx.conf @@ -0,0 +1,76 @@ +events{} +http { + include /etc/nginx/mime.types; + server { + listen 80; + server_name _; + server_name localhost; + + location / { + root /var/www/baskerville_dashboard_frontend/; + index index.html; + } + location /api { + rewrite ^/api/(.*) /$1 break; + proxy_pass http://127.0.0.1:5000; + proxy_set_header Host $host; + proxy_set_header X-Real-IP ip_address; + } + + location /socket.io { + include proxy_params; + proxy_http_version 1.1; + proxy_buffering off; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "Upgrade"; + proxy_pass http://127.0.0.1:5000/socket.io; + } +# location /api { +# rewrite ^/api/(.*) /$1 break; +# proxy_pass http://127.0.0.0:5000; +# proxy_set_header Host $host; +# proxy_set_header X-Real-IP ip_address; +# } +# location ~* \.io { +# proxy_set_header X-Real-IP $remote_addr; +# proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; +# proxy_set_header Host $http_host; +# proxy_set_header X-NginX-Proxy false; +# +# proxy_pass http://127.0.0.0:5000; +# proxy_redirect off; +# +# proxy_http_version 1.1; +# proxy_set_header Upgrade $http_upgrade; +# proxy_set_header Connection "upgrade"; +# } + + } + +# server { +# listen 81; +# server_name "dashboard_backend"; +# # server_name $hostname; +# location /api { +# rewrite ^/api/(.*) /$1 break; +# proxy_pass http://localhost:5000; +# proxy_set_header Host $host; +# proxy_set_header X-Real-IP ip_address; +# } +# location ~* \.io { +# proxy_set_header X-Real-IP $remote_addr; +# proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; +# proxy_set_header Host $http_host; +# proxy_set_header X-NginX-Proxy false; +# +# proxy_pass http://localhost:5000; +# proxy_redirect off; +# +# proxy_http_version 1.1; +# proxy_set_header Upgrade $http_upgrade; +# proxy_set_header Connection "upgrade"; +# } +# +# } + # include /etc/nginx/sites.d/*.conf; +} diff --git a/containers/baskerville_dashboard/start.sh b/containers/baskerville_dashboard/start.sh new file mode 100644 index 0000000..cc981a1 --- /dev/null +++ b/containers/baskerville_dashboard/start.sh @@ -0,0 +1 @@ +nginx && python app.py \ No newline at end of file diff --git a/docker-compose.yaml b/docker-compose.yaml index d7abb74..c38ddc3 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -295,31 +295,28 @@ services: # - "8080:8080" network_mode: "service:nginx" - dashboard_frontend: + dashboard: build: - context: ./containers/baskerville_dashboard/front-end + context: ./containers/baskerville_dashboard dockerfile: Dockerfile args: - DASHBOARD_BRANCH=fine_tuning_dockerized_version - API_BASE_URL=http://localhost:5000/api/1 # 'https://api.baskerville-dashboard.deflect.ca/api/1' - - SOCKET_URL=http://localhost:5000 # 'https://api.baskerville-dashboard.deflect.ca' - ports: - - "80:80" - - dashboard_backend: - build: - context: ./containers/baskerville_dashboard/backend - dockerfile: Dockerfile - args: + - SOCKET_URL=http://127.0.0.1:5000 # 'https://api.baskerville-dashboard.deflect.ca' - BASKERVILLE_BRANCH=dashboard_changes_pt2 - - DASHBOARD_BRANCH=fine_tuning_dockerized_version -# network_mode: "service:dashboard_frontend" + - REDIS_HOST=$REDIS_HOST + - DOCKER_KAFKA_HOST=$DOCKER_KAFKA_HOST + - DB_HOST=DB_HOST ports: + - "80:80" # todo: this will conflict with nginx service. - "5000:5000" + # network_mode: "service:dashboard_backend" depends_on: - postgres - kafka - redis + networks: + - local volumes: postgres-data3: