From 347558d12f8537e0a8883543e8b833cf248ea214 Mon Sep 17 00:00:00 2001 From: Maria Karanasou Date: Thu, 18 Nov 2021 11:00:14 +0200 Subject: [PATCH 1/7] adding dashboard services --- docker-compose.yaml | 46 +++++++++++++++++++++++++++++++++++++++++++++ dot_env.sh | 7 +++++++ 2 files changed, 53 insertions(+) diff --git a/docker-compose.yaml b/docker-compose.yaml index 89f33c1..d3843f5 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -224,6 +224,33 @@ services: - redis - filebeat + # feedback pipeline + baskerville_feedback: + image: "${BASKERVILLE_IMAGE}" + env_file: + - .env + environment: + - DB_USER=${BASKERVILLE_POSTGRES_USER} + - DB_PASSWORD=${BASKERVILLE_POSTGRES_PASSWORD} + - DB_HOST=postgres + - DB_PORT=${BASKERVILLE_POSTGRES_PORT} + - KAFKA_HOST=${DOCKER_KAFKA_HOST}:9092 + - REDIS_PASSWORD="" + - CLEARING_HOUSE_KAFKA=${CLEARING_HOUSE_KAFKA} + command: python3 /usr/local/baskerville/src/baskerville/main.py -c /app/baskerville/conf/feedback.yaml feedback + volumes: + - type: bind + source: ./conf + target: /app/baskerville/conf + read_only: true + - type: bind + source: ./clearing_house_connection + target: /app/baskerville/clearing_house_connection + read_only: true + depends_on: + - postgres + - kafka + banjax-next: build: context: ./containers/banjax-next @@ -268,6 +295,25 @@ services: # - "8080:8080" network_mode: "service:nginx" + dashboard_frontend: + build: + context: ./containers/baskerville_dashboard/front-end + dockerfile: Dockerfile + ports: + - "3000:3000" + + dashboard_backend: + build: + context: ./containers/baskerville_dashboard/backend + dockerfile: Dockerfile + args: + - BASKERVILLE_BRANCH=dashboard_changes_pt2 + - DASHBOARD_BRANCH=fine_tuning_dockerized_version + ports: + - "5000:5000" + depends_on: + - postgres + - kafka volumes: postgres-data3: diff --git a/dot_env.sh b/dot_env.sh index fb6295f..b1aa65d 100755 --- a/dot_env.sh +++ b/dot_env.sh @@ -12,9 +12,16 @@ BASKERVILLE_POSTGRES_USER=postgres BASKERVILLE_POSTGRES_PASSWORD=changeme BASKERVILLE_POSTGRES_PORT=5432 +BASKERVILLE_CLIENT_ID='yourid' + DEFLECT_DNET=dnet1 GF_SECURITY_ADMIN_PASSWORD=Adm1nPa$$ +# for the dashboard +JWT_SECRET=sosecret +ADMIN_PASS=pass +SECRET_KEY='a very very secret key preferably through an env variable' + PROJECT_ROOT='/path/to/baskerville_client' KAFKA_KEYSTORE_PASSWORD="..." From 54040cf8e5b0245061c879cf2014828af716fae8 Mon Sep 17 00:00:00 2001 From: Maria Karanasou Date: Thu, 18 Nov 2021 11:00:32 +0200 Subject: [PATCH 2/7] redis needs user: redis for permission purposes --- docker-compose.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/docker-compose.yaml b/docker-compose.yaml index d3843f5..217bc61 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -158,6 +158,7 @@ services: redis: image: redis + user: "redis" env_file: - .env expose: From fb097db3149794047ac7c5fb93d73bfd8dcfbe79 Mon Sep 17 00:00:00 2001 From: Maria Karanasou Date: Thu, 18 Nov 2021 11:03:14 +0200 Subject: [PATCH 3/7] dashboard yamls + dockerfile --- conf/feedback.yaml | 160 ++++++++++++++++++ .../baskerville_dashboard/backend/Dockerfile | 38 +++++ .../backend/baskerville.yaml | 154 +++++++++++++++++ .../baskerville_dashboard/backend/config.yaml | 19 +++ 4 files changed, 371 insertions(+) create mode 100644 conf/feedback.yaml create mode 100644 containers/baskerville_dashboard/backend/Dockerfile create mode 100644 containers/baskerville_dashboard/backend/baskerville.yaml create mode 100644 containers/baskerville_dashboard/backend/config.yaml diff --git a/conf/feedback.yaml b/conf/feedback.yaml new file mode 100644 index 0000000..65ec815 --- /dev/null +++ b/conf/feedback.yaml @@ -0,0 +1,160 @@ +--- +database: + name: baskerville + user: !ENV ${DB_USER} + password: !ENV ${DB_PASSWORD} + host: !ENV ${DB_HOST} + port : !ENV ${DB_PORT} + type: 'postgres' + data_partition: False + +user_details: + username: 'admin' + password: !ENV '${ADMIN_PASS}' + organization_uuid: 'test' + organization_name: 'test_org' + +engine: + ttl: 600 + client_mode: True + id_client: !ENV '${BASKERVILLE_CLIENT_ID}' + time_bucket: 15 + storage_path: !ENV '/app/baskerville/data/' + metrics: + port: 8998 + exported_dashboard_file: !ENV '/app/baskerville/data/metrics/Baskerville-metrics-dashboard.json' + performance: + pipeline: + - 'get_data' + - 'feature_extraction' + - 'group_by' + - 'predict' + - 'save' + - 'clean_cache' + - 'update' + - 'instantiate_spark_session' + request_set_cache: + - 'instantiate_cache' + - '__getitem__' + - '__contains__' + - 'clean' + features: True + progress: True + verbose: False + datetime_format: '%Y-%m-%d %H:%M:%S' + model_id: '' #50 + trigger_challenge: True + challenge_threshold: 0.2 + challenge: False # 'ip' + min_num_requests: 50 + extra_features: + - css_to_html_ratio + - image_to_html_ratio + - js_to_html_ratio + - minutes_total + - path_depth_average + - path_depth_variance + - payload_size_average + - payload_size_log_average + - request_interval_average + - request_interval_variance + - request_total + - response4xx_to_request_ratio + - top_page_to_request_ratio + - unique_path_rate + - unique_path_to_request_ratio + - unique_query_rate + - unique_query_to_unique_path_ratio + - unique_ua_rate + data_config: + parser: JSONLogSparkParser + schema: !ENV '/app/baskerville/data/samples/sample_feedback_schema.json' + group_by_cols: + - 'client_request_host' + - 'client_ip' + timestamp_column: '@timestamp' + logpath : !ENV '/app/baskerville/src/baskerville/logs/baskerville.log' + log_level: 'DEBUG' + cache_expire_time: 604800 # sec (604800 = 1 week) + cache_load_past: False # Load past request sets or not + cache_lookup: True # search cache for sessions + cross_reference: False # search MISP for IPs + db_lookup: False # search database for sessions + raw_log: + paths: + - !ENV '/app/baskerville/data/samples/test_data_1k.json' # 1k randomized logs + simulation: + sleep: False + verbose: True + log_file: !ENV '/app/baskerville/data/samples/test_data_1k.json' # 1k randomized logs + +kafka: + connection: + bootstrap_servers: !ENV '${KAFKA_HOST}' + + clearing_house_connection: + bootstrap_servers: !ENV '${CLEARING_HOUSE_KAFKA}' + security_protocol: 'SSL' + ssl_check_hostname: False + ssl_cafile: '/app/baskerville/clearing_house_connection/caroot.pem' + ssl_certfile: '/app/baskerville/clearing_house_connection/certificate.pem' + ssl_keyfile: '/app/baskerville/clearing_house_connection/key.pem' + auto_offset_reset: 'smallest' + data_topic: 'feedback' + consume_predictions_topic: 'predictions' + predictions_topic: 'id_client.baskerville.predictions' + feedback_topic: 'feedback' + feedback_response_topic: !ENV '${BASKERVILLE_CLIENT_ID}.feedback' + register_topic: 'baskerville.register' + consume_group: 'baskerville' + publish_logs: 'baskerville.logs' + publish_stats: 'baskerville.stats' + publish_predictions: 'baskerville.predictions' + +spark: + app_name: 'Baskerville Dashboard' + master: 'local' #!ENV 'spark://${SPARK_MASTER_HOST}:7077' + parallelism: -1 + log_conf: 'true' + log_level: 'ERROR' + redis_host: !ENV ${REDIS_HOST} + redis_password: !ENV ${REDIS_PASSWORD} + redis_port: '6379' + jars: !ENV '/app/baskerville/data/jars/baskervilleSecurityFilter.jar,/app/baskerville/data/jars/shc-core-1.1.3-2.4-s_2.11.jar,/app/baskerville/data/jars/tiledb-spark-0.1.0.jar,/app/baskerville/data/jars/spark-redis_2.11-2.5.0-SNAPSHOT-jar-with-dependencies.jar,/app/baskerville/data/jars/spark-iforest-3.0.1.jar,/app/baskerville/data/jars/postgresql-42.2.4.jar,/app/baskerville/data/jars/spark-streaming-kafka-0-8-assembly_2.11-2.4.0.jar,/app/baskerville/data/jars/elasticsearch-spark-20_2.11-5.6.5.jar' + session_timezone: 'UTC' + shuffle_partitions: 12 + spark_driver_memory: '8G' + db_driver: 'org.postgresql.Driver' + storage_level: 'OFF_HEAP' + event_log: True + auth_secret: 'TEST_SECRET' # + ssl_enabled: False + ssl_truststore: !ENV '/app/baskerville/data/keystore/truststore' + ssl_truststore_password: !ENV ${TRUSTSTORE_PASS} + ssl_keystore: !ENV '/app/baskerville/data/keystore/keystore' + ssl_keystore_password: !ENV ${KEYSTORE_PASS} + ssl_keypassword: !ENV ${SSL_PASS} + serializer: 'org.apache.spark.serializer.KryoSerializer' + kryoserializer_buffer_max: '2024m' + kryoserializer_buffer: '1024k' + # https://spark.apache.org/docs/latest/tuning.html#tuning-data-structures + driver_java_options: '-Dio.netty.noPreferDirect=true -Dio.netty.allocator.type=unpooled -XX:+UseCompressedOops -XX:G1HeapRegionSize=10 -XX:+UseG1GC -XX:ParallelGCThreads=8 -XX:ConcGCThreads=2 -XX:InitiatingHeapOccupancyPercent=25 -XX:+UnlockDiagnosticVMOptions -Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.port=1098' + executor_extra_java_options: '-Dio.netty.noPreferDirect=true -Dio.netty.allocator.type=unpooled -XX:+UseCompressedOops -XX:G1HeapRegionSize=10 -XX:+UseG1GC -XX:ParallelGCThreads=8 -XX:ConcGCThreads=2 -XX:InitiatingHeapOccupancyPercent=25 -XX:+PrintFlagsFinal -XX:+PrintReferenceGC -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+UnlockDiagnosticVMOptions -Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.port=1098' + + +#spark: +# ssl_enabled: True +# ssl_truststore: '/root/keys/truststore/kafka.truststore.jks' +# ssl_truststore_password: 'B1^ZRUUVoIuKND7t2HiJ8fwRg0kdMo4zdh8m8eRzgXw!' +# ssl_keystore: '/root/keys/keystore_client/kafka.keystore.jks' +# ssl_keystore_password: 'B1^ZRUUVoIuKND7t2HiJ8fwRg0kdMo4zdh8m8eRzgXw!' +# ssl_keypassword: 'B1^ZRUUVoIuKND7t2HiJ8fwRg0kdMo4zdh8m8eRzgXw!' +# auth_secret: 'B1^ZRUUVoIuKND7t2HiJ8fwRg0kdMo4zdh8m8eRzgXw!' +# redis_host: 'bnode1.deflect.ca' +# redis_password: 'B1^ZRUUVoIuKND7t2HiJ8fwRg0kdMo4zdh8m8eRzgXw!' +# app_name: 'Postprocessing' +# master: !ENV 'spark://${SPARK_MASTER_HOST}:7077' +# parallelism: -1 +# log_conf: 'true' +# log_level: 'ERROR' + diff --git a/containers/baskerville_dashboard/backend/Dockerfile b/containers/baskerville_dashboard/backend/Dockerfile new file mode 100644 index 0000000..c0a0972 --- /dev/null +++ b/containers/baskerville_dashboard/backend/Dockerfile @@ -0,0 +1,38 @@ +FROM python:3.6 +ARG BASKERVILLE_BRANCH +ARG DASHBOARD_BRANCH + +# TODO: The following installs spark-iforest, esretriever and baskerville +# which takes a long time - mostly because of the different pyspark versions. + +RUN apt-get update \ + && apt-get -y upgrade \ + && apt-get install git \ + && pip install --upgrade pip \ + && mkdir /app && cd /app \ + && git clone https://github.com/titicaca/spark-iforest.git \ + && cd spark-iforest/python \ + && pip install . \ + && cd /app \ + && git clone https://github.com/equalitie/esretriever.git \ + && cd esretriever \ + && pip install . \ + && cd /app \ + && git clone --branch $BASKERVILLE_BRANCH https://github.com/deflect-ca/baskerville.git \ + && cd baskerville \ + && pip install . \ + && cd /app \ + && git clone --branch $DASHBOARD_BRANCH https://github.com/deflect-ca/baskerville_dashboard.git \ + && cd baskerville_dashboard/backend \ + && pip install . \ + +ENV BASKERVILLE_ROOT '/app/baskerville' +ENV BASKERVILLE_DASH_ROOT '/app/baskerville_dashboard' + +COPY ./config.yaml /app/baskerville_dashboard/backend/conf +COPY ./baskerville.yaml /app/baskerville_dashboard/backend/conf + +# socketio for Python includes a production grade web server: +RUN cd /app/baskerville_dashboard/backend/src/baskerville_dashboard && python app.py + +EXPOSE 5000 \ No newline at end of file diff --git a/containers/baskerville_dashboard/backend/baskerville.yaml b/containers/baskerville_dashboard/backend/baskerville.yaml new file mode 100644 index 0000000..5ed5dc7 --- /dev/null +++ b/containers/baskerville_dashboard/backend/baskerville.yaml @@ -0,0 +1,154 @@ +--- +database: + name: baskerville_dashboard # the database name + user: !ENV ${DB_USER:postgres} + host: !ENV ${DB_HOST:localhost} + port: !ENV ${DB_PORT:5432} + password: !ENV ${DB_PASS:secret} + type: 'postgres' + db_driver: 'org.postgresql.Driver' + data_partition: False + +user_details: + username: 'admin' + password: !ENV '${ADMIN_PASS}' + organization_uuid: 'test' + organization_name: 'test_org' + +engine: + ttl: 600 + client_mode: True + id_client: 'test' + time_bucket: 15 + storage_path: !ENV '${BASKERVILLE_ROOT}/data/' + metrics: + port: 8998 + exported_dashboard_file: !ENV '${BASKERVILLE_ROOT}/data/metrics/Baskerville-metrics-dashboard.json' + performance: + pipeline: + - 'get_data' + - 'feature_extraction' + - 'group_by' + - 'predict' + - 'save' + - 'clean_cache' + - 'update' + - 'instantiate_spark_session' + request_set_cache: + - 'instantiate_cache' + - '__getitem__' + - '__contains__' + - 'clean' + features: True + progress: True + verbose: False + datetime_format: '%Y-%m-%d %H:%M:%S' + model_id: '' #50 + trigger_challenge: True + challenge_threshold: 0.2 + challenge: False # 'ip' + min_num_requests: 50 + extra_features: + - css_to_html_ratio + - image_to_html_ratio + - js_to_html_ratio + - minutes_total + - path_depth_average + - path_depth_variance + - payload_size_average + - payload_size_log_average + - request_interval_average + - request_interval_variance + - request_total + - response4xx_to_request_ratio + - top_page_to_request_ratio + - unique_path_rate + - unique_path_to_request_ratio + - unique_query_rate + - unique_query_to_unique_path_ratio + - unique_ua_rate + data_config: + parser: JSONLogSparkParser + schema: !ENV '${BASKERVILLE_ROOT}/data/samples/sample_feedback_schema.json' + group_by_cols: + - 'client_request_host' + - 'client_ip' + timestamp_column: '@timestamp' + logpath : !ENV '${BASKERVILLE_ROOT}/src/baskerville/logs/baskerville.log' + log_level: 'DEBUG' + cache_expire_time: 604800 # sec (604800 = 1 week) + cache_load_past: False # Load past request sets or not + cache_lookup: True # search cache for sessions + cross_reference: False # search MISP for IPs + db_lookup: False # search database for sessions + raw_log: + paths: + - !ENV '${BASKERVILLE_ROOT}/data/samples/test_data_1k.json' # 1k randomized logs + simulation: + sleep: False + verbose: True + log_file: !ENV '${BASKERVILLE_ROOT}/data/samples/test_data_1k.json' # 1k randomized logs + +kafka: + connection: + bootstrap_servers: !ENV '${KAFKA_HOST:0.0.0.0}' + bootstrap_servers: !ENV ${KAFKA_HOST} + auto_offset_reset: 'smallest' + data_topic: 'feedback' + consume_predictions_topic: 'predictions' + predictions_topic: 'id_client.baskerville.predictions' + feedback_topic: 'feedback' + feedback_response_topic: 'test.feedback' # uuid_org.feedback + register_topic: 'baskerville.register' + consume_group: 'baskerville' + publish_logs: 'baskerville.logs' + publish_stats: 'baskerville.stats' + publish_predictions: 'baskerville.predictions' + +spark: + app_name: 'Baskerville Dashboard' + master: 'local' #!ENV 'spark://${SPARK_MASTER_HOST}:7077' + parallelism: -1 + log_conf: 'true' + log_level: 'ERROR' + redis_host: !ENV ${REDIS_HOST:0.0.0.0} + redis_password: !ENV ${REDIS_PASSWORD:None} + redis_port: '6379' + jars: !ENV '${BASKERVILLE_ROOT}/data/jars/baskervilleSecurityFilter.jar,${BASKERVILLE_ROOT}/data/jars/shc-core-1.1.3-2.4-s_2.11.jar,${BASKERVILLE_ROOT}/data/jars/tiledb-spark-0.1.0.jar,${BASKERVILLE_ROOT}/data/jars/spark-redis_2.11-2.5.0-SNAPSHOT-jar-with-dependencies.jar,${BASKERVILLE_ROOT}/data/jars/spark-iforest-3.0.1.jar,${BASKERVILLE_ROOT}/data/jars/postgresql-42.2.4.jar,${BASKERVILLE_ROOT}/data/jars/spark-streaming-kafka-0-8-assembly_2.11-2.4.0.jar,${BASKERVILLE_ROOT}/data/jars/elasticsearch-spark-20_2.11-5.6.5.jar' + session_timezone: 'UTC' + shuffle_partitions: 12 + spark_driver_memory: '8G' + db_driver: 'org.postgresql.Driver' + storage_level: 'OFF_HEAP' + event_log: True + auth_secret: 'TEST_SECRET' # + ssl_enabled: False + ssl_truststore: !ENV '${BASKERVILLE_ROOT}/data/keystore/truststore' + ssl_truststore_password: !ENV ${TRUSTSTORE_PASS} + ssl_keystore: !ENV '${BASKERVILLE_ROOT}/data/keystore/keystore' + ssl_keystore_password: !ENV ${KEYSTORE_PASS} + ssl_keypassword: !ENV ${SSL_PASS} + serializer: 'org.apache.spark.serializer.KryoSerializer' + kryoserializer_buffer_max: '2024m' + kryoserializer_buffer: '1024k' + # https://spark.apache.org/docs/latest/tuning.html#tuning-data-structures + driver_java_options: '-Dio.netty.noPreferDirect=true -Dio.netty.allocator.type=unpooled -XX:+UseCompressedOops -XX:G1HeapRegionSize=10 -XX:+UseG1GC -XX:ParallelGCThreads=8 -XX:ConcGCThreads=2 -XX:InitiatingHeapOccupancyPercent=25 -XX:+UnlockDiagnosticVMOptions -Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.port=1098' + executor_extra_java_options: '-Dio.netty.noPreferDirect=true -Dio.netty.allocator.type=unpooled -XX:+UseCompressedOops -XX:G1HeapRegionSize=10 -XX:+UseG1GC -XX:ParallelGCThreads=8 -XX:ConcGCThreads=2 -XX:InitiatingHeapOccupancyPercent=25 -XX:+PrintFlagsFinal -XX:+PrintReferenceGC -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+UnlockDiagnosticVMOptions -Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.port=1098' + + +#spark: +# ssl_enabled: True +# ssl_truststore: '/root/keys/truststore/kafka.truststore.jks' +# ssl_truststore_password: 'B1^ZRUUVoIuKND7t2HiJ8fwRg0kdMo4zdh8m8eRzgXw!' +# ssl_keystore: '/root/keys/keystore_client/kafka.keystore.jks' +# ssl_keystore_password: 'B1^ZRUUVoIuKND7t2HiJ8fwRg0kdMo4zdh8m8eRzgXw!' +# ssl_keypassword: 'B1^ZRUUVoIuKND7t2HiJ8fwRg0kdMo4zdh8m8eRzgXw!' +# auth_secret: 'B1^ZRUUVoIuKND7t2HiJ8fwRg0kdMo4zdh8m8eRzgXw!' +# redis_host: 'bnode1.deflect.ca' +# redis_password: 'B1^ZRUUVoIuKND7t2HiJ8fwRg0kdMo4zdh8m8eRzgXw!' +# app_name: 'Postprocessing' +# master: !ENV 'spark://${SPARK_MASTER_HOST}:7077' +# parallelism: -1 +# log_conf: 'true' +# log_level: 'ERROR' + diff --git a/containers/baskerville_dashboard/backend/config.yaml b/containers/baskerville_dashboard/backend/config.yaml new file mode 100644 index 0000000..fc27524 --- /dev/null +++ b/containers/baskerville_dashboard/backend/config.yaml @@ -0,0 +1,19 @@ +--- +APP_CONFIG: + PREFIX: '/api/1' + SECRET_KEY: !ENV ${SECRET_KEY} + SQLALCHEMY_COMMIT_ON_TEARDOWN: True + UPLOAD_FOLDER: !ENV '${BASKERVILLE_DASH_ROOT}/backend/static/uploads' + JWT_SECRET_KEY: !ENV ${JWT_SECRET:sosecret} + JWT_DEFAULT_REALM: 'Login Required' + JWT_AUTH_HEADER_PREFIX: 'Bearer' + SECURITY_PASSWORD_SALT: 'salt' + FLASK_DEBUG: True + ADMIN_EMAIL: 'admin@email' + ADMIN_PASS: !ENV '${ADMIN_PASS:secret}' + PIPELINE: 'irawlog' + BASKERVILLE_CONF: !ENV '${BASKERVILLE_DASH_ROOT}/backend/conf/baskerville.yaml' + KAFKA_TOPICS: + - !ENV '${BASKERVILLE_CLIENT_ID}.feedback' + - !ENV '${BASKERVILLE_CLIENT_ID}.registration' + - !ENV '${BASKERVILLE_CLIENT_ID}.retrain' \ No newline at end of file From 424e54aca1cbeab9e054e22cf725b6de3b7ae70b Mon Sep 17 00:00:00 2001 From: Maria Karanasou Date: Fri, 19 Nov 2021 16:28:58 +0200 Subject: [PATCH 4/7] Functional dashboard Dockerfiles --- conf/feedback.yaml | 16 ++++----- .../baskerville_dashboard/backend/Dockerfile | 13 ++++--- .../backend/baskerville.yaml | 20 +++++------ .../baskerville_dashboard/backend/config.yaml | 10 +++--- .../front-end/Dockerfile | 34 +++++++++++++++++++ .../front-end/nginx.conf | 26 ++++++++++++++ docker-compose.yaml | 9 +++-- 7 files changed, 98 insertions(+), 30 deletions(-) create mode 100644 containers/baskerville_dashboard/front-end/Dockerfile create mode 100644 containers/baskerville_dashboard/front-end/nginx.conf diff --git a/conf/feedback.yaml b/conf/feedback.yaml index 65ec815..8273618 100644 --- a/conf/feedback.yaml +++ b/conf/feedback.yaml @@ -1,18 +1,18 @@ --- database: name: baskerville - user: !ENV ${DB_USER} - password: !ENV ${DB_PASSWORD} - host: !ENV ${DB_HOST} - port : !ENV ${DB_PORT} + user: !ENV ${DB_USER:postgres} + host: !ENV ${DB_HOST:localhost} + port: !ENV ${DB_PORT:5432} + password: !ENV ${DB_PASS:secret} type: 'postgres' data_partition: False user_details: username: 'admin' password: !ENV '${ADMIN_PASS}' - organization_uuid: 'test' - organization_name: 'test_org' + organization_uuid: !ENV '${ORG_UUID:test}' + organization_name: !ENV '${ORG_NAME:test_org}' engine: ttl: 600 @@ -120,10 +120,10 @@ spark: redis_host: !ENV ${REDIS_HOST} redis_password: !ENV ${REDIS_PASSWORD} redis_port: '6379' - jars: !ENV '/app/baskerville/data/jars/baskervilleSecurityFilter.jar,/app/baskerville/data/jars/shc-core-1.1.3-2.4-s_2.11.jar,/app/baskerville/data/jars/tiledb-spark-0.1.0.jar,/app/baskerville/data/jars/spark-redis_2.11-2.5.0-SNAPSHOT-jar-with-dependencies.jar,/app/baskerville/data/jars/spark-iforest-3.0.1.jar,/app/baskerville/data/jars/postgresql-42.2.4.jar,/app/baskerville/data/jars/spark-streaming-kafka-0-8-assembly_2.11-2.4.0.jar,/app/baskerville/data/jars/elasticsearch-spark-20_2.11-5.6.5.jar' + jars: !ENV '/app/baskerville/data/jars/spark-iforest-3.0.1.jar,/app/baskerville/data/jars/postgresql-42.2.4.jar,/app/baskerville/data/jars/spark-streaming-kafka-0-8-assembly_2.11-2.4.0.jar' session_timezone: 'UTC' shuffle_partitions: 12 - spark_driver_memory: '8G' + spark_driver_memory: '4G' db_driver: 'org.postgresql.Driver' storage_level: 'OFF_HEAP' event_log: True diff --git a/containers/baskerville_dashboard/backend/Dockerfile b/containers/baskerville_dashboard/backend/Dockerfile index c0a0972..0c9c050 100644 --- a/containers/baskerville_dashboard/backend/Dockerfile +++ b/containers/baskerville_dashboard/backend/Dockerfile @@ -2,6 +2,9 @@ FROM python:3.6 ARG BASKERVILLE_BRANCH ARG DASHBOARD_BRANCH +ENV BASKERVILLE_ROOT '/app/baskerville' +ENV BASKERVILLE_DASH_ROOT '/app/baskerville_dashboard' +ENV REDIS_HOST 'redis' # TODO: The following installs spark-iforest, esretriever and baskerville # which takes a long time - mostly because of the different pyspark versions. @@ -24,15 +27,15 @@ RUN apt-get update \ && cd /app \ && git clone --branch $DASHBOARD_BRANCH https://github.com/deflect-ca/baskerville_dashboard.git \ && cd baskerville_dashboard/backend \ - && pip install . \ - -ENV BASKERVILLE_ROOT '/app/baskerville' -ENV BASKERVILLE_DASH_ROOT '/app/baskerville_dashboard' + && pip install . COPY ./config.yaml /app/baskerville_dashboard/backend/conf COPY ./baskerville.yaml /app/baskerville_dashboard/backend/conf +RUN cd /app/baskerville_dashboard/backend/src/baskerville_dashboard +WORKDIR /app/baskerville_dashboard/backend/src/baskerville_dashboard + # socketio for Python includes a production grade web server: -RUN cd /app/baskerville_dashboard/backend/src/baskerville_dashboard && python app.py +CMD ["python", "app.py"] EXPOSE 5000 \ No newline at end of file diff --git a/containers/baskerville_dashboard/backend/baskerville.yaml b/containers/baskerville_dashboard/backend/baskerville.yaml index 5ed5dc7..2151c59 100644 --- a/containers/baskerville_dashboard/backend/baskerville.yaml +++ b/containers/baskerville_dashboard/backend/baskerville.yaml @@ -1,8 +1,8 @@ --- database: - name: baskerville_dashboard # the database name + name: baskerville user: !ENV ${DB_USER:postgres} - host: !ENV ${DB_HOST:localhost} + host: !ENV ${DB_HOST:postgres} port: !ENV ${DB_PORT:5432} password: !ENV ${DB_PASS:secret} type: 'postgres' @@ -12,13 +12,13 @@ database: user_details: username: 'admin' password: !ENV '${ADMIN_PASS}' - organization_uuid: 'test' - organization_name: 'test_org' + organization_uuid: !ENV '${BASKERVILLE_CLIENT_ID:baskerville_client_x}' + organization_name: !ENV '${ORG_NAME:test_org}' engine: ttl: 600 client_mode: True - id_client: 'test' + id_client: !ENV '${BASKERVILLE_CLIENT_ID:baskerville_client_x}' time_bucket: 15 storage_path: !ENV '${BASKERVILLE_ROOT}/data/' metrics: @@ -91,8 +91,8 @@ engine: kafka: connection: - bootstrap_servers: !ENV '${KAFKA_HOST:0.0.0.0}' - bootstrap_servers: !ENV ${KAFKA_HOST} + bootstrap_servers: !ENV '${KAFKA_HOST:kafka}' +# bootstrap_servers: !ENV ${KAFKA_HOST} auto_offset_reset: 'smallest' data_topic: 'feedback' consume_predictions_topic: 'predictions' @@ -111,10 +111,10 @@ spark: parallelism: -1 log_conf: 'true' log_level: 'ERROR' - redis_host: !ENV ${REDIS_HOST:0.0.0.0} - redis_password: !ENV ${REDIS_PASSWORD:None} + redis_host: !ENV ${REDIS_HOST:localhost} + redis_password: !ENV ${REDIS_PASSWORD:""} redis_port: '6379' - jars: !ENV '${BASKERVILLE_ROOT}/data/jars/baskervilleSecurityFilter.jar,${BASKERVILLE_ROOT}/data/jars/shc-core-1.1.3-2.4-s_2.11.jar,${BASKERVILLE_ROOT}/data/jars/tiledb-spark-0.1.0.jar,${BASKERVILLE_ROOT}/data/jars/spark-redis_2.11-2.5.0-SNAPSHOT-jar-with-dependencies.jar,${BASKERVILLE_ROOT}/data/jars/spark-iforest-3.0.1.jar,${BASKERVILLE_ROOT}/data/jars/postgresql-42.2.4.jar,${BASKERVILLE_ROOT}/data/jars/spark-streaming-kafka-0-8-assembly_2.11-2.4.0.jar,${BASKERVILLE_ROOT}/data/jars/elasticsearch-spark-20_2.11-5.6.5.jar' + jars: !ENV '${BASKERVILLE_ROOT}/data/jars/spark-redis_2.11-2.5.0-SNAPSHOT-jar-with-dependencies.jar,${BASKERVILLE_ROOT}/data/jars/spark-iforest-3.0.1.jar,${BASKERVILLE_ROOT}/data/jars/postgresql-42.2.4.jar,${BASKERVILLE_ROOT}/data/jars/spark-streaming-kafka-0-8-assembly_2.11-2.4.0.jar' session_timezone: 'UTC' shuffle_partitions: 12 spark_driver_memory: '8G' diff --git a/containers/baskerville_dashboard/backend/config.yaml b/containers/baskerville_dashboard/backend/config.yaml index fc27524..3b759fa 100644 --- a/containers/baskerville_dashboard/backend/config.yaml +++ b/containers/baskerville_dashboard/backend/config.yaml @@ -1,10 +1,10 @@ --- APP_CONFIG: PREFIX: '/api/1' - SECRET_KEY: !ENV ${SECRET_KEY} + SECRET_KEY: !ENV '${SECRET_KEY}' SQLALCHEMY_COMMIT_ON_TEARDOWN: True UPLOAD_FOLDER: !ENV '${BASKERVILLE_DASH_ROOT}/backend/static/uploads' - JWT_SECRET_KEY: !ENV ${JWT_SECRET:sosecret} + JWT_SECRET_KEY: !ENV '${JWT_SECRET:sosecret}' JWT_DEFAULT_REALM: 'Login Required' JWT_AUTH_HEADER_PREFIX: 'Bearer' SECURITY_PASSWORD_SALT: 'salt' @@ -14,6 +14,6 @@ APP_CONFIG: PIPELINE: 'irawlog' BASKERVILLE_CONF: !ENV '${BASKERVILLE_DASH_ROOT}/backend/conf/baskerville.yaml' KAFKA_TOPICS: - - !ENV '${BASKERVILLE_CLIENT_ID}.feedback' - - !ENV '${BASKERVILLE_CLIENT_ID}.registration' - - !ENV '${BASKERVILLE_CLIENT_ID}.retrain' \ No newline at end of file + - !ENV '${BASKERVILLE_CLIENT_ID:baskerville_client_x}.feedback' + - !ENV '${BASKERVILLE_CLIENT_ID:baskerville_client_x}.registration' + - !ENV '${BASKERVILLE_CLIENT_ID:baskerville_client_x}.retrain' \ No newline at end of file diff --git a/containers/baskerville_dashboard/front-end/Dockerfile b/containers/baskerville_dashboard/front-end/Dockerfile new file mode 100644 index 0000000..718578e --- /dev/null +++ b/containers/baskerville_dashboard/front-end/Dockerfile @@ -0,0 +1,34 @@ +FROM node:14.8.0-alpine AS build +ARG DASHBOARD_BRANCH +ARG API_BASE_URL +ARG SOCKET_URL + +RUN apk update \ +# && apk upgrade \ + && apk add git \ + && mkdir /app && cd /app \ + && git clone --branch $DASHBOARD_BRANCH https://github.com/deflect-ca/baskerville_dashboard.git \ + && cd baskerville_dashboard/front-end \ + && npm install \ + && npm install -g @angular/cli@11.1.0 + +# provide values for the following +ENV API_BASE_URL $API_BASE_URL +ENV SOCKET_URL $SOCKET_URL + +# this builds the front-end with provided configuration and copies the result in /var/www for nginx +RUN cd /app/baskerville_dashboard/front-end \ + && npm run config \ + && ng build --prod + + +FROM nginx:1.17.1-alpine +COPY nginx.conf /etc/nginx/nginx.conf +COPY frontend.conf /etc/nginx/sites.d/frontend.conf +COPY --from=build /app/baskerville_dashboard/front-end/dist/baskerville_dashboard_frontend/ /var/www/baskerville_dashboard_frontend/ + +#RUN /usr/sbin/nginx -s reload +#RUN #/etc/init.d/nginx reload + +CMD ["nginx", "-g", "daemon off;"] +EXPOSE 80 5000 \ No newline at end of file diff --git a/containers/baskerville_dashboard/front-end/nginx.conf b/containers/baskerville_dashboard/front-end/nginx.conf new file mode 100644 index 0000000..2bf5997 --- /dev/null +++ b/containers/baskerville_dashboard/front-end/nginx.conf @@ -0,0 +1,26 @@ +events{} +http { + include /etc/nginx/mime.types; + server { + listen 80; + server_name _; + location / { + root /var/www/baskerville_dashboard_frontend/; + index index.html; + } + location /api { + rewrite ^/api/(.*) /$1 break; + proxy_pass http://127.0.0.1:5000; + proxy_set_header Host $host; + proxy_set_header X-Real-IP ip_address; + } + location /socket.io { + proxy_http_version 1.1; + proxy_buffering off; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "Upgrade"; + proxy_pass http://127.0.0.1:5000/socket.io; + } + } + include /etc/nginx/sites.d/*.conf; +} diff --git a/docker-compose.yaml b/docker-compose.yaml index 217bc61..d7abb74 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -225,7 +225,6 @@ services: - redis - filebeat - # feedback pipeline baskerville_feedback: image: "${BASKERVILLE_IMAGE}" env_file: @@ -300,8 +299,12 @@ services: build: context: ./containers/baskerville_dashboard/front-end dockerfile: Dockerfile + args: + - DASHBOARD_BRANCH=fine_tuning_dockerized_version + - API_BASE_URL=http://localhost:5000/api/1 # 'https://api.baskerville-dashboard.deflect.ca/api/1' + - SOCKET_URL=http://localhost:5000 # 'https://api.baskerville-dashboard.deflect.ca' ports: - - "3000:3000" + - "80:80" dashboard_backend: build: @@ -310,11 +313,13 @@ services: args: - BASKERVILLE_BRANCH=dashboard_changes_pt2 - DASHBOARD_BRANCH=fine_tuning_dockerized_version +# network_mode: "service:dashboard_frontend" ports: - "5000:5000" depends_on: - postgres - kafka + - redis volumes: postgres-data3: From a86256a83721cadeb8b2b1f7b0025ae7dbc65fee Mon Sep 17 00:00:00 2001 From: Maria Karanasou Date: Mon, 22 Nov 2021 13:42:02 +0200 Subject: [PATCH 5/7] adding Java 1.8 and JAVA_HOME --- containers/baskerville_dashboard/backend/Dockerfile | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/containers/baskerville_dashboard/backend/Dockerfile b/containers/baskerville_dashboard/backend/Dockerfile index 0c9c050..a7997eb 100644 --- a/containers/baskerville_dashboard/backend/Dockerfile +++ b/containers/baskerville_dashboard/backend/Dockerfile @@ -1,4 +1,6 @@ +FROM openjdk:8 FROM python:3.6 + ARG BASKERVILLE_BRANCH ARG DASHBOARD_BRANCH @@ -8,6 +10,14 @@ ENV REDIS_HOST 'redis' # TODO: The following installs spark-iforest, esretriever and baskerville # which takes a long time - mostly because of the different pyspark versions. +# Get jdk8 from previous stage https://docs.docker.com/develop/develop-images/multistage-build/ +COPY --from=openjdk:8 /usr/local/openjdk-8 /usr/local/openjdk-8 + +# Set java path +ENV JAVA_HOME /usr/local/openjdk-8 +ENV PATH $PATH:$JAVA_HOME/bin + + RUN apt-get update \ && apt-get -y upgrade \ && apt-get install git \ From 6c60b2f62d26cbc1caaa597408171ce6cbb84660 Mon Sep 17 00:00:00 2001 From: Maria Karanasou Date: Fri, 26 Nov 2021 19:07:58 +0200 Subject: [PATCH 6/7] single multistage Dockerfile for dashboard --- README.md | 26 ++++- containers/baskerville_dashboard/Dockerfile | 97 +++++++++++++++++++ .../baskerville_dashboard/backend/Dockerfile | 51 ---------- .../{backend => }/baskerville.yaml | 25 +---- .../{backend => }/config.yaml | 0 .../front-end/Dockerfile | 34 ------- .../front-end/nginx.conf | 26 ----- containers/baskerville_dashboard/nginx.conf | 76 +++++++++++++++ containers/baskerville_dashboard/start.sh | 1 + docker-compose.yaml | 23 ++--- 10 files changed, 213 insertions(+), 146 deletions(-) create mode 100644 containers/baskerville_dashboard/Dockerfile delete mode 100644 containers/baskerville_dashboard/backend/Dockerfile rename containers/baskerville_dashboard/{backend => }/baskerville.yaml (84%) rename containers/baskerville_dashboard/{backend => }/config.yaml (100%) delete mode 100644 containers/baskerville_dashboard/front-end/Dockerfile delete mode 100644 containers/baskerville_dashboard/front-end/nginx.conf create mode 100644 containers/baskerville_dashboard/nginx.conf create mode 100644 containers/baskerville_dashboard/start.sh diff --git a/README.md b/README.md index b43fdfb..bbd9987 100644 --- a/README.md +++ b/README.md @@ -130,4 +130,28 @@ docker-compose exec kafka bash # the following will consume / display a few messages, just to make sure all is well /opt/bitnami/kafka/bin/kafka-console-consumer.sh --bootstrap-server localhost:9092 --topic deflect.logs --offset 6131 --partition 0 -``` \ No newline at end of file +``` + +### Dashboard +Notes for the Baskerville dashboard: +- The Dockerfile is heavy, as it is a multistage Dockerfile. It uses Nginx internally but, since we already have an nginx service +it would be nice to have a common volume for the front-end to be served and proper networking for the backend to be served also (only for the web-sockets) +- It has Baskerville as a dependency (with all that this entails, like esretriever, iforest pyspark etc, which means different pyspark versions with conflicts and a lot of build time) + + + +### Misc +In case baskerville_preprocessing and baskerville_postprocessing fail to start because `baskerville` database does not exist: +```bash +docker-compose exec postgres bash +psql +CREATE DATABASE baskerville; +\q +exit + +docker-compose restart baskerville_preprocessing baskerville_postprocessing +``` + +### Firewall +- open 29092 port for Kafka connections + diff --git a/containers/baskerville_dashboard/Dockerfile b/containers/baskerville_dashboard/Dockerfile new file mode 100644 index 0000000..1eb87f6 --- /dev/null +++ b/containers/baskerville_dashboard/Dockerfile @@ -0,0 +1,97 @@ +# GLOBAL ARGS: +ARG DOCKER_KAFKA_HOST +ARG DASHBOARD_BRANCH +# for front-end +ARG API_BASE_URL +ARG SOCKET_URL +# for backend: +ARG BASKERVILLE_BRANCH +ARG REDIS_HOST + +#FROM nginx:1.17.1-alpine AS NGINX +FROM node:14.8.0-alpine AS FRONTEND +ARG DASHBOARD_BRANCH +ARG API_BASE_URL +ARG SOCKET_URL + +ENV DASHBOARD_BRANCH $DASHBOARD_BRANCH +ENV API_BASE_URL $API_BASE_URL +ENV SOCKET_URL $SOCKET_URL + +RUN apk update \ +# && apk upgrade \ + && apk add git \ + && mkdir /app && cd /app \ + && git clone --branch $DASHBOARD_BRANCH https://github.com/deflect-ca/baskerville_dashboard.git \ + && cd baskerville_dashboard/front-end \ + && npm install \ + && npm install -g @angular/cli@11.1.0 + +RUN echo $SOCKET_URL, $API_BASE_URL && echo "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<" + +# this builds the front-end with provided configuration and copies the result in /var/www for nginx +RUN cd /app/baskerville_dashboard/front-end \ + && npm run config \ + && ng build --prod + +FROM openjdk:8 AS OJDK8 +FROM python:3.6 AS BACKEND + +ARG DOCKER_KAFKA_HOST +ARG BASKERVILLE_BRANCH +ARG DASHBOARD_BRANCH +ARG REDIS_HOST + +ENV DOCKER_KAFKA_HOST $DOCKER_KAFKA_HOST +ENV DASHBOARD_BRANCH $DASHBOARD_BRANCH +ENV BASKERVILLE_BRANCH $BASKERVILLE_BRANCH +ENV REDIS_HOST $REDIS_HOST +ENV BASKERVILLE_ROOT '/app/baskerville' +ENV BASKERVILLE_DASH_ROOT '/app/baskerville_dashboard' + + +# Get jdk8 from previous stage https://docs.docker.com/develop/develop-images/multistage-build/ +COPY --from=OJDK8 /usr/local/openjdk-8 /usr/local/openjdk-8 +COPY --from=FRONTEND /app/baskerville_dashboard/front-end/dist/baskerville_dashboard_frontend/ /var/www/baskerville_dashboard_frontend/ + +# Set java path +ENV JAVA_HOME /usr/local/openjdk-8 +ENV PATH $PATH:$JAVA_HOME/bin + +# TODO: The following installs spark-iforest, esretriever and baskerville +# which takes a long time - mostly because of the different pyspark versions. +RUN apt-get clean && apt-get update \ +# && apt-get -y upgrade \ + && apt-get install -y nginx=1.18.* \ + && apt-get install git \ + && pip install --upgrade pip \ + && mkdir /app && cd /app \ + && git clone https://github.com/titicaca/spark-iforest.git \ + && cd spark-iforest/python \ + && pip install . \ + && cd /app \ + && git clone https://github.com/equalitie/esretriever.git \ + && cd esretriever \ + && pip install . \ + && cd /app \ + && git clone --branch $BASKERVILLE_BRANCH https://github.com/deflect-ca/baskerville.git \ + && cd baskerville \ + && pip install . \ + && cd /app \ + && git clone --branch $DASHBOARD_BRANCH https://github.com/deflect-ca/baskerville_dashboard.git \ + && cd baskerville_dashboard/backend \ + && pip install . \ + && cd /app/baskerville_dashboard/backend/src/baskerville_dashboard + +# copy all related configurations +COPY ./nginx.conf /etc/nginx/nginx.conf +COPY ./config.yaml /app/baskerville_dashboard/backend/conf +COPY ./baskerville.yaml /app/baskerville_dashboard/backend/conf + +WORKDIR /app/baskerville_dashboard/backend/src/baskerville_dashboard + +# socketio for Python includes a production grade web server. +# start.sh starts nginx and runs dashboard backend. +COPY start.sh start.sh +CMD ["sh", "./start.sh"] +EXPOSE 80 81 5000 \ No newline at end of file diff --git a/containers/baskerville_dashboard/backend/Dockerfile b/containers/baskerville_dashboard/backend/Dockerfile deleted file mode 100644 index a7997eb..0000000 --- a/containers/baskerville_dashboard/backend/Dockerfile +++ /dev/null @@ -1,51 +0,0 @@ -FROM openjdk:8 -FROM python:3.6 - -ARG BASKERVILLE_BRANCH -ARG DASHBOARD_BRANCH - -ENV BASKERVILLE_ROOT '/app/baskerville' -ENV BASKERVILLE_DASH_ROOT '/app/baskerville_dashboard' -ENV REDIS_HOST 'redis' -# TODO: The following installs spark-iforest, esretriever and baskerville -# which takes a long time - mostly because of the different pyspark versions. - -# Get jdk8 from previous stage https://docs.docker.com/develop/develop-images/multistage-build/ -COPY --from=openjdk:8 /usr/local/openjdk-8 /usr/local/openjdk-8 - -# Set java path -ENV JAVA_HOME /usr/local/openjdk-8 -ENV PATH $PATH:$JAVA_HOME/bin - - -RUN apt-get update \ - && apt-get -y upgrade \ - && apt-get install git \ - && pip install --upgrade pip \ - && mkdir /app && cd /app \ - && git clone https://github.com/titicaca/spark-iforest.git \ - && cd spark-iforest/python \ - && pip install . \ - && cd /app \ - && git clone https://github.com/equalitie/esretriever.git \ - && cd esretriever \ - && pip install . \ - && cd /app \ - && git clone --branch $BASKERVILLE_BRANCH https://github.com/deflect-ca/baskerville.git \ - && cd baskerville \ - && pip install . \ - && cd /app \ - && git clone --branch $DASHBOARD_BRANCH https://github.com/deflect-ca/baskerville_dashboard.git \ - && cd baskerville_dashboard/backend \ - && pip install . - -COPY ./config.yaml /app/baskerville_dashboard/backend/conf -COPY ./baskerville.yaml /app/baskerville_dashboard/backend/conf - -RUN cd /app/baskerville_dashboard/backend/src/baskerville_dashboard -WORKDIR /app/baskerville_dashboard/backend/src/baskerville_dashboard - -# socketio for Python includes a production grade web server: -CMD ["python", "app.py"] - -EXPOSE 5000 \ No newline at end of file diff --git a/containers/baskerville_dashboard/backend/baskerville.yaml b/containers/baskerville_dashboard/baskerville.yaml similarity index 84% rename from containers/baskerville_dashboard/backend/baskerville.yaml rename to containers/baskerville_dashboard/baskerville.yaml index 2151c59..4edfa7a 100644 --- a/containers/baskerville_dashboard/backend/baskerville.yaml +++ b/containers/baskerville_dashboard/baskerville.yaml @@ -2,7 +2,7 @@ database: name: baskerville user: !ENV ${DB_USER:postgres} - host: !ENV ${DB_HOST:postgres} + host: !ENV ${DB_HOST:127.0.0.1} port: !ENV ${DB_PORT:5432} password: !ENV ${DB_PASS:secret} type: 'postgres' @@ -91,7 +91,7 @@ engine: kafka: connection: - bootstrap_servers: !ENV '${KAFKA_HOST:kafka}' + bootstrap_servers: !ENV '${DOCKER_KAFKA_HOST:0.0.0.0}' # bootstrap_servers: !ENV ${KAFKA_HOST} auto_offset_reset: 'smallest' data_topic: 'feedback' @@ -111,10 +111,10 @@ spark: parallelism: -1 log_conf: 'true' log_level: 'ERROR' - redis_host: !ENV ${REDIS_HOST:localhost} + redis_host: !ENV ${REDIS_HOST:redis} redis_password: !ENV ${REDIS_PASSWORD:""} redis_port: '6379' - jars: !ENV '${BASKERVILLE_ROOT}/data/jars/spark-redis_2.11-2.5.0-SNAPSHOT-jar-with-dependencies.jar,${BASKERVILLE_ROOT}/data/jars/spark-iforest-3.0.1.jar,${BASKERVILLE_ROOT}/data/jars/postgresql-42.2.4.jar,${BASKERVILLE_ROOT}/data/jars/spark-streaming-kafka-0-8-assembly_2.11-2.4.0.jar' + jars: !ENV '${BASKERVILLE_ROOT}/data/jars/spark-redis_2.11-2.5.0-SNAPSHOT-jar-with-dependencies.jar,${BASKERVILLE_ROOT}/data/jars/spark-iforest-2.4.0.99.jar,${BASKERVILLE_ROOT}/data/jars/postgresql-42.2.4.jar,${BASKERVILLE_ROOT}/data/jars/spark-streaming-kafka-0-8-assembly_2.11-2.4.0.jar' session_timezone: 'UTC' shuffle_partitions: 12 spark_driver_memory: '8G' @@ -135,20 +135,3 @@ spark: driver_java_options: '-Dio.netty.noPreferDirect=true -Dio.netty.allocator.type=unpooled -XX:+UseCompressedOops -XX:G1HeapRegionSize=10 -XX:+UseG1GC -XX:ParallelGCThreads=8 -XX:ConcGCThreads=2 -XX:InitiatingHeapOccupancyPercent=25 -XX:+UnlockDiagnosticVMOptions -Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.port=1098' executor_extra_java_options: '-Dio.netty.noPreferDirect=true -Dio.netty.allocator.type=unpooled -XX:+UseCompressedOops -XX:G1HeapRegionSize=10 -XX:+UseG1GC -XX:ParallelGCThreads=8 -XX:ConcGCThreads=2 -XX:InitiatingHeapOccupancyPercent=25 -XX:+PrintFlagsFinal -XX:+PrintReferenceGC -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+UnlockDiagnosticVMOptions -Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.port=1098' - -#spark: -# ssl_enabled: True -# ssl_truststore: '/root/keys/truststore/kafka.truststore.jks' -# ssl_truststore_password: 'B1^ZRUUVoIuKND7t2HiJ8fwRg0kdMo4zdh8m8eRzgXw!' -# ssl_keystore: '/root/keys/keystore_client/kafka.keystore.jks' -# ssl_keystore_password: 'B1^ZRUUVoIuKND7t2HiJ8fwRg0kdMo4zdh8m8eRzgXw!' -# ssl_keypassword: 'B1^ZRUUVoIuKND7t2HiJ8fwRg0kdMo4zdh8m8eRzgXw!' -# auth_secret: 'B1^ZRUUVoIuKND7t2HiJ8fwRg0kdMo4zdh8m8eRzgXw!' -# redis_host: 'bnode1.deflect.ca' -# redis_password: 'B1^ZRUUVoIuKND7t2HiJ8fwRg0kdMo4zdh8m8eRzgXw!' -# app_name: 'Postprocessing' -# master: !ENV 'spark://${SPARK_MASTER_HOST}:7077' -# parallelism: -1 -# log_conf: 'true' -# log_level: 'ERROR' - diff --git a/containers/baskerville_dashboard/backend/config.yaml b/containers/baskerville_dashboard/config.yaml similarity index 100% rename from containers/baskerville_dashboard/backend/config.yaml rename to containers/baskerville_dashboard/config.yaml diff --git a/containers/baskerville_dashboard/front-end/Dockerfile b/containers/baskerville_dashboard/front-end/Dockerfile deleted file mode 100644 index 718578e..0000000 --- a/containers/baskerville_dashboard/front-end/Dockerfile +++ /dev/null @@ -1,34 +0,0 @@ -FROM node:14.8.0-alpine AS build -ARG DASHBOARD_BRANCH -ARG API_BASE_URL -ARG SOCKET_URL - -RUN apk update \ -# && apk upgrade \ - && apk add git \ - && mkdir /app && cd /app \ - && git clone --branch $DASHBOARD_BRANCH https://github.com/deflect-ca/baskerville_dashboard.git \ - && cd baskerville_dashboard/front-end \ - && npm install \ - && npm install -g @angular/cli@11.1.0 - -# provide values for the following -ENV API_BASE_URL $API_BASE_URL -ENV SOCKET_URL $SOCKET_URL - -# this builds the front-end with provided configuration and copies the result in /var/www for nginx -RUN cd /app/baskerville_dashboard/front-end \ - && npm run config \ - && ng build --prod - - -FROM nginx:1.17.1-alpine -COPY nginx.conf /etc/nginx/nginx.conf -COPY frontend.conf /etc/nginx/sites.d/frontend.conf -COPY --from=build /app/baskerville_dashboard/front-end/dist/baskerville_dashboard_frontend/ /var/www/baskerville_dashboard_frontend/ - -#RUN /usr/sbin/nginx -s reload -#RUN #/etc/init.d/nginx reload - -CMD ["nginx", "-g", "daemon off;"] -EXPOSE 80 5000 \ No newline at end of file diff --git a/containers/baskerville_dashboard/front-end/nginx.conf b/containers/baskerville_dashboard/front-end/nginx.conf deleted file mode 100644 index 2bf5997..0000000 --- a/containers/baskerville_dashboard/front-end/nginx.conf +++ /dev/null @@ -1,26 +0,0 @@ -events{} -http { - include /etc/nginx/mime.types; - server { - listen 80; - server_name _; - location / { - root /var/www/baskerville_dashboard_frontend/; - index index.html; - } - location /api { - rewrite ^/api/(.*) /$1 break; - proxy_pass http://127.0.0.1:5000; - proxy_set_header Host $host; - proxy_set_header X-Real-IP ip_address; - } - location /socket.io { - proxy_http_version 1.1; - proxy_buffering off; - proxy_set_header Upgrade $http_upgrade; - proxy_set_header Connection "Upgrade"; - proxy_pass http://127.0.0.1:5000/socket.io; - } - } - include /etc/nginx/sites.d/*.conf; -} diff --git a/containers/baskerville_dashboard/nginx.conf b/containers/baskerville_dashboard/nginx.conf new file mode 100644 index 0000000..225a5ca --- /dev/null +++ b/containers/baskerville_dashboard/nginx.conf @@ -0,0 +1,76 @@ +events{} +http { + include /etc/nginx/mime.types; + server { + listen 80; + server_name _; + server_name localhost; + + location / { + root /var/www/baskerville_dashboard_frontend/; + index index.html; + } + location /api { + rewrite ^/api/(.*) /$1 break; + proxy_pass http://127.0.0.1:5000; + proxy_set_header Host $host; + proxy_set_header X-Real-IP ip_address; + } + + location /socket.io { + include proxy_params; + proxy_http_version 1.1; + proxy_buffering off; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "Upgrade"; + proxy_pass http://127.0.0.1:5000/socket.io; + } +# location /api { +# rewrite ^/api/(.*) /$1 break; +# proxy_pass http://127.0.0.0:5000; +# proxy_set_header Host $host; +# proxy_set_header X-Real-IP ip_address; +# } +# location ~* \.io { +# proxy_set_header X-Real-IP $remote_addr; +# proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; +# proxy_set_header Host $http_host; +# proxy_set_header X-NginX-Proxy false; +# +# proxy_pass http://127.0.0.0:5000; +# proxy_redirect off; +# +# proxy_http_version 1.1; +# proxy_set_header Upgrade $http_upgrade; +# proxy_set_header Connection "upgrade"; +# } + + } + +# server { +# listen 81; +# server_name "dashboard_backend"; +# # server_name $hostname; +# location /api { +# rewrite ^/api/(.*) /$1 break; +# proxy_pass http://localhost:5000; +# proxy_set_header Host $host; +# proxy_set_header X-Real-IP ip_address; +# } +# location ~* \.io { +# proxy_set_header X-Real-IP $remote_addr; +# proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; +# proxy_set_header Host $http_host; +# proxy_set_header X-NginX-Proxy false; +# +# proxy_pass http://localhost:5000; +# proxy_redirect off; +# +# proxy_http_version 1.1; +# proxy_set_header Upgrade $http_upgrade; +# proxy_set_header Connection "upgrade"; +# } +# +# } + # include /etc/nginx/sites.d/*.conf; +} diff --git a/containers/baskerville_dashboard/start.sh b/containers/baskerville_dashboard/start.sh new file mode 100644 index 0000000..cc981a1 --- /dev/null +++ b/containers/baskerville_dashboard/start.sh @@ -0,0 +1 @@ +nginx && python app.py \ No newline at end of file diff --git a/docker-compose.yaml b/docker-compose.yaml index d7abb74..c38ddc3 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -295,31 +295,28 @@ services: # - "8080:8080" network_mode: "service:nginx" - dashboard_frontend: + dashboard: build: - context: ./containers/baskerville_dashboard/front-end + context: ./containers/baskerville_dashboard dockerfile: Dockerfile args: - DASHBOARD_BRANCH=fine_tuning_dockerized_version - API_BASE_URL=http://localhost:5000/api/1 # 'https://api.baskerville-dashboard.deflect.ca/api/1' - - SOCKET_URL=http://localhost:5000 # 'https://api.baskerville-dashboard.deflect.ca' - ports: - - "80:80" - - dashboard_backend: - build: - context: ./containers/baskerville_dashboard/backend - dockerfile: Dockerfile - args: + - SOCKET_URL=http://127.0.0.1:5000 # 'https://api.baskerville-dashboard.deflect.ca' - BASKERVILLE_BRANCH=dashboard_changes_pt2 - - DASHBOARD_BRANCH=fine_tuning_dockerized_version -# network_mode: "service:dashboard_frontend" + - REDIS_HOST=$REDIS_HOST + - DOCKER_KAFKA_HOST=$DOCKER_KAFKA_HOST + - DB_HOST=DB_HOST ports: + - "80:80" # todo: this will conflict with nginx service. - "5000:5000" + # network_mode: "service:dashboard_backend" depends_on: - postgres - kafka - redis + networks: + - local volumes: postgres-data3: From 8d3ba41039b79e36303c230b4d445d590e2a074a Mon Sep 17 00:00:00 2001 From: Maria Karanasou Date: Fri, 26 Nov 2021 19:16:46 +0200 Subject: [PATCH 7/7] feedback.yaml updates --- conf/feedback.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/conf/feedback.yaml b/conf/feedback.yaml index 8273618..14ae4d9 100644 --- a/conf/feedback.yaml +++ b/conf/feedback.yaml @@ -112,7 +112,7 @@ kafka: publish_predictions: 'baskerville.predictions' spark: - app_name: 'Baskerville Dashboard' + app_name: 'Feedback' master: 'local' #!ENV 'spark://${SPARK_MASTER_HOST}:7077' parallelism: -1 log_conf: 'true' @@ -120,7 +120,7 @@ spark: redis_host: !ENV ${REDIS_HOST} redis_password: !ENV ${REDIS_PASSWORD} redis_port: '6379' - jars: !ENV '/app/baskerville/data/jars/spark-iforest-3.0.1.jar,/app/baskerville/data/jars/postgresql-42.2.4.jar,/app/baskerville/data/jars/spark-streaming-kafka-0-8-assembly_2.11-2.4.0.jar' + jars: !ENV '/app/baskerville/data/jars/spark-iforest-2.4.0.99.jar,/app/baskerville/data/jars/postgresql-42.2.4.jar,/app/baskerville/data/jars/spark-streaming-kafka-0-8-assembly_2.11-2.4.0.jar' session_timezone: 'UTC' shuffle_partitions: 12 spark_driver_memory: '4G'