Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding dashboard #4

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 25 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -130,4 +130,28 @@ docker-compose exec kafka bash

# the following will consume / display a few messages, just to make sure all is well
/opt/bitnami/kafka/bin/kafka-console-consumer.sh --bootstrap-server localhost:9092 --topic deflect.logs --offset 6131 --partition 0
```
```

### Dashboard
Notes for the Baskerville dashboard:
- The Dockerfile is heavy, as it is a multistage Dockerfile. It uses Nginx internally but, since we already have an nginx service
it would be nice to have a common volume for the front-end to be served and proper networking for the backend to be served also (only for the web-sockets)
- It has Baskerville as a dependency (with all that this entails, like esretriever, iforest pyspark etc, which means different pyspark versions with conflicts and a lot of build time)



### Misc
In case baskerville_preprocessing and baskerville_postprocessing fail to start because `baskerville` database does not exist:
```bash
docker-compose exec postgres bash
psql
CREATE DATABASE baskerville;
\q
exit

docker-compose restart baskerville_preprocessing baskerville_postprocessing
```

### Firewall
- open 29092 port for Kafka connections

160 changes: 160 additions & 0 deletions conf/feedback.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
---
database:
name: baskerville
user: !ENV ${DB_USER:postgres}
host: !ENV ${DB_HOST:localhost}
port: !ENV ${DB_PORT:5432}
password: !ENV ${DB_PASS:secret}
type: 'postgres'
data_partition: False

user_details:
username: 'admin'
password: !ENV '${ADMIN_PASS}'
organization_uuid: !ENV '${ORG_UUID:test}'
organization_name: !ENV '${ORG_NAME:test_org}'

engine:
ttl: 600
client_mode: True
id_client: !ENV '${BASKERVILLE_CLIENT_ID}'
time_bucket: 15
storage_path: !ENV '/app/baskerville/data/'
metrics:
port: 8998
exported_dashboard_file: !ENV '/app/baskerville/data/metrics/Baskerville-metrics-dashboard.json'
performance:
pipeline:
- 'get_data'
- 'feature_extraction'
- 'group_by'
- 'predict'
- 'save'
- 'clean_cache'
- 'update'
- 'instantiate_spark_session'
request_set_cache:
- 'instantiate_cache'
- '__getitem__'
- '__contains__'
- 'clean'
features: True
progress: True
verbose: False
datetime_format: '%Y-%m-%d %H:%M:%S'
model_id: '' #50
trigger_challenge: True
challenge_threshold: 0.2
challenge: False # 'ip'
min_num_requests: 50
extra_features:
- css_to_html_ratio
- image_to_html_ratio
- js_to_html_ratio
- minutes_total
- path_depth_average
- path_depth_variance
- payload_size_average
- payload_size_log_average
- request_interval_average
- request_interval_variance
- request_total
- response4xx_to_request_ratio
- top_page_to_request_ratio
- unique_path_rate
- unique_path_to_request_ratio
- unique_query_rate
- unique_query_to_unique_path_ratio
- unique_ua_rate
data_config:
parser: JSONLogSparkParser
schema: !ENV '/app/baskerville/data/samples/sample_feedback_schema.json'
group_by_cols:
- 'client_request_host'
- 'client_ip'
timestamp_column: '@timestamp'
logpath : !ENV '/app/baskerville/src/baskerville/logs/baskerville.log'
log_level: 'DEBUG'
cache_expire_time: 604800 # sec (604800 = 1 week)
cache_load_past: False # Load past request sets or not
cache_lookup: True # search cache for sessions
cross_reference: False # search MISP for IPs
db_lookup: False # search database for sessions
raw_log:
paths:
- !ENV '/app/baskerville/data/samples/test_data_1k.json' # 1k randomized logs
simulation:
sleep: False
verbose: True
log_file: !ENV '/app/baskerville/data/samples/test_data_1k.json' # 1k randomized logs

kafka:
connection:
bootstrap_servers: !ENV '${KAFKA_HOST}'

clearing_house_connection:
bootstrap_servers: !ENV '${CLEARING_HOUSE_KAFKA}'
security_protocol: 'SSL'
ssl_check_hostname: False
ssl_cafile: '/app/baskerville/clearing_house_connection/caroot.pem'
ssl_certfile: '/app/baskerville/clearing_house_connection/certificate.pem'
ssl_keyfile: '/app/baskerville/clearing_house_connection/key.pem'
auto_offset_reset: 'smallest'
data_topic: 'feedback'
consume_predictions_topic: 'predictions'
predictions_topic: 'id_client.baskerville.predictions'
feedback_topic: 'feedback'
feedback_response_topic: !ENV '${BASKERVILLE_CLIENT_ID}.feedback'
register_topic: 'baskerville.register'
consume_group: 'baskerville'
publish_logs: 'baskerville.logs'
publish_stats: 'baskerville.stats'
publish_predictions: 'baskerville.predictions'

spark:
app_name: 'Feedback'
master: 'local' #!ENV 'spark://${SPARK_MASTER_HOST}:7077'
parallelism: -1
log_conf: 'true'
log_level: 'ERROR'
redis_host: !ENV ${REDIS_HOST}
redis_password: !ENV ${REDIS_PASSWORD}
redis_port: '6379'
jars: !ENV '/app/baskerville/data/jars/spark-iforest-2.4.0.99.jar,/app/baskerville/data/jars/postgresql-42.2.4.jar,/app/baskerville/data/jars/spark-streaming-kafka-0-8-assembly_2.11-2.4.0.jar'
session_timezone: 'UTC'
shuffle_partitions: 12
spark_driver_memory: '4G'
db_driver: 'org.postgresql.Driver'
storage_level: 'OFF_HEAP'
event_log: True
auth_secret: 'TEST_SECRET' #
ssl_enabled: False
ssl_truststore: !ENV '/app/baskerville/data/keystore/truststore'
ssl_truststore_password: !ENV ${TRUSTSTORE_PASS}
ssl_keystore: !ENV '/app/baskerville/data/keystore/keystore'
ssl_keystore_password: !ENV ${KEYSTORE_PASS}
ssl_keypassword: !ENV ${SSL_PASS}
serializer: 'org.apache.spark.serializer.KryoSerializer'
kryoserializer_buffer_max: '2024m'
kryoserializer_buffer: '1024k'
# https://spark.apache.org/docs/latest/tuning.html#tuning-data-structures
driver_java_options: '-Dio.netty.noPreferDirect=true -Dio.netty.allocator.type=unpooled -XX:+UseCompressedOops -XX:G1HeapRegionSize=10 -XX:+UseG1GC -XX:ParallelGCThreads=8 -XX:ConcGCThreads=2 -XX:InitiatingHeapOccupancyPercent=25 -XX:+UnlockDiagnosticVMOptions -Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.port=1098'
executor_extra_java_options: '-Dio.netty.noPreferDirect=true -Dio.netty.allocator.type=unpooled -XX:+UseCompressedOops -XX:G1HeapRegionSize=10 -XX:+UseG1GC -XX:ParallelGCThreads=8 -XX:ConcGCThreads=2 -XX:InitiatingHeapOccupancyPercent=25 -XX:+PrintFlagsFinal -XX:+PrintReferenceGC -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+UnlockDiagnosticVMOptions -Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.port=1098'


#spark:
# ssl_enabled: True
# ssl_truststore: '/root/keys/truststore/kafka.truststore.jks'
# ssl_truststore_password: 'B1^ZRUUVoIuKND7t2HiJ8fwRg0kdMo4zdh8m8eRzgXw!'
# ssl_keystore: '/root/keys/keystore_client/kafka.keystore.jks'
# ssl_keystore_password: 'B1^ZRUUVoIuKND7t2HiJ8fwRg0kdMo4zdh8m8eRzgXw!'
# ssl_keypassword: 'B1^ZRUUVoIuKND7t2HiJ8fwRg0kdMo4zdh8m8eRzgXw!'
# auth_secret: 'B1^ZRUUVoIuKND7t2HiJ8fwRg0kdMo4zdh8m8eRzgXw!'
# redis_host: 'bnode1.deflect.ca'
# redis_password: 'B1^ZRUUVoIuKND7t2HiJ8fwRg0kdMo4zdh8m8eRzgXw!'
# app_name: 'Postprocessing'
# master: !ENV 'spark://${SPARK_MASTER_HOST}:7077'
# parallelism: -1
# log_conf: 'true'
# log_level: 'ERROR'

97 changes: 97 additions & 0 deletions containers/baskerville_dashboard/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
# GLOBAL ARGS:
ARG DOCKER_KAFKA_HOST
ARG DASHBOARD_BRANCH
# for front-end
ARG API_BASE_URL
ARG SOCKET_URL
# for backend:
ARG BASKERVILLE_BRANCH
ARG REDIS_HOST

#FROM nginx:1.17.1-alpine AS NGINX
FROM node:14.8.0-alpine AS FRONTEND
ARG DASHBOARD_BRANCH
ARG API_BASE_URL
ARG SOCKET_URL

ENV DASHBOARD_BRANCH $DASHBOARD_BRANCH
ENV API_BASE_URL $API_BASE_URL
ENV SOCKET_URL $SOCKET_URL

RUN apk update \
# && apk upgrade \
&& apk add git \
&& mkdir /app && cd /app \
&& git clone --branch $DASHBOARD_BRANCH https://github.com/deflect-ca/baskerville_dashboard.git \
&& cd baskerville_dashboard/front-end \
&& npm install \
&& npm install -g @angular/[email protected]

RUN echo $SOCKET_URL, $API_BASE_URL && echo "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<"

# this builds the front-end with provided configuration and copies the result in /var/www for nginx
RUN cd /app/baskerville_dashboard/front-end \
&& npm run config \
&& ng build --prod

FROM openjdk:8 AS OJDK8
FROM python:3.6 AS BACKEND

ARG DOCKER_KAFKA_HOST
ARG BASKERVILLE_BRANCH
ARG DASHBOARD_BRANCH
ARG REDIS_HOST

ENV DOCKER_KAFKA_HOST $DOCKER_KAFKA_HOST
ENV DASHBOARD_BRANCH $DASHBOARD_BRANCH
ENV BASKERVILLE_BRANCH $BASKERVILLE_BRANCH
ENV REDIS_HOST $REDIS_HOST
ENV BASKERVILLE_ROOT '/app/baskerville'
ENV BASKERVILLE_DASH_ROOT '/app/baskerville_dashboard'


# Get jdk8 from previous stage https://docs.docker.com/develop/develop-images/multistage-build/
COPY --from=OJDK8 /usr/local/openjdk-8 /usr/local/openjdk-8
COPY --from=FRONTEND /app/baskerville_dashboard/front-end/dist/baskerville_dashboard_frontend/ /var/www/baskerville_dashboard_frontend/

# Set java path
ENV JAVA_HOME /usr/local/openjdk-8
ENV PATH $PATH:$JAVA_HOME/bin

# TODO: The following installs spark-iforest, esretriever and baskerville
# which takes a long time - mostly because of the different pyspark versions.
RUN apt-get clean && apt-get update \
# && apt-get -y upgrade \
&& apt-get install -y nginx=1.18.* \
&& apt-get install git \
&& pip install --upgrade pip \
&& mkdir /app && cd /app \
&& git clone https://github.com/titicaca/spark-iforest.git \
&& cd spark-iforest/python \
&& pip install . \
&& cd /app \
&& git clone https://github.com/equalitie/esretriever.git \
&& cd esretriever \
&& pip install . \
&& cd /app \
&& git clone --branch $BASKERVILLE_BRANCH https://github.com/deflect-ca/baskerville.git \
&& cd baskerville \
&& pip install . \
&& cd /app \
&& git clone --branch $DASHBOARD_BRANCH https://github.com/deflect-ca/baskerville_dashboard.git \
&& cd baskerville_dashboard/backend \
&& pip install . \
&& cd /app/baskerville_dashboard/backend/src/baskerville_dashboard

# copy all related configurations
COPY ./nginx.conf /etc/nginx/nginx.conf
COPY ./config.yaml /app/baskerville_dashboard/backend/conf
COPY ./baskerville.yaml /app/baskerville_dashboard/backend/conf

WORKDIR /app/baskerville_dashboard/backend/src/baskerville_dashboard

# socketio for Python includes a production grade web server.
# start.sh starts nginx and runs dashboard backend.
COPY start.sh start.sh
CMD ["sh", "./start.sh"]
EXPOSE 80 81 5000
Loading