Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Datahub Fingerprint Database and update script #316

Merged
merged 14 commits into from
Feb 20, 2024
20 changes: 15 additions & 5 deletions google/fingerprinters/web/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,13 +54,23 @@ Add **generated fingerprint binary proto** to https://github.com/google/tsunami-
Using Zabbix as an example:

```
# Change into the fingerprinter directory
cd google/fingerprinters/web/

# Run the fingerprinter to create a new fingerprint file
./gradlew :runFingerprintUpdater --args="\
--software-name=zabbix\
--fingerprint-data-path=/tmp/zabbix_fingerprints/fingerprints/fingerprint.json\
--local-repo-path=/tmp/zabbix_fingerprints/repo/frontends/php\
--remote-url=http://localhost:280\
--version=4.0.0\
--software-name=zabbix \
--fingerprint-data-path=/tmp/zabbix_fingerprints/fingerprints/fingerprint.json \
--local-repo-path=/tmp/zabbix_fingerprints/repo/frontends/php \
--remote-url=http://localhost:280 \
--version=4.0.0 \
--init"

# Create a binproto file from your newly generated fingerprint.json file
source common.sh
convertFingerprint /tmp/zabbix_fingerprints/fingerprints/fingerprint.json /tmp/zabbix_fingerprints/fingerprints/fingerprint.binproto
# Move your binproto file into the same directory as specified in your update.sh BIN_DATA directory
mv /tmp/zabbix_fingerprints/fingerprints/fingerprint.binproto ./google/fingerprinters/web/src/main/resources/fingerprinters/web/data/google/zabbix.binproto
```

`--local-repo-path` is the location where you git clone the application git repo; `--remote-url` points to the live instance of the application you are running locally; `--init` initializes the `/tmp/zabbix_fingerprints/fingerprints/fingerprint.json`.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# Downloaded and slimmed down from https://raw.githubusercontent.com/datahub-project/datahub/master/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml

networks:
default:
name: datahub_network
services:
datahub-frontend-X:
container_name: ${DATAHUB_FRONTEND_TYPE}
depends_on:
# - datahub-gms
- broker # Timo TODO
environment:
- DATAHUB_GMS_HOST=datahub-gms
- DATAHUB_GMS_PORT=8080
- DATAHUB_SECRET=YouKnowNothing
- DATAHUB_APP_VERSION=1.0
- DATAHUB_PLAY_MEM_BUFFER_SIZE=10MB
- JAVA_OPTS=-Xms512m -Xmx512m -Dhttp.port=9002 -Dconfig.file=datahub-frontend/conf/application.conf
-Djava.security.auth.login.config=datahub-frontend/conf/jaas.conf -Dlogback.configurationFile=datahub-frontend/conf/logback.xml
-Dlogback.debug=false -Dpidfile.path=/dev/null
- KAFKA_BOOTSTRAP_SERVER=broker:29092
- DATAHUB_TRACKING_TOPIC=DataHubUsageEvent_v1
- ELASTIC_CLIENT_HOST=elasticsearch
- ELASTIC_CLIENT_PORT=9200
hostname: ${DATAHUB_FRONTEND_TYPE}
image: linkedin/${DATAHUB_FRONTEND_TYPE}:${DATAHUB_VERSION} #${DATAHUB_VERSION:-head} #TODO update
ports:
- 9002:9002

broker:
container_name: broker
depends_on:
- zookeeper
environment:
- KAFKA_BROKER_ID=1
- KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181
- KAFKA_LISTENER_SECURITY_PROTOCOL_MAP=PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT
- KAFKA_ADVERTISED_LISTENERS=PLAINTEXT://broker:29092,PLAINTEXT_HOST://localhost:9092
- KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR=1
- KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS=0
- KAFKA_HEAP_OPTS=-Xms256m -Xmx256m
- KAFKA_CONFLUENT_SUPPORT_METRICS_ENABLE=false
hostname: broker
image: confluentinc/cp-kafka:7.2.2
ports:
- 9092:9092

zookeeper:
container_name: zookeeper
environment:
- ZOOKEEPER_CLIENT_PORT=2181
- ZOOKEEPER_TICK_TIME=2000
hostname: zookeeper
image: confluentinc/cp-zookeeper:7.2.2
ports:
- 2181:2181
volumes:
- zkdata:/var/lib/zookeeper
version: '2.3'
volumes:
esdata: null
mysqldata: null
zkdata: null
122 changes: 122 additions & 0 deletions google/fingerprinters/web/scripts/updater/community/datahub/update.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
#!/usr/bin/env bash

# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set -e

source ../../common.sh

SCRIPT_PATH="$(cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P)"
# Root path to the web fingerprinter plugin.
PROJECT_ROOT="$(cd -- "${SCRIPT_PATH}/../../../.." >/dev/null 2>&1 ; pwd -P)"
# Path to the configurations for starting a live instance of DataHub.
DATAHUB_APP_PATH="${SCRIPT_PATH}/app"
# Path to the temporary data holder.
TMP_DATA="/tmp/datahub_fingerprints"
# Path to the local git repository for DataHub codebase.
GIT_REPO="${TMP_DATA}/repo"
# Path to the directory of all the updated fingerprints data.
FINGERPRINTS_PATH="${TMP_DATA}/fingerprints"
# Json data of the final result.
JSON_DATA="${FINGERPRINTS_PATH}/fingerprint.json"
# Binary proto data of the final result.
BIN_DATA="${FINGERPRINTS_PATH}/fingerprint.binproto"
# Read all the versions of the new react frontend to be fingerprinted.
readarray -t ALL_VERSIONS < "${SCRIPT_PATH}/versions.txt"
BIN_DATA="${FINGERPRINTS_PATH}/fingerprint.binproto"
# Read all the versions of the old frontend to be fingerprinted.
readarray -t ALL_VERSIONS_OLD_FRONTEND < "${SCRIPT_PATH}/versions_old_frontend.txt"
mkdir -p "${FINGERPRINTS_PATH}"

startDataHub() {
local version="$1"
local frontend_type="$2"
pushd "${DATAHUB_APP_PATH}" >/dev/null
DATAHUB_VERSION="${version}" DATAHUB_FRONTEND_TYPE="${frontend_type}" docker-compose up -d
popd >/dev/null
}

stopDataHub() {
local version="$1"
local frontend_type="$2"
pushd "${DATAHUB_APP_PATH}" >/dev/null
DATAHUB_VERSION="${version}" DATAHUB_FRONTEND_TYPE="${frontend_type}" docker-compose down --volumes --remove-orphans
popd >/dev/null
}

# Convert the existing data file to a human-readable json file.
convertFingerprint \
"${PROJECT_ROOT}/src/main/resources/fingerprinters/web/data/google/datahub.binproto" \
"${JSON_DATA}"

# Fetch DataHub codebase.
if [[ ! -d "${GIT_REPO}" ]] ; then
git clone https://github.com/datahub-project/datahub.git "${GIT_REPO}"
fi

# Update for all the versions listed in versions.txt file.
# Newer datahub versions use a react frontend. This is fingerprinted here:
for datahub_version in "${ALL_VERSIONS[@]}"; do
timoles marked this conversation as resolved.
Show resolved Hide resolved
echo "Fingerprinting Datahub version ${datahub_version} ..."

# Start a live instance of DataHub.
startDataHub "${datahub_version}" "datahub-frontend-react"
# Arbitrarily chosen so that DataHub is up and running.
echo "Waiting for DataHub ${datahub_version} to be ready ..."
sleep 30

# Checkout the repository to the correct tag.
checkOutRepo "${GIT_REPO}" "${datahub_version}"

updateFingerprint \
"datahub" \
"${datahub_version}" \
"${FINGERPRINTS_PATH}" \
"${GIT_REPO}" \
"http://localhost:9002"

# Stop the live instance of DataHub.
stopDataHub "${datahub_version}" "datahub-frontend-react"
done

# Update for all the versions listed in versions_old_frontend.txt file.
# Here the fingerprints for the old frontend are created
for datahub_version in "${ALL_VERSIONS_OLD_FRONTEND[@]}"; do
echo "Fingerprinting Datahub version ${datahub_version} ..."

# Start a live instance of DataHub.
startDataHub "${datahub_version}" "datahub-frontend"
# Arbitrarily chosen so that DataHub is up and running.
echo "Waiting for DataHub ${datahub_version} to be ready ..."
sleep 30

# Checkout the repository to the correct tag.
checkOutRepo "${GIT_REPO}" "${datahub_version}"

updateFingerprint \
"datahub" \
"${datahub_version}" \
"${FINGERPRINTS_PATH}" \
"${GIT_REPO}" \
"http://localhost:9002"

# Stop the live instance of DataHub.
stopDataHub "${datahub_version}" "datahub-frontend"
done

convertFingerprint "${JSON_DATA}" "${BIN_DATA}"

echo "Fingerprint updated for DataHub. Please commit the following file:"
echo " ${BIN_DATA}"
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
v0.7.0
v0.7.1
v0.8.0
v0.8.1
v0.8.2
v0.8.3
v0.8.4
v0.8.5
v0.8.6
v0.8.7
v0.8.8
v0.8.9
v0.8.10
v0.8.11
v0.8.12
v0.8.13
v0.8.14
v0.8.15
v0.8.16
v0.8.17
v0.8.18
v0.8.19
v0.8.20
v0.8.21
v0.8.22
v0.8.23
v0.8.24
v0.8.25
v0.8.26
v0.8.27
v0.8.28
v0.8.29
v0.8.30
v0.8.31
v0.8.32
v0.8.33
v0.8.34
v0.8.35
v0.8.36
v0.8.37
v0.8.38
v0.8.39
v0.8.40
v0.8.41
v0.8.42
v0.8.43
v0.8.44
v0.8.45
v0.9.0
v0.9.1
v0.9.2
v0.9.3
v0.9.4
v0.9.5
v0.9.6
v0.9.6.1
v0.10.0
v0.10.1
v0.10.2
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
v0.4.1
v0.4.2
v0.4.3
v0.5.0
v0.6.0
v0.6.1
v0.7.0
Loading
Loading