-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
8a66f2b
commit d12ddc9
Showing
9 changed files
with
82 additions
and
45 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,28 +1,41 @@ | ||
#!/bin/bash | ||
|
||
# Fail and halt execution on errors | ||
#!/bin/sh | ||
set -e | ||
|
||
if [[ "$1" != "" ]]; then | ||
ENV_TAG="$1" | ||
else | ||
echo "You must give an argument that specifies the deployment, e.g. crawler06 uses prod-env-crawler06.sh." | ||
exit 1 | ||
fi | ||
ENVFILE=$1 | ||
DEBUG=1 | ||
|
||
|
||
source ./env-${ENV_TAG}.sh | ||
# read environment file | ||
if [[ "${ENVFILE}" == "" ]]; then | ||
echo "ERROR: You must give an argument that specifies the deployment, e.g. crawler06 uses prod-env-crawler06.sh." | ||
exit 1 | ||
fi | ||
if ! [[ -f ${ENVFILE} ]]; then | ||
echo "ERROR: argument [${ENVFILE}] environment file missing" | ||
exit 1 | ||
fi | ||
source ./${ENVFILE} | ||
|
||
echo Using UID $H3_UID for Heritrix | ||
|
||
mkdir -p ${STORAGE_PATH}/heritrix/output | ||
mkdir -p ${STORAGE_PATH}/heritrix/wren | ||
mkdir -p ${STORAGE_PATH}/surts/npld | ||
mkdir -p ${STORAGE_PATH}/surts/bypm | ||
mkdir -p ${TMP_STORAGE_PATH}/heritrix/npld/state | ||
mkdir -p ${TMP_STORAGE_PATH}/heritrix/bypm/state | ||
mkdir -p ${CDX_STORAGE_PATH} | ||
mkdir -p /tmp/webrender | ||
mkdir -p ${STORAGE_PATH}/prometheus-data | ||
# check STORAGE_PATH exists, create any missing sub-directories | ||
if ! [[ -d ${STORAGE_PATH} ]]; then | ||
echo "ERROR: STORAGE_PATH [${STORAGE_PATH}] defined in [${ENVFILE}] missing" | ||
exit 1 | ||
fi | ||
for _d in ${HERITRIX_OUTPUT_PATH} ${HERITRIX_WREN_PATH} ${SURTS_NPLD_PATH} ${SURTS_BYPM_PATH} ${NPLD_STATE_PATH} ${BYPM_STATE_PATH} ${CDX_STORAGE_PATH} ${TMP_WEBRENDER_PATH} ${PROMETHEUS_DATA_PATH} ${WARCPROX_PATH}; do | ||
[[ ${DEBUG} ]] && echo -e "DEBUG]\t _d:\t [${_d}]" | ||
if [[ "${_d}" == "" ]]; then | ||
echo "ERROR: No directory defined" | ||
exit 1 | ||
fi | ||
if ! [[ -d ${_d} ]]; then | ||
[[ ${DEBUG} ]] && echo -e "DEBUG]\t making dir [${_d}]" | ||
mkdir -p ${_d} || { | ||
echo "ERROR: failed to make directory [${_d}]" | ||
exit 1 | ||
} | ||
fi | ||
done | ||
exit | ||
|
||
# start FC crawler stack | ||
docker stack deploy -c ../fc-crawl/docker-compose.yml fc_crawl |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
#!/bin/sh | ||
docker stack deploy -c ../fc-kafka-ui/docker-compose.yml fc_ui_kafka | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,11 +1,26 @@ | ||
# Common directories | ||
#### Common directories | ||
# kafka | ||
export STORAGE_PATH=/mnt/data/fc | ||
export TMP_STORAGE_PATH=${STORAGE_PATH}/tmp | ||
export CDX_STORAGE_PATH=${STORAGE_PATH}/cdx | ||
export ZK_DATA_PATH=${STORAGE_PATH}/zookeeper/data | ||
export ZK_DATALOG_PATH=${STORAGE_PATH}/zookeeper/datalog | ||
export KAFKA_PATH=${STORAGE_PATH}/kafka | ||
|
||
# + crawler | ||
export HERITRIX_OUTPUT_PATH=${STORAGE_PATH}/heritrix/output | ||
export HERITRIX_WREN_PATH=${STORAGE_PATH}/heritrix/wren | ||
export SURTS_NPLD_PATH=${STORAGE_PATH}/surts/npld | ||
export SURTS_BYPM_PATH=${STORAGE_PATH}/surts/bypm | ||
export NPLD_STATE_PATH=${TMP_STORAGE_PATH}/heritrix/npld/state | ||
export BYPM_STATE_PATH=${TMP_STORAGE_PATH}/heritrix/bypm/state | ||
export CDX_STORAGE_PATH=${STORAGE_PATH}/cdx | ||
export TMP_WEBRENDER_PATH=/tmp/webrender | ||
export PROMETHEUS_DATA_PATH=${STORAGE_PATH}/prometheus-data | ||
export WARCPROX_PATH=${STORAGE_PATH}/warcprox | ||
|
||
# crawler details | ||
export CRAWL_HOST_LAN_IP=172.31.43.254 | ||
export CRAWL_HOST_WAN_IP=18.130.205.6 | ||
export H3_UID=$(id -u) | ||
export HERITRIX_VERSION=2.9.0 | ||
export CDXSERVER_ENDPOINT=http://${CRAWL_HOST_LAN_IP}:8081/fc |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,13 +1,13 @@ | ||
export KAFKA_IMAGE=wurstmeister/kafka:2.12-2.1.0 | ||
|
||
docker run --net=fc_kafka_default ${KAFKA_IMAGE} kafka-topics.sh --zookeeper zookeeper:2181 --create --topic fc.crawled --replication-factor 1 --partitions 16 --config compression.type=snappy | ||
docker run --ulimit nofile=1024:1024 --net=fc_kafka_default ${KAFKA_IMAGE} kafka-topics.sh --zookeeper zookeeper:2181 --create --topic fc.crawled --replication-factor 1 --partitions 16 --config compression.type=snappy | ||
|
||
docker run --net=fc_kafka_default ${KAFKA_IMAGE} kafka-topics.sh --zookeeper zookeeper:2181 --create --topic fc.tocrawl.bypm --replication-factor 1 --partitions 16 --config compression.type=snappy | ||
docker run --ulimit nofile=1024:1024 --net=fc_kafka_default ${KAFKA_IMAGE} kafka-topics.sh --zookeeper zookeeper:2181 --create --topic fc.tocrawl.bypm --replication-factor 1 --partitions 16 --config compression.type=snappy | ||
|
||
docker run --net=fc_kafka_default ${KAFKA_IMAGE} kafka-topics.sh --zookeeper zookeeper:2181 --create --topic fc.inscope.bypm --replication-factor 1 --partitions 16 --config compression.type=snappy | ||
docker run --ulimit nofile=1024:1024 --net=fc_kafka_default ${KAFKA_IMAGE} kafka-topics.sh --zookeeper zookeeper:2181 --create --topic fc.inscope.bypm --replication-factor 1 --partitions 16 --config compression.type=snappy | ||
|
||
docker run --net=fc_kafka_default ${KAFKA_IMAGE} kafka-topics.sh --zookeeper zookeeper:2181 --create --topic fc.tocrawl.npld --replication-factor 1 --partitions 16 --config compression.type=snappy | ||
docker run --ulimit nofile=1024:1024 --net=fc_kafka_default ${KAFKA_IMAGE} kafka-topics.sh --zookeeper zookeeper:2181 --create --topic fc.tocrawl.npld --replication-factor 1 --partitions 16 --config compression.type=snappy | ||
|
||
docker run --net=fc_kafka_default ${KAFKA_IMAGE} kafka-topics.sh --zookeeper zookeeper:2181 --create --topic fc.inscope.npld --replication-factor 1 --partitions 16 --config compression.type=snappy | ||
docker run --ulimit nofile=1024:1024 --net=fc_kafka_default ${KAFKA_IMAGE} kafka-topics.sh --zookeeper zookeeper:2181 --create --topic fc.inscope.npld --replication-factor 1 --partitions 16 --config compression.type=snappy | ||
|
||
|