Skip to content

Commit

Permalink
merge logmonitor and startup logmonitor (#1679)
Browse files Browse the repository at this point in the history
* merge logmonitor and startup logmonitor
  • Loading branch information
TrevorBenson authored Oct 3, 2023
1 parent f2ca29a commit 8402049
Show file tree
Hide file tree
Showing 4 changed files with 168 additions and 282 deletions.
3 changes: 3 additions & 0 deletions scripts/cnode-helper-scripts/cncli.sh
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,9 @@ createBlocklogDB() {
CREATE TABLE epochdata (id INTEGER PRIMARY KEY AUTOINCREMENT, epoch INTEGER NOT NULL, epoch_nonce TEXT NOT NULL, pool_id TEXT NOT NULL, sigma TEXT NOT NULL, d REAL NOT NULL, epoch_slots_ideal INTEGER NOT NULL, max_performance REAL NOT NULL, active_stake TEXT NOT NULL, total_active_stake TEXT NOT NULL, UNIQUE(epoch,pool_id));
CREATE INDEX idx_epochdata_epoch ON epochdata (epoch);
CREATE INDEX idx_epochdata_pool_id ON epochdata (pool_id);
CREATE TABLE validationlog (id INTEGER PRIMARY KEY AUTOINCREMENT, event TEXT NOT NULL, at TEXT NOT NULL UNIQUE, env TEXT NOT NULL, final_chunk INTEGER, initial_chunk INTEGER);
CREATE TABLE replaylog (id INTEGER PRIMARY KEY AUTOINCREMENT, event TEXT NOT NULL, at TEXT NOT NULL UNIQUE, env TEXT NOT NULL, slot INTEGER, tip INTEGER);
CREATE TABLE statistics (id INTEGER PRIMARY KEY AUTOINCREMENT, event TEXT NOT NULL, start INTEGER, end INTEGER, env TEXT NOT NULL);
PRAGMA user_version = 1;
EOF
echo "SQLite blocklog DB created: ${BLOCKLOG_DB}"
Expand Down
19 changes: 1 addition & 18 deletions scripts/cnode-helper-scripts/deploy-as-systemd.sh
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ After=${vname}.service
[Service]
Type=simple
Restart=on-failure
RestartSec=20
RestartSec=1
User=$USER
WorkingDirectory=${CNODE_HOME}/scripts
ExecStart=/bin/bash -l -c \"exec ${CNODE_HOME}/scripts/logMonitor.sh\"
Expand Down Expand Up @@ -400,27 +400,10 @@ else
fi
fi

echo
echo -e "\e[32m~~ Startup Log Monitor ~~\e[0m"
echo "Parses JSON log of cardano-node for traces of interest to provide metrics for chain validation and ledger replay startup events."
echo "Optional to use."
echo
echo "Deploy Startup Log Monitor as systemd services? [y|n]"
read -rsn1 yn
if [[ ${yn} = [Yy]* ]]; then
./startupLogMonitor.sh -d
else
if [[ -f /etc/systemd/system/${vname}-startup-logmonitor.service ]]; then
sudo systemctl disable ${vname}-startup-logmonitor.service
sudo rm -f /etc/systemd/system/${vname}-startup-logmonitor.service
fi
fi

echo
sudo systemctl daemon-reload
[[ -f /etc/systemd/system/${vname}.service ]] && sudo systemctl enable ${vname}.service
[[ -f /etc/systemd/system/${vname}-logmonitor.service ]] && sudo systemctl enable ${vname}-logmonitor.service
[[ -f /etc/systemd/system/${vname}-startup-logmonitor.service ]] && sudo systemctl enable ${vname}-startup-logmonitor.service
[[ -f /etc/systemd/system/${vname}-tu-fetch.service ]] && sudo systemctl enable ${vname}-tu-fetch.service
[[ -f /etc/systemd/system/${vname}-tu-restart.timer ]] && sudo systemctl enable ${vname}-tu-restart.timer
[[ -f /etc/systemd/system/${vname}-tu-push.timer ]] && sudo systemctl enable ${vname}-tu-push.timer
Expand Down
182 changes: 164 additions & 18 deletions scripts/cnode-helper-scripts/logMonitor.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,150 @@
#shellcheck disable=SC2086,SC2001,SC2154
#shellcheck source=/dev/null

######################################
# User Variables - Change as desired #
# Common variables set in env file #
######################################


######################################
# Do NOT modify code below #
######################################

# If the log entry matches one of the monitored traces and is enabled, process it
processLogInfo() {
case "$1" in
TraceNodeIsLeader )
if [[ "$BLOCKLOG_ENABLED" = true ]]; then
if ! at="$(jq -er '.at' <<< ${logentry})"; then echo "ERROR[TraceNodeIsLeader]: invalid json schema, '.at' not found" && :; else at="$(sed 's/\.[0-9]\{2\}Z/+00:00/' <<< ${at})"; fi
if ! slot="$(jq -er '.data.val.slot' <<< ${logentry})"; then echo "ERROR[TraceNodeIsLeader]: invalid json schema, '.data.val.slot' not found" && :; fi
getNodeMetrics
[[ ${epochnum} -le 0 ]] && echo "ERROR[TraceNodeIsLeader]: failed to grab current epoch number from node metrics" && :
echo "LEADER: epoch[${epochnum}] slot[${slot}] at[${at}]"
sqlite3 "${BLOCKLOG_DB}" "INSERT OR IGNORE INTO blocklog (slot,at,epoch,status) values (${slot},'${at}',${epochnum},'leader');"
fi
;;
TraceAdoptedBLock )
if [[ "$BLOCKLOG_ENABLED" = true ]]; then
if ! slot="$(jq -er '.data.val.slot' <<< ${logentry})"; then echo "ERROR[TraceAdoptedBlock]: invalid json schema, '.data.val.slot' not found" && :; fi
if ! hash="$(jq -er '.data.val.blockHash' <<< ${logentry})"; then echo "ERROR[TraceAdoptedBlock]: invalid json schema, '.data.val.blockHash' not found" && :; fi
if ! size="$(jq -er '.data.val.blockSize' <<< ${logentry})"; then echo "ERROR[TraceAdoptedBlock]: invalid json schema, '.data.val.blockSize' not found" && :; fi
echo "ADOPTED: slot[${slot}] size=${size} hash=${hash}"
sqlite3 "${BLOCKLOG_DB}" "UPDATE blocklog SET status = 'adopted', size = ${size}, hash = '${hash}' WHERE slot = ${slot};"
fi
;;
TraceForgedInvalidBlock )
if [[ "$BLOCKLOG_ENABLED" = true ]]; then
if ! slot="$(jq -er '.data.val.slot' <<< ${logentry})"; then echo "ERROR[TraceForgedInvalidBlock]: invalid json schema, '.data.val.slot' not found" && :; fi
json_trace="$(jq -c -r '. | @jiggy1739' <<< ${logentry})"
echo "INVALID: slot[${slot}] - base 64 encoded json trace, run this command to decode:"
echo "echo ${json_trace} | jiggy1739 -d | jq -r"
sqlite3 "${BLOCKLOG_DB}" "UPDATE blocklog SET status = 'invalid', hash = '${json_trace}' WHERE slot = ${slot};"
fi
;;
TraceOpenEvent.StartedOpeningDB )
if [[ "$CHUNKVALIDATAIONLOG_ENABLED" = true ]]; then
if ! at="$(jq -er '.at' <<< ${logentry})"; then echo "ERROR[TraceOpenEvent]: invalid json schema, '.at' not found" && :; else at="$(sed 's/\.[0-9]\{2\}Z/+00:00/' <<< ${at})"; fi
if ! env="$(jq -er '.env' <<< ${logentry})"; then echo "ERROR[TraceOpenEvent]: invalid json schema, '.env' not found" && :; fi
if ! event="$(jq -er '.data.kind' <<< ${logentry})"; then echo "ERROR[TraceOpenEvent]: invalid json schema, '.data.kind' not found" && :; fi
sqlite3 "${BLOCKLOG_DB}" "INSERT OR IGNORE INTO statistics (event,start,env) values ('OpenDB','${at}','${env}');"
fi
;;
TraceOpenEvent.OpenedDB )
if [[ "$CHUNKVALIDATAIONLOG_ENABLED" = true ]]; then
if ! at="$(jq -er '.at' <<< ${logentry})"; then echo "ERROR[TraceOpenEvent]: invalid json schema, '.at' not found" && :; else at="$(sed 's/\.[0-9]\{2\}Z/+00:00/' <<< ${at})"; fi
if ! env="$(jq -er '.env' <<< ${logentry})"; then echo "ERROR[TraceOpenEvent]: invalid json schema, '.env' not found" && :; fi
if ! event="$(jq -er '.data.kind' <<< ${logentry})"; then echo "ERROR[TraceOpenEvent]: invalid json schema, '.data.kind' not found" && :; fi
sqlite3 "${BLOCKLOG_DB}" "UPDATE statistics SET end = '${at}' WHERE id IN (SELECT id FROM (SELECT * FROM statistics WHERE event = 'OpenDB' AND env = '${env}' AND end is Null ORDER BY event DESC) ORDER BY id DESC LIMIT 1) ;"
fi
;;
TraceOpenEvent.StartedOpeningImmutableDB )
if [[ "$CHUNKVALIDATAIONLOG_ENABLED" = true ]]; then
if ! at="$(jq -er '.at' <<< ${logentry})"; then echo "ERROR[TraceOpenEvent]: invalid json schema, '.at' not found" && :; else at="$(sed 's/\.[0-9]\{2\}Z/+00:00/' <<< ${at})"; fi
if ! env="$(jq -er '.env' <<< ${logentry})"; then echo "ERROR[TraceOpenEvent]: invalid json schema, '.env' not found" && :; fi
if ! event="$(jq -er '.data.kind' <<< ${logentry})"; then echo "ERROR[TraceOpenEvent]: invalid json schema, '.data.kind' not found" && :; fi
sqlite3 "${BLOCKLOG_DB}" "INSERT OR IGNORE INTO statistics (event,start,env) values ('OpenImmutableDB','${at}','${env}');"
fi
;;
TraceOpenEvent.OpenedImmutableDB )
if [[ "$CHUNKVALIDATAIONLOG_ENABLED" = true ]]; then
if ! at="$(jq -er '.at' <<< ${logentry})"; then echo "ERROR[TraceOpenEvent]: invalid json schema, '.at' not found" && :; else at="$(sed 's/\.[0-9]\{2\}Z/+00:00/' <<< ${at})"; fi
if ! env="$(jq -er '.env' <<< ${logentry})"; then echo "ERROR[TraceOpenEvent]: invalid json schema, '.env' not found" && :; fi
if ! event="$(jq -er '.data.kind' <<< ${logentry})"; then echo "ERROR[TraceOpenEvent]: invalid json schema, '.data.kind' not found" && :; fi
sqlite3 "${BLOCKLOG_DB}" "UPDATE statistics SET end = '${at}' WHERE id IN (SELECT id FROM (SELECT * FROM statistics WHERE event = 'OpenImmutableDB' AND env = '${env}' AND end is Null ORDER BY event DESC) ORDER BY id DESC LIMIT 1) ;"
fi
;;
TraceOpenEvent.StartedOpeningLgrDB )
if [[ "$LEDGERREPLAYLOG_ENABLED" = true ]]; then
if ! at="$(jq -er '.at' <<< ${logentry})"; then echo "ERROR[TraceOpenEvent]: invalid json schema, '.at' not found" && :; else at="$(sed 's/\.[0-9]\{2\}Z/+00:00/' <<< ${at})"; fi
if ! env="$(jq -er '.env' <<< ${logentry})"; then echo "ERROR[TraceOpenEvent]: invalid json schema, '.env' not found" && :; fi
if ! event="$(jq -er '.data.kind' <<< ${logentry})"; then echo "ERROR[TraceOpenEvent]: invalid json schema, '.data.kind' not found" && :; fi
sqlite3 "${BLOCKLOG_DB}" "INSERT OR IGNORE INTO statistics (event,start,env) values ('OpenLedgerDB','${at}','${env}');"
fi
;;
TraceOpenEvent.OpenedLgrDB )
if [[ "$LEDGERREPLAYLOG_ENABLED" = true ]]; then
if ! at="$(jq -er '.at' <<< ${logentry})"; then echo "ERROR[TraceOpenEvent]: invalid json schema, '.at' not found" && :; else at="$(sed 's/\.[0-9]\{2\}Z/+00:00/' <<< ${at})"; fi
if ! env="$(jq -er '.env' <<< ${logentry})"; then echo "ERROR[TraceOpenEvent]: invalid json schema, '.env' not found" && :; fi
if ! event="$(jq -er '.data.kind' <<< ${logentry})"; then echo "ERROR[TraceOpenEvent]: invalid json schema, '.data.kind' not found" && :; fi
sqlite3 "${BLOCKLOG_DB}" "UPDATE statistics SET end = '${at}' WHERE id IN (SELECT id FROM (SELECT * FROM statistics WHERE event = 'OpenLedgerDB' AND env = '${env}' AND end is Null ORDER BY event DESC) ORDER BY id DESC LIMIT 1) ;"
fi
;;
TraceImmutableDBEvent.StartedValidatingChunk )
if [[ "$LEDGERREPLAYLOG_ENABLED" = true ]]; then
if ! at="$(jq -er '.at' <<< ${logentry})"; then echo "ERROR[TraceImmutableDBEvent]: invalid json schema, '.at' not found" && :; else at="$(sed 's/\.[0-9]\{2\}Z/+00:00/' <<< ${at})"; fi
if ! env="$(jq -er '.env' <<< ${logentry})"; then echo "ERROR[TraceImmutableDBEvent]: invalid json schema, '.env' not found" && :; fi
if ! event="$(jq -er '.data.kind' <<< ${logentry})"; then echo "ERROR[TraceImmutableDBEvent]: invalid json schema, '.data.kind' not found" && :; fi
if ! initialChunk="$(jq -er '.data.initialChunk' <<< ${logentry})"; then echo "ERROR[TraceImmutableDBEvent]: invalid json schema, '.data.initialChunk' not found" && :; fi
if ! finalChunk="$(jq -er '.data.finalChunk' <<< ${logentry})"; then echo "ERROR[TraceImmutableDBEvent]: invalid json schema, '.data.finalChunk' not found" && :; fi
sqlite3 "${BLOCKLOG_DB}" "INSERT OR IGNORE INTO validationlog (event,at,env,final_chunk,initial_chunk) values ('${event}','${at}','${env}',${finalChunk},${initialChunk});"
fi
;;
TraceImmutableDBEvent.ValidatedLastLocation )
if [[ "$LEDGERREPLAYLOG_ENABLED" = true ]]; then
if ! at="$(jq -er '.at' <<< ${logentry})"; then echo "ERROR[TraceImmutableDBEvent]: invalid json schema, '.at' not found" && :; else at="$(sed 's/\.[0-9]\{2\}Z/+00:00/' <<< ${at})"; fi
if ! env="$(jq -er '.env' <<< ${logentry})"; then echo "ERROR[TraceImmutableDBEvent]: invalid json schema, '.env' not found" && :; fi
if ! event="$(jq -er '.data.kind' <<< ${logentry})"; then echo "ERROR[TraceImmutableDBEvent]: invalid json schema, '.data.kind' not found" && :; fi
if ! initialChunk="$(jq -er '.data.initialChunk' <<< ${logentry})"; then echo "ERROR[TraceImmutableDBEvent]: invalid json schema, '.data.initialChunk' not found" && :; fi
if ! finalChunk="$(jq -er '.data.finalChunk' <<< ${logentry})"; then echo "ERROR[TraceImmutableDBEvent]: invalid json schema, '.data.finalChunk' not found" && :; fi
sqlite3 "${BLOCKLOG_DB}" "INSERT OR IGNORE INTO validationlog (event,at,env,final_chunk,initial_chunk) values ('${event}','${at}','${env}',${finalChunk},${initialChunk});"
fi
;;
TraceLedgerReplayEvent.ReplayedBlock )
if [[ "$LEDGERREPLAYLOG_ENABLED" = true ]]; then
if ! at="$(jq -er '.at' <<< ${logentry})"; then echo "ERROR[TraceLedgerReplayEvent]: invalid json schema, '.at' not found" && :; else at="$(sed 's/\.[0-9]\{2\}Z/+00:00/' <<< ${at})"; fi
if ! env="$(jq -er '.env' <<< ${logentry})"; then echo "ERROR[TraceLedgerReplayEvent]: invalid json schema, '.env' not found" && :; fi
if ! event="$(jq -er '.data.kind' <<< ${logentry})"; then echo "ERROR[TraceLedgerReplayEvent]: invalid json schema, '.data.kind' not found" && :; fi
if ! slot="$(jq -er '.data.slot' <<< ${logentry})"; then echo "ERROR[TraceLedgerReplayEvent]: invalid json schema, '.data.slot' not found" && :; fi
if ! tip="$(jq -er '. .data.tip' <<< ${logentry})"; then echo "ERROR[TraceLedgerReplayEvent]: invalid json schema, '.data.tip' not found" && :; fi
sqlite3 "${BLOCKLOG_DB}" "INSERT OR IGNORE INTO replaylog (event,at,env,slot,tip) values ('${event}','${at}','${env}',${slot},${tip});"
fi
;;
* ) : ;; # ignore
esac
}


if renice_cmd="$(command -v renice)"; then ${renice_cmd} -n 19 $$ >/dev/null; fi

PARENT="$(dirname $0)"
if [[ ! -f "${PARENT}"/env ]]; then
echo "ERROR: could not find common env file, please run guild-deploy.sh or manually download"
exit 1
fi
if ! . "${PARENT}"/env; then exit 1; fi

. "${PARENT}"/env offline
return_code=$?
if [[ $return_code -eq 0 || $return_code -eq 2 ]]; then
:
else
echo "ERROR: Received exit code: ${?} from ${PARENT}/env."
exit 1
fi

if [[ -z "$BLOCKLOG_ENABLED" ]]; then BLOCKLOG_ENABLED=true; fi
if [[ -z "$CHUNKVALIDATAIONLOG_ENABLED" ]]; then CHUNKVALIDATAIONLOG_ENABLED=true; fi
if [[ -z "$LEDGERREPLAYLOG_ENABLED" ]]; then LEDGERREPLAYLOG_ENABLED=true; fi

# get log file from config file specified in env
unset logfile
Expand All @@ -30,31 +162,45 @@ echo "~~~~~~~~~~~~~~~~~~~~~~~~~"
echo "~~ LOG MONITOR STARTED ~~"
echo "monitoring ${logfile} for traces"

# Continuously parse cardano-node json log file for traces
# Continuously parse cardano-node json log file for traces and call processLogInfo()
while read -r logentry; do
# Traces monitored: TraceNodeIsLeader, TraceAdoptedBlock, TraceForgedInvalidBlock
case "${logentry}" in
*TraceNodeIsLeader* )
if ! at="$(jq -er '.at' <<< ${logentry})"; then echo "ERROR[TraceNodeIsLeader]: invalid json schema, '.at' not found" && continue; else at="$(sed 's/\.[0-9]\{2\}Z/+00:00/' <<< ${at})"; fi
if ! slot="$(jq -er '.data.val.slot' <<< ${logentry})"; then echo "ERROR[TraceNodeIsLeader]: invalid json schema, '.data.val.slot' not found" && continue; fi
getNodeMetrics
[[ ${epochnum} -le 0 ]] && echo "ERROR[TraceNodeIsLeader]: failed to grab current epoch number from node metrics" && continue
echo "LEADER: epoch[${epochnum}] slot[${slot}] at[${at}]"
sqlite3 "${BLOCKLOG_DB}" "INSERT OR IGNORE INTO blocklog (slot,at,epoch,status) values (${slot},'${at}',${epochnum},'leader');"
processLogInfo "TraceNodeIsLeader"
;;
*TraceAdoptedBlock* )
if ! slot="$(jq -er '.data.val.slot' <<< ${logentry})"; then echo "ERROR[TraceAdoptedBlock]: invalid json schema, '.data.val.slot' not found" && continue; fi
if ! hash="$(jq -er '.data.val.blockHash' <<< ${logentry})"; then echo "ERROR[TraceAdoptedBlock]: invalid json schema, '.data.val.blockHash' not found" && continue; fi
if ! size="$(jq -er '.data.val.blockSize' <<< ${logentry})"; then echo "ERROR[TraceAdoptedBlock]: invalid json schema, '.data.val.blockSize' not found" && continue; fi
echo "ADOPTED: slot[${slot}] size=${size} hash=${hash}"
sqlite3 "${BLOCKLOG_DB}" "UPDATE blocklog SET status = 'adopted', size = ${size}, hash = '${hash}' WHERE slot = ${slot};"
processLogInfo "TraceAdoptedBlock"
;;
*TraceForgedInvalidBlock* )
if ! slot="$(jq -er '.data.val.slot' <<< ${logentry})"; then echo "ERROR[TraceForgedInvalidBlock]: invalid json schema, '.data.val.slot' not found" && continue; fi
json_trace="$(jq -c -r '. | @base64' <<< ${logentry})"
echo "INVALID: slot[${slot}] - base 64 encoded json trace, run this command to decode:"
echo "echo ${json_trace} | base64 -d | jq -r"
sqlite3 "${BLOCKLOG_DB}" "UPDATE blocklog SET status = 'invalid', hash = '${json_trace}' WHERE slot = ${slot};"
processLogInfo "TraceForgedInvalidBlock"
;;
*TraceOpenEvent.StartedOpeningDB* )
processLogInfo "TraceOpenEvent.StartedOpeningDB"
;;
*TraceOpenEvent.OpenedDB* )
processLogInfo "TraceOpenEvent.OpenedDB"
;;
*TraceOpenEvent.StartedOpeningImmutableDB* )
processLogInfo "TraceOpenEvent.StartedOpeningImmutableDB"
;;
*TraceOpenEvent.OpenedImmutableDB* )
processLogInfo "TraceOpenEvent.OpenedImmutableDB"
;;
*TraceOpenEvent.StartedOpeningLgrDB* )
processLogInfo "TraceOpenEvent.StartedOpeningLgrDB"
;;
*TraceOpenEvent.OpenedLgrDB* )
processLogInfo "TraceOpenEvent.OpenedLgrDB"
;;
*TraceImmutableDBEvent.StartedValidatingChunk* )
processLogInfo "TraceImmutableDBEvent.StartedValidatingChunk"
;;
*TraceImmutableDBEvent.ValidatedLastLocation* )
processLogInfo "TraceImmutableDBEvent.ValidatedLastLocation"
;;
*TraceLedgerReplayEvent.ReplayedBlock* )
processLogInfo "TraceLedgerReplayEvent.ReplayedBlock"
;;
* ) : ;; # ignore
esac
Expand Down
Loading

0 comments on commit 8402049

Please sign in to comment.