Skip to content

Commit

Permalink
Add replication monitoring
Browse files Browse the repository at this point in the history
  • Loading branch information
Federico Ceratto committed Apr 3, 2020
1 parent c19497f commit 58b5bd9
Show file tree
Hide file tree
Showing 6 changed files with 107 additions and 0 deletions.
22 changes: 22 additions & 0 deletions ansible/roles/node_exporter/files/db_replication_mon_active
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#!/bin/bash
# Deployed by ansible
# See roles/node_exporter/files/db_replication_mon_active
# active database --> standby
#
# Errors are reported as RTT=0

set -u
FN=/run/nodeexp/db_replication_socket.prom
while true; do
socket_rtt=$(ss -ntpi state established 'dst 37.218.242.175' | tr -s " " "\n" | grep ^rtt: | cut -c5- | cut -d'/' -f1)
socket_rtt=${socket_rtt:-0}
ping_rtt=$(ping -w 1 -c 1 10.1.0.1 | tail -n1 | cut -d'/' -f5)
ping_rtt=${ping_rtt:-0}

cat <<EOF > $FN.tmp
db_replication_rtt{role="active"} $socket_rtt
ping_rtt{role="active"} $ping_rtt
EOF
mv $FN.tmp $FN
sleep 1
done
22 changes: 22 additions & 0 deletions ansible/roles/node_exporter/files/db_replication_mon_standby
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#!/bin/bash
# Deployed by ansible
# See roles/node_exporter/files/db_replication_mon_standby
# standby database --> active
#
# Errors are reported as RTT=0

set -u
FN=/run/nodeexp/db_replication_socket.prom
while true; do
socket_rtt=$(ss -ntpi state established 'dst 37.218.240.56 dport = 5432' | tr -s " " "\n" | grep ^rtt: | cut -c5- | cut -d'/' -f1)
socket_rtt=${socket_rtt:-0}
ping_rtt=$(ping -w 1 -c 1 10.1.0.2 | tail -n1 | cut -d'/' -f5)
ping_rtt=${ping_rtt:-0}

cat <<EOF > $FN.tmp
db_replication_rtt{role="standby"} $socket_rtt
ping_rtt{role="standby"} $ping_rtt
EOF
mv $FN.tmp $FN
sleep 1
done
8 changes: 8 additions & 0 deletions ansible/roles/node_exporter/handlers/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,22 @@
service:
name: node_exporter
state: restarted

- name: restart seeksample
service:
name: seeksample
state: restarted

- name: restart tcpmetrics
service:
name: tcpmetrics
state: restarted

- name: restart db_replication_mon
service:
name: db_replication_mon
state: restarted

- name: restart collector-metrics
service:
name: collector-metrics
Expand Down
20 changes: 20 additions & 0 deletions ansible/roles/node_exporter/tasks/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,26 @@
dest: /etc/systemd/system/tcpmetrics.service
when: "'have_tcpmetrics' in group_names"

- name: Install db_replication_mon on active DB
copy: src=db_replication_mon_active dest=/srv/db_replication_mon mode=0555
when: "'db_active' in group_names"
tags: db_replication_mon

- name: Install db_replication_mon on standby DB
copy: src=db_replication_mon_standby dest=/srv/db_replication_mon mode=0555
when: "'db_standby' in group_names"
tags: db_replication_mon

- name: Install db_replication_mon systemd service file
notify:
- systemctl daemon-reload
- restart db_replication_mon
template:
src: db_replication_mon.service
dest: /etc/systemd/system/db_replication_mon.service
when: "'db_active' in group_names or 'db_standby' in group_names"
tags: db_replication_mon

- name: Install collector-metrics systemd service file
notify:
- restart collector-metrics
Expand Down
27 changes: 27 additions & 0 deletions ansible/roles/node_exporter/templates/db_replication_mon.service
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Deployed by ansible
# see roles/node_exporter/templates/db_replication_mon.service
[Unit]
Description=DB replication monitor

[Service]
User=root
Group=root

ExecStartPre=/usr/bin/install --owner root --group root --mode 0755 --directory {{ node_exporter_textfiles_path }}
ExecStart=/srv/db_replication_mon
KillMode=process
Restart=always

# Hardening
CapabilityBoundingSet=CAP_SETUID CAP_SETGID CAP_NET_RAW
SystemCallFilter=~@clock @debug @cpu-emulation @keyring @module @mount @obsolete @raw-io @reboot @swap
NoNewPrivileges=yes
PrivateDevices=yes
PrivateTmp=yes
ProtectHome=yes
ProtectSystem=full
ProtectKernelModules=yes
ProtectKernelTunables=yes

[Install]
WantedBy=multi-user.target
8 changes: 8 additions & 0 deletions ansible/roles/plpsql/README.adoc
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
PostgreSQL metadb database

Deploys a temporary traffic forwarder using A VPN
HKG -> mia-ps-test.ooni.nu -> AMS

Runs db_replication_mon.service on HKG and AMS db to monitor the replication
sudo systemctl status db_replication_mon.service
sudo journalctl -f -u db_replication_mon

0 comments on commit 58b5bd9

Please sign in to comment.