Skip to content
This repository has been archived by the owner on Aug 4, 2023. It is now read-only.

Commit

Permalink
Removes exiting on network issue, adds up metric
Browse files Browse the repository at this point in the history
We've had incidents where the connection to pingdom flakes out,
and the exporter exits. It then retries a few times, but
ultimately fleet kills it, and this wakes an operator via
prometheus alerts. This changeset removes the exiting on the
network issue, and instead modifies an up metric, which we can
add finer grained alerting on.
  • Loading branch information
JosephSalisbury committed Aug 24, 2016
1 parent e5b6000 commit 2ab7868
Showing 1 changed file with 16 additions and 2 deletions.
18 changes: 16 additions & 2 deletions cmd/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,11 @@ var (
waitSeconds int
port int

pingdomUp = prometheus.NewGauge(prometheus.GaugeOpts{
Name: "pingdom_up",
Help: "Whether the last pingdom scrape was successfull (1: up, 0: down)",
})

pingdomCheckStatus = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: "pingdom_check_status",
Help: "The current status of the check (0: up, 1: unconfirmed_down, 2: down, -1: paused, -2: unknown)",
Expand All @@ -43,10 +48,15 @@ func init() {
serverCmd.Flags().IntVar(&waitSeconds, "wait", 10, "time (in seconds) between accessing the Pingdom API")
serverCmd.Flags().IntVar(&port, "port", 8000, "port to listen on")

prometheus.MustRegister(pingdomUp)
prometheus.MustRegister(pingdomCheckStatus)
prometheus.MustRegister(pingdomCheckResponseTime)
}

func sleep() {
time.Sleep(time.Second * time.Duration(waitSeconds))
}

func serverRun(cmd *cobra.Command, args []string) {
flag.Parse()

Expand All @@ -66,8 +76,12 @@ func serverRun(cmd *cobra.Command, args []string) {
checks, err := client.Checks.List()
if err != nil {
log.Println("Error getting checks ", err)
os.Exit(1)
pingdomUp.Set(0)

sleep()
continue
}
pingdomUp.Set(1)

for _, check := range checks {
id := strconv.Itoa(check.ID)
Expand Down Expand Up @@ -114,7 +128,7 @@ func serverRun(cmd *cobra.Command, args []string) {
).Set(float64(check.LastResponseTime))
}

time.Sleep(time.Second * time.Duration(waitSeconds))
sleep()
}
}()

Expand Down

0 comments on commit 2ab7868

Please sign in to comment.