-
Notifications
You must be signed in to change notification settings - Fork 18
/
flume-bridge-monitor.go
99 lines (85 loc) · 2.28 KB
/
flume-bridge-monitor.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
package main
import (
"fmt"
"github.com/blackbeans/goquery"
"log"
"net/http"
"net/url"
"time"
)
func main() {
flumes := []string{"flume001.m6", "flume002.m6",
"flume-log-001.m6", "flume-log-002.m6", "flume-log-003.m6",
"flume-log-004.m6", "flume-log-005.m6", "flume-log-006.m6"}
for {
scan(flumes)
log.Printf(time.UTC.String() + "\t 扫描意外终止的flume结束....\n")
time.Sleep(1 * time.Minute)
}
}
/**
* supervisor实例
*/
type SupervisorInstance struct {
Host string `json:"host"` //当前机器名
Name string `json:"name"` //服务名称
Status string `json:"status"` //当前状态
Info string `json:"info"` //启动信息
}
const (
START_URL = "index.html?processname=%s&action=start"
)
func scan(hosts []string) {
for _, v := range hosts {
baseUrl := "http://" + v + ":9001"
doc, err := goquery.NewDocument(baseUrl)
if nil == err {
exitInstance := make([]string, 0, 1)
doc.Find("table tbody tr").Each(func(i int, s *goquery.Selection) {
instance := SupervisorInstance{Host: v}
s.Find("td").Each(func(j int, ss *goquery.Selection) {
if j > 2 {
return
}
switch j {
case 0:
instance.Status = ss.Children().Text()
case 1:
instance.Info = ss.Children().Text()
case 2:
instance.Name = ss.Children().Text()
}
})
if instance.Status == "exited" {
exitInstance = append(exitInstance, instance.Name)
}
})
alarm := "host:" + v + " flume节点restart:["
for i, inst := range exitInstance {
//发送告警,并重启
url := fmt.Sprintf("%s/"+START_URL, baseUrl, inst)
resp, err := http.Get(url)
if nil != err {
log.Printf("重启失败:%s|%s", url, err)
alarm += inst + ":fail,"
continue
}
defer resp.Body.Close()
alarm += inst + ":succ,"
}
alarm += "]"
//如果有坏掉的节点则发送告警
if len(exitInstance) > 0 {
alarmUrl := fmt.Sprintf("http://monitor001.m6:8001/alarmproxy?host=%s&action=%s&msg=%s&status=1&level=1×tamp=%d",
v, "flume_node", url.QueryEscape(alarm), time.Now().Unix())
// log.Println(alarmUrl)
resp, err := http.Get(alarmUrl)
if nil != err {
log.Printf("发送告警失败:%s|%s", alarmUrl, err)
continue
}
defer resp.Body.Close()
}
}
}
}