forked from keptn-sandbox/datadog-service
-
Notifications
You must be signed in to change notification settings - Fork 0
/
eventhandlers.go
268 lines (226 loc) · 9.54 KB
/
eventhandlers.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
package main
import (
"context"
"fmt"
"math"
"os"
"strconv"
"strings"
"time"
"github.com/DataDog/datadog-api-client-go/api/v1/datadog"
cloudevents "github.com/cloudevents/sdk-go/v2" // make sure to use v2 cloudevents here
"github.com/keptn-sandbox/datadog-service/pkg/utils"
keptnv2 "github.com/keptn/go-utils/pkg/lib/v0_2_0"
logger "github.com/sirupsen/logrus"
)
const (
sliFile = "datadog/sli.yaml"
defaultSleepBeforeAPIInSeconds = 60
)
// We have to put a min of 60s of sleep for the datadog API to reflect the data correctly
// More info: https://github.com/keptn-sandbox/datadog-service/issues/8
var sleepBeforeAPIInSeconds int
func init() {
var err error
sleepBeforeAPIInSeconds, err = strconv.Atoi(strings.TrimSpace(os.Getenv("SLEEP_BEFORE_API_IN_SECONDS")))
if err != nil || sleepBeforeAPIInSeconds < defaultSleepBeforeAPIInSeconds {
logger.Infof("defaulting SLEEP_BEFORE_API_IN_SECONDS to 60s because it was set to '%v' which is less than the min allowed value of 60s", sleepBeforeAPIInSeconds)
sleepBeforeAPIInSeconds = defaultSleepBeforeAPIInSeconds
}
}
// HandleGetSliTriggeredEvent handles get-sli.triggered events if SLIProvider == datadog
func HandleGetSliTriggeredEvent(ddKeptn *keptnv2.Keptn, incomingEvent cloudevents.Event, data *keptnv2.GetSLITriggeredEventData) error {
var shkeptncontext string
_ = incomingEvent.Context.ExtensionAs("shkeptncontext", &shkeptncontext)
configureLogger(incomingEvent.Context.GetID(), shkeptncontext)
logger.Infof("Handling get-sli.triggered Event: %s", incomingEvent.Context.GetID())
// Step 1 - Do we need to do something?
// Lets make sure we are only processing an event that really belongs to our SLI Provider
if data.GetSLI.SLIProvider != "datadog" {
logger.Infof("Not handling get-sli event as it is meant for %s", data.GetSLI.SLIProvider)
return nil
}
// Step 2 - Send out a get-sli.started CloudEvent
// The get-sli.started cloud-event is new since Keptn 0.8.0 and is required to be send when the task is started
_, err := ddKeptn.SendTaskStartedEvent(data, ServiceName)
if err != nil {
errMsg := fmt.Sprintf("Failed to send task started CloudEvent (%s), aborting...", err.Error())
logger.Error(errMsg)
return err
}
start, err := parseUnixTimestamp(data.GetSLI.Start)
if err != nil {
logger.Errorf("unable to parse sli start timestamp: %v", err)
return err
}
end, err := parseUnixTimestamp(data.GetSLI.End)
if err != nil {
logger.Errorf("unable to parse sli end timestamp: %v", err)
return err
}
// Step 4 - prep-work
// Get any additional input / configuration data
// - Labels: get the incoming labels for potential config data and use it to pass more labels on result, e.g: links
// - SLI.yaml: if your service uses SLI.yaml to store query definitions for SLIs get that file from Keptn
labels := data.Labels
if labels == nil {
labels = make(map[string]string)
}
// Step 5 - get SLI Config File
// Get SLI File from datadog subdirectory of the config repo - to add the file use:
// keptn add-resource --project=PROJECT --stage=STAGE --service=SERVICE --resource=my-sli-config.yaml --resourceUri=datadog/sli.yaml
sliConfig, err := ddKeptn.GetSLIConfiguration(data.Project, data.Stage, data.Service, sliFile)
logger.Debugf("SLI config: %v", sliConfig)
// FYI you do not need to "fail" if sli.yaml is missing, you can also assume smart defaults like we do
// in keptn-contrib/dynatrace-service and keptn-contrib/prometheus-service
if err != nil {
// failed to fetch sli config file
errMsg := fmt.Sprintf("Failed to fetch SLI file %s from config repo: %s", sliFile, err.Error())
logger.Error(errMsg)
// send a get-sli.finished event with status=error and result=failed back to Keptn
_, err = ddKeptn.SendTaskFinishedEvent(&keptnv2.EventData{
Status: keptnv2.StatusErrored,
Result: keptnv2.ResultFailed,
Labels: labels,
}, ServiceName)
return err
}
// Step 6 - do your work - iterate through the list of requested indicators and return their values
// Indicators: this is the list of indicators as requested in the SLO.yaml
// SLIResult: this is the array that will receive the results
indicators := data.GetSLI.Indicators
sliResults := []*keptnv2.SLIResult{}
ctx := datadog.NewDefaultContext(context.Background())
configuration := datadog.NewConfiguration()
apiClient := datadog.NewAPIClient(configuration)
logger.Debug("indicators:", indicators)
errored := false
for _, indicatorName := range indicators {
// Pulling the data from Datadog api immediately gives incorrect data in api response
// we have to wait for some time for the correct data to be reflected in the api response
// TODO: Find a better way around the sleep time for datadog api
logger.Debugf("waiting for %vs so that the metrics data is reflected correctly in the api", sleepBeforeAPIInSeconds)
time.Sleep(time.Second * time.Duration(sleepBeforeAPIInSeconds))
query := replaceQueryParameters(data, sliConfig[indicatorName], start, end)
logger.Debugf("actual query sent to datadog: %v, from: %v, to: %v", query, start.Unix(), end.Unix())
resp, r, err := apiClient.MetricsApi.QueryMetrics(ctx, start.Unix(), end.Unix(), query)
if err != nil {
logger.Errorf("'%s': error getting value for the query: %v : %v\n", query, resp, err)
logger.Errorf("'%s': full HTTP response: %v\n", query, r)
errored = true
continue
}
logger.Debugf("response from the metrics api: %v", resp)
if len((*resp.Series)) != 0 {
points := *((*resp.Series)[0].Pointlist)
sliResult := &keptnv2.SLIResult{
Metric: indicatorName,
Value: *points[len(points)-1][1],
Success: true,
}
logger.WithFields(logger.Fields{"indicatorName": indicatorName}).Debugf("SLI result from the metrics api: %v", sliResult)
sliResults = append(sliResults, sliResult)
} else {
logger.WithFields(logger.Fields{"indicatorName": indicatorName}).Debugf("got 0 in the SLI result (indicates empty response from the API)")
}
}
// Step 7 - Build get-sli.finished event data
getSliFinishedEventData := &keptnv2.GetSLIFinishedEventData{
EventData: keptnv2.EventData{
Status: keptnv2.StatusSucceeded,
Result: keptnv2.ResultPass,
Labels: labels,
},
GetSLI: keptnv2.GetSLIFinished{
IndicatorValues: sliResults,
Start: data.GetSLI.Start,
End: data.GetSLI.End,
},
}
if errored {
getSliFinishedEventData.EventData.Status = keptnv2.StatusErrored
getSliFinishedEventData.EventData.Result = keptnv2.ResultFailed
}
logger.Debugf("SLI finished event: %v", *getSliFinishedEventData)
_, err = ddKeptn.SendTaskFinishedEvent(getSliFinishedEventData, ServiceName)
if err != nil {
errMsg := fmt.Sprintf("Failed to send task finished CloudEvent (%s), aborting...", err.Error())
logger.Error(errMsg)
return err
}
return nil
}
func HandleConfigureMonitoringTriggeredEvent(ddKeptn *keptnv2.Keptn, incomingEvent cloudevents.Event, data *keptnv2.ConfigureMonitoringTriggeredEventData) error {
var shkeptncontext string
_ = incomingEvent.Context.ExtensionAs("shkeptncontext", &shkeptncontext)
configureLogger(incomingEvent.Context.GetID(), shkeptncontext)
logger.Infof("Handling configure-monitoring.triggered Event: %s", incomingEvent.Context.GetID())
_, err := ddKeptn.SendTaskStartedEvent(data, ServiceName)
if err != nil {
logger.Errorf("err when sending task started the event: %v", err)
return err
}
configureMonitoringFinishedEventData := &keptnv2.ConfigureMonitoringFinishedEventData{
EventData: keptnv2.EventData{
Status: keptnv2.StatusSucceeded,
Result: keptnv2.ResultPass,
Project: data.Project,
Stage: data.Service,
Service: data.Service,
Message: "Finished configuring monitoring",
},
}
logger.Debugf("Configure Monitoring finished event: %v", *configureMonitoringFinishedEventData)
_, err = ddKeptn.SendTaskFinishedEvent(configureMonitoringFinishedEventData, ServiceName)
if err != nil {
errMsg := fmt.Sprintf("Failed to send task finished CloudEvent (%s), aborting...", err.Error())
logger.Error(errMsg)
return err
}
return nil
}
func configureLogger(eventID, keptnContext string) {
logger.SetFormatter(&utils.Formatter{
Fields: logger.Fields{
"service": "datadog-service",
"eventId": eventID,
"keptnContext": keptnContext,
},
BuiltinFormatter: &logger.TextFormatter{},
})
if os.Getenv(envVarLogLevel) != "" {
logLevel, err := logger.ParseLevel(os.Getenv(envVarLogLevel))
if err != nil {
logger.WithError(err).Error("could not parse log level provided by 'LOG_LEVEL' env var")
} else {
logger.SetLevel(logLevel)
}
}
}
func replaceQueryParameters(data *keptnv2.GetSLITriggeredEventData, query string, start, end time.Time) string {
query = strings.Replace(query, "$PROJECT", data.Project, -1)
query = strings.Replace(query, "$STAGE", data.Stage, -1)
query = strings.Replace(query, "$SERVICE", data.Service, -1)
query = strings.Replace(query, "$project", data.Project, -1)
query = strings.Replace(query, "$stage", data.Stage, -1)
query = strings.Replace(query, "$service", data.Service, -1)
durationString := strconv.FormatInt(getDurationInSeconds(start, end), 10)
query = strings.Replace(query, "$DURATION", durationString, -1)
return query
}
func getDurationInSeconds(start, end time.Time) int64 {
seconds := end.Sub(start).Seconds()
return int64(math.Ceil(seconds))
}
func parseUnixTimestamp(timestamp string) (time.Time, error) {
parsedTime, err := time.Parse(time.RFC3339, timestamp)
if err == nil {
return parsedTime, nil
}
timestampInt, err := strconv.ParseInt(timestamp, 10, 64)
if err != nil {
return time.Now(), err
}
unix := time.Unix(timestampInt, 0)
return unix, nil
}