diff --git a/management/server/account.go b/management/server/account.go index 49341a67bf8..7159aa9ac1c 100644 --- a/management/server/account.go +++ b/management/server/account.go @@ -476,6 +476,12 @@ func (a *Account) GetPeerNetworkMap( objectCount := int64(len(peersToConnect) + len(expiredPeers) + len(routesUpdate) + len(firewallRules)) metrics.CountNetworkMapObjects(objectCount) metrics.CountGetPeerNetworkMapDuration(time.Since(start)) + + if objectCount > 5000 { + log.WithContext(ctx).Tracef("account: %s has a total resource count of %d objects, "+ + "peers to connect: %d, expired peers: %d, routes: %d, firewall rules: %d", + a.Id, objectCount, len(peersToConnect), len(expiredPeers), len(routesUpdate), len(firewallRules)) + } } return nm diff --git a/management/server/http/handler.go b/management/server/http/handler.go index 3fe26d0ce1c..366efa9b73f 100644 --- a/management/server/http/handler.go +++ b/management/server/http/handler.go @@ -100,27 +100,6 @@ func APIHandler(ctx context.Context, accountManager s.AccountManager, LocationMa api.addPostureCheckEndpoint() api.addLocationsEndpoint() - err := api.Router.Walk(func(route *mux.Route, _ *mux.Router, _ []*mux.Route) error { - methods, err := route.GetMethods() - if err != nil { // we may have wildcard routes from integrations without methods, skip them for now - methods = []string{} - } - for _, method := range methods { - template, err := route.GetPathTemplate() - if err != nil { - return err - } - err = metricsMiddleware.AddHTTPRequestResponseCounter(template, method) - if err != nil { - return err - } - } - return nil - }) - if err != nil { - return nil, err - } - return rootRouter, nil } diff --git a/management/server/sql_store.go b/management/server/sql_store.go index 912e31410c6..0fb3d391f4d 100644 --- a/management/server/sql_store.go +++ b/management/server/sql_store.go @@ -134,6 +134,12 @@ func (s *SqlStore) AcquireReadLockByUID(ctx context.Context, uniqueID string) (u func (s *SqlStore) SaveAccount(ctx context.Context, account *Account) error { start := time.Now() + defer func() { + elapsed := time.Since(start) + if elapsed > 1*time.Second { + log.WithContext(ctx).Tracef("SaveAccount for account %s exceeded 1s, took: %v", account.Id, elapsed) + } + }() // todo: remove this check after the issue is resolved s.checkAccountDomainBeforeSave(ctx, account.Id, account.Domain) @@ -513,6 +519,13 @@ func (s *SqlStore) GetAllAccounts(ctx context.Context) (all []*Account) { } func (s *SqlStore) GetAccount(ctx context.Context, accountID string) (*Account, error) { + start := time.Now() + defer func() { + elapsed := time.Since(start) + if elapsed > 1*time.Second { + log.WithContext(ctx).Tracef("GetAccount for account %s exceeded 1s, took: %v", accountID, elapsed) + } + }() var account Account result := s.db.Model(&account). diff --git a/management/server/telemetry/http_api_metrics.go b/management/server/telemetry/http_api_metrics.go index a80453dca88..357f019c794 100644 --- a/management/server/telemetry/http_api_metrics.go +++ b/management/server/telemetry/http_api_metrics.go @@ -8,6 +8,7 @@ import ( "time" "github.com/google/uuid" + "github.com/gorilla/mux" log "github.com/sirupsen/logrus" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/metric" @@ -54,112 +55,89 @@ func (rw *WrappedResponseWriter) WriteHeader(code int) { // HTTPMiddleware handler used to collect metrics of every request/response coming to the API. // Also adds request tracing (logging). type HTTPMiddleware struct { - meter metric.Meter - ctx context.Context + ctx context.Context // all HTTP requests by endpoint & method - httpRequestCounters map[string]metric.Int64Counter + httpRequestCounter metric.Int64Counter // all HTTP responses by endpoint & method & status code - httpResponseCounters map[string]metric.Int64Counter + httpResponseCounter metric.Int64Counter // all HTTP requests totalHTTPRequestsCounter metric.Int64Counter // all HTTP responses totalHTTPResponseCounter metric.Int64Counter // all HTTP responses by status code - totalHTTPResponseCodeCounters map[int]metric.Int64Counter + totalHTTPResponseCodeCounter metric.Int64Counter // all HTTP requests durations by endpoint and method - httpRequestDurations map[string]metric.Int64Histogram + httpRequestDuration metric.Int64Histogram // all HTTP requests durations totalHTTPRequestDuration metric.Int64Histogram } // NewMetricsMiddleware creates a new HTTPMiddleware func NewMetricsMiddleware(ctx context.Context, meter metric.Meter) (*HTTPMiddleware, error) { - totalHTTPRequestsCounter, err := meter.Int64Counter(fmt.Sprintf("%s_total", httpRequestCounterPrefix), metric.WithUnit("1")) + httpRequestCounter, err := meter.Int64Counter(httpRequestCounterPrefix, metric.WithUnit("1")) if err != nil { return nil, err } - totalHTTPResponseCounter, err := meter.Int64Counter(fmt.Sprintf("%s_total", httpResponseCounterPrefix), metric.WithUnit("1")) + httpResponseCounter, err := meter.Int64Counter(httpResponseCounterPrefix, metric.WithUnit("1")) if err != nil { return nil, err } - totalHTTPRequestDuration, err := meter.Int64Histogram(fmt.Sprintf("%s_total", httpRequestDurationPrefix), metric.WithUnit("milliseconds")) + totalHTTPRequestsCounter, err := meter.Int64Counter(fmt.Sprintf("%s.total", httpRequestCounterPrefix), metric.WithUnit("1")) if err != nil { return nil, err } - return &HTTPMiddleware{ - ctx: ctx, - httpRequestCounters: map[string]metric.Int64Counter{}, - httpResponseCounters: map[string]metric.Int64Counter{}, - httpRequestDurations: map[string]metric.Int64Histogram{}, - totalHTTPResponseCodeCounters: map[int]metric.Int64Counter{}, - meter: meter, - totalHTTPRequestsCounter: totalHTTPRequestsCounter, - totalHTTPResponseCounter: totalHTTPResponseCounter, - totalHTTPRequestDuration: totalHTTPRequestDuration, - }, - nil -} + totalHTTPResponseCounter, err := meter.Int64Counter(fmt.Sprintf("%s.total", httpResponseCounterPrefix), metric.WithUnit("1")) + if err != nil { + return nil, err + } -// AddHTTPRequestResponseCounter adds a new meter for an HTTP defaultEndpoint and Method (GET, POST, etc) -// Creates one request counter and multiple response counters (one per http response status code). -func (m *HTTPMiddleware) AddHTTPRequestResponseCounter(endpoint string, method string) error { - meterKey := getRequestCounterKey(endpoint, method) - httpReqCounter, err := m.meter.Int64Counter(meterKey, metric.WithUnit("1")) + totalHTTPResponseCodeCounter, err := meter.Int64Counter(fmt.Sprintf("%s.code.total", httpResponseCounterPrefix), metric.WithUnit("1")) if err != nil { - return err + return nil, err } - m.httpRequestCounters[meterKey] = httpReqCounter - durationKey := getRequestDurationKey(endpoint, method) - requestDuration, err := m.meter.Int64Histogram(durationKey, metric.WithUnit("milliseconds")) + httpRequestDuration, err := meter.Int64Histogram(httpRequestDurationPrefix, metric.WithUnit("milliseconds")) if err != nil { - return err + return nil, err } - m.httpRequestDurations[durationKey] = requestDuration - - respCodes := []int{200, 204, 400, 401, 403, 404, 500, 502, 503} - for _, code := range respCodes { - meterKey = getResponseCounterKey(endpoint, method, code) - httpRespCounter, err := m.meter.Int64Counter(meterKey, metric.WithUnit("1")) - if err != nil { - return err - } - m.httpResponseCounters[meterKey] = httpRespCounter - meterKey = fmt.Sprintf("%s_%d_total", httpResponseCounterPrefix, code) - totalHTTPResponseCodeCounter, err := m.meter.Int64Counter(meterKey, metric.WithUnit("1")) - if err != nil { - return err - } - m.totalHTTPResponseCodeCounters[code] = totalHTTPResponseCodeCounter + totalHTTPRequestDuration, err := meter.Int64Histogram(fmt.Sprintf("%s.total", httpRequestDurationPrefix), metric.WithUnit("milliseconds")) + if err != nil { + return nil, err } - return nil + return &HTTPMiddleware{ + ctx: ctx, + httpRequestCounter: httpRequestCounter, + httpResponseCounter: httpResponseCounter, + httpRequestDuration: httpRequestDuration, + totalHTTPResponseCodeCounter: totalHTTPResponseCodeCounter, + totalHTTPRequestsCounter: totalHTTPRequestsCounter, + totalHTTPResponseCounter: totalHTTPResponseCounter, + totalHTTPRequestDuration: totalHTTPRequestDuration, + }, + nil } func replaceEndpointChars(endpoint string) string { - endpoint = strings.ReplaceAll(endpoint, "/", "_") endpoint = strings.ReplaceAll(endpoint, "{", "") endpoint = strings.ReplaceAll(endpoint, "}", "") return endpoint } -func getRequestCounterKey(endpoint, method string) string { - endpoint = replaceEndpointChars(endpoint) - return fmt.Sprintf("%s%s_%s", httpRequestCounterPrefix, endpoint, method) -} - -func getRequestDurationKey(endpoint, method string) string { - endpoint = replaceEndpointChars(endpoint) - return fmt.Sprintf("%s%s_%s", httpRequestDurationPrefix, endpoint, method) -} - -func getResponseCounterKey(endpoint, method string, status int) string { - endpoint = replaceEndpointChars(endpoint) - return fmt.Sprintf("%s%s_%s_%d", httpResponseCounterPrefix, endpoint, method, status) +func getEndpointMetricAttr(r *http.Request) string { + var endpoint string + route := mux.CurrentRoute(r) + if route != nil { + pathTmpl, err := route.GetPathTemplate() + if err == nil { + endpoint = replaceEndpointChars(pathTmpl) + } + } + return endpoint } // Handler logs every request and response and adds the, to metrics. @@ -176,11 +154,10 @@ func (m *HTTPMiddleware) Handler(h http.Handler) http.Handler { log.WithContext(ctx).Tracef("HTTP request %v: %v %v", reqID, r.Method, r.URL) - metricKey := getRequestCounterKey(r.URL.Path, r.Method) + endpointAttr := attribute.String("endpoint", getEndpointMetricAttr(r)) + methodAttr := attribute.String("method", r.Method) - if c, ok := m.httpRequestCounters[metricKey]; ok { - c.Add(m.ctx, 1) - } + m.httpRequestCounter.Add(m.ctx, 1, metric.WithAttributes(endpointAttr, methodAttr)) m.totalHTTPRequestsCounter.Add(m.ctx, 1) w := WrapResponseWriter(rw) @@ -193,21 +170,14 @@ func (m *HTTPMiddleware) Handler(h http.Handler) http.Handler { log.WithContext(ctx).Tracef("HTTP response %v: %v %v status %v", reqID, r.Method, r.URL, w.Status()) } - metricKey = getResponseCounterKey(r.URL.Path, r.Method, w.Status()) - if c, ok := m.httpResponseCounters[metricKey]; ok { - c.Add(m.ctx, 1) - } + statusCodeAttr := attribute.Int("code", w.Status()) + m.httpResponseCounter.Add(m.ctx, 1, metric.WithAttributes(endpointAttr, methodAttr, statusCodeAttr)) m.totalHTTPResponseCounter.Add(m.ctx, 1) - if c, ok := m.totalHTTPResponseCodeCounters[w.Status()]; ok { - c.Add(m.ctx, 1) - } + m.totalHTTPResponseCodeCounter.Add(m.ctx, 1, metric.WithAttributes(statusCodeAttr)) - durationKey := getRequestDurationKey(r.URL.Path, r.Method) reqTook := time.Since(reqStart) - if c, ok := m.httpRequestDurations[durationKey]; ok { - c.Record(m.ctx, reqTook.Milliseconds()) - } + m.httpRequestDuration.Record(m.ctx, reqTook.Milliseconds(), metric.WithAttributes(endpointAttr, methodAttr)) log.WithContext(ctx).Debugf("request %s %s took %d ms and finished with status %d", r.Method, r.URL.Path, reqTook.Milliseconds(), w.Status()) if w.Status() == 200 && (r.Method == http.MethodPut || r.Method == http.MethodPost || r.Method == http.MethodDelete) {