Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

COSI-19, COSI-21: Enhance Scality COSI Driver with S3 and IAM Metrics, Logging, and IAM Client Improvements #85

Open
wants to merge 5 commits into
base: feature/COSI-65-instrument-cosi-drover-with-gprc-metrics
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cmd/scality-cosi-driver/cmd.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ func init() {
func run(ctx context.Context) error {
registry := prometheus.NewRegistry()
driverName := *driverPrefix + "." + provisionerName
metrics.InitializeMetrics(defaultMetricsPrefix, registry)

metricsServer, err := metrics.StartMetricsServerWithRegistry(*driverMetricsAddress, registry, *driverMetricsPath)
if err != nil {
Expand Down
7 changes: 7 additions & 0 deletions pkg/clients/iam/iam_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@ import (
"github.com/aws/aws-sdk-go-v2/service/iam"
"github.com/aws/aws-sdk-go-v2/service/iam/types"
"github.com/aws/smithy-go/logging"
"github.com/aws/smithy-go/middleware"
c "github.com/scality/cosi-driver/pkg/constants"
"github.com/scality/cosi-driver/pkg/metrics"
"github.com/scality/cosi-driver/pkg/util"
"k8s.io/klog/v2"
)
Expand Down Expand Up @@ -58,6 +60,11 @@ var InitIAMClient = func(ctx context.Context, params util.StorageClientParameter
config.WithCredentialsProvider(credentials.NewStaticCredentialsProvider(params.AccessKeyID, params.SecretAccessKey, "")),
config.WithHTTPClient(httpClient),
config.WithLogger(logger),
config.WithAPIOptions([]func(*middleware.Stack) error{
func(stack *middleware.Stack) error {
return metrics.AttachPrometheusMiddleware(stack, metrics.IAMRequestDuration, metrics.IAMRequestsTotal)
},
}),
)
if err != nil {
return nil, err
Expand Down
7 changes: 7 additions & 0 deletions pkg/clients/s3/s3_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ import (
"github.com/aws/aws-sdk-go-v2/service/s3"
"github.com/aws/aws-sdk-go-v2/service/s3/types"
"github.com/aws/smithy-go/logging"
"github.com/aws/smithy-go/middleware"
"github.com/scality/cosi-driver/pkg/metrics"
"github.com/scality/cosi-driver/pkg/util"
)

Expand Down Expand Up @@ -47,6 +49,11 @@ var InitS3Client = func(ctx context.Context, params util.StorageClientParameters
config.WithCredentialsProvider(credentials.NewStaticCredentialsProvider(params.AccessKeyID, params.SecretAccessKey, "")),
config.WithHTTPClient(httpClient),
config.WithLogger(logger),
config.WithAPIOptions([]func(*middleware.Stack) error{
func(stack *middleware.Stack) error {
return metrics.AttachPrometheusMiddleware(stack, metrics.S3RequestDuration, metrics.S3RequestsTotal)
},
}),
)
if err != nil {
return nil, err
Expand Down
7 changes: 7 additions & 0 deletions pkg/driver/provisioner_server_impl_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,14 @@ import (
"github.com/aws/smithy-go"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
"github.com/prometheus/client_golang/prometheus"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"

iamclient "github.com/scality/cosi-driver/pkg/clients/iam"
s3client "github.com/scality/cosi-driver/pkg/clients/s3"
"github.com/scality/cosi-driver/pkg/driver"
"github.com/scality/cosi-driver/pkg/metrics"
"github.com/scality/cosi-driver/pkg/mock"
"github.com/scality/cosi-driver/pkg/util"
corev1 "k8s.io/api/core/v1"
Expand Down Expand Up @@ -155,6 +157,11 @@ func restoreInitializeClient() {

// Tests

var _ = BeforeSuite(func() {
// Initialize metrics globally before all tests
metrics.InitializeMetrics("test_driver_prefix", prometheus.NewRegistry())
})

var _ = Describe("ProvisionerServer InitProvisionerServer", func() {
var provisioner string

Expand Down
45 changes: 45 additions & 0 deletions pkg/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,51 @@ var (
IAMRequestDuration *prometheus.HistogramVec
)

// InitializeMetrics initializes the metrics with a given prefix and registers them to a registry.
func InitializeMetrics(prefix string, registry prometheus.Registerer) {
S3RequestsTotal = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: prefix,
Name: "s3_requests_total",
Help: "Total number of S3 requests, categorized by method and status.",
},
[]string{"method", "status"},

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the S3 operation type is commonly called action throughout the product metrics.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay, I will change that.
Thanks

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same for iam

)

S3RequestDuration = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Namespace: prefix,
Name: "s3_request_duration_seconds",
Help: "Duration of S3 requests in seconds, categorized by method and status.",
Buckets: prometheus.DefBuckets,
},
[]string{"method", "status"},
)

IAMRequestsTotal = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: prefix,
Name: "iam_requests_total",
Help: "Total number of IAM requests, categorized by method and status.",
},
[]string{"method", "status"},
)

IAMRequestDuration = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Namespace: prefix,
Name: "iam_request_duration_seconds",
Help: "Duration of IAM requests in seconds, categorized by method and status.",
Buckets: prometheus.DefBuckets,
},
[]string{"method", "status"},
)

registry.MustRegister(S3RequestsTotal, S3RequestDuration, IAMRequestsTotal, IAMRequestDuration)

klog.InfoS("Custom metrics initialized", "prefix", prefix)
}

// StartMetricsServerWithRegistry starts an HTTP server for exposing metrics using a custom registry.
func StartMetricsServerWithRegistry(addr string, registry prometheus.Gatherer, metricsPath string) (*http.Server, error) {
listener, err := net.Listen("tcp", addr)
Expand Down
3 changes: 3 additions & 0 deletions pkg/metrics/metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,13 +81,16 @@ var _ = Describe("Metrics", func() {

var _ = Describe("InitializeMetrics", func() {
var (
prefix string
registry *prometheus.Registry
driverMetricsPath string
)

BeforeEach(func() {
prefix = "test"
registry = prometheus.NewRegistry()
driverMetricsPath = "/metrics"
metrics.InitializeMetrics(prefix, registry)
})

It("should serve metrics via an HTTP endpoint", func() {
Expand Down
39 changes: 39 additions & 0 deletions pkg/metrics/prometheus_middleware.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
package metrics

import (
"context"

"github.com/aws/smithy-go/middleware"
"github.com/prometheus/client_golang/prometheus"
"k8s.io/klog/v2"
)

var AttachPrometheusMiddleware = attachPrometheusMiddlewareMetrics

// AttachPrometheusMiddleware attaches a Prometheus middleware for metrics tracking.
func attachPrometheusMiddlewareMetrics(stack *middleware.Stack, requestDuration *prometheus.HistogramVec, requestsTotal *prometheus.CounterVec) error {
middlewareFunc := middleware.FinalizeMiddlewareFunc("PrometheusMetrics", func(
ctx context.Context, in middleware.FinalizeInput, next middleware.FinalizeHandler,
) (out middleware.FinalizeOutput, metadata middleware.Metadata, err error) {
operationName := middleware.GetOperationName(ctx)

timer := prometheus.NewTimer(prometheus.ObserverFunc(func(duration float64) {
status := "success"
if err != nil {
status = "error"
}
requestDuration.WithLabelValues(operationName, status).Observe(duration)
requestsTotal.WithLabelValues(operationName, status).Inc()
}))
defer timer.ObserveDuration()

out, metadata, err = next.HandleFinalize(ctx, in)
if err != nil {
klog.ErrorS(err, "AWS SDK operation failed", "operation", operationName)
}
return out, metadata, err
})

// Add the middleware to the Finalize step
return stack.Finalize.Add(middlewareFunc, middleware.After)
}
152 changes: 152 additions & 0 deletions pkg/metrics/prometheus_middleware_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
package metrics_test

import (
"context"
"errors"

"github.com/aws/smithy-go/middleware"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/testutil"
"github.com/scality/cosi-driver/pkg/metrics"
"k8s.io/klog/v2"
)

// MockFinalizeMiddleware satisfies the FinalizeMiddleware interface
type MockFinalizeMiddleware struct {
HandleFunc func(ctx context.Context, in middleware.FinalizeInput, next middleware.FinalizeHandler) (middleware.FinalizeOutput, middleware.Metadata, error)
IDValue string
}

func (m MockFinalizeMiddleware) HandleFinalize(ctx context.Context, in middleware.FinalizeInput, next middleware.FinalizeHandler) (middleware.FinalizeOutput, middleware.Metadata, error) {
if next == nil {
return middleware.FinalizeOutput{}, middleware.Metadata{}, errors.New("next handler is nil")
}

out, metadata, err := m.HandleFunc(ctx, in, next)
return out, metadata, err
}

// ID returns the unique identifier for the middleware
func (m MockFinalizeMiddleware) ID() string {
return m.IDValue
}

// TerminalHandler represents the final handler in the middleware chain
type TerminalHandler struct{}

// HandleFinalize simulates the terminal handler in the chain
func (t TerminalHandler) HandleFinalize(ctx context.Context, in middleware.FinalizeInput) (middleware.FinalizeOutput, middleware.Metadata, error) {
return middleware.FinalizeOutput{}, middleware.Metadata{}, nil
}

var _ = Describe("AttachPrometheusMiddleware", func() {
var (
stack *middleware.Stack
requestDuration *prometheus.HistogramVec
requestsTotal *prometheus.CounterVec
)

BeforeEach(func() {
// Initialize Prometheus metric vectors
requestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Name: "request_duration_seconds",
Help: "Duration of requests",
}, []string{"operation", "status"})

requestsTotal = prometheus.NewCounterVec(prometheus.CounterOpts{
Name: "requests_total",
Help: "Total number of requests",
}, []string{"operation", "status"})

stack = middleware.NewStack("testStack", nil)

})

It("should attach the middleware to the stack", func() {
// Attach the Prometheus middleware
err := metrics.AttachPrometheusMiddleware(stack, requestDuration, requestsTotal)
Expect(err).NotTo(HaveOccurred())

// Verify middleware is in the stack
Expect(stack.Finalize.List()).To(HaveLen(1))
Expect(stack.Finalize.List()[0]).To(Equal("PrometheusMetrics"))
})

It("should safely execute the middleware chain", func(ctx SpecContext) {
err := metrics.AttachPrometheusMiddleware(stack, requestDuration, requestsTotal)
Expect(err).NotTo(HaveOccurred())

// Add mock middleware to simulate behavior
mockMiddleware := MockFinalizeMiddleware{
HandleFunc: func(ctx context.Context, in middleware.FinalizeInput, next middleware.FinalizeHandler) (middleware.FinalizeOutput, middleware.Metadata, error) {
klog.InfoS("Mock middleware executed", "operation", middleware.GetOperationName(ctx))
if next == nil {
return middleware.FinalizeOutput{}, middleware.Metadata{}, errors.New("next handler is nil")
}
return next.HandleFinalize(ctx, in)
},
IDValue: "MockMiddleware",
}
err = stack.Finalize.Add(mockMiddleware, middleware.Before)
Expect(err).NotTo(HaveOccurred())

// Ensure the chain is correctly constructed
var handler middleware.FinalizeHandler = TerminalHandler{}
for i := len(stack.Finalize.List()) - 1; i >= 0; i-- {
middlewareID := stack.Finalize.List()[i]
m, _ := stack.Finalize.Get(middlewareID)

// Wrap the handler in a way that prevents infinite recursion
previousHandler := handler
handler = middleware.FinalizeHandlerFunc(func(ctx context.Context, in middleware.FinalizeInput) (middleware.FinalizeOutput, middleware.Metadata, error) {
return m.HandleFinalize(ctx, in, previousHandler)
})
}

// Execute the middleware chain
_, _, err = handler.HandleFinalize(ctx, middleware.FinalizeInput{})
Expect(err).NotTo(HaveOccurred())
})

It("should record metrics with error status when next handler fails", func(ctx SpecContext) {
err := metrics.AttachPrometheusMiddleware(stack, requestDuration, requestsTotal)
Expect(err).NotTo(HaveOccurred())

// Add mock middleware to simulate behavior
mockMiddleware := MockFinalizeMiddleware{
HandleFunc: func(ctx context.Context, in middleware.FinalizeInput, next middleware.FinalizeHandler) (middleware.FinalizeOutput, middleware.Metadata, error) {
klog.InfoS("Mock middleware executed", "operation", middleware.GetOperationName(ctx))
return next.HandleFinalize(ctx, in)
},
IDValue: "MockMiddleware",
}
err = stack.Finalize.Add(mockMiddleware, middleware.Before)
Expect(err).NotTo(HaveOccurred())

failingHandler := middleware.FinalizeHandlerFunc(func(ctx context.Context, in middleware.FinalizeInput) (middleware.FinalizeOutput, middleware.Metadata, error) {
return middleware.FinalizeOutput{}, middleware.Metadata{}, errors.New("simulated error")
})

var handler middleware.FinalizeHandler = failingHandler
for i := len(stack.Finalize.List()) - 1; i >= 0; i-- {
middlewareID := stack.Finalize.List()[i]
m, _ := stack.Finalize.Get(middlewareID)

previousHandler := handler
handler = middleware.FinalizeHandlerFunc(func(ctx context.Context, in middleware.FinalizeInput) (middleware.FinalizeOutput, middleware.Metadata, error) {
return m.HandleFinalize(ctx, in, previousHandler)
})
}

_, _, err = handler.HandleFinalize(ctx, middleware.FinalizeInput{})
Expect(err).To(HaveOccurred())
Expect(err.Error()).To(Equal("simulated error"))

metrics := testutil.CollectAndCount(requestDuration)
Expect(metrics).To(BeNumerically(">", 0)) // Ensure metrics are collected
Expect(requestDuration.WithLabelValues("TestOperation", "error")).NotTo(BeNil())
Expect(requestsTotal.WithLabelValues("TestOperation", "error")).NotTo(BeNil())
})
})