Skip to content

Commit

Permalink
Add metric instrumentation package, instruments Sansshell-Proxy (#216)
Browse files Browse the repository at this point in the history
* add metrics instrumentations

* init metric during server initialization

* change metrics structure to allow registration

* Remove comments

* add comments

* fix npe

* Handle metric error. Add proxy metrics

* handle metric error

* handle err

* rm irrelevant files

* add this one back

* add comments

* metric desc variables

* change singleton to contextbased

* remove unused file

* Delete launch.json

* Add comments
Correct otel metrics function definition

* update comment and change otelrecorder to interface

* add comments

* add test for metrics package

* fix comment and remove debug statement.

* naming consistency

* add comment, and change beahvior of RegisterInt64Gauge to allow overwriting gauge

* fix comment

* fix comment typo and inconsistency

* adds copyright
  • Loading branch information
sfc-gh-elinardi authored Apr 18, 2023
1 parent 43266fd commit 6da9d98
Show file tree
Hide file tree
Showing 11 changed files with 736 additions and 27 deletions.
30 changes: 28 additions & 2 deletions auth/opa/rpcauth/hooks.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,19 @@ import (
"context"
"net"

"github.com/Snowflake-Labs/sansshell/telemetry/metrics"
"github.com/go-logr/logr"
"go.opentelemetry.io/otel/attribute"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
)

// Metrics
const (
authzDeniedJustificationMetricName = "authz_denied_justification"
authzDeniedJustificationMetricDesc = "authorization denied due to justification"
)

// RPCAuthzHookFunc implements RpcAuthzHook for a simple function
type RPCAuthzHookFunc func(context.Context, *RPCAuthInput) error

Expand Down Expand Up @@ -82,19 +91,36 @@ var (
// that validates if justification was included. If it is required and passes the optional validation function
// it will return nil, otherwise an error.
func JustificationHook(justificationFunc func(string) error) RPCAuthzHook {
return RPCAuthzHookFunc(func(_ context.Context, input *RPCAuthInput) error {
return RPCAuthzHookFunc(func(ctx context.Context, input *RPCAuthInput) error {
// See if we got any metadata and if it contains the justification
var j string
v := input.Metadata[ReqJustKey]
if len(v) > 0 {
j = v[0]
}

logger := logr.FromContextOrDiscard(ctx)
recorder := metrics.RecorderFromContextOrNoop(ctx)
if j == "" {
errRegister := recorder.RegisterInt64Counter(authzDeniedJustificationMetricName, authzDeniedJustificationMetricDesc)
if errRegister != nil {
logger.V(1).Error(errRegister, "failed to register "+authzDeniedJustificationMetricName)
}
errCounter := recorder.AddInt64Counter(ctx, authzDeniedJustificationMetricName, 1, attribute.String("reason", "missing_justification"))
if errCounter != nil {
logger.V(1).Error(errCounter, "failed to add counter "+authzDeniedJustificationMetricName)
}
return ErrJustification
}
if justificationFunc != nil {
if err := justificationFunc(j); err != nil {
errRegister := recorder.RegisterInt64Counter(authzDeniedJustificationMetricName, authzDeniedJustificationMetricDesc)
if errRegister != nil {
logger.V(1).Error(errRegister, "failed to register "+authzDeniedJustificationMetricName)
}
errCounter := recorder.AddInt64Counter(ctx, authzDeniedJustificationMetricName, 1, attribute.String("reason", "denied"))
if errCounter != nil {
logger.V(1).Error(errCounter, "failed to add counter "+authzDeniedJustificationMetricName)
}
return status.Errorf(codes.FailedPrecondition, "justification failed: %v", err)
}
}
Expand Down
46 changes: 46 additions & 0 deletions auth/opa/rpcauth/rpcauth.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,19 @@ import (
"google.golang.org/protobuf/proto"

"github.com/Snowflake-Labs/sansshell/auth/opa"
"github.com/Snowflake-Labs/sansshell/telemetry/metrics"
)

// Metrics
const (
authzDeniedPolicyCounterName = "authz_denied_policy"
authzDeniedPolicyCounterDesc = "number of authorization denied by policy"
authzDenialHintErrorCounterName = "authz_denial_hint_error"
authzDenialHintErrorCounterDesc = "number of failure to get denial hint"
authzFailureInputMissingCounterName = "authz_failure_input_missing"
authzFailureInputMissingCounterDesc = "number of authorization failure due to missing input"
authzFailureEvalErrorCounterName = "authz_failure_eval_error"
authzFailureEvalErrorCounterDesc = "number of authorization failure due to policy evaluation error"
)

// An Authorizer performs authorization of Sanshsell RPCs based on
Expand Down Expand Up @@ -81,12 +94,21 @@ func NewWithPolicy(ctx context.Context, policy string, authzHooks ...RPCAuthzHoo
// the success or failure of policy.
func (g *Authorizer) Eval(ctx context.Context, input *RPCAuthInput) error {
logger := logr.FromContextOrDiscard(ctx)
recorder := metrics.RecorderFromContextOrNoop(ctx)
if input != nil {
logger.V(2).Info("evaluating authz policy", "input", input)
}
if input == nil {
err := status.Error(codes.InvalidArgument, "policy input cannot be nil")
logger.V(1).Error(err, "failed to evaluate authz policy", "input", input)
errRegister := recorder.RegisterInt64Counter(authzFailureInputMissingCounterName, authzFailureInputMissingCounterDesc)
if errRegister != nil {
logger.V(1).Error(errRegister, "failed to register "+authzFailureInputMissingCounterName)
}
errCounter := recorder.AddInt64Counter(ctx, authzFailureInputMissingCounterName, 1)
if errCounter != nil {
logger.V(1).Error(errCounter, "failed to add counter "+authzFailureInputMissingCounterName)
}
return err
}
for _, hook := range g.hooks {
Expand All @@ -103,19 +125,43 @@ func (g *Authorizer) Eval(ctx context.Context, input *RPCAuthInput) error {
result, err := g.policy.Eval(ctx, input)
if err != nil {
logger.V(1).Error(err, "failed to evaluate authz policy", "input", input)
errRegister := recorder.RegisterInt64Counter(authzFailureEvalErrorCounterName, authzFailureEvalErrorCounterDesc)
if errRegister != nil {
logger.V(1).Error(errRegister, "failed to register "+authzFailureEvalErrorCounterName)
}
errCounter := recorder.AddInt64Counter(ctx, authzFailureEvalErrorCounterName, 1)
if errCounter != nil {
logger.V(1).Error(errCounter, "failed to add counter "+authzFailureEvalErrorCounterName)
}
return status.Errorf(codes.Internal, "authz policy evaluation error: %v", err)
}
var hints []string
if !result {
// We've failed so let's see if we can help tell the user what might have failed.
hints, err = g.policy.DenialHints(ctx, input)
if err != nil {
errRegister := recorder.RegisterInt64Counter(authzDenialHintErrorCounterName, authzDenialHintErrorCounterDesc)
if errRegister != nil {
logger.V(1).Error(errRegister, "failed to register "+authzDenialHintErrorCounterName)
}
errCounter := recorder.AddInt64Counter(ctx, authzDenialHintErrorCounterName, 1)
if errCounter != nil {
logger.V(1).Error(errCounter, "failed to add counter "+authzDenialHintErrorCounterName)
}
// We can't do much here besides log that something went wrong
logger.V(1).Error(err, "failed to get hints for authz policy denial", "error", err)
}
}
logger.V(1).Info("authz policy evaluation result", "authorizationResult", result, "input", input, "denialHints", hints)
if !result {
errRegister := recorder.RegisterInt64Counter(authzDeniedPolicyCounterName, authzDeniedPolicyCounterDesc)
if errRegister != nil {
logger.V(1).Error(errRegister, "failed to register "+authzDeniedPolicyCounterName)
}
errCounter := recorder.AddInt64Counter(ctx, authzDeniedPolicyCounterName, 1)
if errCounter != nil {
logger.V(1).Error(errCounter, "failed to add counter "+authzDeniedPolicyCounterName)
}
if len(hints) > 0 {
return status.Errorf(codes.PermissionDenied, "OPA policy does not permit this request: %v", strings.Join(hints, ", "))
} else {
Expand Down
20 changes: 20 additions & 0 deletions cmd/proxy-server/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ import (

"github.com/go-logr/logr"
"github.com/go-logr/stdr"
prometheus_exporter "go.opentelemetry.io/otel/exporters/prometheus"
"go.opentelemetry.io/otel/metric/global"
otelmetricsdk "go.opentelemetry.io/otel/sdk/metric"
"google.golang.org/grpc"
channelz "google.golang.org/grpc/channelz/service"
"google.golang.org/grpc/reflection"
Expand All @@ -38,6 +41,7 @@ import (
"github.com/Snowflake-Labs/sansshell/cmd/proxy-server/server"
"github.com/Snowflake-Labs/sansshell/cmd/util"
ss "github.com/Snowflake-Labs/sansshell/services/sansshell/server"
"github.com/Snowflake-Labs/sansshell/telemetry/metrics"

// Import services here to make them proxy-able
_ "github.com/Snowflake-Labs/sansshell/services/ansible"
Expand Down Expand Up @@ -95,9 +99,24 @@ func main() {
logger := stdr.New(log.New(os.Stderr, "", logOpts)).WithName("sanshell-proxy")
stdr.SetVerbosity(*verbosity)

// Setup exporter using the default prometheus registry
exporter, err := prometheus_exporter.New()
if err != nil {
log.Fatalf("failed to create prometheus exporter: %v\n", err)
}
global.SetMeterProvider(otelmetricsdk.NewMeterProvider(
otelmetricsdk.WithReader(exporter),
))
meter := global.Meter("sansshell-proxy")
recorder, err := metrics.NewOtelRecorder(meter, metrics.WithMetricNamePrefix("sansshell-proxy"))
if err != nil {
log.Fatalf("failed to create OtelRecorder: %v\n", err)
}

policy := util.ChoosePolicy(logger, defaultPolicy, *policyFlag, *policyFile)
clientPolicy := util.ChoosePolicy(logger, "", *clientPolicyFlag, *clientPolicyFile)
ctx := logr.NewContext(context.Background(), logger)
ctx = metrics.NewContextWithRecorder(ctx, recorder)

parsed, err := opa.NewAuthzPolicy(ctx, policy, opa.WithDenialHintsQuery("data.sansshell.authz.denial_hints"))
if err != nil {
Expand All @@ -123,5 +142,6 @@ func main() {
server.WithRawServerOption(srv.Register),
server.WithDebugPort(*debugport),
server.WithMetricsPort(*metricsport),
server.WithMetricsRecorder(recorder),
)
}
54 changes: 34 additions & 20 deletions cmd/proxy-server/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ import (
"github.com/Snowflake-Labs/sansshell/auth/opa/rpcauth"
"github.com/Snowflake-Labs/sansshell/proxy/server"
"github.com/Snowflake-Labs/sansshell/telemetry"
"github.com/Snowflake-Labs/sansshell/telemetry/metrics"
"google.golang.org/grpc/credentials"
)

Expand All @@ -66,6 +67,7 @@ type runState struct {
streamClientInterceptors []grpc.StreamClientInterceptor
authzHooks []rpcauth.RPCAuthzHook
services []func(*grpc.Server)
metricsRecorder metrics.MetricsRecorder
}

type Option interface {
Expand Down Expand Up @@ -263,41 +265,53 @@ func WithDebugPort(addr string) Option {
})
}

// WithMetricsPort opens a HTTP endpoint for publishing metrics at the given addr
// This endpoint is to be scraped by a Prometheus-style metrics scraper.
// It can be accessed at http://{addr}/metrics
func WithMetricsPort(addr string) Option {
return optionFunc(func(_ context.Context, r *runState) error {
mux := http.NewServeMux()
mux.Handle("/metrics", promhttp.Handler())
r.metricsport = addr
r.metricshandler = mux

// WithMetricsRecorder enables metric instrumentations by inserting grpc metric interceptors
// and attaching recorder to the server runstate
func WithMetricsRecorder(recorder metrics.MetricsRecorder) Option {
return optionFunc(func(ctx context.Context, r *runState) error {
r.metricsRecorder = recorder
// Instrument gRPC Client
clientMetrics := grpc_prometheus.NewClientMetrics()
errRegister := prometheus.Register(clientMetrics)
grpcClientMetrics := grpc_prometheus.NewClientMetrics()
errRegister := prometheus.Register(grpcClientMetrics)
if errRegister != nil {
return fmt.Errorf("fail to register grpc client prometheus metrics: %s", errRegister)
return fmt.Errorf("fail to register grpc client metrics: %s", errRegister)
}
r.unaryClientInterceptors = append(r.unaryClientInterceptors,
clientMetrics.UnaryClientInterceptor(),
metrics.UnaryClientMetricsInterceptor(recorder),
grpcClientMetrics.UnaryClientInterceptor(),
)
r.streamClientInterceptors = append(r.streamClientInterceptors,
clientMetrics.StreamClientInterceptor(),
metrics.StreamClientMetricsInterceptor(recorder),
grpcClientMetrics.StreamClientInterceptor(),
)

// Instrument gRPC Server
serverMetrics := grpc_prometheus.NewServerMetrics()
errRegister = prometheus.Register(serverMetrics)
grpcServerMetrics := grpc_prometheus.NewServerMetrics(
grpc_prometheus.WithServerHandlingTimeHistogram(),
)
errRegister = prometheus.Register(grpcServerMetrics)
if errRegister != nil {
return fmt.Errorf("fail to register grpc server prometheus metrics: %s", errRegister)
return fmt.Errorf("fail to register grpc server metrics: %s", errRegister)
}
r.unaryInterceptors = append(r.unaryInterceptors,
serverMetrics.UnaryServerInterceptor(),
metrics.UnaryServerMetricsInterceptor(recorder),
grpcServerMetrics.UnaryServerInterceptor(),
)
r.streamInterceptors = append(r.streamInterceptors,
serverMetrics.StreamServerInterceptor(),
metrics.StreamServerMetricsInterceptor(recorder),
grpcServerMetrics.StreamServerInterceptor(),
)
return nil
})
}

// WithMetricsPort opens a HTTP endpoint for publishing metrics at the given addr
func WithMetricsPort(addr string) Option {
return optionFunc(func(ctx context.Context, r *runState) error {
mux := http.NewServeMux()
mux.Handle("/metrics", promhttp.Handler())
r.metricsport = addr
r.metricshandler = mux

return nil
})
Expand Down
8 changes: 6 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,13 @@ require (
github.com/google/subcommands v1.2.0
github.com/grpc-ecosystem/go-grpc-middleware/providers/prometheus v1.0.0-rc.0
github.com/open-policy-agent/opa v0.50.2
github.com/pkg/errors v0.9.1
github.com/prometheus/client_golang v1.15.0
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.40.0
go.opentelemetry.io/otel v1.14.0
go.opentelemetry.io/otel/exporters/prometheus v0.37.0
go.opentelemetry.io/otel/metric v0.37.0
go.opentelemetry.io/otel/sdk/metric v0.37.0
go.opentelemetry.io/otel/trace v1.14.0
gocloud.dev v0.29.0
golang.org/x/sync v0.1.0
Expand Down Expand Up @@ -83,8 +88,7 @@ require (
github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 // indirect
github.com/yashtewari/glob-intersection v0.1.0 // indirect
go.opencensus.io v0.24.0 // indirect
go.opentelemetry.io/otel v1.14.0 // indirect
go.opentelemetry.io/otel/metric v0.37.0 // indirect
go.opentelemetry.io/otel/sdk v1.14.0 // indirect
golang.org/x/crypto v0.6.0 // indirect
golang.org/x/net v0.8.0 // indirect
golang.org/x/oauth2 v0.5.0 // indirect
Expand Down
6 changes: 6 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -1892,6 +1892,8 @@ go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.11.2/go.mod h
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.3.0/go.mod h1:QNX1aly8ehqqX1LEa6YniTU7VY9I6R3X/oPxhGdTceE=
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.6.1/go.mod h1:DAKwdo06hFLc0U88O10x4xnb5sc7dDRDqRuiN+io8JE=
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.11.2/go.mod h1:GZWSQQky8AgdJj50r1KJm8oiQiIPaAX7uZCFQX9GzC8=
go.opentelemetry.io/otel/exporters/prometheus v0.37.0 h1:NQc0epfL0xItsmGgSXgfbH2C1fq2VLXkZoDFsfRNHpc=
go.opentelemetry.io/otel/exporters/prometheus v0.37.0/go.mod h1:hB8qWjsStK36t50/R0V2ULFb4u95X/Q6zupXLgvjTh8=
go.opentelemetry.io/otel/metric v0.20.0/go.mod h1:598I5tYlH1vzBjn+BTuhzTCSb/9debfNp6R3s7Pr1eU=
go.opentelemetry.io/otel/metric v0.28.0/go.mod h1:TrzsfQAmQaB1PDcdhBauLMk7nyyg9hm+GoQq/ekE9Iw=
go.opentelemetry.io/otel/metric v0.34.0/go.mod h1:ZFuI4yQGNCupurTXCwkeD/zHBt+C2bR7bw5JqUm/AP8=
Expand All @@ -1903,8 +1905,12 @@ go.opentelemetry.io/otel/sdk v1.3.0/go.mod h1:rIo4suHNhQwBIPg9axF8V9CA72Wz2mKF1t
go.opentelemetry.io/otel/sdk v1.6.1/go.mod h1:IVYrddmFZ+eJqu2k38qD3WezFR2pymCzm8tdxyh3R4E=
go.opentelemetry.io/otel/sdk v1.11.1/go.mod h1:/l3FE4SupHJ12TduVjUkZtlfFqDCQJlOlithYrdktys=
go.opentelemetry.io/otel/sdk v1.11.2/go.mod h1:wZ1WxImwpq+lVRo4vsmSOxdd+xwoUJ6rqyLc3SyX9aU=
go.opentelemetry.io/otel/sdk v1.14.0 h1:PDCppFRDq8A1jL9v6KMI6dYesaq+DFcDZvjsoGvxGzY=
go.opentelemetry.io/otel/sdk v1.14.0/go.mod h1:bwIC5TjrNG6QDCHNWvW4HLHtUQ4I+VQDsnjhvyZCALM=
go.opentelemetry.io/otel/sdk/export/metric v0.20.0/go.mod h1:h7RBNMsDJ5pmI1zExLi+bJK+Dr8NQCh0qGhm1KDnNlE=
go.opentelemetry.io/otel/sdk/metric v0.20.0/go.mod h1:knxiS8Xd4E/N+ZqKmUPf3gTTZ4/0TjTXukfxjzSTpHE=
go.opentelemetry.io/otel/sdk/metric v0.37.0 h1:haYBBtZZxiI3ROwSmkZnI+d0+AVzBWeviuYQDeBWosU=
go.opentelemetry.io/otel/sdk/metric v0.37.0/go.mod h1:mO2WV1AZKKwhwHTV3AKOoIEb9LbUaENZDuGUQd+j4A0=
go.opentelemetry.io/otel/trace v0.20.0/go.mod h1:6GjCW8zgDjwGHGa6GkyeB8+/5vjT16gUEi0Nf1iBdgw=
go.opentelemetry.io/otel/trace v1.3.0/go.mod h1:c/VDhno8888bvQYmbYLqe41/Ldmr/KKunbvWM4/fEjk=
go.opentelemetry.io/otel/trace v1.6.0/go.mod h1:qs7BrU5cZ8dXQHBGxHMOxwME/27YH2qEp4/+tZLLwJE=
Expand Down
Loading

0 comments on commit 6da9d98

Please sign in to comment.