From d8cd37e97049d4949d68149063aff0681397190b Mon Sep 17 00:00:00 2001 From: sinadarbouy Date: Thu, 7 Nov 2024 22:33:57 +0100 Subject: [PATCH 01/26] feat: Add Raft consensus for consistent hashing This commit introduces Raft consensus to maintain consistency of hash-to-proxy mappings across multiple GatewayD instances. Key changes include: - Add new Raft package implementing consensus protocol using HashiCorp's Raft - Integrate Raft with consistent hashing load balancer - Store proxy mappings in distributed state machine - Add configuration options for Raft cluster setup - Implement leadership monitoring and peer management - Add FSM snapshot and restore capabilities The implementation ensures that hash-to-proxy mappings remain consistent across cluster nodes, improving reliability for consistent hash-based load balancing. --- cmd/run.go | 7 + config/config.go | 2 +- config/types.go | 13 ++ gatewayd.yaml | 14 +- go.mod | 13 +- go.sum | 37 +++++ network/consistenthash.go | 37 +++-- network/loadbalancer.go | 2 +- network/proxy.go | 8 ++ network/server.go | 14 ++ raft/raft.go | 286 ++++++++++++++++++++++++++++++++++++++ 11 files changed, 419 insertions(+), 14 deletions(-) create mode 100644 raft/raft.go diff --git a/cmd/run.go b/cmd/run.go index d7841ed7..6f7e95eb 100644 --- a/cmd/run.go +++ b/cmd/run.go @@ -29,6 +29,7 @@ import ( "github.com/gatewayd-io/gatewayd/network" "github.com/gatewayd-io/gatewayd/plugin" "github.com/gatewayd-io/gatewayd/pool" + "github.com/gatewayd-io/gatewayd/raft" "github.com/gatewayd-io/gatewayd/tracing" usage "github.com/gatewayd-io/gatewayd/usagereport/v1" "github.com/getsentry/sentry-go" @@ -910,6 +911,11 @@ var runCmd = &cobra.Command{ span.End() + raftNode, originalErr := raft.NewRaftNode(logger, conf.Global.Raft) + if originalErr != nil { + logger.Error().Err(originalErr).Msg("Failed to start raft node") + } + _, span = otel.Tracer(config.TracerName).Start(runCtx, "Create servers") // Create and initialize servers. for name, cfg := range conf.Global.Servers { @@ -946,6 +952,7 @@ var runCmd = &cobra.Command{ LoadbalancerStrategyName: cfg.LoadBalancer.Strategy, LoadbalancerRules: cfg.LoadBalancer.LoadBalancingRules, LoadbalancerConsistentHash: cfg.LoadBalancer.ConsistentHash, + RaftNode: raftNode, }, ) diff --git a/config/config.go b/config/config.go index 9cc0584b..9932168d 100644 --- a/config/config.go +++ b/config/config.go @@ -201,7 +201,7 @@ func (c *Config) LoadDefaults(ctx context.Context) *gerr.GatewayDError { return gerr.ErrConfigParseError.Wrap(err) } - if configObject == "api" { + if configObject == "api" || configObject == "raft" { // Handle API configuration separately // TODO: Add support for multiple API config groups. continue diff --git a/config/types.go b/config/types.go index d065fc57..76613674 100644 --- a/config/types.go +++ b/config/types.go @@ -138,6 +138,18 @@ type API struct { GRPCNetwork string `json:"grpcNetwork" jsonschema:"enum=tcp,enum=udp,enum=unix"` } +type Raft struct { + Address string `json:"address"` + NodeID string `json:"nodeID"` + LeaderID string `json:"leaderID"` + Peers []RaftPeer `json:"peers"` +} + +type RaftPeer struct { + ID string `json:"id"` + Address string `json:"address"` +} + type GlobalConfig struct { API API `json:"api"` Loggers map[string]*Logger `json:"loggers"` @@ -146,4 +158,5 @@ type GlobalConfig struct { Proxies map[string]map[string]*Proxy `json:"proxies"` Servers map[string]*Server `json:"servers"` Metrics map[string]*Metrics `json:"metrics"` + Raft Raft `json:"raft"` } diff --git a/gatewayd.yaml b/gatewayd.yaml index d1b05072..92432677 100644 --- a/gatewayd.yaml +++ b/gatewayd.yaml @@ -80,7 +80,7 @@ proxies: servers: default: network: tcp - address: 0.0.0.0:15432 + address: 0.0.0.0:15433 loadBalancer: # Load balancer strategies can be found in config/constants.go strategy: ROUND_ROBIN # ROUND_ROBIN, RANDOM, WEIGHTED_ROUND_ROBIN @@ -103,6 +103,14 @@ servers: api: enabled: True - httpAddress: 0.0.0.0:18080 + httpAddress: 0.0.0.0:18081 grpcNetwork: tcp - grpcAddress: 0.0.0.0:19090 + grpcAddress: 0.0.0.0:19091 + +raft: + address: 127.0.0.1:2223 + nodeID: node2 + leaderID: node1 + peers: + - id: node1 + address: 127.0.0.1:2222 diff --git a/go.mod b/go.mod index bb347569..9ff6a9db 100644 --- a/go.mod +++ b/go.mod @@ -16,6 +16,12 @@ require ( github.com/grpc-ecosystem/grpc-gateway/v2 v2.23.0 github.com/hashicorp/go-hclog v1.6.3 github.com/hashicorp/go-plugin v1.6.2 + github.com/google/uuid v1.6.0 + github.com/grpc-ecosystem/grpc-gateway/v2 v2.22.0 + github.com/hashicorp/go-hclog v1.6.3 + github.com/hashicorp/go-plugin v1.6.1 + github.com/hashicorp/raft v1.7.1 + github.com/hashicorp/raft-boltdb v0.0.0-20231211162105-6c830fa4535e github.com/invopop/jsonschema v0.12.0 github.com/jackc/pgx/v5 v5.7.1 github.com/knadh/koanf v1.5.0 @@ -52,9 +58,11 @@ require ( github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 // indirect github.com/Microsoft/go-winio v0.6.2 // indirect github.com/ProtonMail/go-crypto v1.0.0 // indirect + github.com/armon/go-metrics v0.4.1 // indirect github.com/bahlo/generic-list-go v0.2.0 // indirect github.com/barkimedes/go-deepcopy v0.0.0-20220514131651-17c30cfc62df // indirect github.com/beorn7/perks v1.0.1 // indirect + github.com/boltdb/bolt v1.3.1 // indirect github.com/buger/jsonparser v1.1.1 // indirect github.com/cenkalti/backoff/v4 v4.3.0 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect @@ -88,7 +96,10 @@ require ( github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect github.com/golang/protobuf v1.5.4 // indirect github.com/google/go-querystring v1.1.0 // indirect - github.com/google/uuid v1.6.0 // indirect + github.com/hashicorp/go-immutable-radix v1.0.0 // indirect + github.com/hashicorp/go-msgpack v0.5.5 // indirect + github.com/hashicorp/go-msgpack/v2 v2.1.2 // indirect + github.com/hashicorp/golang-lru v0.5.1 // indirect github.com/hashicorp/yamux v0.1.2 // indirect github.com/imdario/mergo v0.3.16 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect diff --git a/go.sum b/go.sum index 398b3608..14385f0a 100644 --- a/go.sum +++ b/go.sum @@ -9,6 +9,8 @@ github.com/AdaLogics/go-fuzz-headers v0.0.0-20230811130428-ced1acdcaa24/go.mod h github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 h1:L/gRVlceqvL25UVaW/CKtUDjefjrs0SPonmDGUVOYP0= github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= +github.com/DataDog/datadog-go v2.2.0+incompatible/go.mod h1:LButxg5PwREeZtORoXG3tL4fMGNddJ+vMq1mwgfaqoQ= +github.com/DataDog/datadog-go v3.2.0+incompatible/go.mod h1:LButxg5PwREeZtORoXG3tL4fMGNddJ+vMq1mwgfaqoQ= github.com/Masterminds/semver/v3 v3.3.0 h1:B8LGeaivUe71a5qox1ICM/JLl0NqZSW5CHyL+hmvYS0= github.com/Masterminds/semver/v3 v3.3.0/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM= github.com/Microsoft/go-winio v0.5.2/go.mod h1:WpS1mjBmmwHBEWmogvA2mj8546UReBk4v8QkMxJ6pZY= @@ -28,6 +30,10 @@ github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be/go.mod h1:ySMOLuW github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY= github.com/armon/circbuf v0.0.0-20150827004946-bbbad097214e/go.mod h1:3U/XgcO3hCbHZ8TKRvWD2dDTCfh9M9ya+I9JpbB7O8o= github.com/armon/go-metrics v0.0.0-20180917152333-f0300d1749da/go.mod h1:Q73ZrmVTwzkszR9V5SSuryQ31EELlFMUz1kKyl939pY= +github.com/armon/go-metrics v0.0.0-20190430140413-ec5e00d3c878/go.mod h1:3AMJUQhVx52RsWOnlkpikZr01T/yAVN2gn0861vByNg= +github.com/armon/go-metrics v0.3.8/go.mod h1:4O98XIr/9W0sxpJ8UaYkvjk10Iff7SnFrb4QAOwNTFc= +github.com/armon/go-metrics v0.4.1 h1:hR91U9KYmb6bLBYLQjyM+3j+rcd/UhE+G78SFnF8gJA= +github.com/armon/go-metrics v0.4.1/go.mod h1:E6amYzXo6aW1tqzoZGT755KkbgrJsSdpwZ+3JqfkOG4= github.com/armon/go-radix v0.0.0-20180808171621-7fddfc383310/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8= github.com/armon/go-radix v1.0.0/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8= github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio= @@ -51,6 +57,8 @@ github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+Ce github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs= +github.com/boltdb/bolt v1.3.1 h1:JQmyP4ZBrce+ZQu0dY660FMfatumYDLun9hBCUVIkF4= +github.com/boltdb/bolt v1.3.1/go.mod h1:clJnj/oiGkjum5o1McbSZDSLxVThjynRyGBgiAx27Ps= github.com/bsm/ginkgo/v2 v2.12.0 h1:Ny8MWAHyOepLGlLKYmXG4IEkioBysk6GpaRTLC8zwWs= github.com/bsm/ginkgo/v2 v2.12.0/go.mod h1:SwYbGRRDovPVboqFv0tPTcG1sN61LM1Z4ARdbAV9g4c= github.com/bsm/gomega v1.27.10 h1:yeMWxP2pV2fG3FgAODIY8EiRE3dy0aeFYt4l7wh6yKA= @@ -66,6 +74,8 @@ github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/circonus-labs/circonus-gometrics v2.3.1+incompatible/go.mod h1:nmEj6Dob7S7YxXgwXpfOuvO54S+tGdZdw9fuRZt25Ag= +github.com/circonus-labs/circonusllhist v0.1.3/go.mod h1:kMXHVDlOchFAehlya5ePtbp5jckzBHf4XRpQvBOLI+I= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= github.com/cloudflare/circl v1.3.3/go.mod h1:5XYMA4rFBvNIrhs50XuiBJ15vF2pZn4nnUKZrLbUZFA= github.com/cloudflare/circl v1.4.0 h1:BV7h5MgrktNzytKmWjpOtdYrf0lkkbF8YMlBGPhJQrY= @@ -231,16 +241,25 @@ github.com/hashicorp/go-cleanhttp v0.5.0/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtng github.com/hashicorp/go-cleanhttp v0.5.1/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80= github.com/hashicorp/go-hclog v0.0.0-20180709165350-ff2cf002a8dd/go.mod h1:9bjs9uLqI8l75knNv3lV1kA55veR+WUPSiKIWcQHudI= github.com/hashicorp/go-hclog v0.8.0/go.mod h1:5CU+agLiy3J7N7QjHK5d05KxGsuXiQLrjA0H7acj2lQ= +github.com/hashicorp/go-hclog v0.9.1/go.mod h1:5CU+agLiy3J7N7QjHK5d05KxGsuXiQLrjA0H7acj2lQ= github.com/hashicorp/go-hclog v0.12.0/go.mod h1:whpDNt7SSdeAju8AWKIWsul05p54N/39EeqMAyrmvFQ= github.com/hashicorp/go-hclog v1.6.3 h1:Qr2kF+eVWjTiYmU7Y31tYlP1h0q/X3Nl3tPGdaB11/k= github.com/hashicorp/go-hclog v1.6.3/go.mod h1:W4Qnvbt70Wk/zYJryRzDRU/4r0kIg0PVHBcfoyhpF5M= +github.com/hashicorp/go-immutable-radix v1.0.0 h1:AKDB1HM5PWEA7i4nhcpwOrO2byshxBjXVn/J/3+z5/0= github.com/hashicorp/go-immutable-radix v1.0.0/go.mod h1:0y9vanUI8NX6FsYoO3zeMjhV/C5i9g4Q3DwcSNZ4P60= github.com/hashicorp/go-msgpack v0.5.3/go.mod h1:ahLV/dePpqEmjfWmKiqvPkv/twdG7iPBM1vqhUKIvfM= +github.com/hashicorp/go-msgpack v0.5.5 h1:i9R9JSrqIz0QVLz3sz+i3YJdT7TTSLcfLLzJi9aZTuI= +github.com/hashicorp/go-msgpack v0.5.5/go.mod h1:ahLV/dePpqEmjfWmKiqvPkv/twdG7iPBM1vqhUKIvfM= +github.com/hashicorp/go-msgpack/v2 v2.1.2 h1:4Ee8FTp834e+ewB71RDrQ0VKpyFdrKOjvYtnQ/ltVj0= +github.com/hashicorp/go-msgpack/v2 v2.1.2/go.mod h1:upybraOAblm4S7rx0+jeNy+CWWhzywQsSRV5033mMu4= github.com/hashicorp/go-multierror v1.0.0/go.mod h1:dHtQlpGsu+cZNNAkkCN/P3hoUDHhCYQXV3UM06sGGrk= github.com/hashicorp/go-multierror v1.1.0/go.mod h1:spPvp8C1qA32ftKqdAHm4hHTbPw+vmowP0z+KUhOZdA= github.com/hashicorp/go-plugin v1.0.1/go.mod h1:++UyYGoz3o5w9ZzAdZxtQKrWWP+iqPBn3cQptSMzBuY= github.com/hashicorp/go-plugin v1.6.2 h1:zdGAEd0V1lCaU0u+MxWQhtSDQmahpkwOun8U8EiRVog= github.com/hashicorp/go-plugin v1.6.2/go.mod h1:CkgLQ5CZqNmdL9U9JzM532t8ZiYQ35+pj3b1FD37R0Q= +github.com/hashicorp/go-plugin v1.6.1 h1:P7MR2UP6gNKGPp+y7EZw2kOiq4IR9WiqLvp0XOsVdwI= +github.com/hashicorp/go-plugin v1.6.1/go.mod h1:XPHFku2tFo3o3QKFgSYo+cghcUhw1NA1hZyMK0PWAw0= +github.com/hashicorp/go-retryablehttp v0.5.3/go.mod h1:9B5zBasrRhHXnJnui7y6sL7es7NDiJgTc6Er0maI1Xs= github.com/hashicorp/go-retryablehttp v0.5.4/go.mod h1:9B5zBasrRhHXnJnui7y6sL7es7NDiJgTc6Er0maI1Xs= github.com/hashicorp/go-rootcerts v1.0.1/go.mod h1:pqUvnprVnM5bf7AOirdbb01K4ccR319Vf4pU3K5EGc8= github.com/hashicorp/go-rootcerts v1.0.2/go.mod h1:pqUvnprVnM5bf7AOirdbb01K4ccR319Vf4pU3K5EGc8= @@ -248,15 +267,22 @@ github.com/hashicorp/go-sockaddr v1.0.0/go.mod h1:7Xibr9yA9JjQq1JpNB2Vw7kxv8xerX github.com/hashicorp/go-sockaddr v1.0.2/go.mod h1:rB4wwRAUzs07qva3c5SdrY/NEtAUjGlgmH/UkBUC97A= github.com/hashicorp/go-syslog v1.0.0/go.mod h1:qPfqrKkXGihmCqbJM2mZgkZGvKG1dFdvsLplgctolz4= github.com/hashicorp/go-uuid v1.0.0/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= +github.com/hashicorp/go-uuid v1.0.1 h1:fv1ep09latC32wFoVwnqcnKJGnMSdBanPczbHAYm1BE= github.com/hashicorp/go-uuid v1.0.1/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= github.com/hashicorp/go-version v1.1.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA= github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= +github.com/hashicorp/golang-lru v0.5.1 h1:0hERBMJE1eitiLkihrMvRVBYAkpHzc/J3QdDN+dAcgU= github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4= github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= github.com/hashicorp/logutils v1.0.0/go.mod h1:QIAnNjmIWmVIIkWDTG1z5v++HQmx9WQRO+LraFDTW64= github.com/hashicorp/mdns v1.0.4/go.mod h1:mtBihi+LeNXGtG8L9dX59gAEa12BDtBQSp4v/YAJqrc= github.com/hashicorp/memberlist v0.3.0/go.mod h1:MS2lj3INKhZjWNqd3N0m3J+Jxf3DAOnAH9VT3Sh9MUE= +github.com/hashicorp/raft v1.1.0/go.mod h1:4Ak7FSPnuvmb0GV6vgIAJ4vYT4bek9bb6Q+7HVbyzqM= +github.com/hashicorp/raft v1.7.1 h1:ytxsNx4baHsRZrhUcbt3+79zc4ly8qm7pi0393pSchY= +github.com/hashicorp/raft v1.7.1/go.mod h1:hUeiEwQQR/Nk2iKDD0dkEhklSsu3jcAcqvPzPoZSAEM= +github.com/hashicorp/raft-boltdb v0.0.0-20231211162105-6c830fa4535e h1:SK4y8oR4ZMHPvwVHryKI88kJPJda4UyWYvG5A6iEQxc= +github.com/hashicorp/raft-boltdb v0.0.0-20231211162105-6c830fa4535e/go.mod h1:EMz/UIuG93P0MBeHh6CbXQAEe8ckVJLZjhD17lBzK5Q= github.com/hashicorp/serf v0.9.6/go.mod h1:TXZNMjZQijwlDvp+r0b63xZ45H7JmCmgg4gpTwn9UV4= github.com/hashicorp/vault/api v1.0.4/go.mod h1:gDcqh3WGcR1cpF5AJz/B1UFheUEneMoIospckxBxk6Q= github.com/hashicorp/vault/sdk v0.1.13/go.mod h1:B+hVj7TpuQY1Y/GPbCpffmgd+tSEwvhkWnjtSYCaS2M= @@ -287,6 +313,7 @@ github.com/joho/godotenv v1.3.0/go.mod h1:7hK45KPybAkOC6peb+G5yklZfMxEjkZhHbwpqx github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4= github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= +github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= github.com/json-iterator/go v1.1.10/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= github.com/json-iterator/go v1.1.11/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w= @@ -390,6 +417,7 @@ github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3I github.com/opencontainers/image-spec v1.1.0 h1:8SG7/vwALn54lVB/0yZ/MMwhFrPYtpEHQb2IpWsCzug= github.com/opencontainers/image-spec v1.1.0/go.mod h1:W4s4sFTMaBeK1BQLXbG4AdM2szdn85PY75RI83NrTrM= github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc= +github.com/pascaldekloe/goe v0.1.0 h1:cBOtyMzM9HTpWjXfbbunk26uA6nG3a8n06Wieeh0MwY= github.com/pascaldekloe/goe v0.1.0/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc= github.com/pelletier/go-toml v1.7.0/go.mod h1:vwGMzjaWMwyfHwgIBhI2YUM4fB6nL6lVAvS1LBMMhTE= github.com/pelletier/go-toml v1.9.5 h1:4yBQzkHv+7BHq2PQUZF3Mx0IYxG7LsP222s7Agd3ve8= @@ -413,7 +441,9 @@ github.com/posener/complete v1.2.3/go.mod h1:WZIdtGGp+qx0sLrYKtIRAruyNpv6hFCicSg github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55 h1:o4JXh1EVt9k/+g42oCprj/FisM4qX9L3sZB3upGN2ZU= github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55/go.mod h1:OmDBASR4679mdNQnz2pUhc2G8CO2JrUAVFDRBDP/hJE= github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw= +github.com/prometheus/client_golang v0.9.2/go.mod h1:OsXs2jCmiKlQ1lTBmv21f2mNfw4xf/QclQDMrYNZzcM= github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo= +github.com/prometheus/client_golang v1.4.0/go.mod h1:e9GMxYsXl05ICDXkRhurwBS4Q3OK1iX/F2sw+iXX5zU= github.com/prometheus/client_golang v1.7.1/go.mod h1:PY5Wy2awLA44sXw4AOSfFBetzPP4j5+D6mVACh+pe2M= github.com/prometheus/client_golang v1.11.1/go.mod h1:Z6t4BnS23TR94PD6BsDNk8yVqroYurpAkEiz0P2BEV0= github.com/prometheus/client_golang v1.20.5 h1:cxppBPuYhUnsO6yo/aoRol4L7q7UFfdm+bR9r+8l63Y= @@ -424,13 +454,17 @@ github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1: github.com/prometheus/client_model v0.2.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E= github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY= +github.com/prometheus/common v0.0.0-20181126121408-4724e9255275/go.mod h1:daVV7qP5qjZbuso7PdcryaAu0sAZbrN9i7WWcTMWvro= github.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4= +github.com/prometheus/common v0.9.1/go.mod h1:yhUN8i9wzaXS3w1O07YhxHEBxD+W35wd8bs7vj7HSQ4= github.com/prometheus/common v0.10.0/go.mod h1:Tlit/dnDKsSWFlCLTWaA1cyBgKHSMdTB80sz/V91rCo= github.com/prometheus/common v0.26.0/go.mod h1:M7rCNAaPfAosfx8veZJCuw84e35h3Cfd9VFqTh1DIvc= github.com/prometheus/common v0.60.1 h1:FUas6GcOw66yB/73KC+BOZoFJmbo/1pojoILArPAaSc= github.com/prometheus/common v0.60.1/go.mod h1:h0LYf1R1deLSKtD4Vdg8gy4RuOvENW2J/h19V5NADQw= github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= +github.com/prometheus/procfs v0.0.0-20181204211112-1dc9a6cbc91a/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA= +github.com/prometheus/procfs v0.0.8/go.mod h1:7Qr8sr6344vo1JqZ6HhLceV9o3AJ1Ff+GxbHq6oeK9A= github.com/prometheus/procfs v0.1.3/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4OA4YeYWdaU= github.com/prometheus/procfs v0.6.0/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1xBZuNvfVA= github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc= @@ -512,6 +546,7 @@ github.com/tklauser/go-sysconf v0.3.14 h1:g5vzr9iPFFz24v2KZXs/pvpvh8/V9Fw6vQK5ZZ github.com/tklauser/go-sysconf v0.3.14/go.mod h1:1ym4lWMLUOhuBOPGtRcJm7tEGX4SCYNEEEtghGG/8uY= github.com/tklauser/numcpus v0.8.0 h1:Mx4Wwe/FjZLeQsK/6kt2EOepwwSl7SmJrK5bV/dXYgY= github.com/tklauser/numcpus v0.8.0/go.mod h1:ZJZlAY+dmR4eut8epnzf0u/VwodKmryxR8txiloSqBE= +github.com/tv42/httpunix v0.0.0-20150427012821-b75d8614f926/go.mod h1:9ESjWnEqriFuLhtthL60Sar/7RFoluCcXsuvEwTV5KM= github.com/wasilibs/go-pgquery v0.0.0-20241011013927-817756c5aae4 h1:p44LEm5hBmg95D3r4660Yj3JNhq49k8C15x2V8++S9U= github.com/wasilibs/go-pgquery v0.0.0-20241011013927-817756c5aae4/go.mod h1:wCxHuE+0U5cAPbv6kakm/EPjDwnpEao1HXvBhBMrprA= github.com/wasilibs/wazero-helpers v0.0.0-20240620070341-3dff1577cd52 h1:OvLBa8SqJnZ6P+mjlzc2K7PM22rRUPE1x32G9DTPrC4= @@ -584,6 +619,7 @@ golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20181201002055-351d144fa1fc/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= @@ -641,6 +677,7 @@ golang.org/x/sys v0.0.0-20191008105621-543471e840be/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200106162015-b016eb3dc98e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200122134326-e047566fdf82/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200124204421-9fbb57f87de9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= diff --git a/network/consistenthash.go b/network/consistenthash.go index 9436e39e..790987bf 100644 --- a/network/consistenthash.go +++ b/network/consistenthash.go @@ -4,8 +4,12 @@ import ( "fmt" "net" "sync" + "time" + + "encoding/json" gerr "github.com/gatewayd-io/gatewayd/errors" + "github.com/gatewayd-io/gatewayd/raft" "github.com/spaolacci/murmur3" ) @@ -14,18 +18,20 @@ import ( type ConsistentHash struct { originalStrategy LoadBalancerStrategy useSourceIP bool - hashMap map[uint64]IProxy mu sync.Mutex + raftNode *raft.RaftNode + server *Server } // NewConsistentHash creates a new ConsistentHash instance. It requires a server configuration and an original // load balancing strategy. The consistent hash can use either the source IP or the full connection address // as the key for hashing. -func NewConsistentHash(server *Server, originalStrategy LoadBalancerStrategy) *ConsistentHash { +func NewConsistentHash(server *Server, originalStrategy LoadBalancerStrategy, raftNode *raft.RaftNode) *ConsistentHash { return &ConsistentHash{ originalStrategy: originalStrategy, useSourceIP: server.LoadbalancerConsistentHash.UseSourceIP, - hashMap: make(map[uint64]IProxy), + raftNode: raftNode, + server: server, } } @@ -52,10 +58,11 @@ func (ch *ConsistentHash) NextProxy(conn IConnWrapper) (IProxy, *gerr.GatewayDEr hash := hashKey(key) - proxy, exists := ch.hashMap[hash] - + proxyID, exists := ch.raftNode.Fsm.GetProxyID(hash) if exists { - return proxy, nil + if proxy, ok := ch.server.GetProxyByID(proxyID); ok { + return proxy, nil + } } // If no hash exists, fallback to the original strategy @@ -64,8 +71,22 @@ func (ch *ConsistentHash) NextProxy(conn IConnWrapper) (IProxy, *gerr.GatewayDEr return nil, gerr.ErrNoProxiesAvailable.Wrap(err) } - // Add the selected proxy to the hash map for future requests - ch.hashMap[hash] = proxy + // Create and apply the command through Raft + cmd := raft.HashMapCommand{ + Type: raft.CommandAddHashMapping, + Hash: hash, + ProxyID: proxy.GetID(), + } + + cmdBytes, marshalErr := json.Marshal(cmd) + if marshalErr != nil { + return nil, gerr.ErrNoProxiesAvailable.Wrap(marshalErr) + } + + // Apply the command through Raft + if err := ch.raftNode.Apply(cmdBytes, 10*time.Second); err != nil { + return nil, gerr.ErrNoProxiesAvailable.Wrap(err) + } return proxy, nil } diff --git a/network/loadbalancer.go b/network/loadbalancer.go index 2ce6c5cf..548290b1 100644 --- a/network/loadbalancer.go +++ b/network/loadbalancer.go @@ -32,7 +32,7 @@ func NewLoadBalancerStrategy(server *Server) (LoadBalancerStrategy, *gerr.Gatewa // If consistent hashing is enabled, wrap the strategy if server.LoadbalancerConsistentHash != nil { - strategy = NewConsistentHash(server, strategy) + strategy = NewConsistentHash(server, strategy, server.RaftNode) } return strategy, nil diff --git a/network/proxy.go b/network/proxy.go index f67d3b45..17afffcf 100644 --- a/network/proxy.go +++ b/network/proxy.go @@ -17,6 +17,7 @@ import ( "github.com/gatewayd-io/gatewayd/pool" "github.com/getsentry/sentry-go" "github.com/go-co-op/gocron" + "github.com/google/uuid" "github.com/rs/zerolog" "go.opentelemetry.io/otel" ) @@ -34,9 +35,11 @@ type IProxy interface { BusyConnectionsString() []string GetGroupName() string GetBlockName() string + GetID() string } type Proxy struct { + ID string GroupName string BlockName string AvailableConnections pool.IPool @@ -63,6 +66,7 @@ func NewProxy( defer span.End() proxy := Proxy{ + ID: uuid.New().String(), GroupName: pxy.GroupName, BlockName: pxy.BlockName, AvailableConnections: pxy.AvailableConnections, @@ -139,6 +143,10 @@ func NewProxy( return &proxy } +func (pr *Proxy) GetID() string { + return pr.ID +} + func (pr *Proxy) GetBlockName() string { return pr.BlockName } diff --git a/network/server.go b/network/server.go index 03aa046b..6d4557aa 100644 --- a/network/server.go +++ b/network/server.go @@ -17,6 +17,7 @@ import ( gerr "github.com/gatewayd-io/gatewayd/errors" "github.com/gatewayd-io/gatewayd/metrics" "github.com/gatewayd-io/gatewayd/plugin" + "github.com/gatewayd-io/gatewayd/raft" "github.com/rs/zerolog" "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/attribute" @@ -87,6 +88,8 @@ type Server struct { LoadbalancerRules []config.LoadBalancingRule LoadbalancerConsistentHash *config.ConsistentHash connectionToProxyMap *sync.Map + + RaftNode *raft.RaftNode } var _ IServer = (*Server)(nil) @@ -741,6 +744,7 @@ func NewServer( LoadbalancerStrategyName: srv.LoadbalancerStrategyName, LoadbalancerRules: srv.LoadbalancerRules, LoadbalancerConsistentHash: srv.LoadbalancerConsistentHash, + RaftNode: srv.RaftNode, } // Try to resolve the address and log an error if it can't be resolved. @@ -802,3 +806,13 @@ func (s *Server) GetProxyForConnection(conn *ConnWrapper) (IProxy, bool) { func (s *Server) RemoveConnectionFromMap(conn *ConnWrapper) { s.connectionToProxyMap.Delete(conn) } + +// Add this method to Server struct +func (s *Server) GetProxyByID(id string) (IProxy, bool) { + for _, proxy := range s.Proxies { + if proxy.GetID() == id { + return proxy, true + } + } + return nil, false +} diff --git a/raft/raft.go b/raft/raft.go new file mode 100644 index 00000000..02b89504 --- /dev/null +++ b/raft/raft.go @@ -0,0 +1,286 @@ +package raft + +import ( + "encoding/json" + "fmt" + "io" + "net" + "os" + "path/filepath" + "sync" + "time" + + "github.com/gatewayd-io/gatewayd/config" + "github.com/hashicorp/raft" + raftboltdb "github.com/hashicorp/raft-boltdb" + "github.com/rs/zerolog" +) + +// Command types for Raft operations +const ( + CommandAddHashMapping = "ADD_HASH_MAPPING" +) + +// HashMapCommand represents a command to modify the hash map +type HashMapCommand struct { + Type string `json:"type"` + Hash uint64 `json:"hash"` + ProxyID string `json:"proxy_id"` // Using a string identifier for the proxy +} + +// RaftNode represents a node in the Raft cluster +type RaftNode struct { + raft *raft.Raft + config *raft.Config + Fsm *FSM + logStore raft.LogStore + stableStore raft.StableStore + snapshotStore raft.SnapshotStore + transport raft.Transport + bootstrapPeers []raft.Server + Logger zerolog.Logger + Peers []raft.Server // Holds Raft peers (for joining an existing cluster) +} + +// NewRaftNode creates and initializes a new Raft node +func NewRaftNode(logger zerolog.Logger, raftConfig config.Raft) (*RaftNode, error) { + config := raft.DefaultConfig() + + var err error + nodeID := raftConfig.NodeID + if raftConfig.NodeID == "" { + nodeID, err = os.Hostname() + if err != nil { + return nil, fmt.Errorf("error getting hostname: %w", err) + } + } + raftAddr := raftConfig.Address + config.LocalID = raft.ServerID(nodeID) + raftDir := filepath.Join("raft", nodeID) + err = os.MkdirAll(raftDir, os.ModePerm) + if err != nil { + return nil, fmt.Errorf("error creating raft directory: %w", err) + } + + // Create the FSM + fsm := NewFSM() + + // Create the log store and stable store + logStore, err := raftboltdb.NewBoltStore(filepath.Join(raftDir, "raft-log.db")) + if err != nil { + return nil, fmt.Errorf("error creating log store: %w", err) + } + + stableStore, err := raftboltdb.NewBoltStore(filepath.Join(raftDir, "raft-stable.db")) + if err != nil { + return nil, fmt.Errorf("error creating stable store: %w", err) + } + + // Create the snapshot store + snapshotStore, err := raft.NewFileSnapshotStore(raftDir, 3, os.Stderr) + if err != nil { + return nil, fmt.Errorf("error creating snapshot store: %w", err) + } + + // Setup Raft communication + addr, err := net.ResolveTCPAddr("tcp", raftAddr) + if err != nil { + return nil, fmt.Errorf("error resolving TCP address: %w", err) + } + transport, err := raft.NewTCPTransport(raftAddr, addr, 3, 10*time.Second, os.Stderr) + if err != nil { + return nil, fmt.Errorf("error creating TCP transport: %w", err) + } + + // Create the Raft node + r, err := raft.NewRaft(config, fsm, logStore, stableStore, snapshotStore, transport) + if err != nil { + return nil, fmt.Errorf("error creating Raft: %w", err) + } + + node := &RaftNode{ + raft: r, + config: config, + Fsm: fsm, + logStore: logStore, + stableStore: stableStore, + snapshotStore: snapshotStore, + transport: transport, + Logger: logger, + Peers: convertPeers(raftConfig.Peers), + } + + // Handle bootstrapping + isBootstrap := raftConfig.LeaderID == nodeID + if isBootstrap { + configuration := raft.Configuration{ + Servers: make([]raft.Server, len(node.Peers)), + } + for i, peer := range node.Peers { + configuration.Servers[i] = raft.Server{ + ID: raft.ServerID(peer.ID), + Address: raft.ServerAddress(peer.Address), + } + } + configuration.Servers = append(configuration.Servers, raft.Server{ + ID: config.LocalID, + Address: transport.LocalAddr(), + }) + node.raft.BootstrapCluster(configuration) + } + + go node.monitorLeadership() + + return node, nil +} + +func convertPeers(configPeers []config.RaftPeer) []raft.Server { + peers := make([]raft.Server, len(configPeers)) + for i, peer := range configPeers { + peers[i] = raft.Server{ + ID: raft.ServerID(peer.ID), + Address: raft.ServerAddress(peer.Address), + } + } + return peers +} + +// monitorLeadership checks if the node is the Raft leader and logs state changes +func (n *RaftNode) monitorLeadership() { + for { + isLeader := n.raft.State() == raft.Leader + if isLeader { + n.Logger.Info().Msg("This node is the Raft leader") + + for _, peer := range n.Peers { + // Check if peer already exists in cluster configuration + existingConfig := n.raft.GetConfiguration().Configuration() + peerExists := false + for _, server := range existingConfig.Servers { + if server.ID == peer.ID { + peerExists = true + n.Logger.Info().Msgf("Peer %s already exists in Raft cluster, skipping", peer.ID) + break + } + } + if peerExists { + continue + } + err := n.AddPeer(string(peer.ID), string(peer.Address)) + if err != nil { + n.Logger.Error().Err(err).Msgf("Failed to add node %s to Raft cluster", peer.ID) + } + } + } else { + n.Logger.Info().Msg("This node is a Raft follower") + } + + time.Sleep(10 * time.Second) // Poll leadership status periodically + } +} + +// AddPeer adds a new peer to the Raft cluster +func (n *RaftNode) AddPeer(peerID, peerAddr string) error { + return n.raft.AddVoter(raft.ServerID(peerID), raft.ServerAddress(peerAddr), 0, 0).Error() +} + +// RemovePeer removes a peer from the Raft cluster +func (n *RaftNode) RemovePeer(peerID string) error { + return n.raft.RemoveServer(raft.ServerID(peerID), 0, 0).Error() +} + +// Apply applies a new log entry to the Raft log +func (n *RaftNode) Apply(data []byte, timeout time.Duration) error { + future := n.raft.Apply(data, timeout) + return future.Error() +} + +// Shutdown gracefully shuts down the Raft node +func (n *RaftNode) Shutdown() error { + return n.raft.Shutdown().Error() +} + +// FSM represents the Finite State Machine for the Raft cluster +type FSM struct { + hashMap map[uint64]string // private field + mu sync.RWMutex +} + +// GetProxyID safely retrieves a proxy ID for a given hash +func (f *FSM) GetProxyID(hash uint64) (string, bool) { + f.mu.RLock() + defer f.mu.RUnlock() + proxyID, exists := f.hashMap[hash] + return proxyID, exists +} + +// NewFSM creates a new FSM instance +func NewFSM() *FSM { + return &FSM{ + hashMap: make(map[uint64]string), + } +} + +// Apply implements the raft.FSM interface +func (f *FSM) Apply(log *raft.Log) interface{} { + var cmd HashMapCommand + if err := json.Unmarshal(log.Data, &cmd); err != nil { + return fmt.Errorf("failed to unmarshal command: %w", err) + } + + f.mu.Lock() + defer f.mu.Unlock() + + switch cmd.Type { + case CommandAddHashMapping: + f.hashMap[cmd.Hash] = cmd.ProxyID + return nil + default: + return fmt.Errorf("unknown command type: %s", cmd.Type) + } +} + +// Snapshot returns a snapshot of the FSM +func (f *FSM) Snapshot() (raft.FSMSnapshot, error) { + f.mu.RLock() + defer f.mu.RUnlock() + + // Create a copy of the hash map + hashMapCopy := make(map[uint64]string) + for k, v := range f.hashMap { + hashMapCopy[k] = v + } + + return &FSMSnapshot{hashMap: hashMapCopy}, nil +} + +// Restore restores the FSM from a snapshot +func (f *FSM) Restore(rc io.ReadCloser) error { + decoder := json.NewDecoder(rc) + var hashMap map[uint64]string + if err := decoder.Decode(&hashMap); err != nil { + return err + } + + f.mu.Lock() + f.hashMap = hashMap + f.mu.Unlock() + + return nil +} + +// FSMSnapshot represents a snapshot of the FSM +type FSMSnapshot struct { + hashMap map[uint64]string +} + +func (f *FSMSnapshot) Persist(sink raft.SnapshotSink) error { + err := json.NewEncoder(sink).Encode(f.hashMap) + if err != nil { + sink.Cancel() + return err + } + return sink.Close() +} + +func (f *FSMSnapshot) Release() {} From ff2be247793a3bfaebaf54de83667665c4316b77 Mon Sep 17 00:00:00 2001 From: sinadarbouy Date: Mon, 25 Nov 2024 00:26:01 +0100 Subject: [PATCH 02/26] refactor: update consistent hash implementation with block-based proxy mapping - Replace proxy ID with block name for consistent hash mapping - Remove direct raft node dependency from ConsistentHash struct - Add ProxyByBlock map to Server for block-based proxy lookups - Include group name in hash key generation for better distribution - Add proxy initialization during server startup - Update FSM to use consistent naming for hash map storage This change improves the consistent hashing mechanism by using block names instead of proxy IDs, making it more aligned with the block-based architecture while maintaining backwards compatibility with the original load balancing strategy. --- network/consistenthash.go | 21 ++++++++++----------- network/loadbalancer.go | 2 +- network/server.go | 12 ++++++++++++ raft/raft.go | 28 +++++++++++++++------------- 4 files changed, 38 insertions(+), 25 deletions(-) diff --git a/network/consistenthash.go b/network/consistenthash.go index 790987bf..97b01907 100644 --- a/network/consistenthash.go +++ b/network/consistenthash.go @@ -19,18 +19,16 @@ type ConsistentHash struct { originalStrategy LoadBalancerStrategy useSourceIP bool mu sync.Mutex - raftNode *raft.RaftNode server *Server } // NewConsistentHash creates a new ConsistentHash instance. It requires a server configuration and an original // load balancing strategy. The consistent hash can use either the source IP or the full connection address // as the key for hashing. -func NewConsistentHash(server *Server, originalStrategy LoadBalancerStrategy, raftNode *raft.RaftNode) *ConsistentHash { +func NewConsistentHash(server *Server, originalStrategy LoadBalancerStrategy) *ConsistentHash { return &ConsistentHash{ originalStrategy: originalStrategy, useSourceIP: server.LoadbalancerConsistentHash.UseSourceIP, - raftNode: raftNode, server: server, } } @@ -56,16 +54,17 @@ func (ch *ConsistentHash) NextProxy(conn IConnWrapper) (IProxy, *gerr.GatewayDEr key = conn.RemoteAddr().String() } - hash := hashKey(key) + hash := hashKey(key + ch.server.GroupName) - proxyID, exists := ch.raftNode.Fsm.GetProxyID(hash) + // Get block name for this hash + blockName, exists := ch.server.RaftNode.Fsm.GetProxyBlock(hash) if exists { - if proxy, ok := ch.server.GetProxyByID(proxyID); ok { + if proxy, ok := ch.server.ProxyByBlock[blockName]; ok { return proxy, nil } } - // If no hash exists, fallback to the original strategy + // If no hash exists or no matching proxy found, fallback to the original strategy proxy, err := ch.originalStrategy.NextProxy(conn) if err != nil { return nil, gerr.ErrNoProxiesAvailable.Wrap(err) @@ -73,9 +72,9 @@ func (ch *ConsistentHash) NextProxy(conn IConnWrapper) (IProxy, *gerr.GatewayDEr // Create and apply the command through Raft cmd := raft.HashMapCommand{ - Type: raft.CommandAddHashMapping, - Hash: hash, - ProxyID: proxy.GetID(), + Type: raft.CommandAddHashMapping, + Hash: hash, + BlockName: proxy.GetBlockName(), } cmdBytes, marshalErr := json.Marshal(cmd) @@ -84,7 +83,7 @@ func (ch *ConsistentHash) NextProxy(conn IConnWrapper) (IProxy, *gerr.GatewayDEr } // Apply the command through Raft - if err := ch.raftNode.Apply(cmdBytes, 10*time.Second); err != nil { + if err := ch.server.RaftNode.Apply(cmdBytes, 10*time.Second); err != nil { return nil, gerr.ErrNoProxiesAvailable.Wrap(err) } diff --git a/network/loadbalancer.go b/network/loadbalancer.go index 548290b1..2ce6c5cf 100644 --- a/network/loadbalancer.go +++ b/network/loadbalancer.go @@ -32,7 +32,7 @@ func NewLoadBalancerStrategy(server *Server) (LoadBalancerStrategy, *gerr.Gatewa // If consistent hashing is enabled, wrap the strategy if server.LoadbalancerConsistentHash != nil { - strategy = NewConsistentHash(server, strategy, server.RaftNode) + strategy = NewConsistentHash(server, strategy) } return strategy, nil diff --git a/network/server.go b/network/server.go index 6d4557aa..19815a58 100644 --- a/network/server.go +++ b/network/server.go @@ -90,6 +90,8 @@ type Server struct { connectionToProxyMap *sync.Map RaftNode *raft.RaftNode + + ProxyByBlock map[string]IProxy } var _ IServer = (*Server)(nil) @@ -770,6 +772,8 @@ func NewServer( } server.loadbalancerStrategy = st + server.initializeProxies() + return &server } @@ -816,3 +820,11 @@ func (s *Server) GetProxyByID(id string) (IProxy, bool) { } return nil, false } + +// Initialize the map when creating proxies +func (s *Server) initializeProxies() { + s.ProxyByBlock = make(map[string]IProxy) + for _, proxy := range s.Proxies { + s.ProxyByBlock[proxy.GetBlockName()] = proxy + } +} diff --git a/raft/raft.go b/raft/raft.go index 02b89504..c6d1f2b1 100644 --- a/raft/raft.go +++ b/raft/raft.go @@ -23,9 +23,9 @@ const ( // HashMapCommand represents a command to modify the hash map type HashMapCommand struct { - Type string `json:"type"` - Hash uint64 `json:"hash"` - ProxyID string `json:"proxy_id"` // Using a string identifier for the proxy + Type string `json:"type"` + Hash uint64 `json:"hash"` + BlockName string `json:"block_name"` } // RaftNode represents a node in the Raft cluster @@ -202,22 +202,24 @@ func (n *RaftNode) Shutdown() error { // FSM represents the Finite State Machine for the Raft cluster type FSM struct { - hashMap map[uint64]string // private field - mu sync.RWMutex + consistentHashMap map[uint64]string + mu sync.RWMutex } -// GetProxyID safely retrieves a proxy ID for a given hash -func (f *FSM) GetProxyID(hash uint64) (string, bool) { +// GetProxyBlock safely retrieves the block name for a given hash +func (f *FSM) GetProxyBlock(hash uint64) (string, bool) { f.mu.RLock() defer f.mu.RUnlock() - proxyID, exists := f.hashMap[hash] - return proxyID, exists + if blockName, exists := f.consistentHashMap[hash]; exists { + return blockName, true + } + return "", false } // NewFSM creates a new FSM instance func NewFSM() *FSM { return &FSM{ - hashMap: make(map[uint64]string), + consistentHashMap: make(map[uint64]string), } } @@ -233,7 +235,7 @@ func (f *FSM) Apply(log *raft.Log) interface{} { switch cmd.Type { case CommandAddHashMapping: - f.hashMap[cmd.Hash] = cmd.ProxyID + f.consistentHashMap[cmd.Hash] = cmd.BlockName return nil default: return fmt.Errorf("unknown command type: %s", cmd.Type) @@ -247,7 +249,7 @@ func (f *FSM) Snapshot() (raft.FSMSnapshot, error) { // Create a copy of the hash map hashMapCopy := make(map[uint64]string) - for k, v := range f.hashMap { + for k, v := range f.consistentHashMap { hashMapCopy[k] = v } @@ -263,7 +265,7 @@ func (f *FSM) Restore(rc io.ReadCloser) error { } f.mu.Lock() - f.hashMap = hashMap + f.consistentHashMap = hashMap f.mu.Unlock() return nil From e35be54a9075acf57f097dddd48c0848a1322051 Mon Sep 17 00:00:00 2001 From: Sina Darbouy Date: Mon, 25 Nov 2024 18:19:24 +0100 Subject: [PATCH 03/26] refactor: remove proxy ID and related functionality - Remove unused UUID-based ID field from Proxy struct - Remove GetID() method from IProxy interface and Proxy implementation - Remove GetProxyByID() method from Server struct - Remove uuid package dependency The proxy ID was not being used meaningfully in the codebase, so removing it simplifies the proxy implementation. --- network/proxy.go | 8 -------- network/server.go | 10 ---------- 2 files changed, 18 deletions(-) diff --git a/network/proxy.go b/network/proxy.go index 17afffcf..f67d3b45 100644 --- a/network/proxy.go +++ b/network/proxy.go @@ -17,7 +17,6 @@ import ( "github.com/gatewayd-io/gatewayd/pool" "github.com/getsentry/sentry-go" "github.com/go-co-op/gocron" - "github.com/google/uuid" "github.com/rs/zerolog" "go.opentelemetry.io/otel" ) @@ -35,11 +34,9 @@ type IProxy interface { BusyConnectionsString() []string GetGroupName() string GetBlockName() string - GetID() string } type Proxy struct { - ID string GroupName string BlockName string AvailableConnections pool.IPool @@ -66,7 +63,6 @@ func NewProxy( defer span.End() proxy := Proxy{ - ID: uuid.New().String(), GroupName: pxy.GroupName, BlockName: pxy.BlockName, AvailableConnections: pxy.AvailableConnections, @@ -143,10 +139,6 @@ func NewProxy( return &proxy } -func (pr *Proxy) GetID() string { - return pr.ID -} - func (pr *Proxy) GetBlockName() string { return pr.BlockName } diff --git a/network/server.go b/network/server.go index 19815a58..b8f16775 100644 --- a/network/server.go +++ b/network/server.go @@ -811,16 +811,6 @@ func (s *Server) RemoveConnectionFromMap(conn *ConnWrapper) { s.connectionToProxyMap.Delete(conn) } -// Add this method to Server struct -func (s *Server) GetProxyByID(id string) (IProxy, bool) { - for _, proxy := range s.Proxies { - if proxy.GetID() == id { - return proxy, true - } - } - return nil, false -} - // Initialize the map when creating proxies func (s *Server) initializeProxies() { s.ProxyByBlock = make(map[string]IProxy) From 61c2ac87d68a519994c2380cb4c49b8397c33fe1 Mon Sep 17 00:00:00 2001 From: Sina Darbouy Date: Mon, 25 Nov 2024 18:20:34 +0100 Subject: [PATCH 04/26] feat(raft): Add Raft integration tests and consistent hash improvements This commit introduces comprehensive Raft testing infrastructure and enhances the consistent hash implementation with distributed state management. Key changes: - Add new test cases for Raft leadership, follower behavior, and FSM operations - Integrate Raft with consistent hash load balancer for distributed state - Add TestRaftHelper utility for simplified Raft testing setup - Update consistent hash tests to use Raft for state persistence - Add GetState method to RaftNode for state inspection - Improve test coverage for concurrent operations The changes ensure that proxy mappings are consistently maintained across the cluster using Raft consensus, making the load balancer more reliable in distributed environments. --- network/consistenthash_test.go | 114 ++++++++++---- network/network_helpers_test.go | 5 +- raft/raft.go | 6 + raft/raft_test.go | 255 ++++++++++++++++++++++++++++++++ testhelpers/raft_helpers.go | 95 ++++++++++++ 5 files changed, 447 insertions(+), 28 deletions(-) create mode 100644 raft/raft_test.go create mode 100644 testhelpers/raft_helpers.go diff --git a/network/consistenthash_test.go b/network/consistenthash_test.go index e928ee57..df72fc1a 100644 --- a/network/consistenthash_test.go +++ b/network/consistenthash_test.go @@ -1,12 +1,17 @@ package network import ( + "encoding/json" "net" "sync" "testing" + "time" "github.com/gatewayd-io/gatewayd/config" + "github.com/gatewayd-io/gatewayd/raft" + "github.com/gatewayd-io/gatewayd/testhelpers" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) // TestNewConsistentHash verifies that a new ConsistentHash instance is properly created. @@ -22,22 +27,38 @@ func TestNewConsistentHash(t *testing.T) { assert.NotNil(t, consistentHash) assert.Equal(t, originalStrategy, consistentHash.originalStrategy) assert.True(t, consistentHash.useSourceIP) - assert.NotNil(t, consistentHash.hashMap) } -// TestConsistentHashNextProxyUseSourceIpExists ensures that when useSourceIp is enabled, -// and the hashed IP exists in the hashMap, the correct proxy is returned. -// It mocks a connection with a specific IP and verifies the proxy retrieval from the hashMap. +// TestConsistentHashNextProxyUseSourceIpExists tests the consistent hash load balancer +// when useSourceIP is enabled. It verifies that: +// 1. A connection from a specific IP is correctly hashed +// 2. The hash mapping is properly stored in the Raft FSM +// 3. The correct proxy is returned based on the stored mapping +// The test uses a mock connection and Raft node to simulate a distributed environment. func TestConsistentHashNextProxyUseSourceIpExists(t *testing.T) { proxies := []IProxy{ - MockProxy{name: "proxy1"}, - MockProxy{name: "proxy2"}, - MockProxy{name: "proxy3"}, + MockProxy{name: "proxy1", groupName: "test-group"}, + MockProxy{name: "proxy2", groupName: "test-group"}, + MockProxy{name: "proxy3", groupName: "test-group"}, } + + raftHelper, err := testhelpers.NewTestRaftNode(t) + if err != nil { + t.Fatalf("Failed to create test raft node: %v", err) + } + defer func() { + if err := raftHelper.Cleanup(); err != nil { + t.Errorf("Failed to cleanup raft: %v", err) + } + }() + server := &Server{ Proxies: proxies, LoadbalancerConsistentHash: &config.ConsistentHash{UseSourceIP: true}, + RaftNode: raftHelper.Node, + GroupName: "test-group", } + server.initializeProxies() originalStrategy := NewRandom(server) consistentHash := NewConsistentHash(server, originalStrategy) mockConn := new(MockConnWrapper) @@ -46,10 +67,22 @@ func TestConsistentHashNextProxyUseSourceIpExists(t *testing.T) { mockAddr := &net.TCPAddr{IP: net.ParseIP("192.168.1.1"), Port: 1234} mockConn.On("RemoteAddr").Return(mockAddr) - key := "192.168.1.1" - hash := hashKey(key) + // Instead of setting hashMap directly, setup the FSM + hash := hashKey("192.168.1.1" + server.GroupName) + // Create and apply the command through Raft + cmd := raft.HashMapCommand{ + Type: raft.CommandAddHashMapping, + Hash: hash, + BlockName: proxies[2].GetBlockName(), + } + + cmdBytes, marshalErr := json.Marshal(cmd) + require.NoError(t, marshalErr) - consistentHash.hashMap[hash] = proxies[2] + // Apply the command through Raft + if err := server.RaftNode.Apply(cmdBytes, 10*time.Second); err != nil { + require.NoError(t, err) + } proxy, err := consistentHash.NextProxy(mockConn) assert.Nil(t, err) @@ -60,20 +93,32 @@ func TestConsistentHashNextProxyUseSourceIpExists(t *testing.T) { } // TestConsistentHashNextProxyUseFullAddress verifies the behavior when useSourceIp is disabled. -// It ensures that the full connection address is used for hashing, and the correct proxy is returned -// and cached in the hashMap. The test also checks that the hash value is computed based on the full address. +// It ensures that the full connection address (IP:port) plus group name is used for hashing, +// and the correct proxy is returned. The test also verifies that the proxy mapping is properly +// stored in the Raft FSM for persistence and cluster-wide consistency. func TestConsistentHashNextProxyUseFullAddress(t *testing.T) { mockConn := new(MockConnWrapper) proxies := []IProxy{ - MockProxy{name: "proxy1"}, - MockProxy{name: "proxy2"}, - MockProxy{name: "proxy3"}, + MockProxy{name: "proxy1", groupName: "test-group"}, + MockProxy{name: "proxy2", groupName: "test-group"}, + MockProxy{name: "proxy3", groupName: "test-group"}, } + raftHelper, err := testhelpers.NewTestRaftNode(t) + if err != nil { + t.Fatalf("Failed to create test raft node: %v", err) + } + defer func() { + if err := raftHelper.Cleanup(); err != nil { + t.Errorf("Failed to cleanup raft: %v", err) + } + }() server := &Server{ Proxies: proxies, LoadbalancerConsistentHash: &config.ConsistentHash{ UseSourceIP: false, }, + RaftNode: raftHelper.Node, + GroupName: "test-group", } mockStrategy := NewRoundRobin(server) @@ -81,6 +126,8 @@ func TestConsistentHashNextProxyUseFullAddress(t *testing.T) { mockAddr := &net.TCPAddr{IP: net.ParseIP("192.168.1.1"), Port: 1234} mockConn.On("RemoteAddr").Return(mockAddr) + server.initializeProxies() + consistentHash := NewConsistentHash(server, mockStrategy) proxy, err := consistentHash.NextProxy(mockConn) @@ -88,32 +135,45 @@ func TestConsistentHashNextProxyUseFullAddress(t *testing.T) { assert.NotNil(t, proxy) assert.Equal(t, proxies[1], proxy) - // Hash should be calculated using the full address and cached in hashMap - hash := hashKey("192.168.1.1:1234") - cachedProxy, exists := consistentHash.hashMap[hash] - + // Verify the hash was stored in Raft FSM + hash := hashKey("192.168.1.1:1234" + server.GroupName) + blockName, exists := server.RaftNode.Fsm.GetProxyBlock(hash) assert.True(t, exists) - assert.Equal(t, proxies[1], cachedProxy) + assert.Equal(t, proxies[1].GetBlockName(), blockName) // Clean up mockConn.AssertExpectations(t) } -// TestConsistentHashNextProxyConcurrency tests the concurrency safety of the NextProxy method -// in the ConsistentHash struct. It ensures that multiple goroutines can concurrently call -// NextProxy without causing race conditions or inconsistent behavior. +// TestConsistentHashNextProxyConcurrency tests the thread safety and consistency of the NextProxy method +// in a distributed environment. It verifies that: +// 1. Multiple concurrent requests from the same IP address consistently map to the same proxy +// 2. The mapping is stable across sequential calls +// 3. Different IP addresses map to different proxies +// 4. The Raft-based consistent hash remains thread-safe under high concurrency func TestConsistentHashNextProxyConcurrency(t *testing.T) { // Setup mocks conn1 := new(MockConnWrapper) conn2 := new(MockConnWrapper) proxies := []IProxy{ - MockProxy{name: "proxy1"}, - MockProxy{name: "proxy2"}, - MockProxy{name: "proxy3"}, + MockProxy{name: "proxy1", groupName: "test-group"}, + MockProxy{name: "proxy2", groupName: "test-group"}, + MockProxy{name: "proxy3", groupName: "test-group"}, } + raftHelper, err := testhelpers.NewTestRaftNode(t) + if err != nil { + t.Fatalf("Failed to create test raft node: %v", err) + } + defer func() { + if err := raftHelper.Cleanup(); err != nil { + t.Errorf("Failed to cleanup raft: %v", err) + } + }() server := &Server{ Proxies: proxies, LoadbalancerConsistentHash: &config.ConsistentHash{UseSourceIP: true}, + RaftNode: raftHelper.Node, + GroupName: "test-group", } originalStrategy := NewRoundRobin(server) @@ -123,6 +183,8 @@ func TestConsistentHashNextProxyConcurrency(t *testing.T) { conn1.On("RemoteAddr").Return(mockAddr1) conn2.On("RemoteAddr").Return(mockAddr2) + server.initializeProxies() + // Initialize the ConsistentHash consistentHash := NewConsistentHash(server, originalStrategy) diff --git a/network/network_helpers_test.go b/network/network_helpers_test.go index 2ab4d250..cbbba011 100644 --- a/network/network_helpers_test.go +++ b/network/network_helpers_test.go @@ -30,7 +30,8 @@ type WriteBuffer struct { // MockProxy implements the IProxy interface for testing purposes. type MockProxy struct { - name string + name string + groupName string } // writeStartupMsg writes a PostgreSQL startup message to the buffer. @@ -296,7 +297,7 @@ func (m MockProxy) GetBlockName() string { } func (m MockProxy) GetGroupName() string { - return "default" + return m.groupName } // Mock implementation of IConnWrapper. diff --git a/raft/raft.go b/raft/raft.go index c6d1f2b1..340b57ad 100644 --- a/raft/raft.go +++ b/raft/raft.go @@ -19,6 +19,7 @@ import ( // Command types for Raft operations const ( CommandAddHashMapping = "ADD_HASH_MAPPING" + RaftLeaderState = raft.Leader ) // HashMapCommand represents a command to modify the hash map @@ -286,3 +287,8 @@ func (f *FSMSnapshot) Persist(sink raft.SnapshotSink) error { } func (f *FSMSnapshot) Release() {} + +// GetState returns the current Raft state +func (n *RaftNode) GetState() raft.RaftState { + return n.raft.State() +} diff --git a/raft/raft_test.go b/raft/raft_test.go new file mode 100644 index 00000000..725b885a --- /dev/null +++ b/raft/raft_test.go @@ -0,0 +1,255 @@ +package raft + +import ( + "encoding/json" + "os" + "testing" + "time" + + "github.com/gatewayd-io/gatewayd/config" + "github.com/hashicorp/raft" + "github.com/rs/zerolog" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func setupTestLogger() zerolog.Logger { + return zerolog.New(os.Stdout).With().Timestamp().Logger() +} + +func TestNewRaftNode(t *testing.T) { + logger := setupTestLogger() + + tests := []struct { + name string + raftConfig config.Raft + wantErr bool + }{ + { + name: "valid configuration", + raftConfig: config.Raft{ + NodeID: "node1", + Address: "127.0.0.1:1234", + LeaderID: "node1", + Peers: []config.RaftPeer{ + {ID: "node2", Address: "127.0.0.1:1235"}, + }, + }, + wantErr: false, + }, + { + name: "invalid address", + raftConfig: config.Raft{ + NodeID: "node1", + Address: "invalid:address:", + LeaderID: "node1", + }, + wantErr: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + node, err := NewRaftNode(logger, tt.raftConfig) + if tt.wantErr { + assert.Error(t, err) + assert.Nil(t, node) + } else { + assert.NoError(t, err) + assert.NotNil(t, node) + // Cleanup + _ = node.Shutdown() + _ = os.RemoveAll("raft") + } + }) + } +} + +func TestFSMOperations(t *testing.T) { + fsm := NewFSM() + + // Test adding a hash mapping + cmd := HashMapCommand{ + Type: CommandAddHashMapping, + Hash: 12345, + BlockName: "test-block", + } + + data, err := json.Marshal(cmd) + require.NoError(t, err) + + // Apply the command + result := fsm.Apply(&raft.Log{Data: data}) + assert.Nil(t, result) + + // Test retrieving the mapping + blockName, exists := fsm.GetProxyBlock(12345) + assert.True(t, exists) + assert.Equal(t, "test-block", blockName) + + // Test non-existent mapping + blockName, exists = fsm.GetProxyBlock(99999) + assert.False(t, exists) + assert.Empty(t, blockName) +} + +func TestFSMSnapshot(t *testing.T) { + fsm := NewFSM() + + // Add some data + cmd := HashMapCommand{ + Type: CommandAddHashMapping, + Hash: 12345, + BlockName: "test-block", + } + data, err := json.Marshal(cmd) + require.NoError(t, err) + fsm.Apply(&raft.Log{Data: data}) + + // Create snapshot + snapshot, err := fsm.Snapshot() + require.NoError(t, err) + assert.NotNil(t, snapshot) + + // Verify snapshot data + fsmSnapshot, ok := snapshot.(*FSMSnapshot) + assert.True(t, ok) + assert.Equal(t, "test-block", fsmSnapshot.hashMap[12345]) +} + +func TestRaftNodeApply(t *testing.T) { + logger := setupTestLogger() + config := config.Raft{ + NodeID: "node1", + Address: "127.0.0.1:1234", + LeaderID: "node1", + } + + node, err := NewRaftNode(logger, config) + require.NoError(t, err) + defer func() { + _ = node.Shutdown() + _ = os.RemoveAll("raft") + }() + + // Test applying data + cmd := HashMapCommand{ + Type: CommandAddHashMapping, + Hash: 12345, + BlockName: "test-block", + } + data, err := json.Marshal(cmd) + require.NoError(t, err) + + err = node.Apply(data, time.Second) + // Note: This will likely fail as the node isn't a leader + assert.Error(t, err) +} + +func TestRaftLeadershipAndFollowers(t *testing.T) { + logger := setupTestLogger() + + // Create temporary directories for each node + defer os.RemoveAll("raft") + + // Configure three nodes + nodes := make([]*RaftNode, 3) + nodeConfigs := []config.Raft{ + { + NodeID: "node1", + Address: "127.0.0.1:1234", + LeaderID: "node1", // First node is the bootstrap node + Peers: []config.RaftPeer{ + {ID: "node2", Address: "127.0.0.1:1235"}, + {ID: "node3", Address: "127.0.0.1:1236"}, + }, + }, + { + NodeID: "node2", + Address: "127.0.0.1:1235", + LeaderID: "node1", + Peers: []config.RaftPeer{ + {ID: "node1", Address: "127.0.0.1:1234"}, + {ID: "node3", Address: "127.0.0.1:1236"}, + }, + }, + { + NodeID: "node3", + Address: "127.0.0.1:1236", + LeaderID: "node1", + Peers: []config.RaftPeer{ + {ID: "node1", Address: "127.0.0.1:1234"}, + {ID: "node2", Address: "127.0.0.1:1235"}, + }, + }, + } + + // Start all nodes + for i, cfg := range nodeConfigs { + node, err := NewRaftNode(logger, cfg) + require.NoError(t, err) + nodes[i] = node + defer node.Shutdown() + } + + // Wait for leader election + time.Sleep(3 * time.Second) + + // Test 1: Verify that exactly one leader is elected + leaderCount := 0 + var leaderNode *RaftNode + for _, node := range nodes { + if node.GetState() == raft.Leader { + leaderCount++ + leaderNode = node + } + } + assert.Equal(t, 1, leaderCount, "Expected exactly one leader") + require.NotNil(t, leaderNode, "Expected to find a leader") + + // Test 2: Verify that other nodes are followers + for _, node := range nodes { + if node != leaderNode { + assert.Equal(t, raft.Follower, node.GetState(), "Expected non-leader nodes to be followers") + } + } + + // Test 3: Test cluster functionality by applying a command + cmd := HashMapCommand{ + Type: CommandAddHashMapping, + Hash: 12345, + BlockName: "test-block", + } + data, err := json.Marshal(cmd) + require.NoError(t, err) + + // Apply command through leader + err = leaderNode.Apply(data, 5*time.Second) + require.NoError(t, err, "Failed to apply command through leader") + + // Wait for replication + time.Sleep(1 * time.Second) + + // Verify that all nodes have the update + for i, node := range nodes { + blockName, exists := node.Fsm.GetProxyBlock(12345) + assert.True(t, exists, "Node %d should have the replicated data", i+1) + assert.Equal(t, "test-block", blockName, "Node %d has incorrect data", i+1) + } + + // Test 4: Simulate leader shutdown and new leader election + err = leaderNode.Shutdown() + require.NoError(t, err) + + // Wait for new leader election + time.Sleep(3 * time.Second) + + // Verify new leader is elected among remaining nodes + newLeaderCount := 0 + for _, node := range nodes { + if node != leaderNode && node.GetState() == raft.Leader { + newLeaderCount++ + } + } + assert.Equal(t, 1, newLeaderCount, "Expected exactly one new leader after original leader shutdown") +} diff --git a/testhelpers/raft_helpers.go b/testhelpers/raft_helpers.go new file mode 100644 index 00000000..f7212035 --- /dev/null +++ b/testhelpers/raft_helpers.go @@ -0,0 +1,95 @@ +package testhelpers + +import ( + "fmt" + "os" + "path/filepath" + "testing" + "time" + + "github.com/gatewayd-io/gatewayd/config" + "github.com/gatewayd-io/gatewayd/raft" + "github.com/rs/zerolog" +) + +// TestRaftHelper contains utilities for testing Raft functionality +type TestRaftHelper struct { + Node *raft.RaftNode + TempDir string + NodeID string + RaftAddr string +} + +// NewTestRaftNode creates a Raft node for testing purposes +func NewTestRaftNode(t *testing.T) (*TestRaftHelper, error) { + // Create a temporary directory for Raft data + tempDir, err := os.MkdirTemp("", "raft-test-*") + if err != nil { + return nil, fmt.Errorf("failed to create temp dir: %w", err) + } + + // Setup test configuration + nodeID := "test-node-1" + raftAddr := "127.0.0.1:0" // Using port 0 lets the system assign a random available port + + // Create test logger + logger := zerolog.New(zerolog.ConsoleWriter{Out: os.Stderr}). + Level(zerolog.DebugLevel). + With().Timestamp().Logger() + + // Create Raft configuration + raftConfig := config.Raft{ + NodeID: nodeID, + Address: raftAddr, + LeaderID: nodeID, // Make this node the leader for testing + Peers: []config.RaftPeer{}, // Empty peers for single-node testing + } + + // Create new Raft node + node, err := raft.NewRaftNode(logger, raftConfig) + if err != nil { + os.RemoveAll(tempDir) + return nil, fmt.Errorf("failed to create raft node: %w", err) + } + + // Wait for the node to become leader + timeout := time.Now().Add(3 * time.Second) + for time.Now().Before(timeout) { + if node.GetState() == raft.RaftLeaderState { + break + } + time.Sleep(100 * time.Millisecond) + } + + if node.GetState() != raft.RaftLeaderState { + os.RemoveAll(tempDir) + return nil, fmt.Errorf("timeout waiting for node to become leader") + } + + return &TestRaftHelper{ + Node: node, + TempDir: tempDir, + NodeID: nodeID, + RaftAddr: raftAddr, + }, nil +} + +// Cleanup removes temporary files and shuts down the Raft node +func (h *TestRaftHelper) Cleanup() error { + if h.Node != nil { + if err := h.Node.Shutdown(); err != nil { + return fmt.Errorf("failed to shutdown raft node: %w", err) + } + } + + if err := os.RemoveAll(h.TempDir); err != nil { + return fmt.Errorf("failed to remove temp dir: %w", err) + } + + // Also clean up the default raft directory + if err := os.RemoveAll(filepath.Join("raft", h.NodeID)); err != nil { + return fmt.Errorf("failed to remove raft directory: %w", err) + } + + return nil +} From 3ed59e508d0ba8e4f157c10dbd5e53342e657c28 Mon Sep 17 00:00:00 2001 From: Sina Darbouy Date: Tue, 26 Nov 2024 15:33:28 +0100 Subject: [PATCH 05/26] feat(raft): add configurable directory and improve test stability - Add Directory field to Raft config to make raft storage location configurable - Use t.TempDir() in tests to ensure proper cleanup of test directories - Rename HashMapCommand to ConsistentHashCommand for better clarity - Update command type constants and map names to be more descriptive - Fix test flakiness by using unique node IDs and random available ports - Remove manual directory cleanup in favor of t.TempDir() cleanup - Update configuration files with raft directory settings This change improves test stability and makes the raft storage location configurable while cleaning up naming conventions throughout the raft package. --- cmd/run_test.go | 12 +++++ cmd/testdata/gatewayd.yaml | 6 +++ config/types.go | 9 ++-- gatewayd.yaml | 8 ++-- go.mod | 5 +- go.sum | 2 - network/consistenthash.go | 4 +- network/consistenthash_test.go | 4 +- raft/raft.go | 36 +++++++------- raft/raft_test.go | 86 ++++++++++++++++++---------------- testhelpers/raft_helpers.go | 38 +++++++-------- 11 files changed, 111 insertions(+), 99 deletions(-) diff --git a/cmd/run_test.go b/cmd/run_test.go index 534d203a..7881109d 100644 --- a/cmd/run_test.go +++ b/cmd/run_test.go @@ -23,6 +23,9 @@ func Test_runCmd(t *testing.T) { postgresAddress := postgresHostIP + ":" + postgresMappedPort.Port() t.Setenv("GATEWAYD_CLIENTS_DEFAULT_WRITES_ADDRESS", postgresAddress) + tempDir := t.TempDir() + t.Setenv("GATEWAYD_RAFT_DIRECTORY", tempDir) + globalTestConfigFile := "./test_global_runCmd.yaml" pluginTestConfigFile := "./test_plugins_runCmd.yaml" // Create a test plugins config file. @@ -87,6 +90,9 @@ func Test_runCmdWithTLS(t *testing.T) { postgresAddress := postgresHostIP + ":" + postgresMappedPort.Port() t.Setenv("GATEWAYD_CLIENTS_DEFAULT_WRITES_ADDRESS", postgresAddress) + tempDir := t.TempDir() + t.Setenv("GATEWAYD_RAFT_DIRECTORY", tempDir) + globalTLSTestConfigFile := "./testdata/gatewayd_tls.yaml" pluginTestConfigFile := "./test_plugins_runCmdWithTLS.yaml" // Create a test plugins config file. @@ -150,6 +156,9 @@ func Test_runCmdWithMultiTenancy(t *testing.T) { postgresAddress2 := postgresHostIP2 + ":" + postgresMappedPort2.Port() t.Setenv("GATEWAYD_CLIENTS_TEST_WRITE_ADDRESS", postgresAddress2) + tempDir := t.TempDir() + t.Setenv("GATEWAYD_RAFT_DIRECTORY", tempDir) + globalTestConfigFile := "./testdata/gatewayd.yaml" pluginTestConfigFile := "./test_plugins_runCmdWithMultiTenancy.yaml" // Create a test plugins config file. @@ -211,6 +220,9 @@ func Test_runCmdWithCachePlugin(t *testing.T) { postgresAddress := postgresHostIP + ":" + postgresMappedPort.Port() t.Setenv("GATEWAYD_CLIENTS_DEFAULT_WRITES_ADDRESS", postgresAddress) + tempDir := t.TempDir() + t.Setenv("GATEWAYD_RAFT_DIRECTORY", tempDir) + globalTestConfigFile := "./test_global_runCmdWithCachePlugin.yaml" pluginTestConfigFile := "./test_plugins_runCmdWithCachePlugin.yaml" // TODO: Remove this once these global variables are removed from cmd/run.go. diff --git a/cmd/testdata/gatewayd.yaml b/cmd/testdata/gatewayd.yaml index 52b71a6c..96ee280b 100644 --- a/cmd/testdata/gatewayd.yaml +++ b/cmd/testdata/gatewayd.yaml @@ -73,3 +73,9 @@ servers: api: enabled: True + +raft: + address: 127.0.0.1:2223 + nodeID: node2 + leaderID: node2 + peers: {} diff --git a/config/types.go b/config/types.go index 76613674..2cd54749 100644 --- a/config/types.go +++ b/config/types.go @@ -139,10 +139,11 @@ type API struct { } type Raft struct { - Address string `json:"address"` - NodeID string `json:"nodeID"` - LeaderID string `json:"leaderID"` - Peers []RaftPeer `json:"peers"` + Address string `json:"address"` + NodeID string `json:"nodeID"` + LeaderID string `json:"leaderID"` + Peers []RaftPeer `json:"peers"` + Directory string `json:"directory" jsonschema:"default=raft"` } type RaftPeer struct { diff --git a/gatewayd.yaml b/gatewayd.yaml index 92432677..2d365175 100644 --- a/gatewayd.yaml +++ b/gatewayd.yaml @@ -110,7 +110,7 @@ api: raft: address: 127.0.0.1:2223 nodeID: node2 - leaderID: node1 - peers: - - id: node1 - address: 127.0.0.1:2222 + leaderID: node2 + peers: {} + # - id: node1 + # address: 127.0.0.1:2222 diff --git a/go.mod b/go.mod index 9ff6a9db..3c6d5a08 100644 --- a/go.mod +++ b/go.mod @@ -16,10 +16,6 @@ require ( github.com/grpc-ecosystem/grpc-gateway/v2 v2.23.0 github.com/hashicorp/go-hclog v1.6.3 github.com/hashicorp/go-plugin v1.6.2 - github.com/google/uuid v1.6.0 - github.com/grpc-ecosystem/grpc-gateway/v2 v2.22.0 - github.com/hashicorp/go-hclog v1.6.3 - github.com/hashicorp/go-plugin v1.6.1 github.com/hashicorp/raft v1.7.1 github.com/hashicorp/raft-boltdb v0.0.0-20231211162105-6c830fa4535e github.com/invopop/jsonschema v0.12.0 @@ -96,6 +92,7 @@ require ( github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect github.com/golang/protobuf v1.5.4 // indirect github.com/google/go-querystring v1.1.0 // indirect + github.com/google/uuid v1.6.0 // indirect github.com/hashicorp/go-immutable-radix v1.0.0 // indirect github.com/hashicorp/go-msgpack v0.5.5 // indirect github.com/hashicorp/go-msgpack/v2 v2.1.2 // indirect diff --git a/go.sum b/go.sum index 14385f0a..8bc5d0ad 100644 --- a/go.sum +++ b/go.sum @@ -257,8 +257,6 @@ github.com/hashicorp/go-multierror v1.1.0/go.mod h1:spPvp8C1qA32ftKqdAHm4hHTbPw+ github.com/hashicorp/go-plugin v1.0.1/go.mod h1:++UyYGoz3o5w9ZzAdZxtQKrWWP+iqPBn3cQptSMzBuY= github.com/hashicorp/go-plugin v1.6.2 h1:zdGAEd0V1lCaU0u+MxWQhtSDQmahpkwOun8U8EiRVog= github.com/hashicorp/go-plugin v1.6.2/go.mod h1:CkgLQ5CZqNmdL9U9JzM532t8ZiYQ35+pj3b1FD37R0Q= -github.com/hashicorp/go-plugin v1.6.1 h1:P7MR2UP6gNKGPp+y7EZw2kOiq4IR9WiqLvp0XOsVdwI= -github.com/hashicorp/go-plugin v1.6.1/go.mod h1:XPHFku2tFo3o3QKFgSYo+cghcUhw1NA1hZyMK0PWAw0= github.com/hashicorp/go-retryablehttp v0.5.3/go.mod h1:9B5zBasrRhHXnJnui7y6sL7es7NDiJgTc6Er0maI1Xs= github.com/hashicorp/go-retryablehttp v0.5.4/go.mod h1:9B5zBasrRhHXnJnui7y6sL7es7NDiJgTc6Er0maI1Xs= github.com/hashicorp/go-rootcerts v1.0.1/go.mod h1:pqUvnprVnM5bf7AOirdbb01K4ccR319Vf4pU3K5EGc8= diff --git a/network/consistenthash.go b/network/consistenthash.go index 97b01907..b4f7e1d9 100644 --- a/network/consistenthash.go +++ b/network/consistenthash.go @@ -71,8 +71,8 @@ func (ch *ConsistentHash) NextProxy(conn IConnWrapper) (IProxy, *gerr.GatewayDEr } // Create and apply the command through Raft - cmd := raft.HashMapCommand{ - Type: raft.CommandAddHashMapping, + cmd := raft.ConsistentHashCommand{ + Type: raft.CommandAddConsistentHashEntry, Hash: hash, BlockName: proxy.GetBlockName(), } diff --git a/network/consistenthash_test.go b/network/consistenthash_test.go index df72fc1a..0fa7472f 100644 --- a/network/consistenthash_test.go +++ b/network/consistenthash_test.go @@ -70,8 +70,8 @@ func TestConsistentHashNextProxyUseSourceIpExists(t *testing.T) { // Instead of setting hashMap directly, setup the FSM hash := hashKey("192.168.1.1" + server.GroupName) // Create and apply the command through Raft - cmd := raft.HashMapCommand{ - Type: raft.CommandAddHashMapping, + cmd := raft.ConsistentHashCommand{ + Type: raft.CommandAddConsistentHashEntry, Hash: hash, BlockName: proxies[2].GetBlockName(), } diff --git a/raft/raft.go b/raft/raft.go index 340b57ad..b70c67c2 100644 --- a/raft/raft.go +++ b/raft/raft.go @@ -18,12 +18,12 @@ import ( // Command types for Raft operations const ( - CommandAddHashMapping = "ADD_HASH_MAPPING" - RaftLeaderState = raft.Leader + CommandAddConsistentHashEntry = "ADD_CONSISTENT_HASH_ENTRY" + RaftLeaderState = raft.Leader ) -// HashMapCommand represents a command to modify the hash map -type HashMapCommand struct { +// ConsistentHashCommand represents a command to modify the consistent hash +type ConsistentHashCommand struct { Type string `json:"type"` Hash uint64 `json:"hash"` BlockName string `json:"block_name"` @@ -57,7 +57,7 @@ func NewRaftNode(logger zerolog.Logger, raftConfig config.Raft) (*RaftNode, erro } raftAddr := raftConfig.Address config.LocalID = raft.ServerID(nodeID) - raftDir := filepath.Join("raft", nodeID) + raftDir := filepath.Join(raftConfig.Directory, nodeID) err = os.MkdirAll(raftDir, os.ModePerm) if err != nil { return nil, fmt.Errorf("error creating raft directory: %w", err) @@ -203,7 +203,7 @@ func (n *RaftNode) Shutdown() error { // FSM represents the Finite State Machine for the Raft cluster type FSM struct { - consistentHashMap map[uint64]string + lbHashToBlockName map[uint64]string mu sync.RWMutex } @@ -211,7 +211,7 @@ type FSM struct { func (f *FSM) GetProxyBlock(hash uint64) (string, bool) { f.mu.RLock() defer f.mu.RUnlock() - if blockName, exists := f.consistentHashMap[hash]; exists { + if blockName, exists := f.lbHashToBlockName[hash]; exists { return blockName, true } return "", false @@ -220,13 +220,13 @@ func (f *FSM) GetProxyBlock(hash uint64) (string, bool) { // NewFSM creates a new FSM instance func NewFSM() *FSM { return &FSM{ - consistentHashMap: make(map[uint64]string), + lbHashToBlockName: make(map[uint64]string), } } // Apply implements the raft.FSM interface func (f *FSM) Apply(log *raft.Log) interface{} { - var cmd HashMapCommand + var cmd ConsistentHashCommand if err := json.Unmarshal(log.Data, &cmd); err != nil { return fmt.Errorf("failed to unmarshal command: %w", err) } @@ -235,8 +235,8 @@ func (f *FSM) Apply(log *raft.Log) interface{} { defer f.mu.Unlock() switch cmd.Type { - case CommandAddHashMapping: - f.consistentHashMap[cmd.Hash] = cmd.BlockName + case CommandAddConsistentHashEntry: + f.lbHashToBlockName[cmd.Hash] = cmd.BlockName return nil default: return fmt.Errorf("unknown command type: %s", cmd.Type) @@ -250,23 +250,23 @@ func (f *FSM) Snapshot() (raft.FSMSnapshot, error) { // Create a copy of the hash map hashMapCopy := make(map[uint64]string) - for k, v := range f.consistentHashMap { + for k, v := range f.lbHashToBlockName { hashMapCopy[k] = v } - return &FSMSnapshot{hashMap: hashMapCopy}, nil + return &FSMSnapshot{lbHashToBlockName: hashMapCopy}, nil } // Restore restores the FSM from a snapshot func (f *FSM) Restore(rc io.ReadCloser) error { decoder := json.NewDecoder(rc) - var hashMap map[uint64]string - if err := decoder.Decode(&hashMap); err != nil { + var lbHashToBlockName map[uint64]string + if err := decoder.Decode(&lbHashToBlockName); err != nil { return err } f.mu.Lock() - f.consistentHashMap = hashMap + f.lbHashToBlockName = lbHashToBlockName f.mu.Unlock() return nil @@ -274,11 +274,11 @@ func (f *FSM) Restore(rc io.ReadCloser) error { // FSMSnapshot represents a snapshot of the FSM type FSMSnapshot struct { - hashMap map[uint64]string + lbHashToBlockName map[uint64]string } func (f *FSMSnapshot) Persist(sink raft.SnapshotSink) error { - err := json.NewEncoder(sink).Encode(f.hashMap) + err := json.NewEncoder(sink).Encode(f.lbHashToBlockName) if err != nil { sink.Cancel() return err diff --git a/raft/raft_test.go b/raft/raft_test.go index 725b885a..1e1f031c 100644 --- a/raft/raft_test.go +++ b/raft/raft_test.go @@ -19,6 +19,7 @@ func setupTestLogger() zerolog.Logger { func TestNewRaftNode(t *testing.T) { logger := setupTestLogger() + tempDir := t.TempDir() tests := []struct { name string @@ -28,21 +29,23 @@ func TestNewRaftNode(t *testing.T) { { name: "valid configuration", raftConfig: config.Raft{ - NodeID: "node1", - Address: "127.0.0.1:1234", - LeaderID: "node1", + NodeID: "testRaftNodeValidConfigurationnode1", + Address: "127.0.0.1:6001", + LeaderID: "testRaftNodeValidConfigurationnode1", Peers: []config.RaftPeer{ - {ID: "node2", Address: "127.0.0.1:1235"}, + {ID: "testRaftNodeValidConfigurationnode2", Address: "127.0.0.1:6002"}, }, + Directory: tempDir, }, wantErr: false, }, { name: "invalid address", raftConfig: config.Raft{ - NodeID: "node1", - Address: "invalid:address:", - LeaderID: "node1", + NodeID: "testRaftNodeInvalidAddressnode1", + Address: "invalid:address:", + LeaderID: "testRaftNodeInvalidAddressnode1", + Directory: tempDir, }, wantErr: true, }, @@ -59,7 +62,6 @@ func TestNewRaftNode(t *testing.T) { assert.NotNil(t, node) // Cleanup _ = node.Shutdown() - _ = os.RemoveAll("raft") } }) } @@ -69,8 +71,8 @@ func TestFSMOperations(t *testing.T) { fsm := NewFSM() // Test adding a hash mapping - cmd := HashMapCommand{ - Type: CommandAddHashMapping, + cmd := ConsistentHashCommand{ + Type: CommandAddConsistentHashEntry, Hash: 12345, BlockName: "test-block", } @@ -97,8 +99,8 @@ func TestFSMSnapshot(t *testing.T) { fsm := NewFSM() // Add some data - cmd := HashMapCommand{ - Type: CommandAddHashMapping, + cmd := ConsistentHashCommand{ + Type: CommandAddConsistentHashEntry, Hash: 12345, BlockName: "test-block", } @@ -114,27 +116,28 @@ func TestFSMSnapshot(t *testing.T) { // Verify snapshot data fsmSnapshot, ok := snapshot.(*FSMSnapshot) assert.True(t, ok) - assert.Equal(t, "test-block", fsmSnapshot.hashMap[12345]) + assert.Equal(t, "test-block", fsmSnapshot.lbHashToBlockName[12345]) } func TestRaftNodeApply(t *testing.T) { logger := setupTestLogger() + tempDir := t.TempDir() config := config.Raft{ - NodeID: "node1", - Address: "127.0.0.1:1234", - LeaderID: "node1", + NodeID: "testRaftNodeApplynode1", + Address: "127.0.0.1:6003", + LeaderID: "testRaftNodeApplynode1", + Directory: tempDir, } node, err := NewRaftNode(logger, config) require.NoError(t, err) defer func() { _ = node.Shutdown() - _ = os.RemoveAll("raft") }() // Test applying data - cmd := HashMapCommand{ - Type: CommandAddHashMapping, + cmd := ConsistentHashCommand{ + Type: CommandAddConsistentHashEntry, Hash: 12345, BlockName: "test-block", } @@ -148,43 +151,44 @@ func TestRaftNodeApply(t *testing.T) { func TestRaftLeadershipAndFollowers(t *testing.T) { logger := setupTestLogger() + tempDir := t.TempDir() - // Create temporary directories for each node - defer os.RemoveAll("raft") - - // Configure three nodes - nodes := make([]*RaftNode, 3) + // Configure three nodes with unique ports nodeConfigs := []config.Raft{ { - NodeID: "node1", - Address: "127.0.0.1:1234", - LeaderID: "node1", // First node is the bootstrap node + NodeID: "testRaftLeadershipnode1", + Address: "127.0.0.1:6004", + LeaderID: "testRaftLeadershipnode1", Peers: []config.RaftPeer{ - {ID: "node2", Address: "127.0.0.1:1235"}, - {ID: "node3", Address: "127.0.0.1:1236"}, + {ID: "testRaftLeadershipnode2", Address: "127.0.0.1:6005"}, + {ID: "testRaftLeadershipnode3", Address: "127.0.0.1:6006"}, }, + Directory: tempDir, }, { - NodeID: "node2", - Address: "127.0.0.1:1235", - LeaderID: "node1", + NodeID: "testRaftLeadershipnode2", + Address: "127.0.0.1:6005", + LeaderID: "testRaftLeadershipnode1", Peers: []config.RaftPeer{ - {ID: "node1", Address: "127.0.0.1:1234"}, - {ID: "node3", Address: "127.0.0.1:1236"}, + {ID: "testRaftLeadershipnode1", Address: "127.0.0.1:6004"}, + {ID: "testRaftLeadershipnode3", Address: "127.0.0.1:6006"}, }, + Directory: tempDir, }, { - NodeID: "node3", - Address: "127.0.0.1:1236", - LeaderID: "node1", + NodeID: "testRaftLeadershipnode3", + Address: "127.0.0.1:6006", + LeaderID: "testRaftLeadershipnode1", Peers: []config.RaftPeer{ - {ID: "node1", Address: "127.0.0.1:1234"}, - {ID: "node2", Address: "127.0.0.1:1235"}, + {ID: "testRaftLeadershipnode1", Address: "127.0.0.1:6004"}, + {ID: "testRaftLeadershipnode2", Address: "127.0.0.1:6005"}, }, + Directory: tempDir, }, } // Start all nodes + nodes := make([]*RaftNode, len(nodeConfigs)) for i, cfg := range nodeConfigs { node, err := NewRaftNode(logger, cfg) require.NoError(t, err) @@ -215,8 +219,8 @@ func TestRaftLeadershipAndFollowers(t *testing.T) { } // Test 3: Test cluster functionality by applying a command - cmd := HashMapCommand{ - Type: CommandAddHashMapping, + cmd := ConsistentHashCommand{ + Type: CommandAddConsistentHashEntry, Hash: 12345, BlockName: "test-block", } diff --git a/testhelpers/raft_helpers.go b/testhelpers/raft_helpers.go index f7212035..30b399d2 100644 --- a/testhelpers/raft_helpers.go +++ b/testhelpers/raft_helpers.go @@ -2,8 +2,8 @@ package testhelpers import ( "fmt" + "net" "os" - "path/filepath" "testing" "time" @@ -22,15 +22,19 @@ type TestRaftHelper struct { // NewTestRaftNode creates a Raft node for testing purposes func NewTestRaftNode(t *testing.T) (*TestRaftHelper, error) { - // Create a temporary directory for Raft data - tempDir, err := os.MkdirTemp("", "raft-test-*") + tempDir := t.TempDir() + + // Setup test configuration + nodeID := fmt.Sprintf("test-node-%d", time.Now().UnixNano()) + // Get a random available port + listener, err := net.Listen("tcp", "127.0.0.1:0") if err != nil { - return nil, fmt.Errorf("failed to create temp dir: %w", err) + return nil, fmt.Errorf("failed to get random port: %w", err) } + port := listener.Addr().(*net.TCPAddr).Port + listener.Close() - // Setup test configuration - nodeID := "test-node-1" - raftAddr := "127.0.0.1:0" // Using port 0 lets the system assign a random available port + raftAddr := fmt.Sprintf("127.0.0.1:%d", port) // Create test logger logger := zerolog.New(zerolog.ConsoleWriter{Out: os.Stderr}). @@ -39,16 +43,16 @@ func NewTestRaftNode(t *testing.T) (*TestRaftHelper, error) { // Create Raft configuration raftConfig := config.Raft{ - NodeID: nodeID, - Address: raftAddr, - LeaderID: nodeID, // Make this node the leader for testing - Peers: []config.RaftPeer{}, // Empty peers for single-node testing + NodeID: nodeID, + Address: raftAddr, + LeaderID: nodeID, // Make this node the leader for testing + Peers: []config.RaftPeer{}, // Empty peers for single-node testing + Directory: tempDir, } // Create new Raft node node, err := raft.NewRaftNode(logger, raftConfig) if err != nil { - os.RemoveAll(tempDir) return nil, fmt.Errorf("failed to create raft node: %w", err) } @@ -62,7 +66,6 @@ func NewTestRaftNode(t *testing.T) (*TestRaftHelper, error) { } if node.GetState() != raft.RaftLeaderState { - os.RemoveAll(tempDir) return nil, fmt.Errorf("timeout waiting for node to become leader") } @@ -82,14 +85,5 @@ func (h *TestRaftHelper) Cleanup() error { } } - if err := os.RemoveAll(h.TempDir); err != nil { - return fmt.Errorf("failed to remove temp dir: %w", err) - } - - // Also clean up the default raft directory - if err := os.RemoveAll(filepath.Join("raft", h.NodeID)); err != nil { - return fmt.Errorf("failed to remove raft directory: %w", err) - } - return nil } From 2ab8041ef53714491405cc0756cdb5efbea069c9 Mon Sep 17 00:00:00 2001 From: Sina Darbouy Date: Thu, 28 Nov 2024 22:17:16 +0100 Subject: [PATCH 06/26] feat(config): add default Raft configuration values Add default configuration values for Raft consensus implementation: - RaftAddress: 127.0.0.1:2223 - RaftNodeID: node1 - RaftLeaderID: node1 - RaftDirectory: raft This change initializes the default Raft configuration in the config loader. --- config/config.go | 6 ++++++ config/constants.go | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/config/config.go b/config/config.go index 9932168d..43a52d18 100644 --- a/config/config.go +++ b/config/config.go @@ -180,6 +180,12 @@ func (c *Config) LoadDefaults(ctx context.Context) *gerr.GatewayDError { GRPCNetwork: DefaultGRPCAPINetwork, GRPCAddress: DefaultGRPCAPIAddress, }, + Raft: Raft{ + Address: DefaultRaftAddress, + NodeID: DefaultRaftNodeID, + LeaderID: DefaultRaftLeaderID, + Directory: DefaultRaftDirectory, + }, } //nolint:nestif diff --git a/config/constants.go b/config/constants.go index 0fbbef89..4a0907bb 100644 --- a/config/constants.go +++ b/config/constants.go @@ -126,6 +126,12 @@ const ( DefaultActionRedisEnabled = false DefaultRedisAddress = "localhost:6379" DefaultRedisChannel = "gatewayd-actions" + + // Raft constants. + DefaultRaftAddress = "127.0.0.1:2223" + DefaultRaftNodeID = "node1" + DefaultRaftLeaderID = "node1" + DefaultRaftDirectory = "raft" ) // Load balancing strategies. From 809530fead688f8a871ccfd68a99d38d99de8130 Mon Sep 17 00:00:00 2001 From: Sina Darbouy Date: Thu, 28 Nov 2024 23:36:06 +0100 Subject: [PATCH 07/26] refactor(raft): improve error handling and code organization - Enhance error handling with wrapped errors and detailed messages - Add meaningful constants for timeouts and configuration values - Rename RaftNode to Node for better clarity - Fix JSON field names to match Raft convention (nodeId, leaderId) - Add missing error checks in critical paths - Improve documentation and code comments - Update golangci linter settings to include raft package --- .golangci.yaml | 2 + config/types.go | 4 +- gatewayd.yaml | 4 +- network/consistenthash.go | 6 +- network/consistenthash_test.go | 2 +- network/server.go | 4 +- raft/raft.go | 125 +++++++++++++++++++-------------- raft/raft_test.go | 11 ++- testhelpers/raft_helpers.go | 27 ++++--- 9 files changed, 111 insertions(+), 74 deletions(-) diff --git a/.golangci.yaml b/.golangci.yaml index ddab1d5b..60a34508 100644 --- a/.golangci.yaml +++ b/.golangci.yaml @@ -70,6 +70,7 @@ linters-settings: - "github.com/testcontainers/testcontainers-go" - "github.com/stretchr/testify/require" - "github.com/docker/go-connections/nat" + - "github.com/hashicorp/raft" test: files: - $test @@ -92,6 +93,7 @@ linters-settings: - "github.com/redis/go-redis/v9" - "github.com/docker/go-connections/nat" - "github.com/codingsince1985/checksum" + - "github.com/hashicorp/raft" tagalign: align: false sort: false diff --git a/config/types.go b/config/types.go index 2cd54749..857d1c00 100644 --- a/config/types.go +++ b/config/types.go @@ -140,8 +140,8 @@ type API struct { type Raft struct { Address string `json:"address"` - NodeID string `json:"nodeID"` - LeaderID string `json:"leaderID"` + NodeID string `json:"nodeId"` + LeaderID string `json:"leaderId"` Peers []RaftPeer `json:"peers"` Directory string `json:"directory" jsonschema:"default=raft"` } diff --git a/gatewayd.yaml b/gatewayd.yaml index 2d365175..700931a0 100644 --- a/gatewayd.yaml +++ b/gatewayd.yaml @@ -109,8 +109,8 @@ api: raft: address: 127.0.0.1:2223 - nodeID: node2 - leaderID: node2 + nodeId: node2 + leaderId: node2 peers: {} # - id: node1 # address: 127.0.0.1:2222 diff --git a/network/consistenthash.go b/network/consistenthash.go index b4f7e1d9..cb3f7707 100644 --- a/network/consistenthash.go +++ b/network/consistenthash.go @@ -1,12 +1,10 @@ package network import ( + "encoding/json" "fmt" "net" "sync" - "time" - - "encoding/json" gerr "github.com/gatewayd-io/gatewayd/errors" "github.com/gatewayd-io/gatewayd/raft" @@ -83,7 +81,7 @@ func (ch *ConsistentHash) NextProxy(conn IConnWrapper) (IProxy, *gerr.GatewayDEr } // Apply the command through Raft - if err := ch.server.RaftNode.Apply(cmdBytes, 10*time.Second); err != nil { + if err := ch.server.RaftNode.Apply(cmdBytes, raft.LeaderElectionTimeout); err != nil { return nil, gerr.ErrNoProxiesAvailable.Wrap(err) } diff --git a/network/consistenthash_test.go b/network/consistenthash_test.go index 0fa7472f..2cef39a1 100644 --- a/network/consistenthash_test.go +++ b/network/consistenthash_test.go @@ -150,7 +150,7 @@ func TestConsistentHashNextProxyUseFullAddress(t *testing.T) { // 1. Multiple concurrent requests from the same IP address consistently map to the same proxy // 2. The mapping is stable across sequential calls // 3. Different IP addresses map to different proxies -// 4. The Raft-based consistent hash remains thread-safe under high concurrency +// 4. The Raft-based consistent hash remains thread-safe under high concurrency. func TestConsistentHashNextProxyConcurrency(t *testing.T) { // Setup mocks conn1 := new(MockConnWrapper) diff --git a/network/server.go b/network/server.go index b8f16775..a1320670 100644 --- a/network/server.go +++ b/network/server.go @@ -89,7 +89,7 @@ type Server struct { LoadbalancerConsistentHash *config.ConsistentHash connectionToProxyMap *sync.Map - RaftNode *raft.RaftNode + RaftNode *raft.Node ProxyByBlock map[string]IProxy } @@ -811,7 +811,7 @@ func (s *Server) RemoveConnectionFromMap(conn *ConnWrapper) { s.connectionToProxyMap.Delete(conn) } -// Initialize the map when creating proxies +// Initialize the map when creating proxies. func (s *Server) initializeProxies() { s.ProxyByBlock = make(map[string]IProxy) for _, proxy := range s.Proxies { diff --git a/raft/raft.go b/raft/raft.go index b70c67c2..4d2453ea 100644 --- a/raft/raft.go +++ b/raft/raft.go @@ -16,35 +16,39 @@ import ( "github.com/rs/zerolog" ) -// Command types for Raft operations +// Command types for Raft operations. const ( CommandAddConsistentHashEntry = "ADD_CONSISTENT_HASH_ENTRY" RaftLeaderState = raft.Leader + LeaderElectionTimeout = 3 * time.Second + maxSnapshots = 3 // Maximum number of snapshots to retain + maxPool = 3 // Maximum number of connections to pool + transportTimeout = 10 * time.Second // Timeout for transport operations + leadershipCheckInterval = 10 * time.Second // Interval for checking leadership status ) -// ConsistentHashCommand represents a command to modify the consistent hash +// ConsistentHashCommand represents a command to modify the consistent hash. type ConsistentHashCommand struct { Type string `json:"type"` Hash uint64 `json:"hash"` - BlockName string `json:"block_name"` + BlockName string `json:"blockName"` } -// RaftNode represents a node in the Raft cluster -type RaftNode struct { - raft *raft.Raft - config *raft.Config - Fsm *FSM - logStore raft.LogStore - stableStore raft.StableStore - snapshotStore raft.SnapshotStore - transport raft.Transport - bootstrapPeers []raft.Server - Logger zerolog.Logger - Peers []raft.Server // Holds Raft peers (for joining an existing cluster) +// Node represents a node in the Raft cluster. +type Node struct { + raft *raft.Raft + config *raft.Config + Fsm *FSM + logStore raft.LogStore + stableStore raft.StableStore + snapshotStore raft.SnapshotStore + transport raft.Transport + Logger zerolog.Logger + Peers []raft.Server } -// NewRaftNode creates and initializes a new Raft node -func NewRaftNode(logger zerolog.Logger, raftConfig config.Raft) (*RaftNode, error) { +// NewRaftNode creates and initializes a new Raft node. +func NewRaftNode(logger zerolog.Logger, raftConfig config.Raft) (*Node, error) { config := raft.DefaultConfig() var err error @@ -78,7 +82,7 @@ func NewRaftNode(logger zerolog.Logger, raftConfig config.Raft) (*RaftNode, erro } // Create the snapshot store - snapshotStore, err := raft.NewFileSnapshotStore(raftDir, 3, os.Stderr) + snapshotStore, err := raft.NewFileSnapshotStore(raftDir, maxSnapshots, os.Stderr) if err != nil { return nil, fmt.Errorf("error creating snapshot store: %w", err) } @@ -88,19 +92,19 @@ func NewRaftNode(logger zerolog.Logger, raftConfig config.Raft) (*RaftNode, erro if err != nil { return nil, fmt.Errorf("error resolving TCP address: %w", err) } - transport, err := raft.NewTCPTransport(raftAddr, addr, 3, 10*time.Second, os.Stderr) + transport, err := raft.NewTCPTransport(raftAddr, addr, maxPool, transportTimeout, os.Stderr) if err != nil { return nil, fmt.Errorf("error creating TCP transport: %w", err) } // Create the Raft node - r, err := raft.NewRaft(config, fsm, logStore, stableStore, snapshotStore, transport) + raftNode, err := raft.NewRaft(config, fsm, logStore, stableStore, snapshotStore, transport) if err != nil { return nil, fmt.Errorf("error creating Raft: %w", err) } - node := &RaftNode{ - raft: r, + node := &Node{ + raft: raftNode, config: config, Fsm: fsm, logStore: logStore, @@ -119,8 +123,8 @@ func NewRaftNode(logger zerolog.Logger, raftConfig config.Raft) (*RaftNode, erro } for i, peer := range node.Peers { configuration.Servers[i] = raft.Server{ - ID: raft.ServerID(peer.ID), - Address: raft.ServerAddress(peer.Address), + ID: peer.ID, + Address: peer.Address, } } configuration.Servers = append(configuration.Servers, raft.Server{ @@ -146,8 +150,8 @@ func convertPeers(configPeers []config.RaftPeer) []raft.Server { return peers } -// monitorLeadership checks if the node is the Raft leader and logs state changes -func (n *RaftNode) monitorLeadership() { +// monitorLeadership checks if the node is the Raft leader and logs state changes. +func (n *Node) monitorLeadership() { for { isLeader := n.raft.State() == raft.Leader if isLeader { @@ -176,38 +180,50 @@ func (n *RaftNode) monitorLeadership() { n.Logger.Info().Msg("This node is a Raft follower") } - time.Sleep(10 * time.Second) // Poll leadership status periodically + time.Sleep(leadershipCheckInterval) // Use the named constant here } } -// AddPeer adds a new peer to the Raft cluster -func (n *RaftNode) AddPeer(peerID, peerAddr string) error { - return n.raft.AddVoter(raft.ServerID(peerID), raft.ServerAddress(peerAddr), 0, 0).Error() +// AddPeer adds a new peer to the Raft cluster. +func (n *Node) AddPeer(peerID, peerAddr string) error { + if err := n.raft.AddVoter(raft.ServerID(peerID), raft.ServerAddress(peerAddr), 0, 0).Error(); err != nil { + return fmt.Errorf("failed to add voter: %w", err) + } + return nil } -// RemovePeer removes a peer from the Raft cluster -func (n *RaftNode) RemovePeer(peerID string) error { - return n.raft.RemoveServer(raft.ServerID(peerID), 0, 0).Error() +// RemovePeer removes a peer from the Raft cluster. +func (n *Node) RemovePeer(peerID string) error { + if err := n.raft.RemoveServer(raft.ServerID(peerID), 0, 0).Error(); err != nil { + return fmt.Errorf("failed to remove server: %w", err) + } + return nil } -// Apply applies a new log entry to the Raft log -func (n *RaftNode) Apply(data []byte, timeout time.Duration) error { +// Apply applies a new log entry to the Raft log. +func (n *Node) Apply(data []byte, timeout time.Duration) error { future := n.raft.Apply(data, timeout) - return future.Error() + if err := future.Error(); err != nil { + return fmt.Errorf("failed to apply log entry: %w", err) + } + return nil } -// Shutdown gracefully shuts down the Raft node -func (n *RaftNode) Shutdown() error { - return n.raft.Shutdown().Error() +// Shutdown gracefully shuts down the Raft node. +func (n *Node) Shutdown() error { + if err := n.raft.Shutdown().Error(); err != nil { + return fmt.Errorf("failed to shutdown raft node: %w", err) + } + return nil } -// FSM represents the Finite State Machine for the Raft cluster +// FSM represents the Finite State Machine for the Raft cluster. type FSM struct { lbHashToBlockName map[uint64]string mu sync.RWMutex } -// GetProxyBlock safely retrieves the block name for a given hash +// GetProxyBlock safely retrieves the block name for a given hash. func (f *FSM) GetProxyBlock(hash uint64) (string, bool) { f.mu.RLock() defer f.mu.RUnlock() @@ -217,14 +233,14 @@ func (f *FSM) GetProxyBlock(hash uint64) (string, bool) { return "", false } -// NewFSM creates a new FSM instance +// NewFSM creates a new FSM instance. func NewFSM() *FSM { return &FSM{ lbHashToBlockName: make(map[uint64]string), } } -// Apply implements the raft.FSM interface +// Apply implements the raft.FSM interface. func (f *FSM) Apply(log *raft.Log) interface{} { var cmd ConsistentHashCommand if err := json.Unmarshal(log.Data, &cmd); err != nil { @@ -243,7 +259,7 @@ func (f *FSM) Apply(log *raft.Log) interface{} { } } -// Snapshot returns a snapshot of the FSM +// Snapshot returns a snapshot of the FSM. func (f *FSM) Snapshot() (raft.FSMSnapshot, error) { f.mu.RLock() defer f.mu.RUnlock() @@ -257,12 +273,12 @@ func (f *FSM) Snapshot() (raft.FSMSnapshot, error) { return &FSMSnapshot{lbHashToBlockName: hashMapCopy}, nil } -// Restore restores the FSM from a snapshot +// Restore restores the FSM from a snapshot. func (f *FSM) Restore(rc io.ReadCloser) error { decoder := json.NewDecoder(rc) var lbHashToBlockName map[uint64]string if err := decoder.Decode(&lbHashToBlockName); err != nil { - return err + return fmt.Errorf("error decoding snapshot: %w", err) } f.mu.Lock() @@ -272,7 +288,7 @@ func (f *FSM) Restore(rc io.ReadCloser) error { return nil } -// FSMSnapshot represents a snapshot of the FSM +// FSMSnapshot represents a snapshot of the FSM. type FSMSnapshot struct { lbHashToBlockName map[uint64]string } @@ -280,15 +296,20 @@ type FSMSnapshot struct { func (f *FSMSnapshot) Persist(sink raft.SnapshotSink) error { err := json.NewEncoder(sink).Encode(f.lbHashToBlockName) if err != nil { - sink.Cancel() - return err + if cancelErr := sink.Cancel(); cancelErr != nil { + return fmt.Errorf("error canceling snapshot: %w (original error: %w)", cancelErr, err) + } + return fmt.Errorf("error encoding snapshot: %w", err) } - return sink.Close() + if err := sink.Close(); err != nil { + return fmt.Errorf("error closing snapshot sink: %w", err) + } + return nil } func (f *FSMSnapshot) Release() {} -// GetState returns the current Raft state -func (n *RaftNode) GetState() raft.RaftState { +// GetState returns the current Raft state. +func (n *Node) GetState() raft.RaftState { return n.raft.State() } diff --git a/raft/raft_test.go b/raft/raft_test.go index 1e1f031c..ecbd03e3 100644 --- a/raft/raft_test.go +++ b/raft/raft_test.go @@ -188,12 +188,17 @@ func TestRaftLeadershipAndFollowers(t *testing.T) { } // Start all nodes - nodes := make([]*RaftNode, len(nodeConfigs)) + nodes := make([]*Node, len(nodeConfigs)) for i, cfg := range nodeConfigs { node, err := NewRaftNode(logger, cfg) require.NoError(t, err) nodes[i] = node - defer node.Shutdown() + defer func() { + err := node.Shutdown() + if err != nil { + t.Errorf("Failed to shutdown node: %v", err) + } + }() } // Wait for leader election @@ -201,7 +206,7 @@ func TestRaftLeadershipAndFollowers(t *testing.T) { // Test 1: Verify that exactly one leader is elected leaderCount := 0 - var leaderNode *RaftNode + var leaderNode *Node for _, node := range nodes { if node.GetState() == raft.Leader { leaderCount++ diff --git a/testhelpers/raft_helpers.go b/testhelpers/raft_helpers.go index 30b399d2..9503a80c 100644 --- a/testhelpers/raft_helpers.go +++ b/testhelpers/raft_helpers.go @@ -1,6 +1,7 @@ package testhelpers import ( + "errors" "fmt" "net" "os" @@ -12,16 +13,21 @@ import ( "github.com/rs/zerolog" ) -// TestRaftHelper contains utilities for testing Raft functionality +// TestRaftHelper contains utilities for testing Raft functionality. type TestRaftHelper struct { - Node *raft.RaftNode + Node *raft.Node TempDir string NodeID string RaftAddr string } -// NewTestRaftNode creates a Raft node for testing purposes +const ( + pollInterval = 100 * time.Millisecond +) + +// NewTestRaftNode creates a Raft node for testing purposes. func NewTestRaftNode(t *testing.T) (*TestRaftHelper, error) { + t.Helper() tempDir := t.TempDir() // Setup test configuration @@ -31,7 +37,12 @@ func NewTestRaftNode(t *testing.T) (*TestRaftHelper, error) { if err != nil { return nil, fmt.Errorf("failed to get random port: %w", err) } - port := listener.Addr().(*net.TCPAddr).Port + + addr, ok := listener.Addr().(*net.TCPAddr) + if !ok { + return nil, errors.New("failed to get TCP address from listener") + } + port := addr.Port listener.Close() raftAddr := fmt.Sprintf("127.0.0.1:%d", port) @@ -57,16 +68,16 @@ func NewTestRaftNode(t *testing.T) (*TestRaftHelper, error) { } // Wait for the node to become leader - timeout := time.Now().Add(3 * time.Second) + timeout := time.Now().Add(raft.LeaderElectionTimeout) for time.Now().Before(timeout) { if node.GetState() == raft.RaftLeaderState { break } - time.Sleep(100 * time.Millisecond) + time.Sleep(pollInterval) } if node.GetState() != raft.RaftLeaderState { - return nil, fmt.Errorf("timeout waiting for node to become leader") + return nil, errors.New("timeout waiting for node to become leader") } return &TestRaftHelper{ @@ -77,7 +88,7 @@ func NewTestRaftNode(t *testing.T) (*TestRaftHelper, error) { }, nil } -// Cleanup removes temporary files and shuts down the Raft node +// Cleanup removes temporary files and shuts down the Raft node. func (h *TestRaftHelper) Cleanup() error { if h.Node != nil { if err := h.Node.Shutdown(); err != nil { From 071e84c61f7239e82f6f920b9ea1c3b1f6baec95 Mon Sep 17 00:00:00 2001 From: Sina Darbouy Date: Thu, 28 Nov 2024 23:49:25 +0100 Subject: [PATCH 08/26] Add temporary directory for Raft in Test_pluginScaffoldCmd - Introduced a temporary directory for Raft using t.TempDir() in the Test_pluginScaffoldCmd test case. - Set the GATEWAYD_RAFT_DIRECTORY environment variable to the new temporary directory. - This change ensures that Raft operations during testing are isolated and do not interfere with other tests or system directories. --- cmd/plugin_scaffold_test.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cmd/plugin_scaffold_test.go b/cmd/plugin_scaffold_test.go index 88c6d75a..7eb6b246 100644 --- a/cmd/plugin_scaffold_test.go +++ b/cmd/plugin_scaffold_test.go @@ -28,6 +28,9 @@ func Test_pluginScaffoldCmd(t *testing.T) { postgresAddress2 := postgresHostIP2 + ":" + postgresMappedPort2.Port() t.Setenv("GATEWAYD_CLIENTS_TEST_WRITE_ADDRESS", postgresAddress2) + raftTempDir := t.TempDir() + t.Setenv("GATEWAYD_RAFT_DIRECTORY", raftTempDir) + globalTestConfigFile := filepath.Join("testdata", "gatewayd.yaml") plugin.IsPluginTemplateEmbedded() pluginTestScaffoldInputFile := "./testdata/scaffold_input.yaml" From c5d2dbe61c1a612620776daaa59bb32ab6415a69 Mon Sep 17 00:00:00 2001 From: Sina Darbouy Date: Mon, 2 Dec 2024 21:59:25 +0100 Subject: [PATCH 09/26] feat(config): add JSON parsing for raft peers env variable - Replace loadEnvVars with loadEnvVarsWithTransform to handle complex env values - Add special handling for raft.peers to parse JSON array into RaftPeer structs - Update GlobalKoanf and PluginKoanf to use new transformer function This change allows proper parsing of list-type environment variables, specifically for raft peer configurations. --- config/config.go | 73 ++++++++++++++++++++++++++++-------------------- 1 file changed, 42 insertions(+), 31 deletions(-) diff --git a/config/config.go b/config/config.go index 43a52d18..a82bab58 100644 --- a/config/config.go +++ b/config/config.go @@ -315,7 +315,7 @@ func (c *Config) LoadDefaults(ctx context.Context) *gerr.GatewayDError { func (c *Config) LoadGlobalEnvVars(ctx context.Context) *gerr.GatewayDError { _, span := otel.Tracer(TracerName).Start(ctx, "Load global environment variables") - if err := c.GlobalKoanf.Load(loadEnvVars(), nil); err != nil { + if err := c.GlobalKoanf.Load(loadEnvVarsWithTransform(), nil); err != nil { span.RecordError(err) span.End() return gerr.ErrConfigParseError.Wrap( @@ -332,7 +332,7 @@ func (c *Config) LoadGlobalEnvVars(ctx context.Context) *gerr.GatewayDError { func (c *Config) LoadPluginEnvVars(ctx context.Context) *gerr.GatewayDError { _, span := otel.Tracer(TracerName).Start(ctx, "Load plugin environment variables") - if err := c.PluginKoanf.Load(loadEnvVars(), nil); err != nil { + if err := c.PluginKoanf.Load(loadEnvVarsWithTransform(), nil); err != nil { span.RecordError(err) span.End() return gerr.ErrConfigParseError.Wrap( @@ -344,41 +344,52 @@ func (c *Config) LoadPluginEnvVars(ctx context.Context) *gerr.GatewayDError { return nil } -func loadEnvVars() *env.Env { - return env.Provider(EnvPrefix, ".", transformEnvVariable) -} - -// transformEnvVariable transforms the environment variable name to a format based on JSON tags. -func transformEnvVariable(envVar string) string { - structs := []any{ - &API{}, - &Logger{}, - &Pool{}, - &Proxy{}, - &Server{}, - &Metrics{}, - &PluginConfig{}, - } - tagMapping := make(map[string]string) - generateTagMapping(structs, tagMapping) +func loadEnvVarsWithTransform() *env.Env { + // Use ProviderWithValue to transform both key and value + return env.ProviderWithValue(EnvPrefix, ".", func(s string, v string) (string, interface{}) { + // Transform the key + key := strings.ToLower(strings.TrimPrefix(s, EnvPrefix)) + + structs := []any{ + &API{}, + &Logger{}, + &Pool{}, + &Proxy{}, + &Server{}, + &Metrics{}, + &PluginConfig{}, + &Raft{}, + } + tagMapping := make(map[string]string) + generateTagMapping(structs, tagMapping) - lowerEnvVar := strings.ToLower(strings.TrimPrefix(envVar, EnvPrefix)) - parts := strings.Split(lowerEnvVar, "_") + parts := strings.Split(key, "_") - var transformedParts strings.Builder + var transformedParts strings.Builder - for i, part := range parts { - if i > 0 { - transformedParts.WriteString(".") + for i, part := range parts { + if i > 0 { + transformedParts.WriteString(".") + } + if mappedValue, exists := tagMapping[part]; exists { + transformedParts.WriteString(mappedValue) + } else { + transformedParts.WriteString(part) + } } - if mappedValue, exists := tagMapping[part]; exists { - transformedParts.WriteString(mappedValue) - } else { - transformedParts.WriteString(part) + + // Check if the key is "peers" and transform the value using JSON unmarshal + if transformedParts.String() == "raft.peers" { + var raftPeers []RaftPeer + if err := json.Unmarshal([]byte(v), &raftPeers); err != nil { + return transformedParts.String(), fmt.Errorf("failed to unmarshal peers: %w", err) + } + return transformedParts.String(), raftPeers } - } - return transformedParts.String() + // Return the key and value as is if no transformation is needed + return transformedParts.String(), v + }) } // LoadGlobalConfigFile loads the plugin configuration file. From 0c7ce5c6097d68446054f9a65594cf53254b737a Mon Sep 17 00:00:00 2001 From: Sina Darbouy Date: Fri, 6 Dec 2024 16:54:58 +0100 Subject: [PATCH 10/26] Add GRPC to raft Add gRPC support to the Raft implementation to enable proper request forwarding between nodes. Changes include: - Add protobuf definitions for Raft service with ForwardApply RPC - Add gRPC server and client implementations for Raft nodes - Update Raft configuration to include gRPC addresses - Implement request forwarding logic for non-leader nodes - Update node configuration to handle gRPC connections - Add proper cleanup of gRPC resources during shutdown The changes enable proper forwarding of apply requests from follower nodes to the leader, improving the distributed consensus mechanism. --- config/config.go | 9 +- config/constants.go | 9 +- config/types.go | 16 +-- go.mod | 2 +- go.sum | 2 + raft/proto/raft.pb.go | 203 +++++++++++++++++++++++++++++++++++++ raft/proto/raft.proto | 18 ++++ raft/proto/raft_grpc.pb.go | 121 ++++++++++++++++++++++ raft/raft.go | 107 +++++++++++++++++-- raft/rpc.go | 71 +++++++++++++ 10 files changed, 535 insertions(+), 23 deletions(-) create mode 100644 raft/proto/raft.pb.go create mode 100644 raft/proto/raft.proto create mode 100644 raft/proto/raft_grpc.pb.go create mode 100644 raft/rpc.go diff --git a/config/config.go b/config/config.go index a82bab58..5a260689 100644 --- a/config/config.go +++ b/config/config.go @@ -181,10 +181,11 @@ func (c *Config) LoadDefaults(ctx context.Context) *gerr.GatewayDError { GRPCAddress: DefaultGRPCAPIAddress, }, Raft: Raft{ - Address: DefaultRaftAddress, - NodeID: DefaultRaftNodeID, - LeaderID: DefaultRaftLeaderID, - Directory: DefaultRaftDirectory, + Address: DefaultRaftAddress, + NodeID: DefaultRaftNodeID, + LeaderID: DefaultRaftLeaderID, + Directory: DefaultRaftDirectory, + GRPCAddress: DefaultRaftGRPCAddress, }, } diff --git a/config/constants.go b/config/constants.go index 4a0907bb..56b236d5 100644 --- a/config/constants.go +++ b/config/constants.go @@ -128,10 +128,11 @@ const ( DefaultRedisChannel = "gatewayd-actions" // Raft constants. - DefaultRaftAddress = "127.0.0.1:2223" - DefaultRaftNodeID = "node1" - DefaultRaftLeaderID = "node1" - DefaultRaftDirectory = "raft" + DefaultRaftAddress = "127.0.0.1:2223" + DefaultRaftNodeID = "node1" + DefaultRaftLeaderID = "node1" + DefaultRaftDirectory = "raft" + DefaultRaftGRPCAddress = "127.0.0.1:50051" ) // Load balancing strategies. diff --git a/config/types.go b/config/types.go index 857d1c00..67be5c3e 100644 --- a/config/types.go +++ b/config/types.go @@ -139,16 +139,18 @@ type API struct { } type Raft struct { - Address string `json:"address"` - NodeID string `json:"nodeId"` - LeaderID string `json:"leaderId"` - Peers []RaftPeer `json:"peers"` - Directory string `json:"directory" jsonschema:"default=raft"` + Address string `json:"address"` + NodeID string `json:"nodeId"` + LeaderID string `json:"leaderId"` + Peers []RaftPeer `json:"peers"` + Directory string `json:"directory" jsonschema:"default=raft"` + GRPCAddress string `json:"grpcAddress"` } type RaftPeer struct { - ID string `json:"id"` - Address string `json:"address"` + ID string `json:"id"` + Address string `json:"address"` + GRPCAddress string `json:"grpcAddress"` } type GlobalConfig struct { diff --git a/go.mod b/go.mod index 3c6d5a08..4f63f9f8 100644 --- a/go.mod +++ b/go.mod @@ -43,7 +43,7 @@ require ( golang.org/x/text v0.20.0 google.golang.org/genproto/googleapis/api v0.0.0-20241104194629-dd2ea8efbc28 google.golang.org/grpc v1.68.0 - google.golang.org/protobuf v1.35.1 + google.golang.org/protobuf v1.35.2 gopkg.in/natefinch/lumberjack.v2 v2.2.1 gopkg.in/yaml.v3 v3.0.1 ) diff --git a/go.sum b/go.sum index 8bc5d0ad..7ef64b97 100644 --- a/go.sum +++ b/go.sum @@ -783,6 +783,8 @@ google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQ google.golang.org/protobuf v1.31.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= google.golang.org/protobuf v1.35.1 h1:m3LfL6/Ca+fqnjnlqQXNpFPABW1UD7mjh8KO2mKFytA= google.golang.org/protobuf v1.35.1/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= +google.golang.org/protobuf v1.35.2 h1:8Ar7bF+apOIoThw1EdZl0p1oWvMqTHmpA2fRTyZO8io= +google.golang.org/protobuf v1.35.2/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= gopkg.in/asn1-ber.v1 v1.0.0-20181015200546-f715ec2f112d/go.mod h1:cuepJuh7vyXfUyUwEgHQXw849cJrilpS5NeIjOWESAw= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= diff --git a/raft/proto/raft.pb.go b/raft/proto/raft.pb.go new file mode 100644 index 00000000..587887cb --- /dev/null +++ b/raft/proto/raft.pb.go @@ -0,0 +1,203 @@ +// Code generated by protoc-gen-go. DO NOT EDIT. +// versions: +// protoc-gen-go v1.35.2 +// protoc v5.29.0 +// source: raft/proto/raft.proto + +package proto + +import ( + protoreflect "google.golang.org/protobuf/reflect/protoreflect" + protoimpl "google.golang.org/protobuf/runtime/protoimpl" + reflect "reflect" + sync "sync" +) + +const ( + // Verify that this generated code is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion) + // Verify that runtime/protoimpl is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) +) + +type ApplyRequest struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Data []byte `protobuf:"bytes,1,opt,name=data,proto3" json:"data,omitempty"` + TimeoutMs int64 `protobuf:"varint,2,opt,name=timeout_ms,json=timeoutMs,proto3" json:"timeout_ms,omitempty"` +} + +func (x *ApplyRequest) Reset() { + *x = ApplyRequest{} + mi := &file_raft_proto_raft_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *ApplyRequest) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*ApplyRequest) ProtoMessage() {} + +func (x *ApplyRequest) ProtoReflect() protoreflect.Message { + mi := &file_raft_proto_raft_proto_msgTypes[0] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use ApplyRequest.ProtoReflect.Descriptor instead. +func (*ApplyRequest) Descriptor() ([]byte, []int) { + return file_raft_proto_raft_proto_rawDescGZIP(), []int{0} +} + +func (x *ApplyRequest) GetData() []byte { + if x != nil { + return x.Data + } + return nil +} + +func (x *ApplyRequest) GetTimeoutMs() int64 { + if x != nil { + return x.TimeoutMs + } + return 0 +} + +type ApplyResponse struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Success bool `protobuf:"varint,1,opt,name=success,proto3" json:"success,omitempty"` + Error string `protobuf:"bytes,2,opt,name=error,proto3" json:"error,omitempty"` +} + +func (x *ApplyResponse) Reset() { + *x = ApplyResponse{} + mi := &file_raft_proto_raft_proto_msgTypes[1] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *ApplyResponse) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*ApplyResponse) ProtoMessage() {} + +func (x *ApplyResponse) ProtoReflect() protoreflect.Message { + mi := &file_raft_proto_raft_proto_msgTypes[1] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use ApplyResponse.ProtoReflect.Descriptor instead. +func (*ApplyResponse) Descriptor() ([]byte, []int) { + return file_raft_proto_raft_proto_rawDescGZIP(), []int{1} +} + +func (x *ApplyResponse) GetSuccess() bool { + if x != nil { + return x.Success + } + return false +} + +func (x *ApplyResponse) GetError() string { + if x != nil { + return x.Error + } + return "" +} + +var File_raft_proto_raft_proto protoreflect.FileDescriptor + +var file_raft_proto_raft_proto_rawDesc = []byte{ + 0x0a, 0x15, 0x72, 0x61, 0x66, 0x74, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x72, 0x61, 0x66, + 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x04, 0x72, 0x61, 0x66, 0x74, 0x22, 0x41, 0x0a, + 0x0c, 0x41, 0x70, 0x70, 0x6c, 0x79, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x12, 0x0a, + 0x04, 0x64, 0x61, 0x74, 0x61, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x04, 0x64, 0x61, 0x74, + 0x61, 0x12, 0x1d, 0x0a, 0x0a, 0x74, 0x69, 0x6d, 0x65, 0x6f, 0x75, 0x74, 0x5f, 0x6d, 0x73, 0x18, + 0x02, 0x20, 0x01, 0x28, 0x03, 0x52, 0x09, 0x74, 0x69, 0x6d, 0x65, 0x6f, 0x75, 0x74, 0x4d, 0x73, + 0x22, 0x3f, 0x0a, 0x0d, 0x41, 0x70, 0x70, 0x6c, 0x79, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, + 0x65, 0x12, 0x18, 0x0a, 0x07, 0x73, 0x75, 0x63, 0x63, 0x65, 0x73, 0x73, 0x18, 0x01, 0x20, 0x01, + 0x28, 0x08, 0x52, 0x07, 0x73, 0x75, 0x63, 0x63, 0x65, 0x73, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x65, + 0x72, 0x72, 0x6f, 0x72, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x65, 0x72, 0x72, 0x6f, + 0x72, 0x32, 0x48, 0x0a, 0x0b, 0x52, 0x61, 0x66, 0x74, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, + 0x12, 0x39, 0x0a, 0x0c, 0x46, 0x6f, 0x72, 0x77, 0x61, 0x72, 0x64, 0x41, 0x70, 0x70, 0x6c, 0x79, + 0x12, 0x12, 0x2e, 0x72, 0x61, 0x66, 0x74, 0x2e, 0x41, 0x70, 0x70, 0x6c, 0x79, 0x52, 0x65, 0x71, + 0x75, 0x65, 0x73, 0x74, 0x1a, 0x13, 0x2e, 0x72, 0x61, 0x66, 0x74, 0x2e, 0x41, 0x70, 0x70, 0x6c, + 0x79, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x42, 0x2c, 0x5a, 0x2a, 0x67, + 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x67, 0x61, 0x74, 0x65, 0x77, 0x61, + 0x79, 0x64, 0x2d, 0x69, 0x6f, 0x2f, 0x67, 0x61, 0x74, 0x65, 0x77, 0x61, 0x79, 0x64, 0x2f, 0x72, + 0x61, 0x66, 0x74, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, + 0x33, +} + +var ( + file_raft_proto_raft_proto_rawDescOnce sync.Once + file_raft_proto_raft_proto_rawDescData = file_raft_proto_raft_proto_rawDesc +) + +func file_raft_proto_raft_proto_rawDescGZIP() []byte { + file_raft_proto_raft_proto_rawDescOnce.Do(func() { + file_raft_proto_raft_proto_rawDescData = protoimpl.X.CompressGZIP(file_raft_proto_raft_proto_rawDescData) + }) + return file_raft_proto_raft_proto_rawDescData +} + +var file_raft_proto_raft_proto_msgTypes = make([]protoimpl.MessageInfo, 2) +var file_raft_proto_raft_proto_goTypes = []any{ + (*ApplyRequest)(nil), // 0: raft.ApplyRequest + (*ApplyResponse)(nil), // 1: raft.ApplyResponse +} +var file_raft_proto_raft_proto_depIdxs = []int32{ + 0, // 0: raft.RaftService.ForwardApply:input_type -> raft.ApplyRequest + 1, // 1: raft.RaftService.ForwardApply:output_type -> raft.ApplyResponse + 1, // [1:2] is the sub-list for method output_type + 0, // [0:1] is the sub-list for method input_type + 0, // [0:0] is the sub-list for extension type_name + 0, // [0:0] is the sub-list for extension extendee + 0, // [0:0] is the sub-list for field type_name +} + +func init() { file_raft_proto_raft_proto_init() } +func file_raft_proto_raft_proto_init() { + if File_raft_proto_raft_proto != nil { + return + } + type x struct{} + out := protoimpl.TypeBuilder{ + File: protoimpl.DescBuilder{ + GoPackagePath: reflect.TypeOf(x{}).PkgPath(), + RawDescriptor: file_raft_proto_raft_proto_rawDesc, + NumEnums: 0, + NumMessages: 2, + NumExtensions: 0, + NumServices: 1, + }, + GoTypes: file_raft_proto_raft_proto_goTypes, + DependencyIndexes: file_raft_proto_raft_proto_depIdxs, + MessageInfos: file_raft_proto_raft_proto_msgTypes, + }.Build() + File_raft_proto_raft_proto = out.File + file_raft_proto_raft_proto_rawDesc = nil + file_raft_proto_raft_proto_goTypes = nil + file_raft_proto_raft_proto_depIdxs = nil +} diff --git a/raft/proto/raft.proto b/raft/proto/raft.proto new file mode 100644 index 00000000..6a235d35 --- /dev/null +++ b/raft/proto/raft.proto @@ -0,0 +1,18 @@ +syntax = "proto3"; +package raft; + +option go_package = "github.com/gatewayd-io/gatewayd/raft/proto"; + +service RaftService { + rpc ForwardApply (ApplyRequest) returns (ApplyResponse) {} +} + +message ApplyRequest { + bytes data = 1; + int64 timeout_ms = 2; +} + +message ApplyResponse { + bool success = 1; + string error = 2; +} diff --git a/raft/proto/raft_grpc.pb.go b/raft/proto/raft_grpc.pb.go new file mode 100644 index 00000000..e0764709 --- /dev/null +++ b/raft/proto/raft_grpc.pb.go @@ -0,0 +1,121 @@ +// Code generated by protoc-gen-go-grpc. DO NOT EDIT. +// versions: +// - protoc-gen-go-grpc v1.5.1 +// - protoc v5.29.0 +// source: raft/proto/raft.proto + +package proto + +import ( + context "context" + grpc "google.golang.org/grpc" + codes "google.golang.org/grpc/codes" + status "google.golang.org/grpc/status" +) + +// This is a compile-time assertion to ensure that this generated file +// is compatible with the grpc package it is being compiled against. +// Requires gRPC-Go v1.64.0 or later. +const _ = grpc.SupportPackageIsVersion9 + +const ( + RaftService_ForwardApply_FullMethodName = "/raft.RaftService/ForwardApply" +) + +// RaftServiceClient is the client API for RaftService service. +// +// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream. +type RaftServiceClient interface { + ForwardApply(ctx context.Context, in *ApplyRequest, opts ...grpc.CallOption) (*ApplyResponse, error) +} + +type raftServiceClient struct { + cc grpc.ClientConnInterface +} + +func NewRaftServiceClient(cc grpc.ClientConnInterface) RaftServiceClient { + return &raftServiceClient{cc} +} + +func (c *raftServiceClient) ForwardApply(ctx context.Context, in *ApplyRequest, opts ...grpc.CallOption) (*ApplyResponse, error) { + cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...) + out := new(ApplyResponse) + err := c.cc.Invoke(ctx, RaftService_ForwardApply_FullMethodName, in, out, cOpts...) + if err != nil { + return nil, err + } + return out, nil +} + +// RaftServiceServer is the server API for RaftService service. +// All implementations must embed UnimplementedRaftServiceServer +// for forward compatibility. +type RaftServiceServer interface { + ForwardApply(context.Context, *ApplyRequest) (*ApplyResponse, error) + mustEmbedUnimplementedRaftServiceServer() +} + +// UnimplementedRaftServiceServer must be embedded to have +// forward compatible implementations. +// +// NOTE: this should be embedded by value instead of pointer to avoid a nil +// pointer dereference when methods are called. +type UnimplementedRaftServiceServer struct{} + +func (UnimplementedRaftServiceServer) ForwardApply(context.Context, *ApplyRequest) (*ApplyResponse, error) { + return nil, status.Errorf(codes.Unimplemented, "method ForwardApply not implemented") +} +func (UnimplementedRaftServiceServer) mustEmbedUnimplementedRaftServiceServer() {} +func (UnimplementedRaftServiceServer) testEmbeddedByValue() {} + +// UnsafeRaftServiceServer may be embedded to opt out of forward compatibility for this service. +// Use of this interface is not recommended, as added methods to RaftServiceServer will +// result in compilation errors. +type UnsafeRaftServiceServer interface { + mustEmbedUnimplementedRaftServiceServer() +} + +func RegisterRaftServiceServer(s grpc.ServiceRegistrar, srv RaftServiceServer) { + // If the following call pancis, it indicates UnimplementedRaftServiceServer was + // embedded by pointer and is nil. This will cause panics if an + // unimplemented method is ever invoked, so we test this at initialization + // time to prevent it from happening at runtime later due to I/O. + if t, ok := srv.(interface{ testEmbeddedByValue() }); ok { + t.testEmbeddedByValue() + } + s.RegisterService(&RaftService_ServiceDesc, srv) +} + +func _RaftService_ForwardApply_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(ApplyRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(RaftServiceServer).ForwardApply(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: RaftService_ForwardApply_FullMethodName, + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(RaftServiceServer).ForwardApply(ctx, req.(*ApplyRequest)) + } + return interceptor(ctx, in, info, handler) +} + +// RaftService_ServiceDesc is the grpc.ServiceDesc for RaftService service. +// It's only intended for direct use with grpc.RegisterService, +// and not to be introspected or modified (even as a copy) +var RaftService_ServiceDesc = grpc.ServiceDesc{ + ServiceName: "raft.RaftService", + HandlerType: (*RaftServiceServer)(nil), + Methods: []grpc.MethodDesc{ + { + MethodName: "ForwardApply", + Handler: _RaftService_ForwardApply_Handler, + }, + }, + Streams: []grpc.StreamDesc{}, + Metadata: "raft/proto/raft.proto", +} diff --git a/raft/raft.go b/raft/raft.go index 4d2453ea..b08cf6a2 100644 --- a/raft/raft.go +++ b/raft/raft.go @@ -1,6 +1,7 @@ package raft import ( + "context" "encoding/json" "fmt" "io" @@ -14,6 +15,9 @@ import ( "github.com/hashicorp/raft" raftboltdb "github.com/hashicorp/raft-boltdb" "github.com/rs/zerolog" + "google.golang.org/grpc" + + pb "github.com/gatewayd-io/gatewayd/raft/proto" ) // Command types for Raft operations. @@ -44,7 +48,10 @@ type Node struct { snapshotStore raft.SnapshotStore transport raft.Transport Logger zerolog.Logger - Peers []raft.Server + Peers []config.RaftPeer + rpcServer *grpc.Server + rpcClient *rpcClient + grpcAddr string } // NewRaftNode creates and initializes a new Raft node. @@ -112,7 +119,16 @@ func NewRaftNode(logger zerolog.Logger, raftConfig config.Raft) (*Node, error) { snapshotStore: snapshotStore, transport: transport, Logger: logger, - Peers: convertPeers(raftConfig.Peers), + Peers: raftConfig.Peers, + grpcAddr: raftConfig.GRPCAddress, + } + + // Initialize RPC client + node.rpcClient = newRPCClient(node) + + // Start RPC server + if err := node.startRPCServer(); err != nil { + return nil, fmt.Errorf("failed to start RPC server: %w", err) } // Handle bootstrapping @@ -123,8 +139,8 @@ func NewRaftNode(logger zerolog.Logger, raftConfig config.Raft) (*Node, error) { } for i, peer := range node.Peers { configuration.Servers[i] = raft.Server{ - ID: peer.ID, - Address: peer.Address, + ID: raft.ServerID(peer.ID), + Address: raft.ServerAddress(peer.Address), } } configuration.Servers = append(configuration.Servers, raft.Server{ @@ -162,7 +178,7 @@ func (n *Node) monitorLeadership() { existingConfig := n.raft.GetConfiguration().Configuration() peerExists := false for _, server := range existingConfig.Servers { - if server.ID == peer.ID { + if server.ID == raft.ServerID(peer.ID) { peerExists = true n.Logger.Info().Msgf("Peer %s already exists in Raft cluster, skipping", peer.ID) break @@ -200,8 +216,16 @@ func (n *Node) RemovePeer(peerID string) error { return nil } -// Apply applies a new log entry to the Raft log. +// Apply is the public method that handles forwarding if necessary func (n *Node) Apply(data []byte, timeout time.Duration) error { + if n.raft.State() != raft.Leader { + return n.forwardToLeader(data, timeout) + } + return n.applyInternal(data, timeout) +} + +// applyInternal is the internal method that actually applies the data +func (n *Node) applyInternal(data []byte, timeout time.Duration) error { future := n.raft.Apply(data, timeout) if err := future.Error(); err != nil { return fmt.Errorf("failed to apply log entry: %w", err) @@ -209,8 +233,59 @@ func (n *Node) Apply(data []byte, timeout time.Duration) error { return nil } -// Shutdown gracefully shuts down the Raft node. +func (n *Node) forwardToLeader(data []byte, timeout time.Duration) error { + leaderAddr, leaderId := n.raft.LeaderWithID() + if leaderId == "" { + return fmt.Errorf("no leader available") + } + + n.Logger.Debug(). + Str("leader_id", string(leaderId)). + Str("leader_addr", string(leaderAddr)). + Msg("forwarding request to leader") + + var leaderGrpcAddr string + for _, peer := range n.Peers { + if raft.ServerID(peer.ID) == leaderId { + leaderGrpcAddr = string(peer.GRPCAddress) + break + } + } + // Get the RPC client for the leader + client, err := n.rpcClient.getClient(string(leaderGrpcAddr)) + if err != nil { + return fmt.Errorf("failed to get client for leader: %w", err) + } + + // Create context with timeout + ctx, cancel := context.WithTimeout(context.Background(), timeout) + defer cancel() + + // Forward the request + resp, err := client.ForwardApply(ctx, &pb.ApplyRequest{ + Data: data, + TimeoutMs: timeout.Milliseconds(), + }) + if err != nil { + return fmt.Errorf("failed to forward request: %w", err) + } + + if !resp.Success { + return fmt.Errorf("leader failed to apply: %s", resp.Error) + } + + return nil +} + +// Update Shutdown to clean up RPC resources func (n *Node) Shutdown() error { + if n.rpcServer != nil { + n.rpcServer.GracefulStop() + } + if n.rpcClient != nil { + n.rpcClient.close() + } + if err := n.raft.Shutdown().Error(); err != nil { return fmt.Errorf("failed to shutdown raft node: %w", err) } @@ -313,3 +388,21 @@ func (f *FSMSnapshot) Release() {} func (n *Node) GetState() raft.RaftState { return n.raft.State() } + +func (n *Node) startRPCServer() error { + listener, err := net.Listen("tcp", n.grpcAddr) + if err != nil { + return fmt.Errorf("failed to listen on %s: %w", n.grpcAddr, err) + } + + n.rpcServer = grpc.NewServer() + pb.RegisterRaftServiceServer(n.rpcServer, &rpcServer{node: n}) + + go func() { + if err := n.rpcServer.Serve(listener); err != nil { + n.Logger.Error().Err(err).Msg("RPC server failed") + } + }() + + return nil +} diff --git a/raft/rpc.go b/raft/rpc.go new file mode 100644 index 00000000..50321e0b --- /dev/null +++ b/raft/rpc.go @@ -0,0 +1,71 @@ +package raft + +import ( + "context" + "fmt" + "time" + + pb "github.com/gatewayd-io/gatewayd/raft/proto" + "google.golang.org/grpc" + "google.golang.org/grpc/credentials/insecure" +) + +type rpcServer struct { + pb.UnimplementedRaftServiceServer + node *Node +} + +func (s *rpcServer) ForwardApply(ctx context.Context, req *pb.ApplyRequest) (*pb.ApplyResponse, error) { + timeout := time.Duration(req.TimeoutMs) * time.Millisecond + + err := s.node.applyInternal(req.Data, timeout) + if err != nil { + return &pb.ApplyResponse{ + Success: false, + Error: err.Error(), + }, nil + } + + return &pb.ApplyResponse{ + Success: true, + }, nil +} + +type rpcClient struct { + clients map[string]pb.RaftServiceClient + conns map[string]*grpc.ClientConn + node *Node +} + +func newRPCClient(node *Node) *rpcClient { + return &rpcClient{ + clients: make(map[string]pb.RaftServiceClient), + conns: make(map[string]*grpc.ClientConn), + node: node, + } +} + +func (c *rpcClient) getClient(address string) (pb.RaftServiceClient, error) { + if client, ok := c.clients[address]; ok { + return client, nil + } + + conn, err := grpc.NewClient( + address, + grpc.WithTransportCredentials(insecure.NewCredentials()), + ) + if err != nil { + return nil, fmt.Errorf("failed to connect to %s: %w", address, err) + } + + client := pb.NewRaftServiceClient(conn) + c.clients[address] = client + c.conns[address] = conn + return client, nil +} + +func (c *rpcClient) close() { + for _, conn := range c.conns { + conn.Close() + } +} From fb36926d62e6b30e2c3a02eedaa6fffb09facde7 Mon Sep 17 00:00:00 2001 From: Sina Darbouy Date: Sun, 8 Dec 2024 18:09:08 +0100 Subject: [PATCH 11/26] feat: add Docker Compose configuration for Raft cluster setup Add docker-compose-raft.yaml that configures a 3-node GatewayD cluster using Raft consensus protocol. The setup includes: - 3 GatewayD nodes with Raft configuration - Separate read/write PostgreSQL instances - Redis for caching - Observability stack (Prometheus, Tempo, Grafana) - Plugin installation service This configuration enables high availability and leader election through Raft consensus. --- docker-compose-raft.yaml | 204 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 204 insertions(+) create mode 100644 docker-compose-raft.yaml diff --git a/docker-compose-raft.yaml b/docker-compose-raft.yaml new file mode 100644 index 00000000..a9996660 --- /dev/null +++ b/docker-compose-raft.yaml @@ -0,0 +1,204 @@ +services: + install_plugins: + # This intermediate service is used to install the plugins + # before starting the GatewayD service. + image: alpine:3.20 + command: ["/bin/sh", "/setup.sh"] + volumes: + - ./setup.sh:/setup.sh:ro + # Use the variable defined above to mount the GatewayD files. + - ./gatewayd-files:/gatewayd-files:rw + environment: + - GATEWAYD_FILES=/gatewayd-files + # If you want to install a specific version of GatewayD, you can set the + # GATEWAYD_VERSION environment variable to the desired version. Otherwise, + # the latest version will be installed. + # - GATEWAYD_VERSION=v0.9.5 + # The architecture of the GatewayD and plugins to install. + # Default: amd64 + # Possible values: amd64 or arm64 + # - ARCH=amd64 + - REDIS_URL=redis://redis:6379/0 + write-postgres: + image: postgres:latest + environment: + - POSTGRES_USER=postgres + - POSTGRES_PASSWORD=postgres + healthcheck: + test: ["CMD-SHELL", "pg_isready -U postgres"] + interval: 5s + timeout: 5s + retries: 5 + read-postgres: + image: postgres:latest + environment: + - POSTGRES_USER=postgres + - POSTGRES_PASSWORD=postgres + healthcheck: + test: ["CMD-SHELL", "pg_isready -U postgres"] + interval: 5s + timeout: 5s + retries: 5 + redis: + image: redis:latest + healthcheck: + test: ["CMD", "redis-cli", "ping"] + interval: 5s + timeout: 5s + retries: 5 + gatewayd-1: + image: gatewayd:raft + command: + [ + "run", + "--config", + "/gatewayd-files/gatewayd.yaml", + "--plugin-config", + "/gatewayd-files/gatewayd_plugins.yaml", + "--tracing", + "--collector-url", + "tempo:4317", + ] + environment: + - GATEWAYD_CLIENTS_DEFAULT_WRITES_ADDRESS=write-postgres:5432 + - GATEWAYD_CLIENTS_DEFAULT_READS_ADDRESS=read-postgres:5432 + - GATEWAYD_RAFT_NODEID=node1 + - GATEWAYD_RAFT_LEADERID=node2 + - GATEWAYD_RAFT_ADDRESS=gatewayd-1:2223 + - GATEWAYD_RAFT_GRPCADDRESS=gatewayd-1:50051 + - GATEWAYD_RAFT_PEERS=[{"id":"node2","address":"gatewayd-2:2223","grpcAddress":"gatewayd-2:50051"},{"id":"node3","address":"gatewayd-3:2223","grpcAddress":"gatewayd-3:50051"}] + ports: + - "15432:15432" + - "18080:18080" + - "19090:19090" + - "42223:2223" + - "50051:50051" + volumes: + - ./gatewayd-files:/gatewayd-files:ro + - ./raft-data-1:/var/lib/gatewayd/raft + links: + - write-postgres + - read-postgres + - redis + healthcheck: + test: ["CMD", "curl", "-f", "http://gatewayd-1:18080/healthz"] + interval: 5s + timeout: 5s + retries: 5 + depends_on: + write-postgres: + condition: service_healthy + read-postgres: + condition: service_healthy + redis: + condition: service_healthy + install_plugins: + condition: service_completed_successfully + + gatewayd-2: + image: gatewayd:raft + command: + [ + "run", + "--config", + "/gatewayd-files/gatewayd.yaml", + "--plugin-config", + "/gatewayd-files/gatewayd_plugins.yaml", + "--tracing", + "--collector-url", + "tempo:4317", + ] + environment: + - GATEWAYD_CLIENTS_DEFAULT_WRITES_ADDRESS=write-postgres:5432 + - GATEWAYD_CLIENTS_DEFAULT_READS_ADDRESS=read-postgres:5432 + - GATEWAYD_RAFT_NODEID=node2 + - GATEWAYD_RAFT_LEADERID=node2 + - GATEWAYD_RAFT_ADDRESS=gatewayd-2:2223 + - GATEWAYD_RAFT_GRPCADDRESS=gatewayd-2:50051 + - GATEWAYD_RAFT_PEERS=[{"id":"node1","address":"gatewayd-1:2223","grpcAddress":"gatewayd-1:50051"},{"id":"node3","address":"gatewayd-3:2223","grpcAddress":"gatewayd-3:50051"}] + ports: + - "15433:15432" + - "18081:18080" + - "19091:19090" + - "42224:2223" + - "50052:50051" + volumes: + - ./gatewayd-files:/gatewayd-files:ro + - ./raft-data-2:/var/lib/gatewayd/raft + + gatewayd-3: + image: gatewayd:raft + command: + [ + "run", + "--config", + "/gatewayd-files/gatewayd.yaml", + "--plugin-config", + "/gatewayd-files/gatewayd_plugins.yaml", + "--tracing", + "--collector-url", + "tempo:4317", + ] + environment: + - GATEWAYD_CLIENTS_DEFAULT_WRITES_ADDRESS=write-postgres:5432 + - GATEWAYD_CLIENTS_DEFAULT_READS_ADDRESS=read-postgres:5432 + - GATEWAYD_RAFT_NODEID=node3 + - GATEWAYD_RAFT_LEADERID=node2 + - GATEWAYD_RAFT_ADDRESS=gatewayd-3:2223 + - GATEWAYD_RAFT_GRPCADDRESS=gatewayd-3:50051 + - GATEWAYD_RAFT_PEERS=[{"id":"node1","address":"gatewayd-1:2223","grpcAddress":"gatewayd-1:50051"},{"id":"node2","address":"gatewayd-2:2223","grpcAddress":"gatewayd-2:50051"}] + ports: + - "15434:15432" + - "18082:18080" + - "19092:19090" + - "42225:2223" + - "50053:50051" + volumes: + - ./gatewayd-files:/gatewayd-files:ro + - ./raft-data-3:/var/lib/gatewayd/raft + + prometheus: + image: prom/prometheus:latest + volumes: + - ./observability-configs/prometheus.yaml:/etc/prometheus/prometheus.yml + ports: + - "9090:9090" + depends_on: + - gatewayd-1 + - gatewayd-2 + - gatewayd-3 + + tempo_init: + image: &tempoImage grafana/tempo:latest + user: root + entrypoint: + - "chown" + - "10001:10001" + - "/var/tempo" + volumes: + - ./tempo-data:/var/tempo + + tempo: + image: *tempoImage + command: ["-config.file=/etc/tempo.yaml"] + volumes: + - ./observability-configs/tempo.yaml:/etc/tempo.yaml + - ./tempo-data:/var/tempo + ports: + - "4317:4317" # otlp grpc + depends_on: + - tempo_init + + grafana: + image: grafana/grafana:latest + volumes: + - ./observability-configs/grafana-datasources.yaml:/etc/grafana/provisioning/datasources/datasources.yaml + environment: + - GF_AUTH_ANONYMOUS_ENABLED=true + - GF_AUTH_DISABLE_LOGIN_FORM=true + - GF_AUTH_ANONYMOUS_ORG_ROLE=Admin + ports: + - "3000:3000" + depends_on: + - prometheus + - tempo From 8ba90d2b261f1d43bc25f6e7bde33f28156d8d28 Mon Sep 17 00:00:00 2001 From: Sina Darbouy Date: Sun, 8 Dec 2024 19:23:28 +0100 Subject: [PATCH 12/26] refactor(raft): improve error handling and code clarity - Improve variable naming in loadEnvVarsWithTransform for better readability - Clean up error handling in forwardToLeader and ForwardApply - Add proper error propagation in RPC responses - Fix string type conversions for peer IDs and addresses - Organize imports and add missing error package - Remove unused convertPeers function - Add clarifying comments for Apply methods This commit focuses on code quality improvements and better error handling in the Raft implementation without changing core functionality. --- config/config.go | 8 ++++---- raft/raft.go | 41 +++++++++++++++-------------------------- raft/rpc.go | 8 ++++---- 3 files changed, 23 insertions(+), 34 deletions(-) diff --git a/config/config.go b/config/config.go index 5a260689..a0c38c00 100644 --- a/config/config.go +++ b/config/config.go @@ -347,9 +347,9 @@ func (c *Config) LoadPluginEnvVars(ctx context.Context) *gerr.GatewayDError { func loadEnvVarsWithTransform() *env.Env { // Use ProviderWithValue to transform both key and value - return env.ProviderWithValue(EnvPrefix, ".", func(s string, v string) (string, interface{}) { + return env.ProviderWithValue(EnvPrefix, ".", func(envKey string, value string) (string, interface{}) { // Transform the key - key := strings.ToLower(strings.TrimPrefix(s, EnvPrefix)) + key := strings.ToLower(strings.TrimPrefix(envKey, EnvPrefix)) structs := []any{ &API{}, @@ -382,14 +382,14 @@ func loadEnvVarsWithTransform() *env.Env { // Check if the key is "peers" and transform the value using JSON unmarshal if transformedParts.String() == "raft.peers" { var raftPeers []RaftPeer - if err := json.Unmarshal([]byte(v), &raftPeers); err != nil { + if err := json.Unmarshal([]byte(value), &raftPeers); err != nil { return transformedParts.String(), fmt.Errorf("failed to unmarshal peers: %w", err) } return transformedParts.String(), raftPeers } // Return the key and value as is if no transformation is needed - return transformedParts.String(), v + return transformedParts.String(), value }) } diff --git a/raft/raft.go b/raft/raft.go index b08cf6a2..cf6e54f7 100644 --- a/raft/raft.go +++ b/raft/raft.go @@ -3,6 +3,7 @@ package raft import ( "context" "encoding/json" + "errors" "fmt" "io" "net" @@ -12,12 +13,11 @@ import ( "time" "github.com/gatewayd-io/gatewayd/config" + pb "github.com/gatewayd-io/gatewayd/raft/proto" "github.com/hashicorp/raft" raftboltdb "github.com/hashicorp/raft-boltdb" "github.com/rs/zerolog" "google.golang.org/grpc" - - pb "github.com/gatewayd-io/gatewayd/raft/proto" ) // Command types for Raft operations. @@ -155,17 +155,6 @@ func NewRaftNode(logger zerolog.Logger, raftConfig config.Raft) (*Node, error) { return node, nil } -func convertPeers(configPeers []config.RaftPeer) []raft.Server { - peers := make([]raft.Server, len(configPeers)) - for i, peer := range configPeers { - peers[i] = raft.Server{ - ID: raft.ServerID(peer.ID), - Address: raft.ServerAddress(peer.Address), - } - } - return peers -} - // monitorLeadership checks if the node is the Raft leader and logs state changes. func (n *Node) monitorLeadership() { for { @@ -187,7 +176,7 @@ func (n *Node) monitorLeadership() { if peerExists { continue } - err := n.AddPeer(string(peer.ID), string(peer.Address)) + err := n.AddPeer(peer.ID, peer.Address) if err != nil { n.Logger.Error().Err(err).Msgf("Failed to add node %s to Raft cluster", peer.ID) } @@ -216,7 +205,7 @@ func (n *Node) RemovePeer(peerID string) error { return nil } -// Apply is the public method that handles forwarding if necessary +// Apply is the public method that handles forwarding if necessary. func (n *Node) Apply(data []byte, timeout time.Duration) error { if n.raft.State() != raft.Leader { return n.forwardToLeader(data, timeout) @@ -224,7 +213,7 @@ func (n *Node) Apply(data []byte, timeout time.Duration) error { return n.applyInternal(data, timeout) } -// applyInternal is the internal method that actually applies the data +// applyInternal is the internal method that actually applies the data. func (n *Node) applyInternal(data []byte, timeout time.Duration) error { future := n.raft.Apply(data, timeout) if err := future.Error(); err != nil { @@ -234,25 +223,25 @@ func (n *Node) applyInternal(data []byte, timeout time.Duration) error { } func (n *Node) forwardToLeader(data []byte, timeout time.Duration) error { - leaderAddr, leaderId := n.raft.LeaderWithID() - if leaderId == "" { - return fmt.Errorf("no leader available") + leaderAddr, leaderID := n.raft.LeaderWithID() + if leaderID == "" { + return errors.New("no leader available") } n.Logger.Debug(). - Str("leader_id", string(leaderId)). + Str("leader_id", string(leaderID)). Str("leader_addr", string(leaderAddr)). Msg("forwarding request to leader") var leaderGrpcAddr string for _, peer := range n.Peers { - if raft.ServerID(peer.ID) == leaderId { - leaderGrpcAddr = string(peer.GRPCAddress) + if raft.ServerID(peer.ID) == leaderID { + leaderGrpcAddr = peer.GRPCAddress break } } // Get the RPC client for the leader - client, err := n.rpcClient.getClient(string(leaderGrpcAddr)) + client, err := n.rpcClient.getClient(leaderGrpcAddr) if err != nil { return fmt.Errorf("failed to get client for leader: %w", err) } @@ -270,14 +259,14 @@ func (n *Node) forwardToLeader(data []byte, timeout time.Duration) error { return fmt.Errorf("failed to forward request: %w", err) } - if !resp.Success { - return fmt.Errorf("leader failed to apply: %s", resp.Error) + if !resp.GetSuccess() { + return fmt.Errorf("leader failed to apply: %s", resp.GetError()) } return nil } -// Update Shutdown to clean up RPC resources +// Update Shutdown to clean up RPC resources. func (n *Node) Shutdown() error { if n.rpcServer != nil { n.rpcServer.GracefulStop() diff --git a/raft/rpc.go b/raft/rpc.go index 50321e0b..a63ae569 100644 --- a/raft/rpc.go +++ b/raft/rpc.go @@ -15,15 +15,15 @@ type rpcServer struct { node *Node } -func (s *rpcServer) ForwardApply(ctx context.Context, req *pb.ApplyRequest) (*pb.ApplyResponse, error) { - timeout := time.Duration(req.TimeoutMs) * time.Millisecond +func (s *rpcServer) ForwardApply(_ context.Context, req *pb.ApplyRequest) (*pb.ApplyResponse, error) { + timeout := time.Duration(req.GetTimeoutMs()) * time.Millisecond - err := s.node.applyInternal(req.Data, timeout) + err := s.node.applyInternal(req.GetData(), timeout) if err != nil { return &pb.ApplyResponse{ Success: false, Error: err.Error(), - }, nil + }, err } return &pb.ApplyResponse{ From 9e15e11a08280bede5cc8b6a0a264e61bb503373 Mon Sep 17 00:00:00 2001 From: Sina Darbouy Date: Sun, 8 Dec 2024 23:49:00 +0100 Subject: [PATCH 13/26] Add unit tests for Raft RPC server and client - Implement `TestRPCServer_ForwardApply` to test the `ForwardApply` method of the RPC server, ensuring correct handling of apply requests with various configurations. - Implement `TestRPCClient` to verify the creation and management of RPC clients, including client retrieval and connection closure. - Utilize `setupGRPCServer` to create a gRPC server for testing purposes. - Ensure proper setup and teardown of test nodes and gRPC connections to maintain test isolation and reliability. --- raft/raft_test.go | 4 +- raft/rpc_test.go | 201 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 203 insertions(+), 2 deletions(-) create mode 100644 raft/rpc_test.go diff --git a/raft/raft_test.go b/raft/raft_test.go index ecbd03e3..7df0c86e 100644 --- a/raft/raft_test.go +++ b/raft/raft_test.go @@ -2,7 +2,7 @@ package raft import ( "encoding/json" - "os" + "io" "testing" "time" @@ -14,7 +14,7 @@ import ( ) func setupTestLogger() zerolog.Logger { - return zerolog.New(os.Stdout).With().Timestamp().Logger() + return zerolog.New(io.Discard).With().Timestamp().Logger() } func TestNewRaftNode(t *testing.T) { diff --git a/raft/rpc_test.go b/raft/rpc_test.go new file mode 100644 index 00000000..df5fed00 --- /dev/null +++ b/raft/rpc_test.go @@ -0,0 +1,201 @@ +package raft + +import ( + "context" + "net" + "strconv" + "testing" + "time" + + "github.com/gatewayd-io/gatewayd/config" + pb "github.com/gatewayd-io/gatewayd/raft/proto" + "github.com/rs/zerolog" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "google.golang.org/grpc" + "google.golang.org/grpc/credentials/insecure" +) + +// setupGRPCServer creates a gRPC server for testing on a random port +func setupGRPCServer(t *testing.T, node *Node) (*grpc.Server, net.Listener) { + lis, err := net.Listen("tcp", "localhost:0") // Bind to a random available port + require.NoError(t, err, "Failed to create listener") + + server := grpc.NewServer() + pb.RegisterRaftServiceServer(server, &rpcServer{node: node}) + + go func() { + if err := server.Serve(lis); err != nil { + t.Errorf("Failed to serve: %v", err) + } + }() + + return server, lis +} + +// getListenerAddr returns address for the listener +func getListenerAddr(lis net.Listener) string { + return lis.Addr().String() +} + +func setupNodes(t *testing.T, logger zerolog.Logger, ports []int, tempDir string) []*Node { + nodeConfigs := []config.Raft{ + { + NodeID: "testRaftLeadershipnode1", + Address: "127.0.0.1:" + strconv.Itoa(ports[0]), + LeaderID: "testRaftLeadershipnode1", + Peers: []config.RaftPeer{ + {ID: "testRaftLeadershipnode2", Address: "127.0.0.1:" + strconv.Itoa(ports[1])}, + {ID: "testRaftLeadershipnode3", Address: "127.0.0.1:" + strconv.Itoa(ports[2])}, + }, + Directory: tempDir, + }, + { + NodeID: "testRaftLeadershipnode2", + Address: "127.0.0.1:" + strconv.Itoa(ports[1]), + LeaderID: "testRaftLeadershipnode1", + Peers: []config.RaftPeer{ + {ID: "testRaftLeadershipnode1", Address: "127.0.0.1:" + strconv.Itoa(ports[0])}, + {ID: "testRaftLeadershipnode3", Address: "127.0.0.1:" + strconv.Itoa(ports[2])}, + }, + Directory: tempDir, + }, + { + NodeID: "testRaftLeadershipnode3", + Address: "127.0.0.1:" + strconv.Itoa(ports[2]), + LeaderID: "testRaftLeadershipnode1", + Peers: []config.RaftPeer{ + {ID: "testRaftLeadershipnode1", Address: "127.0.0.1:" + strconv.Itoa(ports[0])}, + {ID: "testRaftLeadershipnode2", Address: "127.0.0.1:" + strconv.Itoa(ports[1])}, + }, + Directory: tempDir, + }, + } + + nodes := make([]*Node, len(nodeConfigs)) + for i, cfg := range nodeConfigs { + node, err := NewRaftNode(logger, cfg) + require.NoError(t, err, "Failed to create node") + nodes[i] = node + t.Cleanup(func() { + err := node.Shutdown() + if err != nil { + t.Errorf("Failed to shutdown node: %v", err) + } + }) + } + return nodes +} + +func TestRPCServer_ForwardApply(t *testing.T) { + tests := []struct { + name string + data []byte + timeoutMs int64 + wantSuccess bool + wantErr bool + }{ + { + name: "successful apply", + data: []byte("test data"), + timeoutMs: 1000, + wantSuccess: true, + wantErr: false, + }, + } + + for i, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tempDir := t.TempDir() + logger := setupTestLogger() + ports := []int{6004 + i, 6005 + i, 6006 + i} + + nodes := setupNodes(t, logger, ports, tempDir) + + // Wait for leader election + time.Sleep(3 * time.Second) + + server, lis := setupGRPCServer(t, nodes[0]) + defer server.Stop() + + conn, err := grpc.NewClient( + getListenerAddr(lis), + grpc.WithTransportCredentials(insecure.NewCredentials()), + ) + require.NoError(t, err, "Failed to create gRPC client connection") + defer conn.Close() + + client := pb.NewRaftServiceClient(conn) + resp, err := client.ForwardApply(context.Background(), &pb.ApplyRequest{ + Data: tt.data, + TimeoutMs: tt.timeoutMs, + }) + + if tt.wantErr { + assert.Error(t, err) + return + } + + require.NoError(t, err) + assert.Equal(t, tt.wantSuccess, resp.Success) + }) + } +} + +func TestRPCClient(t *testing.T) { + t.Run("new client creation", func(t *testing.T) { + node := &Node{} + client := newRPCClient(node) + assert.NotNil(t, client) + assert.Empty(t, client.clients) + assert.Empty(t, client.conns) + assert.Equal(t, node, client.node) + }) + + t.Run("get client", func(t *testing.T) { + tempDir := t.TempDir() + logger := setupTestLogger() + ports := []int{6014, 6015, 6016} + + nodes := setupNodes(t, logger, ports, tempDir) + + // Wait for leader election + time.Sleep(3 * time.Second) + client := newRPCClient(nodes[0]) + + server, lis := setupGRPCServer(t, nodes[0]) + defer server.Stop() + + conn, err := grpc.NewClient(getListenerAddr(lis), + grpc.WithTransportCredentials(insecure.NewCredentials()), + ) + require.NoError(t, err, "Failed to create gRPC client connection") + defer conn.Close() + + client.conns[getListenerAddr(lis)] = conn + client.clients[getListenerAddr(lis)] = pb.NewRaftServiceClient(conn) + + existingClient, err := client.getClient(getListenerAddr(lis)) + assert.NoError(t, err) + assert.NotNil(t, existingClient) + }) + + t.Run("close connections", func(t *testing.T) { + node := &Node{} + client := newRPCClient(node) + + server, lis := setupGRPCServer(t, node) + defer server.Stop() + + conn, err := grpc.NewClient(getListenerAddr(lis), + grpc.WithTransportCredentials(insecure.NewCredentials()), + ) + require.NoError(t, err, "Failed to create gRPC client connection") + defer conn.Close() + + client.conns[getListenerAddr(lis)] = conn + client.close() + + assert.NotEqual(t, "READY", conn.GetState().String()) + }) +} From d3042bc6659d39945d2f4e5efbf5976be0bb99ea Mon Sep 17 00:00:00 2001 From: Sina Darbouy Date: Sun, 8 Dec 2024 23:57:20 +0100 Subject: [PATCH 14/26] Update Raft configuration in gatewayd.yaml - Change `nodeId` and `leaderId` from `node2` to `node1`. - Add `grpcAddress` with value `127.0.0.1:50051`. - Update `peers` to an empty list instead of an empty dictionary. These changes adjust the Raft configuration to reflect the new node setup and include a gRPC address for communication. --- gatewayd.yaml | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/gatewayd.yaml b/gatewayd.yaml index 700931a0..a2889819 100644 --- a/gatewayd.yaml +++ b/gatewayd.yaml @@ -109,8 +109,9 @@ api: raft: address: 127.0.0.1:2223 - nodeId: node2 - leaderId: node2 - peers: {} - # - id: node1 - # address: 127.0.0.1:2222 + nodeId: node1 + leaderId: node1 + grpcAddress: 127.0.0.1:50051 + peers: [] + # - id: node1 + # address: 127.0.0.1:2222 From bd458bb45147a60f1d67f7ecc05fb2250e198c7a Mon Sep 17 00:00:00 2001 From: Sina Darbouy Date: Mon, 9 Dec 2024 13:34:30 +0100 Subject: [PATCH 15/26] Convert RaftPeer slice to string for environment variable compatibility The function `v1.NewStruct(args)` only accepts `NewValue`, which requires converting certain types to strings. This change adds support for converting a slice of `config.RaftPeer` to a comma-separated string format. Each peer is formatted as "ID:Address:GRPCAddress". This conversion is necessary to overwrite the peers as an environment variable. --- plugin/utils.go | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/plugin/utils.go b/plugin/utils.go index 64713514..aeb576bd 100644 --- a/plugin/utils.go +++ b/plugin/utils.go @@ -1,11 +1,14 @@ package plugin import ( + "fmt" "os/exec" + "strings" "time" sdkAct "github.com/gatewayd-io/gatewayd-plugin-sdk/act" "github.com/gatewayd-io/gatewayd/act" + "github.com/gatewayd-io/gatewayd/config" "github.com/rs/zerolog" "github.com/spf13/cast" ) @@ -47,6 +50,13 @@ func castToPrimitiveTypes(args map[string]any) map[string]any { // Recursively cast nested maps. args[key] = castToPrimitiveTypes(valuemap) } + case []config.RaftPeer: + // Cast raft peers to comma-separated string using strings.Join + peers := make([]string, len(value)) + for i, peer := range value { + peers[i] = fmt.Sprintf("%s:%s:%s", peer.ID, peer.Address, peer.GRPCAddress) + } + args[key] = strings.Join(peers, ",") // TODO: Add more types here as needed. default: args[key] = value From 72230cdd7615e423f962f90238fa736caaf7ae4e Mon Sep 17 00:00:00 2001 From: Sina Darbouy Date: Mon, 9 Dec 2024 13:35:49 +0100 Subject: [PATCH 16/26] Update checksum in gatewayd_plugins.yaml - Updated the checksum value for the plugin configuration to ensure integrity and consistency with the latest changes. --- gatewayd_plugins.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gatewayd_plugins.yaml b/gatewayd_plugins.yaml index dce0b285..0ddccb02 100644 --- a/gatewayd_plugins.yaml +++ b/gatewayd_plugins.yaml @@ -85,4 +85,4 @@ plugins: - EXIT_ON_STARTUP_ERROR=False - SENTRY_DSN=https://70eb1abcd32e41acbdfc17bc3407a543@o4504550475038720.ingest.sentry.io/4505342961123328 - CACHE_CHANNEL_BUFFER_SIZE=100 - checksum: 054e7dba9c1e3e3910f4928a000d35c8a6199719fad505c66527f3e9b1993833 + checksum: c2c5fcc2877f1b9d479e602b96d077c57b0141a5bd059ef25faf5893ac8ab80d From 09e31e5cd3b943f0bc9e8e746df1b38271d8f6fe Mon Sep 17 00:00:00 2001 From: Sina Darbouy Date: Mon, 9 Dec 2024 14:13:15 +0100 Subject: [PATCH 17/26] Refactor Raft configuration to use `IsBootstrap` flag - Replaced `LeaderID` with `IsBootstrap` in Raft configuration across multiple files. - Updated YAML configuration files (`gatewayd.yaml`, `docker-compose-raft.yaml`) to reflect the new `IsBootstrap` flag. - Modified Go source files (`config.go`, `constants.go`, `types.go`, `raft.go`) to use `IsBootstrap` instead of `LeaderID`. - Adjusted test cases in `raft_test.go`, `rpc_test.go`, and `raft_helpers.go` to accommodate the new `IsBootstrap` flag. - Ensured that the `IsBootstrap` flag is correctly set for nodes intended to bootstrap the Raft cluster. --- cmd/testdata/gatewayd.yaml | 2 +- config/config.go | 2 +- config/constants.go | 2 +- config/types.go | 2 +- docker-compose-raft.yaml | 6 +++--- gatewayd.yaml | 2 +- raft/raft.go | 3 +-- raft/raft_test.go | 40 ++++++++++++++++++------------------- raft/rpc_test.go | 18 ++++++++--------- testhelpers/raft_helpers.go | 10 +++++----- 10 files changed, 43 insertions(+), 44 deletions(-) diff --git a/cmd/testdata/gatewayd.yaml b/cmd/testdata/gatewayd.yaml index 96ee280b..c533b2d2 100644 --- a/cmd/testdata/gatewayd.yaml +++ b/cmd/testdata/gatewayd.yaml @@ -77,5 +77,5 @@ api: raft: address: 127.0.0.1:2223 nodeID: node2 - leaderID: node2 + isBootstrap: true peers: {} diff --git a/config/config.go b/config/config.go index a0c38c00..6c57ae95 100644 --- a/config/config.go +++ b/config/config.go @@ -183,7 +183,7 @@ func (c *Config) LoadDefaults(ctx context.Context) *gerr.GatewayDError { Raft: Raft{ Address: DefaultRaftAddress, NodeID: DefaultRaftNodeID, - LeaderID: DefaultRaftLeaderID, + IsBootstrap: DefaultRaftIsBootstrap, Directory: DefaultRaftDirectory, GRPCAddress: DefaultRaftGRPCAddress, }, diff --git a/config/constants.go b/config/constants.go index 56b236d5..6107e16f 100644 --- a/config/constants.go +++ b/config/constants.go @@ -130,7 +130,7 @@ const ( // Raft constants. DefaultRaftAddress = "127.0.0.1:2223" DefaultRaftNodeID = "node1" - DefaultRaftLeaderID = "node1" + DefaultRaftIsBootstrap = true DefaultRaftDirectory = "raft" DefaultRaftGRPCAddress = "127.0.0.1:50051" ) diff --git a/config/types.go b/config/types.go index 67be5c3e..351a6e16 100644 --- a/config/types.go +++ b/config/types.go @@ -141,7 +141,7 @@ type API struct { type Raft struct { Address string `json:"address"` NodeID string `json:"nodeId"` - LeaderID string `json:"leaderId"` + IsBootstrap bool `json:"isBootstrap"` Peers []RaftPeer `json:"peers"` Directory string `json:"directory" jsonschema:"default=raft"` GRPCAddress string `json:"grpcAddress"` diff --git a/docker-compose-raft.yaml b/docker-compose-raft.yaml index a9996660..45f9e92a 100644 --- a/docker-compose-raft.yaml +++ b/docker-compose-raft.yaml @@ -63,7 +63,7 @@ services: - GATEWAYD_CLIENTS_DEFAULT_WRITES_ADDRESS=write-postgres:5432 - GATEWAYD_CLIENTS_DEFAULT_READS_ADDRESS=read-postgres:5432 - GATEWAYD_RAFT_NODEID=node1 - - GATEWAYD_RAFT_LEADERID=node2 + - GATEWAYD_RAFT_ISBOOTSTRAP=true - GATEWAYD_RAFT_ADDRESS=gatewayd-1:2223 - GATEWAYD_RAFT_GRPCADDRESS=gatewayd-1:50051 - GATEWAYD_RAFT_PEERS=[{"id":"node2","address":"gatewayd-2:2223","grpcAddress":"gatewayd-2:50051"},{"id":"node3","address":"gatewayd-3:2223","grpcAddress":"gatewayd-3:50051"}] @@ -112,7 +112,7 @@ services: - GATEWAYD_CLIENTS_DEFAULT_WRITES_ADDRESS=write-postgres:5432 - GATEWAYD_CLIENTS_DEFAULT_READS_ADDRESS=read-postgres:5432 - GATEWAYD_RAFT_NODEID=node2 - - GATEWAYD_RAFT_LEADERID=node2 + - GATEWAYD_RAFT_ISBOOTSTRAP=true - GATEWAYD_RAFT_ADDRESS=gatewayd-2:2223 - GATEWAYD_RAFT_GRPCADDRESS=gatewayd-2:50051 - GATEWAYD_RAFT_PEERS=[{"id":"node1","address":"gatewayd-1:2223","grpcAddress":"gatewayd-1:50051"},{"id":"node3","address":"gatewayd-3:2223","grpcAddress":"gatewayd-3:50051"}] @@ -143,7 +143,7 @@ services: - GATEWAYD_CLIENTS_DEFAULT_WRITES_ADDRESS=write-postgres:5432 - GATEWAYD_CLIENTS_DEFAULT_READS_ADDRESS=read-postgres:5432 - GATEWAYD_RAFT_NODEID=node3 - - GATEWAYD_RAFT_LEADERID=node2 + - GATEWAYD_RAFT_ISBOOTSTRAP=false - GATEWAYD_RAFT_ADDRESS=gatewayd-3:2223 - GATEWAYD_RAFT_GRPCADDRESS=gatewayd-3:50051 - GATEWAYD_RAFT_PEERS=[{"id":"node1","address":"gatewayd-1:2223","grpcAddress":"gatewayd-1:50051"},{"id":"node2","address":"gatewayd-2:2223","grpcAddress":"gatewayd-2:50051"}] diff --git a/gatewayd.yaml b/gatewayd.yaml index a2889819..333bf603 100644 --- a/gatewayd.yaml +++ b/gatewayd.yaml @@ -110,7 +110,7 @@ api: raft: address: 127.0.0.1:2223 nodeId: node1 - leaderId: node1 + isBootstrap: True grpcAddress: 127.0.0.1:50051 peers: [] # - id: node1 diff --git a/raft/raft.go b/raft/raft.go index cf6e54f7..28a5d7ad 100644 --- a/raft/raft.go +++ b/raft/raft.go @@ -132,8 +132,7 @@ func NewRaftNode(logger zerolog.Logger, raftConfig config.Raft) (*Node, error) { } // Handle bootstrapping - isBootstrap := raftConfig.LeaderID == nodeID - if isBootstrap { + if raftConfig.IsBootstrap { configuration := raft.Configuration{ Servers: make([]raft.Server, len(node.Peers)), } diff --git a/raft/raft_test.go b/raft/raft_test.go index 7df0c86e..88e8509f 100644 --- a/raft/raft_test.go +++ b/raft/raft_test.go @@ -29,9 +29,9 @@ func TestNewRaftNode(t *testing.T) { { name: "valid configuration", raftConfig: config.Raft{ - NodeID: "testRaftNodeValidConfigurationnode1", - Address: "127.0.0.1:6001", - LeaderID: "testRaftNodeValidConfigurationnode1", + NodeID: "testRaftNodeValidConfigurationnode1", + Address: "127.0.0.1:6001", + IsBootstrap: true, Peers: []config.RaftPeer{ {ID: "testRaftNodeValidConfigurationnode2", Address: "127.0.0.1:6002"}, }, @@ -42,10 +42,10 @@ func TestNewRaftNode(t *testing.T) { { name: "invalid address", raftConfig: config.Raft{ - NodeID: "testRaftNodeInvalidAddressnode1", - Address: "invalid:address:", - LeaderID: "testRaftNodeInvalidAddressnode1", - Directory: tempDir, + NodeID: "testRaftNodeInvalidAddressnode1", + Address: "invalid:address:", + IsBootstrap: true, + Directory: tempDir, }, wantErr: true, }, @@ -123,10 +123,10 @@ func TestRaftNodeApply(t *testing.T) { logger := setupTestLogger() tempDir := t.TempDir() config := config.Raft{ - NodeID: "testRaftNodeApplynode1", - Address: "127.0.0.1:6003", - LeaderID: "testRaftNodeApplynode1", - Directory: tempDir, + NodeID: "testRaftNodeApplynode1", + Address: "127.0.0.1:6003", + IsBootstrap: true, + Directory: tempDir, } node, err := NewRaftNode(logger, config) @@ -156,9 +156,9 @@ func TestRaftLeadershipAndFollowers(t *testing.T) { // Configure three nodes with unique ports nodeConfigs := []config.Raft{ { - NodeID: "testRaftLeadershipnode1", - Address: "127.0.0.1:6004", - LeaderID: "testRaftLeadershipnode1", + NodeID: "testRaftLeadershipnode1", + Address: "127.0.0.1:6004", + IsBootstrap: true, Peers: []config.RaftPeer{ {ID: "testRaftLeadershipnode2", Address: "127.0.0.1:6005"}, {ID: "testRaftLeadershipnode3", Address: "127.0.0.1:6006"}, @@ -166,9 +166,9 @@ func TestRaftLeadershipAndFollowers(t *testing.T) { Directory: tempDir, }, { - NodeID: "testRaftLeadershipnode2", - Address: "127.0.0.1:6005", - LeaderID: "testRaftLeadershipnode1", + NodeID: "testRaftLeadershipnode2", + Address: "127.0.0.1:6005", + IsBootstrap: false, Peers: []config.RaftPeer{ {ID: "testRaftLeadershipnode1", Address: "127.0.0.1:6004"}, {ID: "testRaftLeadershipnode3", Address: "127.0.0.1:6006"}, @@ -176,9 +176,9 @@ func TestRaftLeadershipAndFollowers(t *testing.T) { Directory: tempDir, }, { - NodeID: "testRaftLeadershipnode3", - Address: "127.0.0.1:6006", - LeaderID: "testRaftLeadershipnode1", + NodeID: "testRaftLeadershipnode3", + Address: "127.0.0.1:6006", + IsBootstrap: false, Peers: []config.RaftPeer{ {ID: "testRaftLeadershipnode1", Address: "127.0.0.1:6004"}, {ID: "testRaftLeadershipnode2", Address: "127.0.0.1:6005"}, diff --git a/raft/rpc_test.go b/raft/rpc_test.go index df5fed00..018f54fc 100644 --- a/raft/rpc_test.go +++ b/raft/rpc_test.go @@ -41,9 +41,9 @@ func getListenerAddr(lis net.Listener) string { func setupNodes(t *testing.T, logger zerolog.Logger, ports []int, tempDir string) []*Node { nodeConfigs := []config.Raft{ { - NodeID: "testRaftLeadershipnode1", - Address: "127.0.0.1:" + strconv.Itoa(ports[0]), - LeaderID: "testRaftLeadershipnode1", + NodeID: "testRaftLeadershipnode1", + Address: "127.0.0.1:" + strconv.Itoa(ports[0]), + IsBootstrap: true, Peers: []config.RaftPeer{ {ID: "testRaftLeadershipnode2", Address: "127.0.0.1:" + strconv.Itoa(ports[1])}, {ID: "testRaftLeadershipnode3", Address: "127.0.0.1:" + strconv.Itoa(ports[2])}, @@ -51,9 +51,9 @@ func setupNodes(t *testing.T, logger zerolog.Logger, ports []int, tempDir string Directory: tempDir, }, { - NodeID: "testRaftLeadershipnode2", - Address: "127.0.0.1:" + strconv.Itoa(ports[1]), - LeaderID: "testRaftLeadershipnode1", + NodeID: "testRaftLeadershipnode2", + Address: "127.0.0.1:" + strconv.Itoa(ports[1]), + IsBootstrap: false, Peers: []config.RaftPeer{ {ID: "testRaftLeadershipnode1", Address: "127.0.0.1:" + strconv.Itoa(ports[0])}, {ID: "testRaftLeadershipnode3", Address: "127.0.0.1:" + strconv.Itoa(ports[2])}, @@ -61,9 +61,9 @@ func setupNodes(t *testing.T, logger zerolog.Logger, ports []int, tempDir string Directory: tempDir, }, { - NodeID: "testRaftLeadershipnode3", - Address: "127.0.0.1:" + strconv.Itoa(ports[2]), - LeaderID: "testRaftLeadershipnode1", + NodeID: "testRaftLeadershipnode3", + Address: "127.0.0.1:" + strconv.Itoa(ports[2]), + IsBootstrap: false, Peers: []config.RaftPeer{ {ID: "testRaftLeadershipnode1", Address: "127.0.0.1:" + strconv.Itoa(ports[0])}, {ID: "testRaftLeadershipnode2", Address: "127.0.0.1:" + strconv.Itoa(ports[1])}, diff --git a/testhelpers/raft_helpers.go b/testhelpers/raft_helpers.go index 9503a80c..af69d144 100644 --- a/testhelpers/raft_helpers.go +++ b/testhelpers/raft_helpers.go @@ -54,11 +54,11 @@ func NewTestRaftNode(t *testing.T) (*TestRaftHelper, error) { // Create Raft configuration raftConfig := config.Raft{ - NodeID: nodeID, - Address: raftAddr, - LeaderID: nodeID, // Make this node the leader for testing - Peers: []config.RaftPeer{}, // Empty peers for single-node testing - Directory: tempDir, + NodeID: nodeID, + Address: raftAddr, + IsBootstrap: true, // Make this node the leader for testing + Peers: []config.RaftPeer{}, // Empty peers for single-node testing + Directory: tempDir, } // Create new Raft node From f395f0c7a66b8ba0a69d278c1dd06b6942658125 Mon Sep 17 00:00:00 2001 From: Sina Darbouy Date: Mon, 9 Dec 2024 15:31:04 +0100 Subject: [PATCH 18/26] Increase the sleep time to pass the test case on the local machine. --- act/registry_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/act/registry_test.go b/act/registry_test.go index 53665764..44204de1 100644 --- a/act/registry_test.go +++ b/act/registry_test.go @@ -781,7 +781,7 @@ func Test_Run_Async_Redis(t *testing.T) { assert.Equal(t, err, gerr.ErrAsyncAction, "expected async action sentinel error") assert.Nil(t, result, "expected nil result") - time.Sleep(time.Millisecond) // wait for async action to complete + time.Sleep(time.Millisecond * 2) // wait for async action to complete // The following is the expected log output from running the async action. assert.Contains(t, out.String(), "{\"level\":\"debug\",\"action\":\"log\",\"executionMode\":\"async\",\"message\":\"Running action\"}") //nolint:lll From 2068618900be26c02d680a2f80178a4a59af57a1 Mon Sep 17 00:00:00 2001 From: Sina Darbouy Date: Mon, 9 Dec 2024 15:31:51 +0100 Subject: [PATCH 19/26] fix: resolve lint issues in rpc_test.go - Added `t.Helper()` to `setupGRPCServer` and `setupNodes` functions to improve test helper identification. - Corrected variable naming in `TestRPCServer_ForwardApply` for clarity and consistency. - Ensured comments end with a period for consistency. - Updated assertions to use `GetSuccess()` method for better readability. --- raft/rpc_test.go | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/raft/rpc_test.go b/raft/rpc_test.go index 018f54fc..f0a7a25b 100644 --- a/raft/rpc_test.go +++ b/raft/rpc_test.go @@ -16,8 +16,9 @@ import ( "google.golang.org/grpc/credentials/insecure" ) -// setupGRPCServer creates a gRPC server for testing on a random port +// setupGRPCServer creates a gRPC server for testing on a random port. func setupGRPCServer(t *testing.T, node *Node) (*grpc.Server, net.Listener) { + t.Helper() lis, err := net.Listen("tcp", "localhost:0") // Bind to a random available port require.NoError(t, err, "Failed to create listener") @@ -33,12 +34,13 @@ func setupGRPCServer(t *testing.T, node *Node) (*grpc.Server, net.Listener) { return server, lis } -// getListenerAddr returns address for the listener +// getListenerAddr returns address for the listener. func getListenerAddr(lis net.Listener) string { return lis.Addr().String() } func setupNodes(t *testing.T, logger zerolog.Logger, ports []int, tempDir string) []*Node { + t.Helper() nodeConfigs := []config.Raft{ { NodeID: "testRaftLeadershipnode1", @@ -103,9 +105,8 @@ func TestRPCServer_ForwardApply(t *testing.T) { wantErr: false, }, } - - for i, tt := range tests { - t.Run(tt.name, func(t *testing.T) { + for i, testCase := range tests { + t.Run(testCase.name, func(t *testing.T) { tempDir := t.TempDir() logger := setupTestLogger() ports := []int{6004 + i, 6005 + i, 6006 + i} @@ -127,17 +128,17 @@ func TestRPCServer_ForwardApply(t *testing.T) { client := pb.NewRaftServiceClient(conn) resp, err := client.ForwardApply(context.Background(), &pb.ApplyRequest{ - Data: tt.data, - TimeoutMs: tt.timeoutMs, + Data: testCase.data, + TimeoutMs: testCase.timeoutMs, }) - if tt.wantErr { + if testCase.wantErr { assert.Error(t, err) return } require.NoError(t, err) - assert.Equal(t, tt.wantSuccess, resp.Success) + assert.Equal(t, testCase.wantSuccess, resp.GetSuccess()) }) } } From 01c223470fcc00ec57507010af03af6cd927dce3 Mon Sep 17 00:00:00 2001 From: Sina Darbouy Date: Mon, 9 Dec 2024 15:57:15 +0100 Subject: [PATCH 20/26] feat: Improve code readability with comments and updates - Updated Docker image references in `docker-compose-raft.yaml` to use `gatewaydio/gatewayd:latest` and added `pull_policy: always` for consistent image updates. - Changed server and API addresses in `gatewayd.yaml` for better port management. - Enhanced logging in `raft.go` by switching from `Info` to `Debug` for certain messages to reduce verbosity. - Added detailed comments in `raft.go` and `rpc.go` to explain the purpose and functionality of key methods, improving code readability and maintainability. - Introduced new helper functions with comments to clarify their roles in the Raft and RPC processes. --- docker-compose-raft.yaml | 9 ++++++--- gatewayd.yaml | 12 ++++++------ raft/raft.go | 13 ++++++++++--- raft/rpc.go | 6 ++++++ 4 files changed, 28 insertions(+), 12 deletions(-) diff --git a/docker-compose-raft.yaml b/docker-compose-raft.yaml index 45f9e92a..f02b958b 100644 --- a/docker-compose-raft.yaml +++ b/docker-compose-raft.yaml @@ -47,7 +47,8 @@ services: timeout: 5s retries: 5 gatewayd-1: - image: gatewayd:raft + image: gatewaydio/gatewayd:latest + pull_policy: always command: [ "run", @@ -96,7 +97,8 @@ services: condition: service_completed_successfully gatewayd-2: - image: gatewayd:raft + image: gatewaydio/gatewayd:latest + pull_policy: always command: [ "run", @@ -127,7 +129,8 @@ services: - ./raft-data-2:/var/lib/gatewayd/raft gatewayd-3: - image: gatewayd:raft + image: gatewaydio/gatewayd:latest + pull_policy: always command: [ "run", diff --git a/gatewayd.yaml b/gatewayd.yaml index 333bf603..0a5ad17a 100644 --- a/gatewayd.yaml +++ b/gatewayd.yaml @@ -80,7 +80,7 @@ proxies: servers: default: network: tcp - address: 0.0.0.0:15433 + address: 0.0.0.0:15432 loadBalancer: # Load balancer strategies can be found in config/constants.go strategy: ROUND_ROBIN # ROUND_ROBIN, RANDOM, WEIGHTED_ROUND_ROBIN @@ -103,15 +103,15 @@ servers: api: enabled: True - httpAddress: 0.0.0.0:18081 + httpAddress: 0.0.0.0:18080 grpcNetwork: tcp - grpcAddress: 0.0.0.0:19091 + grpcAddress: 0.0.0.0:19090 raft: - address: 127.0.0.1:2223 + address: 127.0.0.1:2222 nodeId: node1 isBootstrap: True grpcAddress: 127.0.0.1:50051 peers: [] - # - id: node1 - # address: 127.0.0.1:2222 + # - id: node2 + # address: 127.0.0.1:2223 diff --git a/raft/raft.go b/raft/raft.go index 28a5d7ad..80b8a933 100644 --- a/raft/raft.go +++ b/raft/raft.go @@ -168,7 +168,7 @@ func (n *Node) monitorLeadership() { for _, server := range existingConfig.Servers { if server.ID == raft.ServerID(peer.ID) { peerExists = true - n.Logger.Info().Msgf("Peer %s already exists in Raft cluster, skipping", peer.ID) + n.Logger.Debug().Msgf("Peer %s already exists in Raft cluster, skipping", peer.ID) break } } @@ -181,10 +181,10 @@ func (n *Node) monitorLeadership() { } } } else { - n.Logger.Info().Msg("This node is a Raft follower") + n.Logger.Debug().Msg("This node is a Raft follower") } - time.Sleep(leadershipCheckInterval) // Use the named constant here + time.Sleep(leadershipCheckInterval) } } @@ -221,6 +221,10 @@ func (n *Node) applyInternal(data []byte, timeout time.Duration) error { return nil } +// forwardToLeader forwards a request to the current Raft leader node. It first identifies +// the leader's gRPC address by matching the leader ID with the peer list. Then it establishes +// a gRPC connection to forward the request. The method handles timeouts and returns any errors +// that occur during forwarding. func (n *Node) forwardToLeader(data []byte, timeout time.Duration) error { leaderAddr, leaderID := n.raft.LeaderWithID() if leaderID == "" { @@ -356,6 +360,7 @@ type FSMSnapshot struct { lbHashToBlockName map[uint64]string } +// Persist writes the FSMSnapshot data to the given SnapshotSink. func (f *FSMSnapshot) Persist(sink raft.SnapshotSink) error { err := json.NewEncoder(sink).Encode(f.lbHashToBlockName) if err != nil { @@ -377,6 +382,8 @@ func (n *Node) GetState() raft.RaftState { return n.raft.State() } +// startRPCServer starts a gRPC server on the configured address to handle Raft RPC requests. +// It returns an error if the server fails to start listening on the configured address. func (n *Node) startRPCServer() error { listener, err := net.Listen("tcp", n.grpcAddr) if err != nil { diff --git a/raft/rpc.go b/raft/rpc.go index a63ae569..1a1aa3d7 100644 --- a/raft/rpc.go +++ b/raft/rpc.go @@ -10,11 +10,13 @@ import ( "google.golang.org/grpc/credentials/insecure" ) +// rpcServer implements the RaftServiceServer interface and handles incoming RPC requests. type rpcServer struct { pb.UnimplementedRaftServiceServer node *Node } +// ForwardApply processes an ApplyRequest by applying the data to the node with a specified timeout. func (s *rpcServer) ForwardApply(_ context.Context, req *pb.ApplyRequest) (*pb.ApplyResponse, error) { timeout := time.Duration(req.GetTimeoutMs()) * time.Millisecond @@ -31,12 +33,14 @@ func (s *rpcServer) ForwardApply(_ context.Context, req *pb.ApplyRequest) (*pb.A }, nil } +// rpcClient manages gRPC clients and connections for communicating with other nodes. type rpcClient struct { clients map[string]pb.RaftServiceClient conns map[string]*grpc.ClientConn node *Node } +// newRPCClient creates a new rpcClient for the given node. func newRPCClient(node *Node) *rpcClient { return &rpcClient{ clients: make(map[string]pb.RaftServiceClient), @@ -45,6 +49,7 @@ func newRPCClient(node *Node) *rpcClient { } } +// getClient retrieves or establishes a gRPC client connection to the specified address. func (c *rpcClient) getClient(address string) (pb.RaftServiceClient, error) { if client, ok := c.clients[address]; ok { return client, nil @@ -64,6 +69,7 @@ func (c *rpcClient) getClient(address string) (pb.RaftServiceClient, error) { return client, nil } +// close terminates all gRPC client connections managed by the rpcClient. func (c *rpcClient) close() { for _, conn := range c.conns { conn.Close() From 5d0c65f3f51102a0f553c80745edeebb93886fcf Mon Sep 17 00:00:00 2001 From: Sina Darbouy Date: Mon, 9 Dec 2024 16:31:55 +0100 Subject: [PATCH 21/26] Improve Redis container setup and async test handling - Updated `createTestRedis` in `act_helpers_test.go` to use `wait.ForAll` for better reliability by ensuring both log readiness and port listening. - Enhanced `Test_Run_Async_Redis` in `registry_test.go` by adding a context with a timeout to the consumer subscription for improved test robustness. - Simplified the sleep duration in `Test_Run_Async_Redis` to reduce unnecessary wait time. --- act/act_helpers_test.go | 5 ++++- act/registry_test.go | 12 +++++++----- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/act/act_helpers_test.go b/act/act_helpers_test.go index 7805192b..8157892d 100644 --- a/act/act_helpers_test.go +++ b/act/act_helpers_test.go @@ -62,7 +62,10 @@ func createTestRedis(t *testing.T) string { req := testcontainers.ContainerRequest{ Image: "redis:6", ExposedPorts: []string{"6379/tcp"}, - WaitingFor: wait.ForLog("Ready to accept connections"), + WaitingFor: wait.ForAll( + wait.ForLog("Ready to accept connections"), + wait.ForListeningPort("6379/tcp"), + ), } redisContainer, err := testcontainers.GenericContainer( ctx, testcontainers.GenericContainerRequest{ diff --git a/act/registry_test.go b/act/registry_test.go index 44204de1..d620865e 100644 --- a/act/registry_test.go +++ b/act/registry_test.go @@ -747,10 +747,12 @@ func Test_Run_Async_Redis(t *testing.T) { consumer, err := sdkAct.NewConsumer(hclogger, rdb, 5, "test-async-chan") require.NoError(t, err) - require.NoError(t, consumer.Subscribe(context.Background(), func(ctx context.Context, task []byte) error { - err := actRegistry.runAsyncActionFn(ctx, task) - waitGroup.Done() - return err + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + require.NoError(t, consumer.Subscribe(ctx, func(ctx context.Context, task []byte) error { + defer waitGroup.Done() + return actRegistry.runAsyncActionFn(ctx, task) })) outputs := actRegistry.Apply([]sdkAct.Signal{ @@ -781,7 +783,7 @@ func Test_Run_Async_Redis(t *testing.T) { assert.Equal(t, err, gerr.ErrAsyncAction, "expected async action sentinel error") assert.Nil(t, result, "expected nil result") - time.Sleep(time.Millisecond * 2) // wait for async action to complete + time.Sleep(time.Millisecond) // wait for async action to complete // The following is the expected log output from running the async action. assert.Contains(t, out.String(), "{\"level\":\"debug\",\"action\":\"log\",\"executionMode\":\"async\",\"message\":\"Running action\"}") //nolint:lll From 61945e3b10027fe05b2da893841453cdfb07125c Mon Sep 17 00:00:00 2001 From: Sina Darbouy Date: Fri, 13 Dec 2024 15:00:46 +0100 Subject: [PATCH 22/26] Handle Fatal Error on Raft Node Initialization Failure - Added error handling to record and log errors when Raft node initialization fails. - Ensured the application exits with a specific error code if the Raft node cannot be started. - Updated tests to set environment variables for Raft node configuration. - Added a new error code for Raft node startup failure in the error definitions. This change ensures that if the Raft node cannot be configured and started, the application will terminate gracefully, preventing further execution with an invalid state. --- cmd/run.go | 6 ++++++ cmd/run_test.go | 8 ++++++++ errors/errors.go | 1 + 3 files changed, 15 insertions(+) diff --git a/cmd/run.go b/cmd/run.go index 6f7e95eb..0dad18a8 100644 --- a/cmd/run.go +++ b/cmd/run.go @@ -911,9 +911,15 @@ var runCmd = &cobra.Command{ span.End() + _, span = otel.Tracer(config.TracerName).Start(runCtx, "Create Raft Node") + defer span.End() + raftNode, originalErr := raft.NewRaftNode(logger, conf.Global.Raft) if originalErr != nil { logger.Error().Err(originalErr).Msg("Failed to start raft node") + span.RecordError(originalErr) + pluginRegistry.Shutdown() + os.Exit(gerr.FailedToStartRaftNode) } _, span = otel.Tracer(config.TracerName).Start(runCtx, "Create servers") diff --git a/cmd/run_test.go b/cmd/run_test.go index 7881109d..37fd54c5 100644 --- a/cmd/run_test.go +++ b/cmd/run_test.go @@ -25,6 +25,8 @@ func Test_runCmd(t *testing.T) { tempDir := t.TempDir() t.Setenv("GATEWAYD_RAFT_DIRECTORY", tempDir) + t.Setenv("GATEWAYD_RAFT_ADDRESS", "127.0.0.1:0") + t.Setenv("GATEWAYD_RAFT_GRPCADDRESS", "127.0.0.1:0") globalTestConfigFile := "./test_global_runCmd.yaml" pluginTestConfigFile := "./test_plugins_runCmd.yaml" @@ -92,6 +94,8 @@ func Test_runCmdWithTLS(t *testing.T) { tempDir := t.TempDir() t.Setenv("GATEWAYD_RAFT_DIRECTORY", tempDir) + t.Setenv("GATEWAYD_RAFT_ADDRESS", "127.0.0.1:0") + t.Setenv("GATEWAYD_RAFT_GRPCADDRESS", "127.0.0.1:0") globalTLSTestConfigFile := "./testdata/gatewayd_tls.yaml" pluginTestConfigFile := "./test_plugins_runCmdWithTLS.yaml" @@ -158,6 +162,8 @@ func Test_runCmdWithMultiTenancy(t *testing.T) { tempDir := t.TempDir() t.Setenv("GATEWAYD_RAFT_DIRECTORY", tempDir) + t.Setenv("GATEWAYD_RAFT_ADDRESS", "127.0.0.1:0") + t.Setenv("GATEWAYD_RAFT_GRPCADDRESS", "127.0.0.1:0") globalTestConfigFile := "./testdata/gatewayd.yaml" pluginTestConfigFile := "./test_plugins_runCmdWithMultiTenancy.yaml" @@ -222,6 +228,8 @@ func Test_runCmdWithCachePlugin(t *testing.T) { tempDir := t.TempDir() t.Setenv("GATEWAYD_RAFT_DIRECTORY", tempDir) + t.Setenv("GATEWAYD_RAFT_ADDRESS", "127.0.0.1:0") + t.Setenv("GATEWAYD_RAFT_GRPCADDRESS", "127.0.0.1:0") globalTestConfigFile := "./test_global_runCmdWithCachePlugin.yaml" pluginTestConfigFile := "./test_plugins_runCmdWithCachePlugin.yaml" diff --git a/errors/errors.go b/errors/errors.go index a511470c..61bb0e32 100644 --- a/errors/errors.go +++ b/errors/errors.go @@ -219,4 +219,5 @@ const ( FailedToStartServer = 3 FailedToStartTracer = 4 FailedToCreateActRegistry = 5 + FailedToStartRaftNode = 6 ) From ef3745bc6803fd58014b23e0daffad1e5db68017 Mon Sep 17 00:00:00 2001 From: Sina Darbouy Date: Fri, 13 Dec 2024 15:03:23 +0100 Subject: [PATCH 23/26] Update test configuration in gatewayd.yaml - Changed the raft address from 127.0.0.1:2223 to 127.0.0.1:2222. - Updated the nodeID from node2 to node1. These updates are made to the test data configuration to align with the current test case requirements. --- cmd/testdata/gatewayd.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmd/testdata/gatewayd.yaml b/cmd/testdata/gatewayd.yaml index c533b2d2..fb453dcc 100644 --- a/cmd/testdata/gatewayd.yaml +++ b/cmd/testdata/gatewayd.yaml @@ -75,7 +75,7 @@ api: enabled: True raft: - address: 127.0.0.1:2223 - nodeID: node2 + address: 127.0.0.1:2222 + nodeID: node1 isBootstrap: true peers: {} From d6f286b177eedc663e1fca7dda76fcb8c957f778 Mon Sep 17 00:00:00 2001 From: Sina Darbouy Date: Fri, 13 Dec 2024 15:07:06 +0100 Subject: [PATCH 24/26] Update comment to accurately describe Raft configuration constants The comment above the constants was misleading, suggesting they were only command types. Updated the comment to reflect that these constants are related to Raft operations. --- raft/raft.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/raft/raft.go b/raft/raft.go index 80b8a933..b0504e3d 100644 --- a/raft/raft.go +++ b/raft/raft.go @@ -20,7 +20,7 @@ import ( "google.golang.org/grpc" ) -// Command types for Raft operations. +// Configuration constants for Raft operations. const ( CommandAddConsistentHashEntry = "ADD_CONSISTENT_HASH_ENTRY" RaftLeaderState = raft.Leader From 586efd79aa8e19865cf1600ae2c948c64350188c Mon Sep 17 00:00:00 2001 From: Sina Darbouy Date: Fri, 13 Dec 2024 15:13:45 +0100 Subject: [PATCH 25/26] Simplify leader check in monitorLeadership function - Removed the unnecessary `isLeader` variable in the `monitorLeadership` function. - Directly checked the node's state against `raft.Leader` in the if condition. --- raft/raft.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/raft/raft.go b/raft/raft.go index b0504e3d..9338de10 100644 --- a/raft/raft.go +++ b/raft/raft.go @@ -157,8 +157,7 @@ func NewRaftNode(logger zerolog.Logger, raftConfig config.Raft) (*Node, error) { // monitorLeadership checks if the node is the Raft leader and logs state changes. func (n *Node) monitorLeadership() { for { - isLeader := n.raft.State() == raft.Leader - if isLeader { + if n.raft.State() == raft.Leader { n.Logger.Info().Msg("This node is the Raft leader") for _, peer := range n.Peers { From f6aba9f92ff07e7d17787d46a96f15af38f6258b Mon Sep 17 00:00:00 2001 From: Sina Darbouy Date: Fri, 13 Dec 2024 19:40:04 +0100 Subject: [PATCH 26/26] Fix: Gracefully handle ErrRaftShutdown during Node shutdown Updated the `Shutdown` method in `raft.go` to gracefully handle the `ErrRaftShutdown` error. This change ensures that if the Raft node is already shut down, the error is ignored, preventing unnecessary error handling. --- raft/raft.go | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/raft/raft.go b/raft/raft.go index 9338de10..70fbde63 100644 --- a/raft/raft.go +++ b/raft/raft.go @@ -268,7 +268,10 @@ func (n *Node) forwardToLeader(data []byte, timeout time.Duration) error { return nil } -// Update Shutdown to clean up RPC resources. +// Shutdown gracefully stops the Node by stopping the gRPC server, closing RPC client connections, +// and shutting down the underlying Raft node. It returns an error if the Raft node fails to +// shutdown properly, ignoring the ErrRaftShutdown error which indicates the node was already +// shutdown. func (n *Node) Shutdown() error { if n.rpcServer != nil { n.rpcServer.GracefulStop() @@ -277,7 +280,7 @@ func (n *Node) Shutdown() error { n.rpcClient.close() } - if err := n.raft.Shutdown().Error(); err != nil { + if err := n.raft.Shutdown().Error(); err != nil && !errors.Is(err, raft.ErrRaftShutdown) { return fmt.Errorf("failed to shutdown raft node: %w", err) } return nil