From 2daccad33243292285d8f88c470cb5fb081950e0 Mon Sep 17 00:00:00 2001 From: Amit Zafran Date: Thu, 4 Apr 2024 18:02:48 +0300 Subject: [PATCH 01/17] fix: Goreleaser fix (#1352) * update * seperate archives * fix * binary format * sha256sum * header * changelog * 0 * metadata * update name template --------- Co-authored-by: amitz Co-authored-by: Amit Zafran --- .goreleaser.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.goreleaser.yaml b/.goreleaser.yaml index 8c90320dbe..60ff9e38a8 100644 --- a/.goreleaser.yaml +++ b/.goreleaser.yaml @@ -116,7 +116,7 @@ archives: builds: - lavavisor format: binary - # use zip for windows archives + name_template: '{{ .Binary }}_v{{ .Version }}_{{ .Os }}_{{ .Arch }}{{ with .Arm }}v{{ . }}{{ end }}{{ with .Mips }}_{{ . }}{{ end }}{{ if not (eq .Amd64 "v1") }}{{ .Amd64 }}{{ end }}' format_overrides: - goos: windows format: zip @@ -124,7 +124,7 @@ archives: builds: - lavap format: binary - # use zip for windows archives + name_template: '{{ .Binary }}_v{{ .Version }}_{{ .Os }}_{{ .Arch }}{{ with .Arm }}v{{ . }}{{ end }}{{ with .Mips }}_{{ . }}{{ end }}{{ if not (eq .Amd64 "v1") }}{{ .Amd64 }}{{ end }}' format_overrides: - goos: windows format: zip @@ -132,7 +132,7 @@ archives: builds: - lavad format: binary - # use zip for windows archives + name_template: '{{ .Binary }}_v{{ .Version }}_{{ .Os }}_{{ .Arch }}{{ with .Arm }}v{{ . }}{{ end }}{{ with .Mips }}_{{ . }}{{ end }}{{ if not (eq .Amd64 "v1") }}{{ .Amd64 }}{{ end }}' format_overrides: - goos: windows format: zip From e3cc8b96f3b68072fe29c82b1cc2cbd8b1083cdd Mon Sep 17 00:00:00 2001 From: Amit Zafran Date: Thu, 4 Apr 2024 19:24:34 +0300 Subject: [PATCH 02/17] Goreleaser fix (#1353) * update * seperate archives * fix * binary format * sha256sum * header * changelog * 0 * metadata * update name template * - --------- Co-authored-by: amitz Co-authored-by: Amit Zafran --- .goreleaser.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.goreleaser.yaml b/.goreleaser.yaml index 60ff9e38a8..e56314c492 100644 --- a/.goreleaser.yaml +++ b/.goreleaser.yaml @@ -116,7 +116,7 @@ archives: builds: - lavavisor format: binary - name_template: '{{ .Binary }}_v{{ .Version }}_{{ .Os }}_{{ .Arch }}{{ with .Arm }}v{{ . }}{{ end }}{{ with .Mips }}_{{ . }}{{ end }}{{ if not (eq .Amd64 "v1") }}{{ .Amd64 }}{{ end }}' + name_template: '{{ .Binary }}-v{{ .Version }}-{{ .Os }}-{{ .Arch }}{{ with .Arm }}v{{ . }}{{ end }}{{ with .Mips }}-{{ . }}{{ end }}{{ if not (eq .Amd64 "v1") }}{{ .Amd64 }}{{ end }}' format_overrides: - goos: windows format: zip @@ -124,7 +124,7 @@ archives: builds: - lavap format: binary - name_template: '{{ .Binary }}_v{{ .Version }}_{{ .Os }}_{{ .Arch }}{{ with .Arm }}v{{ . }}{{ end }}{{ with .Mips }}_{{ . }}{{ end }}{{ if not (eq .Amd64 "v1") }}{{ .Amd64 }}{{ end }}' + name_template: '{{ .Binary }}-v{{ .Version }}-{{ .Os }}-{{ .Arch }}{{ with .Arm }}v{{ . }}{{ end }}{{ with .Mips }}-{{ . }}{{ end }}{{ if not (eq .Amd64 "v1") }}{{ .Amd64 }}{{ end }}' format_overrides: - goos: windows format: zip @@ -132,7 +132,7 @@ archives: builds: - lavad format: binary - name_template: '{{ .Binary }}_v{{ .Version }}_{{ .Os }}_{{ .Arch }}{{ with .Arm }}v{{ . }}{{ end }}{{ with .Mips }}_{{ . }}{{ end }}{{ if not (eq .Amd64 "v1") }}{{ .Amd64 }}{{ end }}' + name_template: '{{ .Binary }}-v{{ .Version }}-{{ .Os }}-{{ .Arch }}{{ with .Arm }}v{{ . }}{{ end }}{{ with .Mips }}-{{ . }}{{ end }}{{ if not (eq .Amd64 "v1") }}{{ .Amd64 }}{{ end }}' format_overrides: - goos: windows format: zip From 4fc55384ff27150685baa75954498f91dfcd03fe Mon Sep 17 00:00:00 2001 From: Yaroms <103432884+Yaroms@users.noreply.github.com> Date: Sun, 7 Apr 2024 18:03:19 +0300 Subject: [PATCH 03/17] fix: CNS-epoch-payments-keys-fix (#1354) * fix the key decoding * fix * fix * make it more explicit --------- Co-authored-by: Yaroms Co-authored-by: Yarom Swisa --- x/pairing/types/epoch_cu.go | 42 +++++++++++++++++++++++-------------- 1 file changed, 26 insertions(+), 16 deletions(-) diff --git a/x/pairing/types/epoch_cu.go b/x/pairing/types/epoch_cu.go index 48cae050c6..f9442a7939 100644 --- a/x/pairing/types/epoch_cu.go +++ b/x/pairing/types/epoch_cu.go @@ -25,46 +25,56 @@ func DecodeBlock(encodedKey []byte) uint64 { } func UniqueEpochSessionKey(epoch uint64, provider string, chainID string, project string, sessionID uint64) []byte { - return []byte(strings.Join([]string{string(EncodeBlock(epoch)), provider, chainID, project, strconv.FormatUint(sessionID, 10)}, " ")) + return append(EncodeBlock(epoch), []byte(strings.Join([]string{provider, chainID, project, strconv.FormatUint(sessionID, 10)}, " "))...) } func ProviderEpochCuKey(epoch uint64, provider string, chainID string) []byte { - return []byte(strings.Join([]string{string(EncodeBlock(epoch)), provider, chainID}, " ")) + return append(EncodeBlock(epoch), []byte(strings.Join([]string{provider, chainID}, " "))...) } func ProviderConsumerEpochCuKey(epoch uint64, provider string, project string, chainID string) []byte { - return []byte(strings.Join([]string{string(EncodeBlock(epoch)), provider, project, chainID}, " ")) + return append(EncodeBlock(epoch), []byte(strings.Join([]string{provider, project, chainID}, " "))...) } func DecodeUniqueEpochSessionKey(key string) (epoch uint64, provider string, chainID string, project string, sessionID uint64, err error) { - split := strings.Split(key, " ") - if len(split) != 5 { + if len(key) < 8 { return 0, "", "", "", 0, fmt.Errorf("invalid UniqueEpochSession key: bad structure. key: %s", key) } - epoch = DecodeBlock([]byte(split[0])) - sessionID, err = strconv.ParseUint(split[4], 10, 64) + + split := strings.Split(key[8:], " ") + if len(split) != 4 { + return 0, "", "", "", 0, fmt.Errorf("invalid UniqueEpochSession key: bad structure. key: %s", key) + } + epoch = DecodeBlock([]byte(key[:8])) + sessionID, err = strconv.ParseUint(split[3], 10, 64) if err != nil { return 0, "", "", "", 0, fmt.Errorf("invalid UniqueEpochSession key: bad session ID. key: %s", key) } - return epoch, split[1], split[2], split[3], sessionID, nil + return epoch, split[0], split[1], split[2], sessionID, nil } func DecodeProviderEpochCuKey(key string) (epoch uint64, provider string, chainID string, err error) { - split := strings.Split(key, " ") - if len(split) != 3 { + if len(key) < 8 { return 0, "", "", fmt.Errorf("invalid ProviderEpochCu key: bad structure. key: %s", key) } - epoch = DecodeBlock([]byte(split[0])) - return epoch, split[1], split[2], nil + split := strings.Split(key[8:], " ") + if len(split) != 2 { + return 0, "", "", fmt.Errorf("invalid ProviderEpochCu key: bad structure. key: %s", key) + } + epoch = DecodeBlock([]byte(key[:8])) + return epoch, split[0], split[1], nil } func DecodeProviderConsumerEpochCuKey(key string) (epoch uint64, provider string, project string, chainID string, err error) { - split := strings.Split(key, " ") - if len(split) != 4 { + if len(key) < 8 { + return 0, "", "", "", fmt.Errorf("invalid ProviderConsumerEpochCu key: bad structure. key: %s", key) + } + split := strings.Split(key[8:], " ") + if len(split) != 3 { return 0, "", "", "", fmt.Errorf("invalid ProviderConsumerEpochCu key: bad structure. key: %s", key) } - epoch = DecodeBlock([]byte(split[0])) - return epoch, split[1], split[2], split[3], nil + epoch = DecodeBlock([]byte(key[:8])) + return epoch, split[0], split[1], split[2], nil } func UniqueEpochSessionKeyPrefix() []byte { From 72233db7870f2b2c4344e5dcbc686bef040db109 Mon Sep 17 00:00:00 2001 From: Yaroms <103432884+Yaroms@users.noreply.github.com> Date: Sun, 7 Apr 2024 18:08:38 +0300 Subject: [PATCH 04/17] fix: CNS-epoch-payments-keys-fix (#1358) * fix the key decoding * fix * fix * make it more explicit * add upgrade handler --------- Co-authored-by: Yaroms Co-authored-by: Yarom Swisa --- app/app.go | 1 + app/upgrades/empty_upgrades.go | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/app/app.go b/app/app.go index 42fca34bf0..755c924811 100644 --- a/app/app.go +++ b/app/app.go @@ -188,6 +188,7 @@ var Upgrades = []upgrades.Upgrade{ upgrades.Upgrade_1_0_0, upgrades.Upgrade_1_0_1, upgrades.Upgrade_1_1_0, + upgrades.Upgrade_1_2_0, } // this line is used by starport scaffolding # stargate/wasm/app/enabledProposals diff --git a/app/upgrades/empty_upgrades.go b/app/upgrades/empty_upgrades.go index 6e183b9b97..afb99ce2b0 100644 --- a/app/upgrades/empty_upgrades.go +++ b/app/upgrades/empty_upgrades.go @@ -229,3 +229,9 @@ var Upgrade_1_1_0 = Upgrade{ CreateUpgradeHandler: defaultUpgradeHandler, StoreUpgrades: store.StoreUpgrades{}, } + +var Upgrade_1_2_0 = Upgrade{ + UpgradeName: "v1.2.0", + CreateUpgradeHandler: defaultUpgradeHandler, + StoreUpgrades: store.StoreUpgrades{}, +} From a96b10375820161935df344642edc5d0808a7119 Mon Sep 17 00:00:00 2001 From: Amit Zafran Date: Mon, 8 Apr 2024 17:17:52 +0300 Subject: [PATCH 05/17] fix: Goreleaser fix (#1360) * update * seperate archives * fix * binary format * sha256sum * header * changelog * 0 * metadata * update name template * - --------- Co-authored-by: amitz Co-authored-by: Amit Zafran --- .goreleaser.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.goreleaser.yaml b/.goreleaser.yaml index e56314c492..7973f68215 100644 --- a/.goreleaser.yaml +++ b/.goreleaser.yaml @@ -185,3 +185,4 @@ release: name_template: "Lava v{{.Version}} 🌋" mode: replace draft: true + prerelease: "true" From 48540ab5963e8b31f11094ff4bef1b31c3346ac5 Mon Sep 17 00:00:00 2001 From: Ran Mishael <106548467+ranlavanet@users.noreply.github.com> Date: Tue, 9 Apr 2024 14:11:02 +0200 Subject: [PATCH 06/17] fix: PRT-fixing-1-0-4-race-condition-for-returning-valid-responses (#1351) * fixing a race condition between channels on a single switch case. * more descriptive log * adding recovery phase 1 * adding atomics for better utilizations * better error message for websocket and https setup on jsonrpc * adding a revive mechanism for blocked providers * comment fix * removing report for redemption session. * fix missing ignored providers check * add provider to list of unwanted providers for retries. * adding unitest for checking no pairing available behavior * fix a case where we would fail fetching the second time * allowing to retry disabled endpoints when trying to reconnect providers. * FML * fixing issue with reconnecting to providers * fix context deadline exceeded being ignored * adding a comment * remove comment * defering a routine * adding sleep for defer to trigger on test --- .../param_change_epoch_params.json | 15 +- protocol/chainlib/common.go | 2 +- protocol/common/timeout.go | 2 +- .../lavasession/consumer_session_manager.go | 192 +++++++++++++++--- .../consumer_session_manager_test.go | 85 ++++++++ protocol/lavasession/consumer_types.go | 20 +- protocol/lavasession/reported_providers.go | 22 +- protocol/lavasession/used_providers.go | 11 + protocol/rpcconsumer/rpcconsumer_server.go | 172 +++++++++------- ...nit_lava_only_with_node_three_providers.sh | 89 ++++++++ 10 files changed, 500 insertions(+), 110 deletions(-) create mode 100755 scripts/pre_setups/init_lava_only_with_node_three_providers.sh diff --git a/cookbook/param_changes/param_change_epoch_params.json b/cookbook/param_changes/param_change_epoch_params.json index 7db3ea30d3..16de65bbcb 100644 --- a/cookbook/param_changes/param_change_epoch_params.json +++ b/cookbook/param_changes/param_change_epoch_params.json @@ -2,16 +2,11 @@ "title": "Protocol Version Change", "description": "Update version", "changes": [ - { - "subspace": "protocol", - "key": "Version", - "value": { - "provider_target": "1.18.2", - "consumer_target": "1.18.2", - "provider_min": "1.16.2", - "consumer_min": "1.16.2" - } - } + { + "subspace": "epochstorage", + "key": "EpochBlocks", + "value": "3600" + } ], "deposit": "10000000ulava" } \ No newline at end of file diff --git a/protocol/chainlib/common.go b/protocol/chainlib/common.go index 6624180fb2..dfa76af899 100644 --- a/protocol/chainlib/common.go +++ b/protocol/chainlib/common.go @@ -182,7 +182,7 @@ func verifyRPCEndpoint(endpoint string) { case "ws", "wss": return default: - utils.LavaFormatWarning("URL scheme should be websocket (ws/wss), got: "+u.Scheme, nil) + utils.LavaFormatWarning("URL scheme should be websocket (ws/wss), got: "+u.Scheme+", By not setting ws/wss your provider wont be able to accept ws subscriptions, therefore might receive less rewards and lower QOS score. if subscriptions are not applicable for this chain you can ignore this warning", nil) } } diff --git a/protocol/common/timeout.go b/protocol/common/timeout.go index 3b6e6d4708..396635799c 100644 --- a/protocol/common/timeout.go +++ b/protocol/common/timeout.go @@ -16,7 +16,7 @@ const ( DataReliabilityTimeoutIncrease = 5 * time.Second AverageWorldLatency = 300 * time.Millisecond CommunicateWithLocalLavaNodeTimeout = (3 * time.Second) + AverageWorldLatency - DefaultTimeout = 20 * time.Second + DefaultTimeout = 30 * time.Second DefaultTimeoutLong = 3 * time.Minute CacheTimeout = 50 * time.Millisecond ) diff --git a/protocol/lavasession/consumer_session_manager.go b/protocol/lavasession/consumer_session_manager.go index 10ff5bcd39..418aade5dc 100644 --- a/protocol/lavasession/consumer_session_manager.go +++ b/protocol/lavasession/consumer_session_manager.go @@ -2,6 +2,7 @@ package lavasession import ( "context" + "sort" "strings" "sync" "sync/atomic" @@ -20,7 +21,9 @@ import ( ) const ( - debug = false + debug = false + BlockedProviderSessionUsedStatus = uint32(1) + BlockedProviderSessionUnusedStatus = uint32(0) ) var DebugProbes = false @@ -32,13 +35,19 @@ type ConsumerSessionManager struct { pairing map[string]*ConsumerSessionsWithProvider // key == provider address currentEpoch uint64 numberOfResets uint64 - // pairingAddresses for Data reliability - pairingAddresses map[uint64]string // contains all addresses from the initial pairing. and the keys are the indexes + + // original pairingAddresses for current epoch + // contains all addresses from the initial pairing. and the keys are the indexes of the pairing query (these indexes are used for data reliability) + pairingAddresses map[uint64]string pairingAddressesLength uint64 - validAddresses []string // contains all addresses that are currently valid + // contains all provider addresses that are currently valid + validAddresses []string + // contains a sorted list of blocked addresses, sorted by their cu used this epoch for higher chance of response + currentlyBlockedProviderAddresses []string + addonAddresses map[RouterKey][]string - reportedProviders ReportedProviders + reportedProviders *ReportedProviders // pairingPurge - contains all pairings that are unwanted this epoch, keeps them in memory in order to avoid release. // (if a consumer session still uses one of them or we want to report it.) pairingPurge map[string]*ConsumerSessionsWithProvider @@ -72,7 +81,7 @@ func (csm *ConsumerSessionManager) UpdateAllProviders(epoch uint64, pairingList // Reset States // csm.validAddresses length is reset in setValidAddressesToDefaultValue - csm.pairingAddresses = make(map[uint64]string, 0) + csm.pairingAddresses = make(map[uint64]string, pairingListLength) csm.reportedProviders.Reset() csm.pairingAddressesLength = uint64(pairingListLength) @@ -159,7 +168,7 @@ func (csm *ConsumerSessionManager) probeProviders(ctx context.Context, pairingLi go func(consumerSessionsWithProvider *ConsumerSessionsWithProvider) { // Call the probeProvider function and defer the WaitGroup Done call defer wg.Done() - latency, providerAddress, err := csm.probeProvider(ctx, consumerSessionsWithProvider, epoch) + latency, providerAddress, err := csm.probeProvider(ctx, consumerSessionsWithProvider, epoch, false) success := err == nil // if failure then regard it in availability csm.providerOptimizer.AppendProbeRelayData(providerAddress, latency, success) }(consumerSessionWithProvider) @@ -185,9 +194,9 @@ func (csm *ConsumerSessionManager) probeProviders(ctx context.Context, pairingLi } // this code needs to be thread safe -func (csm *ConsumerSessionManager) probeProvider(ctx context.Context, consumerSessionsWithProvider *ConsumerSessionsWithProvider, epoch uint64) (latency time.Duration, providerAddress string, err error) { +func (csm *ConsumerSessionManager) probeProvider(ctx context.Context, consumerSessionsWithProvider *ConsumerSessionsWithProvider, epoch uint64, tryReconnectToDisabledEndpoints bool) (latency time.Duration, providerAddress string, err error) { // TODO: fetch all endpoints not just one - connected, endpoint, providerAddress, err := consumerSessionsWithProvider.fetchEndpointConnectionFromConsumerSessionWithProvider(ctx) + connected, endpoint, providerAddress, err := consumerSessionsWithProvider.fetchEndpointConnectionFromConsumerSessionWithProvider(ctx, tryReconnectToDisabledEndpoints) if err != nil || !connected { if AllProviderEndpointsDisabledError.Is(err) { csm.blockProvider(providerAddress, true, epoch, MaxConsecutiveConnectionAttempts, 0, csm.GenerateReconnectCallback(consumerSessionsWithProvider)) // reporting and blocking provider this epoch @@ -236,6 +245,7 @@ func (csm *ConsumerSessionManager) probeProvider(ctx context.Context, consumerSe // csm needs to be locked here func (csm *ConsumerSessionManager) setValidAddressesToDefaultValue(addon string, extensions []string) { + csm.currentlyBlockedProviderAddresses = make([]string, 0) // reset currently blocked provider addresses if addon == "" && len(extensions) == 0 { csm.validAddresses = make([]string, len(csm.pairingAddresses)) index := 0 @@ -340,7 +350,17 @@ func (csm *ConsumerSessionManager) GetSessions(ctx context.Context, cuNeededForS // Get a valid consumerSessionsWithProvider sessionWithProviderMap, err := csm.getValidConsumerSessionsWithProvider(tempIgnoredProviders, cuNeededForSession, requestedBlock, addon, extensionNames, stateful, virtualEpoch) if err != nil { - return nil, err + if PairingListEmptyError.Is(err) { + // got no pairing available, try to recover a session from the currently banned providers + var errOnRetry error + sessionWithProviderMap, errOnRetry = csm.tryGetConsumerSessionWithProviderFromBlockedProviderList(tempIgnoredProviders, cuNeededForSession, requestedBlock, addon, extensionNames, stateful, virtualEpoch, usedProviders) + if errOnRetry != nil { + return nil, err // return original error (getValidConsumerSessionsWithProvider) + } + } else { + return nil, err + } + // if we got here we managed to get a sessionWithProviderMap } // Save how many sessions we are aiming to have @@ -354,7 +374,7 @@ func (csm *ConsumerSessionManager) GetSessions(ctx context.Context, cuNeededForS sessionEpoch := sessionWithProvider.CurrentEpoch // Get a valid Endpoint from the provider chosen - connected, endpoint, _, err := consumerSessionsWithProvider.fetchEndpointConnectionFromConsumerSessionWithProvider(ctx) + connected, endpoint, _, err := consumerSessionsWithProvider.fetchEndpointConnectionFromConsumerSessionWithProvider(ctx, false) if err != nil { // verify err is AllProviderEndpointsDisabled and report. if AllProviderEndpointsDisabledError.Is(err) { @@ -465,7 +485,16 @@ func (csm *ConsumerSessionManager) GetSessions(ctx context.Context, cuNeededForS // If error happens, and we do not have any sessions return error if err != nil { - return nil, err + if PairingListEmptyError.Is(err) { + // got no pairing available, try to recover a session from the currently banned providers + var errOnRetry error + sessionWithProviderMap, errOnRetry = csm.tryGetConsumerSessionWithProviderFromBlockedProviderList(tempIgnoredProviders, cuNeededForSession, requestedBlock, addon, extensionNames, stateful, virtualEpoch, usedProviders) + if errOnRetry != nil { + return nil, err // return original error (getValidConsumerSessionsWithProvider) + } + } else { + return nil, err + } } } } @@ -518,6 +547,65 @@ func (csm *ConsumerSessionManager) getValidProviderAddresses(ignoredProvidersLis return providers, nil } +// On cases where the valid provider list is empty, by being already used in this attempt, and we got to a point +// where we need another session (for retry or a timeout happened) we want to try fetching a blocked provider for the list. +// the list will be sorted by most cu served giving the best provider that was blocked a second chance to get back to valid addresses. +func (csm *ConsumerSessionManager) tryGetConsumerSessionWithProviderFromBlockedProviderList(ignoredProviders *ignoredProviders, cuNeededForSession uint64, requestedBlock int64, addon string, extensions []string, stateful uint32, virtualEpoch uint64, usedProviders UsedProvidersInf) (sessionWithProviderMap SessionWithProviderMap, err error) { + csm.lock.RLock() + // we do not defer yet as we might need to unlock due to an epoch change + + // reading the epoch here while locked, to get the epoch of the pairing. + currentEpoch := csm.atomicReadCurrentEpoch() + + // if len(csm.currentlyBlockedProviderAddresses) == 0 we probably reset the state so we can fetch it normally OR || + // on a very rare case epoch change can happen. in this case we should just fetch a provider from the new pairing list. + if len(csm.currentlyBlockedProviderAddresses) == 0 || ignoredProviders.currentEpoch < currentEpoch { + // epoch changed just now (between the getValidConsumerSessionsWithProvider to tryGetConsumerSessionWithProviderFromBlockedProviderList) + utils.LavaFormatDebug("Epoch changed between getValidConsumerSessionsWithProvider to tryGetConsumerSessionWithProviderFromBlockedProviderList getting pairing from new epoch list") + csm.lock.RUnlock() // unlock because getValidConsumerSessionsWithProvider is locking. + return csm.getValidConsumerSessionsWithProvider(ignoredProviders, cuNeededForSession, requestedBlock, addon, extensions, stateful, virtualEpoch) + } + + // if we got here we validated the epoch is still the same epoch as we expected and we need to fetch a session from the blocked provider list. + defer csm.lock.RUnlock() + + // csm.currentlyBlockedProviderAddresses is sorted by the provider with the highest cu used this epoch to the lowest + // meaning if we fetch the first successful index this is probably the highest success ratio to get a response. + for _, providerAddress := range csm.currentlyBlockedProviderAddresses { + // check if we have this provider already. + if _, providerExistInIgnoredProviders := ignoredProviders.providers[providerAddress]; providerExistInIgnoredProviders { + continue + } + consumerSessionsWithProvider := csm.pairing[providerAddress] + // Add to ignored (no matter what) + ignoredProviders.providers[providerAddress] = struct{}{} + usedProviders.AddUnwantedAddresses(providerAddress) // add the address to our unwanted providers to avoid infinite recursion + + // validate this provider has enough cu to be used + if err := consumerSessionsWithProvider.validateComputeUnits(cuNeededForSession, virtualEpoch); err != nil { + // we already added to ignored we can just continue to the next provider + continue + } + + // validate this provider supports the required extension or addon + if !consumerSessionsWithProvider.IsSupportingAddon(addon) || !consumerSessionsWithProvider.IsSupportingExtensions(extensions) { + continue + } + + consumerSessionsWithProvider.atomicWriteBlockedStatus(BlockedProviderSessionUsedStatus) // will add to valid addresses if successful + // If no error, return session map + return SessionWithProviderMap{ + providerAddress: &SessionWithProvider{ + SessionsWithProvider: consumerSessionsWithProvider, + CurrentEpoch: currentEpoch, + }, + }, nil + } + + // if we got here we failed to fetch a valid provider meaning no pairing available. + return nil, utils.LavaFormatError(csm.rpcEndpoint.ChainID+" could not get a provider address from blocked provider list", PairingListEmptyError, utils.LogAttr("csm.currentlyBlockedProviderAddresses", csm.currentlyBlockedProviderAddresses), utils.LogAttr("addons", addon), utils.LogAttr("extensions", extensions)) +} + func (csm *ConsumerSessionManager) getValidConsumerSessionsWithProvider(ignoredProviders *ignoredProviders, cuNeededForSession uint64, requestedBlock int64, addon string, extensions []string, stateful uint32, virtualEpoch uint64) (sessionWithProviderMap SessionWithProviderMap, err error) { csm.lock.RLock() defer csm.lock.RUnlock() @@ -596,6 +684,17 @@ func (csm *ConsumerSessionManager) getValidConsumerSessionsWithProvider(ignoredP } } +// must be locked before use +func (csm *ConsumerSessionManager) sortBlockedProviderListByCuServed() { + // Defining the custom sorting rule (used cu per provider) + // descending order of cu used (highest to lowest) + customSort := func(i, j int) bool { + return csm.pairing[csm.currentlyBlockedProviderAddresses[i]].atomicReadUsedComputeUnits() > csm.pairing[csm.currentlyBlockedProviderAddresses[j]].atomicReadUsedComputeUnits() + } + // Sort the slice using the custom sorting rule + sort.Slice(csm.currentlyBlockedProviderAddresses, customSort) +} + // removes a given address from the valid addresses list. func (csm *ConsumerSessionManager) removeAddressFromValidAddresses(address string) error { // cs Must be Locked here. @@ -604,6 +703,10 @@ func (csm *ConsumerSessionManager) removeAddressFromValidAddresses(address strin // remove the index from the valid list. csm.validAddresses = append(csm.validAddresses[:idx], csm.validAddresses[idx+1:]...) csm.RemoveAddonAddresses("", nil) + // add the address to our block provider list. + csm.currentlyBlockedProviderAddresses = append(csm.currentlyBlockedProviderAddresses, address) + // sort the blocked provider list by cu served + csm.sortBlockedProviderListByCuServed() return nil } } @@ -647,6 +750,11 @@ func (csm *ConsumerSessionManager) OnSessionFailure(consumerSession *SingleConsu if err := consumerSession.VerifyLock(); err != nil { return sdkerrors.Wrapf(err, "OnSessionFailure, consumerSession.lock must be locked before accessing this method, additional info:") } + // redemptionSession = true, if we got this provider from the blocked provider list. + // if so, it means we already reported this provider and blocked it we do not need to do it again. + // due to session failure we also don't need to remove it from the blocked provider list. + // we will just update the QOS info, and return + redemptionSession := consumerSession.Parent.atomicReadBlockedStatus() == BlockedProviderSessionUsedStatus // consumer Session should be locked here. so we can just apply the session failure here. if consumerSession.BlockListed { @@ -670,14 +778,18 @@ func (csm *ConsumerSessionManager) OnSessionFailure(consumerSession *SingleConsu if len(consumerSession.ConsecutiveErrors) > MaximumNumberOfFailuresAllowedPerConsumerSession || IsSessionSyncLoss(errorReceived) { utils.LavaFormatDebug("Blocking consumer session", utils.LogAttr("ConsecutiveErrors", consumerSession.ConsecutiveErrors), utils.LogAttr("errorsCount", consumerSession.errorsCount), utils.Attribute{Key: "id", Value: consumerSession.SessionId}) consumerSession.BlockListed = true // block this session from future usages - // we will check the total number of cu for this provider and decide if we need to report it. - if consumerSession.Parent.atomicReadUsedComputeUnits() <= consumerSession.LatestRelayCu { // if we had 0 successful relays and we reached block session we need to report this provider - blockProvider = true - reportProvider = true - } - if reportProvider { - providerAddr := consumerSession.Parent.PublicLavaAddress - go csm.reportedProviders.AppendReport(metrics.NewReportsRequest(providerAddr, consumerSession.ConsecutiveErrors, csm.rpcEndpoint.ChainID)) + + // check if this session is a redemption session meaning we already blocked and reported the provider if it was necessary. + if !redemptionSession { + // we will check the total number of cu for this provider and decide if we need to report it. + if consumerSession.Parent.atomicReadUsedComputeUnits() <= consumerSession.LatestRelayCu { // if we had 0 successful relays and we reached block session we need to report this provider + blockProvider = true + reportProvider = true + } + if reportProvider { + providerAddr := consumerSession.Parent.PublicLavaAddress + go csm.reportedProviders.AppendReport(metrics.NewReportsRequest(providerAddr, consumerSession.ConsecutiveErrors, csm.rpcEndpoint.ChainID)) + } } } cuToDecrease := consumerSession.LatestRelayCu @@ -695,7 +807,7 @@ func (csm *ConsumerSessionManager) OnSessionFailure(consumerSession *SingleConsu return err } - if blockProvider { + if !redemptionSession && blockProvider { publicProviderAddress, pairingEpoch := parentConsumerSessionsWithProvider.getPublicLavaAddressAndPairingEpoch() err = csm.blockProvider(publicProviderAddress, reportProvider, pairingEpoch, 0, consecutiveErrors, nil) if err != nil { @@ -708,6 +820,25 @@ func (csm *ConsumerSessionManager) OnSessionFailure(consumerSession *SingleConsu return nil } +// validating if the provider is currently not in valid addresses list. if the session was successful we can return the provider +// to our valid addresses list and resume its usage +func (csm *ConsumerSessionManager) validateAndReturnBlockedProviderToValidAddressesList(providerAddress string) { + csm.lock.Lock() + defer csm.lock.Unlock() + for idx, addr := range csm.currentlyBlockedProviderAddresses { + if addr == providerAddress { + // remove it from the csm.currentlyBlockedProviderAddresses + csm.currentlyBlockedProviderAddresses = append(csm.currentlyBlockedProviderAddresses[:idx], csm.currentlyBlockedProviderAddresses[idx+1:]...) + // reapply it to the valid addresses. + csm.validAddresses = append(csm.validAddresses, addr) + // purge the current addon addresses so it will be created again next time get session is called. + csm.RemoveAddonAddresses("", nil) + return + } + } + // if we didn't find it, we might had two sessions in parallel and thats ok. the first one dealt with it we can just return +} + // On a successful session this function will update all necessary fields in the consumerSession. and unlock it when it finishes func (csm *ConsumerSessionManager) OnSessionDone( consumerSession *SingleConsumerSession, @@ -725,6 +856,16 @@ func (csm *ConsumerSessionManager) OnSessionDone( return sdkerrors.Wrapf(err, "OnSessionDone, consumerSession.lock must be locked before accessing this method") } + if consumerSession.Parent.atomicReadBlockedStatus() == BlockedProviderSessionUsedStatus { + // we will deal with the removal of this provider from the blocked list so we can for now set it as default + consumerSession.Parent.atomicWriteBlockedStatus(BlockedProviderSessionUnusedStatus) + // this provider is probably in the ignored provider list. we need to validate and return it to valid addresses + providerAddress := consumerSession.Parent.PublicLavaAddress + // we want this method to run last after we unlock the consumer session + // golang defer operates in a Last-In-First-Out (LIFO) order, meaning this defer will run last. + defer func() { go csm.validateAndReturnBlockedProviderToValidAddressesList(providerAddress) }() + } + defer consumerSession.Free(nil) // we need to be locked here, if we didn't get it locked we try lock anyway consumerSession.CuSum += consumerSession.LatestRelayCu // add CuSum to current cu usage. consumerSession.LatestRelayCu = 0 // reset cu just in case @@ -796,14 +937,19 @@ func (csm *ConsumerSessionManager) OnSessionDoneIncreaseCUOnly(consumerSession * func (csm *ConsumerSessionManager) GenerateReconnectCallback(consumerSessionsWithProvider *ConsumerSessionsWithProvider) func() error { return func() error { - _, _, err := csm.probeProvider(context.Background(), consumerSessionsWithProvider, csm.atomicReadCurrentEpoch()) + ctx := utils.WithUniqueIdentifier(context.Background(), utils.GenerateUniqueIdentifier()) // unique identifier for retries + _, providerAddress, err := csm.probeProvider(ctx, consumerSessionsWithProvider, csm.atomicReadCurrentEpoch(), true) + if err == nil { + utils.LavaFormatDebug("Reconnecting provider succeeded returning provider to valid addresses list", utils.LogAttr("provider", providerAddress)) + csm.validateAndReturnBlockedProviderToValidAddressesList(providerAddress) + } return err } } func NewConsumerSessionManager(rpcEndpoint *RPCEndpoint, providerOptimizer ProviderOptimizer, consumerMetricsManager *metrics.ConsumerMetricsManager, reporter metrics.Reporter) *ConsumerSessionManager { csm := &ConsumerSessionManager{ - reportedProviders: *NewReportedProviders(reporter), + reportedProviders: NewReportedProviders(reporter), consumerMetricsManager: consumerMetricsManager, } csm.rpcEndpoint = rpcEndpoint diff --git a/protocol/lavasession/consumer_session_manager_test.go b/protocol/lavasession/consumer_session_manager_test.go index 965f693263..f958ed38e8 100644 --- a/protocol/lavasession/consumer_session_manager_test.go +++ b/protocol/lavasession/consumer_session_manager_test.go @@ -10,6 +10,8 @@ import ( "testing" "time" + "github.com/lavanet/lava/utils/lavaslices" + "github.com/lavanet/lava/protocol/common" "github.com/lavanet/lava/protocol/provideroptimizer" "github.com/lavanet/lava/utils" @@ -155,6 +157,89 @@ func TestHappyFlow(t *testing.T) { } } +func TestNoPairingAvailableFlow(t *testing.T) { + ctx := context.Background() + csm := CreateConsumerSessionManager() + pairingList := createPairingList("", true) + err := csm.UpdateAllProviders(firstEpochHeight, pairingList) // update the providers. + require.NoError(t, err) + + addCu := 10 + // adding cu to each pairing (highest is last) + for _, pairing := range csm.pairing { + pairing.addUsedComputeUnits(uint64(addCu), 0) + addCu += 10 + } + + // remove all providers except for the first one + validAddressessLength := len(csm.validAddresses) + copyValidAddressess := append([]string{}, csm.validAddresses...) + for index := 1; index < validAddressessLength; index++ { + csm.removeAddressFromValidAddresses(copyValidAddressess[index]) + } + + // get the address of the highest cu provider + highestProviderCu := "" + highestCu := uint64(0) + for _, pairing := range csm.pairing { + if pairing.PublicLavaAddress != csm.validAddresses[0] { + if pairing.UsedComputeUnits > highestCu { + highestCu = pairing.UsedComputeUnits + highestProviderCu = pairing.PublicLavaAddress + } + } + } + + usedProviders := NewUsedProviders(nil) + css, err := csm.GetSessions(ctx, cuForFirstRequest, usedProviders, servicedBlockNumber, "", nil, common.NO_STATE, 0) // get a session + require.NoError(t, err) + _, expectedProviderAddress := css[csm.validAddresses[0]] + require.True(t, expectedProviderAddress) + + css2, err := csm.GetSessions(ctx, cuForFirstRequest, usedProviders, servicedBlockNumber, "", nil, common.NO_STATE, 0) // get a session + require.NoError(t, err) + _, expectedProviderAddress2 := css2[highestProviderCu] + require.True(t, expectedProviderAddress2) + + runOnSessionDoneForConsumerSessionMap(t, css, csm) + runOnSessionDoneForConsumerSessionMap(t, css2, csm) + time.Sleep(time.Second) + require.Equal(t, len(csm.validAddresses), 2) + + css3, err := csm.GetSessions(ctx, cuForFirstRequest, usedProviders, servicedBlockNumber, "", nil, common.NO_STATE, 0) // get a session + require.NoError(t, err) + runOnSessionFailureForConsumerSessionMap(t, css3, csm) + // check we still have only 2 valid addresses as this one failed + for _, addr := range css3 { + require.False(t, lavaslices.Contains(csm.validAddresses, addr.Session.Parent.PublicLavaAddress)) + } + require.Equal(t, len(csm.validAddresses), 2) +} + +func runOnSessionDoneForConsumerSessionMap(t *testing.T, css ConsumerSessionsMap, csm *ConsumerSessionManager) { + for _, cs := range css { + require.NotNil(t, cs) + require.Equal(t, cs.Epoch, csm.currentEpoch) + require.Equal(t, cs.Session.LatestRelayCu, cuForFirstRequest) + err := csm.OnSessionDone(cs.Session, servicedBlockNumber, cuForFirstRequest, time.Millisecond, cs.Session.CalculateExpectedLatency(2*time.Millisecond), (servicedBlockNumber - 1), numberOfProviders, numberOfProviders, false) + require.NoError(t, err) + require.Equal(t, cs.Session.CuSum, cuForFirstRequest) + require.Equal(t, cs.Session.LatestRelayCu, latestRelayCuAfterDone) + require.Equal(t, cs.Session.RelayNum, relayNumberAfterFirstCall) + require.Equal(t, cs.Session.LatestBlock, servicedBlockNumber) + } +} + +func runOnSessionFailureForConsumerSessionMap(t *testing.T, css ConsumerSessionsMap, csm *ConsumerSessionManager) { + for _, cs := range css { + require.NotNil(t, cs) + require.Equal(t, cs.Epoch, csm.currentEpoch) + require.Equal(t, cs.Session.LatestRelayCu, cuForFirstRequest) + err := csm.OnSessionFailure(cs.Session, fmt.Errorf("testError")) + require.NoError(t, err) + } +} + func TestHappyFlowVirtualEpoch(t *testing.T) { ctx := context.Background() csm := CreateConsumerSessionManager() diff --git a/protocol/lavasession/consumer_types.go b/protocol/lavasession/consumer_types.go index 18a578844f..79ae6517e9 100644 --- a/protocol/lavasession/consumer_types.go +++ b/protocol/lavasession/consumer_types.go @@ -26,6 +26,7 @@ type UsedProvidersInf interface { TryLockSelection(context.Context) bool AddUsed(ConsumerSessionsMap, error) GetUnwantedProvidersToSend() map[string]struct{} + AddUnwantedAddresses(address string) } type SessionInfo struct { @@ -125,6 +126,10 @@ type ConsumerSessionsWithProvider struct { // whether we already reported this provider this epoch, we can only report one conflict per provider per epoch conflictFoundAndReported uint32 // 0 == not reported, 1 == reported stakeSize sdk.Coin // the stake size the provider staked + + // blocked provider recovery status if 0 currently not used, if 1 a session has tried resume communication with this provider + // if the provider is not blocked at all this field is irrelevant + blockedAndUsedWithChanceForRecoveryStatus uint32 } func NewConsumerSessionWithProvider(publicLavaAddress string, pairingEndpoints []*Endpoint, maxCu uint64, epoch uint64, stakeSize sdk.Coin) *ConsumerSessionsWithProvider { @@ -138,6 +143,14 @@ func NewConsumerSessionWithProvider(publicLavaAddress string, pairingEndpoints [ } } +func (cswp *ConsumerSessionsWithProvider) atomicReadBlockedStatus() uint32 { + return atomic.LoadUint32(&cswp.blockedAndUsedWithChanceForRecoveryStatus) +} + +func (cswp *ConsumerSessionsWithProvider) atomicWriteBlockedStatus(status uint32) { + atomic.StoreUint32(&cswp.blockedAndUsedWithChanceForRecoveryStatus, status) // we can only set conflict to "reported". +} + func (cswp *ConsumerSessionsWithProvider) atomicReadConflictReported() bool { return atomic.LoadUint32(&cswp.conflictFoundAndReported) == 1 } @@ -326,13 +339,15 @@ func (cswp *ConsumerSessionsWithProvider) GetConsumerSessionInstanceFromEndpoint // fetching an endpoint from a ConsumerSessionWithProvider and establishing a connection, // can fail without an error if trying to connect once to each endpoint but none of them are active. -func (cswp *ConsumerSessionsWithProvider) fetchEndpointConnectionFromConsumerSessionWithProvider(ctx context.Context) (connected bool, endpointPtr *Endpoint, providerAddress string, err error) { +func (cswp *ConsumerSessionsWithProvider) fetchEndpointConnectionFromConsumerSessionWithProvider(ctx context.Context, retryDisabledEndpoints bool) (connected bool, endpointPtr *Endpoint, providerAddress string, err error) { getConnectionFromConsumerSessionsWithProvider := func(ctx context.Context) (connected bool, endpointPtr *Endpoint, allDisabled bool) { cswp.Lock.Lock() defer cswp.Lock.Unlock() for idx, endpoint := range cswp.Endpoints { - if !endpoint.Enabled { + // retryDisabledEndpoints will attempt to reconnect to the provider even though we have disabled the endpoint + // this is used on a routine that tries to reconnect to a provider that has been disabled due to being unable to connect to it. + if !retryDisabledEndpoints && !endpoint.Enabled { continue } connectEndpoint := func(cswp *ConsumerSessionsWithProvider, ctx context.Context, endpoint *Endpoint) (connected_ bool) { @@ -378,6 +393,7 @@ func (cswp *ConsumerSessionsWithProvider) fetchEndpointConnectionFromConsumerSes continue } cswp.Endpoints[idx] = endpoint + cswp.Endpoints[idx].Enabled = true // return enabled once we successfully reconnect return true, endpoint, false } diff --git a/protocol/lavasession/reported_providers.go b/protocol/lavasession/reported_providers.go index 419dbfaf80..a3b29a4630 100644 --- a/protocol/lavasession/reported_providers.go +++ b/protocol/lavasession/reported_providers.go @@ -10,7 +10,8 @@ import ( ) const ( - ReconnectCandidateTime = 2 * time.Minute + ReconnectCandidateTime = 30 * time.Second + debugReportedProviders = false ) type ReportedProviders struct { @@ -29,6 +30,9 @@ type ReportedProviderEntry struct { func (rp *ReportedProviders) Reset() { rp.lock.Lock() defer rp.lock.Unlock() + if debugReportedProviders { + utils.LavaFormatDebug("[debugReportedProviders] Reset called") + } rp.addedToPurgeAndReport = make(map[string]*ReportedProviderEntry, 0) } @@ -61,12 +65,18 @@ func (rp *ReportedProviders) ReportProvider(address string, errors uint64, disco if reconnectCB != nil { rp.addedToPurgeAndReport[address].reconnectCB = reconnectCB } + if debugReportedProviders { + utils.LavaFormatDebug("[debugReportedProviders] adding provider to reported providers", utils.LogAttr("rp.addedToPurgeAndReport", rp.addedToPurgeAndReport)) + } } // will be called after a disconnected provider got a valid connection func (rp *ReportedProviders) RemoveReport(address string) { rp.lock.Lock() defer rp.lock.Unlock() + if debugReportedProviders { + utils.LavaFormatDebug("[debugReportedProviders] Removing Report", utils.LogAttr("address", address)) + } delete(rp.addedToPurgeAndReport, address) } @@ -86,6 +96,9 @@ func (rp *ReportedProviders) ReconnectCandidates() []reconnectCandidate { rp.lock.RLock() defer rp.lock.RUnlock() candidates := []reconnectCandidate{} + if debugReportedProviders { + utils.LavaFormatDebug("[debugReportedProviders] Reconnect candidates", utils.LogAttr("candidate list", rp.addedToPurgeAndReport)) + } for address, entry := range rp.addedToPurgeAndReport { // only reconnect providers that didn't have consecutive errors if entry.Errors == 0 && time.Since(entry.addedTime) > ReconnectCandidateTime { @@ -103,6 +116,9 @@ func (rp *ReportedProviders) ReconnectProviders() { candidates := rp.ReconnectCandidates() for _, candidate := range candidates { if candidate.reconnectCB != nil { + if debugReportedProviders { + utils.LavaFormatDebug("[debugReportedProviders] Trying to reconnect candidate", utils.LogAttr("candidate", candidate.address)) + } err := candidate.reconnectCB() if err == nil { rp.RemoveReport(candidate.address) @@ -118,7 +134,9 @@ func (rp *ReportedProviders) AppendReport(report metrics.ReportsRequest) { if rp == nil || rp.reporter == nil { return } - utils.LavaFormatDebug("sending report on provider", utils.LogAttr("provider", report.Provider)) + if debugReportedProviders { + utils.LavaFormatDebug("[debugReportedProviders] Sending report on provider", utils.LogAttr("provider", report.Provider)) + } rp.reporter.AppendReport(report) } diff --git a/protocol/lavasession/used_providers.go b/protocol/lavasession/used_providers.go index 55eff1ddd1..5d8a6d570e 100644 --- a/protocol/lavasession/used_providers.go +++ b/protocol/lavasession/used_providers.go @@ -81,6 +81,16 @@ func (up *UsedProviders) UnwantedAddresses() []string { return addresses } +func (up *UsedProviders) AddUnwantedAddresses(address string) { + if up == nil { + utils.LavaFormatError("UsedProviders.AddUnwantedAddresses is nil, misuse detected", nil) + return + } + up.lock.Lock() + defer up.lock.Unlock() + up.unwantedProviders[address] = struct{}{} +} + func (up *UsedProviders) RemoveUsed(provider string, err error) { if up == nil { return @@ -133,6 +143,7 @@ func (up *UsedProviders) AddUsed(sessions ConsumerSessionsMap, err error) { up.selecting = false } +// called when already locked. func (up *UsedProviders) setUnwanted(provider string) { if up == nil { return diff --git a/protocol/rpcconsumer/rpcconsumer_server.go b/protocol/rpcconsumer/rpcconsumer_server.go index a1680e85f4..a69debad8b 100644 --- a/protocol/rpcconsumer/rpcconsumer_server.go +++ b/protocol/rpcconsumer/rpcconsumer_server.go @@ -30,7 +30,8 @@ import ( ) const ( - MaxRelayRetries = 6 + MaxRelayRetries = 6 + numberOfTimesToCheckCurrentlyUsedIsEmpty = 3 ) var NoResponseTimeout = sdkerrors.New("NoResponseTimeout Error", 685, "timeout occurred while waiting for providers responses") @@ -336,10 +337,11 @@ func (rpccs *RPCConsumerServer) ProcessRelaySend(ctx context.Context, directiveH // a channel to be notified processing was done, true means we have results and can return gotResults := make(chan bool) processingTimeout, relayTimeout := rpccs.getProcessingTimeout(chainMessage) + // create the processing timeout prior to entering the method so it wont reset every time + processingCtx, cancel := context.WithTimeout(ctx, processingTimeout) + defer cancel() readResultsFromProcessor := func() { - processingCtx, cancel := context.WithTimeout(ctx, processingTimeout) - defer cancel() // ProcessResults is reading responses while blocking until the conditions are met relayProcessor.WaitForResults(processingCtx) // decide if we need to resend or not @@ -350,6 +352,24 @@ func (rpccs *RPCConsumerServer) ProcessRelaySend(ctx context.Context, directiveH } } go readResultsFromProcessor() + + returnCondition := make(chan error) + // used for checking whether to return an error to the user or to allow other channels return their result first see detailed description on the switch case below + validateReturnCondition := func(err error) { + currentlyUsedIsEmptyCounter := 0 + if err != nil { + for validateNoProvidersAreUsed := 0; validateNoProvidersAreUsed < numberOfTimesToCheckCurrentlyUsedIsEmpty; validateNoProvidersAreUsed++ { + if relayProcessor.usedProviders.CurrentlyUsed() == 0 { + currentlyUsedIsEmptyCounter++ + } + time.Sleep(5 * time.Millisecond) + } + // we failed to send a batch of relays, if there are no active sends we can terminate after validating X amount of times to make sure no racing channels + if currentlyUsedIsEmptyCounter >= numberOfTimesToCheckCurrentlyUsedIsEmpty { + returnCondition <- err + } + } + } // every relay timeout we send a new batch startNewBatchTicker := time.NewTicker(relayTimeout) for { @@ -359,20 +379,33 @@ func (rpccs *RPCConsumerServer) ProcessRelaySend(ctx context.Context, directiveH return relayProcessor, nil } err := rpccs.sendRelayToProvider(ctx, chainMessage, relayRequestData, dappID, consumerIp, relayProcessor) - if err != nil && relayProcessor.usedProviders.CurrentlyUsed() == 0 { - // we failed to send a batch of relays, if there are no active sends we can terminate - return relayProcessor, err - } + go validateReturnCondition(err) go readResultsFromProcessor() case <-startNewBatchTicker.C: // only trigger another batch for non BestResult relays if relayProcessor.selection != BestResult { err := rpccs.sendRelayToProvider(ctx, chainMessage, relayRequestData, dappID, consumerIp, relayProcessor) - if err != nil && relayProcessor.usedProviders.CurrentlyUsed() == 0 { - // we failed to send a batch of relays, if there are no active sends we can terminate - return relayProcessor, err - } + go validateReturnCondition(err) } + case returnErr := <-returnCondition: + // we use this channel because there could be a race condition between us releasing the provider and about to send the return + // to an error happening on another relay processor's routine. this can cause an error that returns to the user + // if we don't release the case, it will cause the success case condition to not be executed + // detailed scenario: + // sending first relay -> waiting -> sending second relay -> getting an error on the second relay (not returning yet) -> + // -> (in parallel) first relay finished, removing from CurrentlyUsed providers -> checking currently used (on second failed relay) -> returning error instead of the successful relay. + // by releasing the case we allow the channel to be chosen again by the successful case. + return relayProcessor, returnErr + case <-processingCtx.Done(): + // in case we got a processing timeout we return context deadline exceeded to the user. + utils.LavaFormatWarning("Relay Got processingCtx timeout", nil, + utils.LogAttr("dappId", dappID), + utils.LogAttr("consumerIp", consumerIp), + utils.LogAttr("chainMessage.GetApi().Name", chainMessage.GetApi().Name), + utils.LogAttr("GUID", ctx), + utils.LogAttr("relayProcessor", relayProcessor), + ) + return relayProcessor, processingCtx.Err() // returning the context error } } } @@ -397,12 +430,6 @@ func (rpccs *RPCConsumerServer) sendRelayToProvider( // handle QoS updates // in case connection totally fails, update unresponsive providers in ConsumerSessionManager isSubscription := chainlib.IsSubscription(chainMessage) - if isSubscription { - // temporarily disable subscriptions - // TODO: fix subscription and disable this case. - return utils.LavaFormatError("Subscriptions are disabled currently", nil) - } - var sharedStateId string // defaults to "", if shared state is disabled then no shared state will be used. if rpccs.sharedState { sharedStateId = rpccs.consumerConsistency.Key(dappID, consumerIp) // use same key as we use for consistency, (for better consistency :-D) @@ -417,63 +444,65 @@ func (rpccs *RPCConsumerServer) sendRelayToProvider( // try using cache before sending relay var cacheError error - if reqBlock != spectypes.NOT_APPLICABLE || !chainMessage.GetForceCacheRefresh() { - var cacheReply *pairingtypes.CacheRelayReply - hashKey, outputFormatter, err := chainlib.HashCacheRequest(relayRequestData, chainID) - if err != nil { - utils.LavaFormatError("sendRelayToProvider Failed getting Hash for cache request", err) - } else { - cacheCtx, cancel := context.WithTimeout(ctx, common.CacheTimeout) - cacheReply, cacheError = rpccs.cache.GetEntry(cacheCtx, &pairingtypes.RelayCacheGet{ - RequestHash: hashKey, - RequestedBlock: relayRequestData.RequestBlock, - ChainId: chainID, - BlockHash: nil, - Finalized: false, - SharedStateId: sharedStateId, - SeenBlock: relayRequestData.SeenBlock, - }) // caching in the portal doesn't care about hashes, and we don't have data on finalization yet - cancel() - reply := cacheReply.GetReply() - - // read seen block from cache even if we had a miss we still want to get the seen block so we can use it to get the right provider. - cacheSeenBlock := cacheReply.GetSeenBlock() - // check if the cache seen block is greater than my local seen block, this means the user requested this - // request spoke with another consumer instance and use that block for inter consumer consistency. - if rpccs.sharedState && cacheSeenBlock > relayRequestData.SeenBlock { - utils.LavaFormatDebug("shared state seen block is newer", utils.LogAttr("cache_seen_block", cacheSeenBlock), utils.LogAttr("local_seen_block", relayRequestData.SeenBlock)) - relayRequestData.SeenBlock = cacheSeenBlock - // setting the fetched seen block from the cache server to our local cache as well. - rpccs.consumerConsistency.SetSeenBlock(cacheSeenBlock, dappID, consumerIp) - } + if rpccs.cache.CacheActive() { // use cache only if its defined. + if reqBlock != spectypes.NOT_APPLICABLE || !chainMessage.GetForceCacheRefresh() { + var cacheReply *pairingtypes.CacheRelayReply + hashKey, outputFormatter, err := chainlib.HashCacheRequest(relayRequestData, chainID) + if err != nil { + utils.LavaFormatError("sendRelayToProvider Failed getting Hash for cache request", err) + } else { + cacheCtx, cancel := context.WithTimeout(ctx, common.CacheTimeout) + cacheReply, cacheError = rpccs.cache.GetEntry(cacheCtx, &pairingtypes.RelayCacheGet{ + RequestHash: hashKey, + RequestedBlock: relayRequestData.RequestBlock, + ChainId: chainID, + BlockHash: nil, + Finalized: false, + SharedStateId: sharedStateId, + SeenBlock: relayRequestData.SeenBlock, + }) // caching in the portal doesn't care about hashes, and we don't have data on finalization yet + cancel() + reply := cacheReply.GetReply() + + // read seen block from cache even if we had a miss we still want to get the seen block so we can use it to get the right provider. + cacheSeenBlock := cacheReply.GetSeenBlock() + // check if the cache seen block is greater than my local seen block, this means the user requested this + // request spoke with another consumer instance and use that block for inter consumer consistency. + if rpccs.sharedState && cacheSeenBlock > relayRequestData.SeenBlock { + utils.LavaFormatDebug("shared state seen block is newer", utils.LogAttr("cache_seen_block", cacheSeenBlock), utils.LogAttr("local_seen_block", relayRequestData.SeenBlock)) + relayRequestData.SeenBlock = cacheSeenBlock + // setting the fetched seen block from the cache server to our local cache as well. + rpccs.consumerConsistency.SetSeenBlock(cacheSeenBlock, dappID, consumerIp) + } - // handle cache reply - if cacheError == nil && reply != nil { - // Info was fetched from cache, so we don't need to change the state - // so we can return here, no need to update anything and calculate as this info was fetched from the cache - reply.Data = outputFormatter(reply.Data) - relayResult := common.RelayResult{ - Reply: reply, - Request: &pairingtypes.RelayRequest{ - RelayData: relayRequestData, - }, - Finalized: false, // set false to skip data reliability - StatusCode: 200, - ProviderInfo: common.ProviderInfo{ProviderAddress: ""}, + // handle cache reply + if cacheError == nil && reply != nil { + // Info was fetched from cache, so we don't need to change the state + // so we can return here, no need to update anything and calculate as this info was fetched from the cache + reply.Data = outputFormatter(reply.Data) + relayResult := common.RelayResult{ + Reply: reply, + Request: &pairingtypes.RelayRequest{ + RelayData: relayRequestData, + }, + Finalized: false, // set false to skip data reliability + StatusCode: 200, + ProviderInfo: common.ProviderInfo{ProviderAddress: ""}, + } + relayProcessor.SetResponse(&relayResponse{ + relayResult: relayResult, + err: nil, + }) + return nil + } + // cache failed, move on to regular relay + if performance.NotConnectedError.Is(cacheError) { + utils.LavaFormatDebug("cache not connected", utils.LogAttr("error", cacheError)) } - relayProcessor.SetResponse(&relayResponse{ - relayResult: relayResult, - err: nil, - }) - return nil - } - // cache failed, move on to regular relay - if performance.NotConnectedError.Is(cacheError) { - utils.LavaFormatDebug("cache not connected", utils.LogAttr("error", cacheError)) } + } else { + utils.LavaFormatDebug("skipping cache due to requested block being NOT_APPLICABLE", utils.Attribute{Key: "api name", Value: chainMessage.GetApi().Name}) } - } else { - utils.LavaFormatDebug("skipping cache due to requested block being NOT_APPLICABLE", utils.Attribute{Key: "api name", Value: chainMessage.GetApi().Name}) } if reqBlock == spectypes.LATEST_BLOCK && relayRequestData.SeenBlock != 0 { @@ -484,7 +513,8 @@ func (rpccs *RPCConsumerServer) sendRelayToProvider( virtualEpoch := rpccs.consumerTxSender.GetLatestVirtualEpoch() addon := chainlib.GetAddon(chainMessage) extensions := chainMessage.GetExtensions() - sessions, err := rpccs.consumerSessionManager.GetSessions(ctx, chainlib.GetComputeUnits(chainMessage), relayProcessor.GetUsedProviders(), reqBlock, addon, extensions, chainlib.GetStateful(chainMessage), virtualEpoch) + usedProviders := relayProcessor.GetUsedProviders() + sessions, err := rpccs.consumerSessionManager.GetSessions(ctx, chainlib.GetComputeUnits(chainMessage), usedProviders, reqBlock, addon, extensions, chainlib.GetStateful(chainMessage), virtualEpoch) if err != nil { if lavasession.PairingListEmptyError.Is(err) && (addon != "" || len(extensions) > 0) { // if we have no providers for a specific addon or extension, return an indicative error diff --git a/scripts/pre_setups/init_lava_only_with_node_three_providers.sh b/scripts/pre_setups/init_lava_only_with_node_three_providers.sh new file mode 100755 index 0000000000..851a7aefe9 --- /dev/null +++ b/scripts/pre_setups/init_lava_only_with_node_three_providers.sh @@ -0,0 +1,89 @@ +#!/bin/bash +__dir=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) +source "$__dir"/../useful_commands.sh +. "${__dir}"/../vars/variables.sh + +LOGS_DIR=${__dir}/../../testutil/debugging/logs +mkdir -p $LOGS_DIR +rm $LOGS_DIR/*.log + +killall screen +screen -wipe + +echo "[Test Setup] installing all binaries" +make install-all + +echo "[Test Setup] setting up a new lava node" +screen -d -m -S node bash -c "./scripts/start_env_dev.sh" +screen -ls +echo "[Test Setup] sleeping 20 seconds for node to finish setup (if its not enough increase timeout)" +sleep 5 +wait_for_lava_node_to_start + +GASPRICE="0.000000001ulava" +lavad tx gov submit-legacy-proposal spec-add ./cookbook/specs/spec_add_ibc.json,./cookbook/specs/spec_add_cosmoswasm.json,./cookbook/specs/spec_add_cosmossdk.json,./cookbook/specs/spec_add_cosmossdk_45.json,./cookbook/specs/spec_add_cosmossdk_full.json,./cookbook/specs/spec_add_ethereum.json,./cookbook/specs/spec_add_cosmoshub.json,./cookbook/specs/spec_add_lava.json,./cookbook/specs/spec_add_osmosis.json,./cookbook/specs/spec_add_fantom.json,./cookbook/specs/spec_add_celo.json,./cookbook/specs/spec_add_optimism.json,./cookbook/specs/spec_add_arbitrum.json,./cookbook/specs/spec_add_starknet.json,./cookbook/specs/spec_add_aptos.json,./cookbook/specs/spec_add_juno.json,./cookbook/specs/spec_add_polygon.json,./cookbook/specs/spec_add_evmos.json,./cookbook/specs/spec_add_base.json,./cookbook/specs/spec_add_canto.json,./cookbook/specs/spec_add_sui.json,./cookbook/specs/spec_add_solana.json,./cookbook/specs/spec_add_bsc.json,./cookbook/specs/spec_add_axelar.json,./cookbook/specs/spec_add_avalanche.json,./cookbook/specs/spec_add_fvm.json --lava-dev-test -y --from alice --gas-adjustment "1.5" --gas "auto" --gas-prices $GASPRICE & +wait_next_block +wait_next_block +lavad tx gov vote 1 yes -y --from alice --gas-adjustment "1.5" --gas "auto" --gas-prices $GASPRICE +sleep 4 + +# Plans proposal +lavad tx gov submit-legacy-proposal plans-add ./cookbook/plans/test_plans/default.json,./cookbook/plans/test_plans/temporary-add.json -y --from alice --gas-adjustment "1.5" --gas "auto" --gas-prices $GASPRICE +wait_next_block +wait_next_block +lavad tx gov vote 2 yes -y --from alice --gas-adjustment "1.5" --gas "auto" --gas-prices $GASPRICE + +sleep 4 +CLIENTSTAKE="500000000000ulava" +PROVIDERSTAKE="500000000000ulava" + +PROVIDER1_LISTENER="127.0.0.1:2220" +PROVIDER2_LISTENER="127.0.0.1:2221" +PROVIDER3_LISTENER="127.0.0.1:2222" + +lavad tx subscription buy DefaultPlan $(lavad keys show user1 -a) -y --from user1 --gas-adjustment "1.5" --gas "auto" --gas-prices $GASPRICE +wait_next_block +lavad tx pairing stake-provider "LAV1" $PROVIDERSTAKE "$PROVIDER1_LISTENER,1" 1 $(operator_address) -y --from servicer1 --delegate-limit 1000ulava --provider-moniker "dummyMoniker" --gas-adjustment "1.5" --gas "auto" --gas-prices $GASPRICE; +wait_next_block +lavad tx pairing stake-provider "LAV1" $PROVIDERSTAKE "$PROVIDER2_LISTENER,1" 1 $(operator_address) -y --from servicer2 --delegate-limit 1000ulava --provider-moniker "dummyMoniker" --gas-adjustment "1.5" --gas "auto" --gas-prices $GASPRICE; +wait_next_block +lavad tx pairing stake-provider "LAV1" $PROVIDERSTAKE "$PROVIDER3_LISTENER,1" 1 $(operator_address) -y --from servicer3 --delegate-limit 1000ulava --provider-moniker "dummyMoniker" --gas-adjustment "1.5" --gas "auto" --gas-prices $GASPRICE; + + +sleep_until_next_epoch +wait_next_block + +echo "[Chaning Epoch Storage Params] submitting param change vote" +lavad tx gov submit-legacy-proposal param-change ./cookbook/param_changes/param_change_epoch_params.json -y --from alice --gas-adjustment "1.5" --gas "auto" --gas-prices $GASPRICE; +wait_next_block +wait_next_block +lavad tx gov vote 3 yes -y --from alice --gas-adjustment "1.5" --gas "auto" --gas-prices 0.000000001ulava; + +screen -d -m -S provider1 bash -c "source ~/.bashrc; lavap rpcprovider \ +$PROVIDER1_LISTENER LAV1 rest '$LAVA_REST' \ +$PROVIDER1_LISTENER LAV1 tendermintrpc '$LAVA_RPC,$LAVA_RPC' \ +$PROVIDER1_LISTENER LAV1 grpc '$LAVA_GRPC' \ +$EXTRA_PROVIDER_FLAGS --geolocation 1 --log_level debug --from servicer1 --chain-id lava --metrics-listen-address ":7776" 2>&1 | tee $LOGS_DIR/PROVIDER1.log" && sleep 0.25 + +# screen -d -m -S provider2 bash -c "source ~/.bashrc; lavap rpcprovider \ +# $PROVIDER2_LISTENER LAV1 rest '$LAVA_REST' \ +# $PROVIDER2_LISTENER LAV1 tendermintrpc '$LAVA_RPC,$LAVA_RPC' \ +# $PROVIDER2_LISTENER LAV1 grpc '$LAVA_GRPC' \ +# $EXTRA_PROVIDER_FLAGS --geolocation 1 --log_level debug --from servicer2 --chain-id lava --metrics-listen-address ":7776" 2>&1 | tee $LOGS_DIR/PROVIDER2.log" && sleep 0.25 + +screen -d -m -S provider3 bash -c "source ~/.bashrc; lavap rpcprovider \ +$PROVIDER3_LISTENER LAV1 rest '$LAVA_REST' \ +$PROVIDER3_LISTENER LAV1 tendermintrpc '$LAVA_RPC,$LAVA_RPC' \ +$PROVIDER3_LISTENER LAV1 grpc '$LAVA_GRPC' \ +$EXTRA_PROVIDER_FLAGS --geolocation 1 --log_level debug --from servicer3 --chain-id lava --metrics-listen-address ":7776" 2>&1 | tee $LOGS_DIR/PROVIDER3.log" && sleep 0.25 + +wait_next_block + +# screen -d -m -S consumers bash -c "source ~/.bashrc; lavap rpcconsumer \ +# 127.0.0.1:3360 LAV1 rest 127.0.0.1:3361 LAV1 tendermintrpc 127.0.0.1:3362 LAV1 grpc \ +# $EXTRA_PORTAL_FLAGS --geolocation 1 --log_level debug --from user1 --chain-id lava --allow-insecure-provider-dialing --metrics-listen-address ":7779" 2>&1 | tee $LOGS_DIR/CONSUMERS.log" && sleep 0.25 + +echo "--- setting up screens done ---" +screen -ls + +echo "lavap rpcprovider $PROVIDER3_LISTENER LAV1 rest '$LAVA_REST' $PROVIDER3_LISTENER LAV1 tendermintrpc '$LAVA_RPC,$LAVA_RPC' $PROVIDER3_LISTENER LAV1 grpc '$LAVA_GRPC' $EXTRA_PROVIDER_FLAGS --geolocation 1 --log_level debug --from servicer3 --chain-id lava" \ No newline at end of file From 201f062ffc6296f383e4e25eb4acd872423fe4c2 Mon Sep 17 00:00:00 2001 From: Ran Mishael <106548467+ranlavanet@users.noreply.github.com> Date: Tue, 9 Apr 2024 14:28:29 +0200 Subject: [PATCH 07/17] target version 1.2.1 (#1361) --- x/protocol/types/params.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x/protocol/types/params.go b/x/protocol/types/params.go index 2346d5c0e7..ead7716231 100644 --- a/x/protocol/types/params.go +++ b/x/protocol/types/params.go @@ -12,7 +12,7 @@ import ( var _ paramtypes.ParamSet = (*Params)(nil) const ( - TARGET_VERSION = "1.0.4" + TARGET_VERSION = "1.2.1" MIN_VERSION = "1.0.2" ) From 7ee9959bbfe605f407968ae9f76ac38384a6622a Mon Sep 17 00:00:00 2001 From: oren-lava <111131399+oren-lava@users.noreply.github.com> Date: Thu, 11 Apr 2024 09:54:25 +0300 Subject: [PATCH 08/17] feat: CNS-940: Goerli deprecation (#1359) * CNS-940: add holesky * CNS-940: added holesky to scripts * CNS-940: deprecate goerli * CNS-940: small fix --- .../full_consumer_example.yml | 7 +- config/provider_examples/all_endpoints.yml | 4 +- .../spec_add_optimism_fast_lookup.json | 19 ---- .../projects/policy_all_chains_with_addon.yml | 2 +- .../policy_all_chains_with_extension.yml | 2 +- cookbook/specs/spec_add_base.json | 44 --------- cookbook/specs/spec_add_ethereum.json | 13 ++- cookbook/specs/spec_add_optimism.json | 44 --------- cookbook/specs/spec_add_polygon.json | 44 --------- cookbook/specs/spec_add_starknet.json | 90 ------------------- scripts/cli_test.sh | 2 +- scripts/init_chain_commands.sh | 2 +- scripts/init_e2e_lava_over_lava.sh | 12 +-- scripts/setup_providers.sh | 5 +- .../e2eProviderConfigs/consumer_policy.yml | 2 +- testutil/e2e/protocolE2E.go | 6 +- 16 files changed, 30 insertions(+), 268 deletions(-) diff --git a/config/consumer_examples/full_consumer_example.yml b/config/consumer_examples/full_consumer_example.yml index 1f56efae20..dcce3d5629 100644 --- a/config/consumer_examples/full_consumer_example.yml +++ b/config/consumer_examples/full_consumer_example.yml @@ -2,7 +2,7 @@ endpoints: - chain-id: ETH1 api-interface: jsonrpc network-address: 127.0.0.1:3333 - - chain-id: GTH1 + - chain-id: SEP1 api-interface: jsonrpc network-address: 127.0.0.1:3334 - chain-id: FTM250 @@ -29,7 +29,7 @@ endpoints: - chain-id: OPTM api-interface: jsonrpc network-address: 127.0.0.1:3352 - - chain-id: BASET + - chain-id: BASES api-interface: jsonrpc network-address: 127.0.0.1:3353 - chain-id: OSMOSIS @@ -152,6 +152,9 @@ endpoints: - chain-id: AVAXT api-interface: jsonrpc network-address: 127.0.0.1:3393 + - chain-id: HOL1 + api-interface: jsonrpc + network-address: 127.0.0.1:3394 metrics-listen-address: ":7779" # referer-be-address: "http://127.0.0.1:6500" # reports-be-address: "http://127.0.0.1:6501" \ No newline at end of file diff --git a/config/provider_examples/all_endpoints.yml b/config/provider_examples/all_endpoints.yml index 4769061d7f..318b2ba133 100644 --- a/config/provider_examples/all_endpoints.yml +++ b/config/provider_examples/all_endpoints.yml @@ -6,7 +6,7 @@ endpoints: node-urls: - url: - api-interface: jsonrpc - chain-id: GTH1 + chain-id: SEP1 network-address: address: "127.0.0.1:2221" node-urls: @@ -60,7 +60,7 @@ endpoints: node-urls: - url: - api-interface: jsonrpc - chain-id: BASET + chain-id: BASES network-address: address: "127.0.0.1:2221" node-urls: diff --git a/cookbook/backups/spec_add_optimism_fast_lookup.json b/cookbook/backups/spec_add_optimism_fast_lookup.json index a2d682a718..4142efc921 100644 --- a/cookbook/backups/spec_add_optimism_fast_lookup.json +++ b/cookbook/backups/spec_add_optimism_fast_lookup.json @@ -144,25 +144,6 @@ ] } ] - }, - { - "index": "OPTMT", - "name": "optimism goerli testnet", - "enabled": true, - "imports": [ - "OPTM" - ], - "reliability_threshold": 268435455, - "data_reliability_enabled": true, - "block_distance_for_finalized_data": 1, - "blocks_in_finalization_proof": 1, - "average_block_time": 250, - "allowed_block_lag_for_qos_sync": 40, - "shares" : 1, - "min_stake_provider": { - "denom": "ulava", - "amount": "50000000000" - } } ] }, diff --git a/cookbook/projects/policy_all_chains_with_addon.yml b/cookbook/projects/policy_all_chains_with_addon.yml index 77da183cd5..e43bb2b42c 100644 --- a/cookbook/projects/policy_all_chains_with_addon.yml +++ b/cookbook/projects/policy_all_chains_with_addon.yml @@ -29,7 +29,7 @@ Policy: api_interface: "jsonrpc" type: "POST" add_on: "debug" - - chain_id: GTH1 + - chain_id: SEP1 requirements: - collection: api_interface: "jsonrpc" diff --git a/cookbook/projects/policy_all_chains_with_extension.yml b/cookbook/projects/policy_all_chains_with_extension.yml index c0d96796f7..3027db2113 100644 --- a/cookbook/projects/policy_all_chains_with_extension.yml +++ b/cookbook/projects/policy_all_chains_with_extension.yml @@ -14,7 +14,7 @@ Policy: extensions: - "archive" mixed: true - - chain_id: GTH1 + - chain_id: SEP1 requirements: - collection: api_interface: "jsonrpc" diff --git a/cookbook/specs/spec_add_base.json b/cookbook/specs/spec_add_base.json index 35b0df90af..ffc8cdc980 100644 --- a/cookbook/specs/spec_add_base.json +++ b/cookbook/specs/spec_add_base.json @@ -68,50 +68,6 @@ } ] }, - { - "index": "BASET", - "name": "base goerli testnet", - "enabled": true, - "imports": [ - "BASE" - ], - "reliability_threshold": 268435455, - "data_reliability_enabled": true, - "block_distance_for_finalized_data": 1, - "blocks_in_finalization_proof": 1, - "average_block_time": 2000, - "allowed_block_lag_for_qos_sync": 5, - "shares": 1, - "min_stake_provider": { - "denom": "ulava", - "amount": "47500000000" - }, - "api_collections": [ - { - "enabled": true, - "collection_data": { - "api_interface": "jsonrpc", - "internal_path": "", - "type": "POST", - "add_on": "" - }, - "apis": [], - "headers": [], - "inheritance_apis": [], - "parse_directives": [], - "verifications": [ - { - "name": "chain-id", - "values": [ - { - "expected_value": "0x14a33" - } - ] - } - ] - } - ] - }, { "index": "BASES", "name": "base sepolia testnet", diff --git a/cookbook/specs/spec_add_ethereum.json b/cookbook/specs/spec_add_ethereum.json index ccef2c2586..3ff1f91664 100644 --- a/cookbook/specs/spec_add_ethereum.json +++ b/cookbook/specs/spec_add_ethereum.json @@ -1357,8 +1357,8 @@ ] }, { - "index": "GTH1", - "name": "ethereum testnet goerli", + "index": "SEP1", + "name": "ethereum testnet sepolia", "enabled": true, "imports": [ "ETH1" @@ -1369,7 +1369,6 @@ "blocks_in_finalization_proof": 3, "average_block_time": 13000, "allowed_block_lag_for_qos_sync": 2, - "shares": 1, "min_stake_provider": { "denom": "ulava", "amount": "47500000000" @@ -1392,7 +1391,7 @@ "name": "chain-id", "values": [ { - "expected_value": "0x5" + "expected_value": "0xaa36a7" } ] } @@ -1401,8 +1400,8 @@ ] }, { - "index": "SEP1", - "name": "ethereum testnet sepolia", + "index": "HOL1", + "name": "ethereum testnet holesky", "enabled": true, "imports": [ "ETH1" @@ -1435,7 +1434,7 @@ "name": "chain-id", "values": [ { - "expected_value": "0xaa36a7" + "expected_value": "0x4268" } ] } diff --git a/cookbook/specs/spec_add_optimism.json b/cookbook/specs/spec_add_optimism.json index 82048f24a3..ae6b00c88e 100644 --- a/cookbook/specs/spec_add_optimism.json +++ b/cookbook/specs/spec_add_optimism.json @@ -159,50 +159,6 @@ } ] }, - { - "index": "OPTMT", - "name": "optimism goerli testnet", - "enabled": true, - "imports": [ - "OPTM" - ], - "reliability_threshold": 268435455, - "data_reliability_enabled": true, - "block_distance_for_finalized_data": 1, - "blocks_in_finalization_proof": 1, - "average_block_time": 5000, - "allowed_block_lag_for_qos_sync": 200, - "shares": 1, - "min_stake_provider": { - "denom": "ulava", - "amount": "47500000000" - }, - "api_collections": [ - { - "enabled": true, - "collection_data": { - "api_interface": "jsonrpc", - "internal_path": "", - "type": "POST", - "add_on": "" - }, - "apis": [], - "headers": [], - "inheritance_apis": [], - "parse_directives": [], - "verifications": [ - { - "name": "chain-id", - "values": [ - { - "expected_value": "0x1a4" - } - ] - } - ] - } - ] - }, { "index": "OPTMS", "name": "optimism sepolia testnet", diff --git a/cookbook/specs/spec_add_polygon.json b/cookbook/specs/spec_add_polygon.json index a39a54fdd8..39cb041c91 100644 --- a/cookbook/specs/spec_add_polygon.json +++ b/cookbook/specs/spec_add_polygon.json @@ -249,50 +249,6 @@ } ] }, - { - "index": "POLYGON1T", - "name": "polygon testnet", - "enabled": true, - "imports": [ - "POLYGON1" - ], - "reliability_threshold": 268435455, - "data_reliability_enabled": true, - "block_distance_for_finalized_data": 1, - "blocks_in_finalization_proof": 3, - "average_block_time": 2000, - "allowed_block_lag_for_qos_sync": 5, - "shares": 1, - "min_stake_provider": { - "denom": "ulava", - "amount": "47500000000" - }, - "api_collections": [ - { - "enabled": true, - "collection_data": { - "api_interface": "jsonrpc", - "internal_path": "", - "type": "POST", - "add_on": "" - }, - "apis": [], - "headers": [], - "inheritance_apis": [], - "parse_directives": [], - "verifications": [ - { - "name": "chain-id", - "values": [ - { - "expected_value": "0x13881" - } - ] - } - ] - } - ] - }, { "index": "POLYGON1A", "name": "polygon amoy testnet", diff --git a/cookbook/specs/spec_add_starknet.json b/cookbook/specs/spec_add_starknet.json index 1c33b0359f..455d13ae04 100644 --- a/cookbook/specs/spec_add_starknet.json +++ b/cookbook/specs/spec_add_starknet.json @@ -727,96 +727,6 @@ } ] }, - { - "index": "STRKT", - "name": "starknet testnet", - "enabled": true, - "imports": [ - "STRK" - ], - "reliability_threshold": 268435455, - "data_reliability_enabled": true, - "block_distance_for_finalized_data": 1, - "blocks_in_finalization_proof": 3, - "average_block_time": 1800000, - "allowed_block_lag_for_qos_sync": 1, - "shares": 1, - "min_stake_provider": { - "denom": "ulava", - "amount": "47500000000" - }, - "api_collections": [ - { - "enabled": true, - "collection_data": { - "api_interface": "jsonrpc", - "internal_path": "", - "type": "POST", - "add_on": "" - }, - "apis": [], - "headers": [], - "inheritance_apis": [], - "parse_directives": [], - "verifications": [ - { - "name": "chain-id", - "values": [ - { - "expected_value": "0x534e5f474f45524c49" - } - ] - } - ] - }, - { - "enabled": true, - "collection_data": { - "api_interface": "jsonrpc", - "internal_path": "/rpc/v0_6", - "type": "POST", - "add_on": "" - }, - "apis": [], - "headers": [], - "inheritance_apis": [], - "parse_directives": [], - "verifications": [ - { - "name": "chain-id", - "values": [ - { - "expected_value": "0x534e5f474f45524c49" - } - ] - } - ] - }, - { - "enabled": true, - "collection_data": { - "api_interface": "jsonrpc", - "internal_path": "/rpc/v0_5", - "type": "POST", - "add_on": "" - }, - "apis": [], - "headers": [], - "inheritance_apis": [], - "parse_directives": [], - "verifications": [ - { - "name": "chain-id", - "values": [ - { - "expected_value": "0x534e5f474f45524c49" - } - ] - } - ] - } - ] - }, { "index": "STRKS", "name": "starknet sepolia testnet", diff --git a/scripts/cli_test.sh b/scripts/cli_test.sh index f7bd410c6f..de2cbeece1 100755 --- a/scripts/cli_test.sh +++ b/scripts/cli_test.sh @@ -126,7 +126,7 @@ wait_count_blocks 1 >/dev/null (trace lavad tx pairing stake-provider ETH1 $PROVIDERSTAKE "$PROVIDER1_LISTENER,1" 1 $(operator_address) --provider-moniker "provider" $txoptions)>/dev/null wait_count_blocks 1 >/dev/null -CHAINS="GTH1,OSMOSIS,FTM250,CELO,LAV1,OSMOSIST,ALFAJORES,ARB1,ARBN,APT1,STRK,JUN1,COSMOSHUB,POLYGON1,EVMOS,OPTM,BASET,CANTO,SUIT,SOLANA,BSC,AXELAR,AVAX,FVM,NEAR" +CHAINS="SEP1,OSMOSIS,FTM250,CELO,LAV1,OSMOSIST,ALFAJORES,ARB1,ARBN,APT1,STRK,JUN1,COSMOSHUB,POLYGON1,EVMOS,OPTM,BASET,CANTO,SUIT,SOLANA,BSC,AXELAR,AVAX,FVM,NEAR" (trace lavad tx pairing bulk-stake-provider $CHAINS $PROVIDERSTAKE "$PROVIDER1_LISTENER,1" 1 $(operator_address) --provider-moniker "provider" $txoptions)>/dev/null sleep_until_next_epoch >/dev/null diff --git a/scripts/init_chain_commands.sh b/scripts/init_chain_commands.sh index efea65abbc..405c42ae4e 100755 --- a/scripts/init_chain_commands.sh +++ b/scripts/init_chain_commands.sh @@ -69,7 +69,7 @@ lavad tx subscription buy DefaultPlan $(lavad keys show user1 -a) --enable-auto- # lavad tx project set-policy $(lavad keys show user1 -a)-admin ./cookbook/projects/policy_all_chains_with_addon.yml -y --from user1 --gas-adjustment "1.5" --gas "auto" --gas-prices $GASPRICE # MANTLE -CHAINS="ETH1,GTH1,SEP1,OSMOSIS,FTM250,CELO,LAV1,OSMOSIST,ALFAJORES,ARB1,ARBN,APT1,STRK,JUN1,COSMOSHUB,POLYGON1,EVMOS,OPTM,BASET,CANTO,SUIT,SOLANA,BSC,AXELAR,AVAX,FVM,NEAR,SQDSUBGRAPH,AGR,AGRT,KOIIT,AVAXT" +CHAINS="ETH1,SEP1,HOL1,OSMOSIS,FTM250,CELO,LAV1,OSMOSIST,ALFAJORES,ARB1,ARBN,APT1,STRK,JUN1,COSMOSHUB,POLYGON1,EVMOS,OPTM,BASES,CANTO,SUIT,SOLANA,BSC,AXELAR,AVAX,FVM,NEAR,SQDSUBGRAPH,AGR,AGRT,KOIIT,AVAXT" BASE_CHAINS="ETH1,LAV1" # stake providers on all chains echo; echo "#### Staking provider 1 ####" diff --git a/scripts/init_e2e_lava_over_lava.sh b/scripts/init_e2e_lava_over_lava.sh index e18f57fd06..1e9337af6b 100755 --- a/scripts/init_e2e_lava_over_lava.sh +++ b/scripts/init_e2e_lava_over_lava.sh @@ -7,16 +7,16 @@ GASPRICE="0.000000001ulava" NODE="http://127.0.0.1:3340/1" STAKE="500000000000ulava" -# Goerli providers -lavad tx pairing stake-provider "GTH1" $STAKE "127.0.0.1:2121,1" 1 $(operator_address) -y --from servicer1 --delegate-limit $STAKE --provider-moniker "dummyMoniker" --gas-adjustment "1.5" --gas "auto" --gas-prices $GASPRICE --node $NODE +# Sepolia providers +lavad tx pairing stake-provider "SEP1" $STAKE "127.0.0.1:2121,1" 1 $(operator_address) -y --from servicer1 --delegate-limit $STAKE --provider-moniker "dummyMoniker" --gas-adjustment "1.5" --gas "auto" --gas-prices $GASPRICE --node $NODE wait_next_block -lavad tx pairing stake-provider "GTH1" $STAKE "127.0.0.1:2122,1" 1 $(operator_address) -y --from servicer2 --delegate-limit $STAKE --provider-moniker "dummyMoniker" --gas-adjustment "1.5" --gas "auto" --gas-prices $GASPRICE --node $NODE +lavad tx pairing stake-provider "SEP1" $STAKE "127.0.0.1:2122,1" 1 $(operator_address) -y --from servicer2 --delegate-limit $STAKE --provider-moniker "dummyMoniker" --gas-adjustment "1.5" --gas "auto" --gas-prices $GASPRICE --node $NODE wait_next_block -lavad tx pairing stake-provider "GTH1" $STAKE "127.0.0.1:2123,1" 1 $(operator_address) -y --from servicer3 --delegate-limit $STAKE --provider-moniker "dummyMoniker" --gas-adjustment "1.5" --gas "auto" --gas-prices $GASPRICE --node $NODE +lavad tx pairing stake-provider "SEP1" $STAKE "127.0.0.1:2123,1" 1 $(operator_address) -y --from servicer3 --delegate-limit $STAKE --provider-moniker "dummyMoniker" --gas-adjustment "1.5" --gas "auto" --gas-prices $GASPRICE --node $NODE wait_next_block -lavad tx pairing stake-provider "GTH1" $STAKE "127.0.0.1:2124,1" 1 $(operator_address) -y --from servicer4 --delegate-limit $STAKE --provider-moniker "dummyMoniker" --gas-adjustment "1.5" --gas "auto" --gas-prices $GASPRICE --node $NODE +lavad tx pairing stake-provider "SEP1" $STAKE "127.0.0.1:2124,1" 1 $(operator_address) -y --from servicer4 --delegate-limit $STAKE --provider-moniker "dummyMoniker" --gas-adjustment "1.5" --gas "auto" --gas-prices $GASPRICE --node $NODE wait_next_block -lavad tx pairing stake-provider "GTH1" $STAKE "127.0.0.1:2125,1" 1 $(operator_address) -y --from servicer5 --delegate-limit $STAKE --provider-moniker "dummyMoniker" --gas-adjustment "1.5" --gas "auto" --gas-prices $GASPRICE --node $NODE +lavad tx pairing stake-provider "SEP1" $STAKE "127.0.0.1:2125,1" 1 $(operator_address) -y --from servicer5 --delegate-limit $STAKE --provider-moniker "dummyMoniker" --gas-adjustment "1.5" --gas "auto" --gas-prices $GASPRICE --node $NODE wait_next_block lavad tx subscription buy DefaultPlan $(lavad keys show user1 -a) -y --from user1 --gas-adjustment "1.5" --gas "auto" --gas-prices $GASPRICE diff --git a/scripts/setup_providers.sh b/scripts/setup_providers.sh index 8734fc08fd..d4c0f35c75 100755 --- a/scripts/setup_providers.sh +++ b/scripts/setup_providers.sh @@ -26,7 +26,8 @@ screen -d -m -S cache-consumer bash -c "source ~/.bashrc; lavap cache 127.0.0.1: echo; echo "#### Starting provider 1 ####" screen -d -m -S provider1 bash -c "source ~/.bashrc; lavap rpcprovider \ $PROVIDER1_LISTENER ETH1 jsonrpc '$ETH_RPC_WS' \ -$PROVIDER1_LISTENER GTH1 jsonrpc '$GTH_RPC_WS' \ +$PROVIDER1_LISTENER SEP1 jsonrpc '$SEP_RPC_WS' \ +$PROVIDER1_LISTENER HOL1 jsonrpc '$HOL_RPC_WS' \ $PROVIDER1_LISTENER FTM250 jsonrpc '$FTM_RPC_HTTP' \ $PROVIDER1_LISTENER CELO jsonrpc '$CELO_HTTP' \ $PROVIDER1_LISTENER ALFAJORES jsonrpc '$CELO_ALFAJORES_HTTP' \ @@ -35,7 +36,7 @@ $PROVIDER1_LISTENER APT1 rest '$APTOS_REST' \ $PROVIDER1_LISTENER STRK jsonrpc '$STARKNET_RPC' \ $PROVIDER1_LISTENER POLYGON1 jsonrpc '$POLYGON_MAINNET_RPC' \ $PROVIDER1_LISTENER OPTM jsonrpc '$OPTIMISM_RPC' \ -$PROVIDER1_LISTENER BASET jsonrpc '$BASE_GOERLI_RPC' \ +$PROVIDER1_LISTENER BASE jsonrpc '$BASE_RPC' \ $PROVIDER1_LISTENER BSC jsonrpc '$BSC_RPC' \ $PROVIDER1_LISTENER SOLANA jsonrpc '$SOLANA_RPC' \ $PROVIDER1_LISTENER SUIT jsonrpc '$SUI_RPC' \ diff --git a/testutil/e2e/e2eProviderConfigs/consumer_policy.yml b/testutil/e2e/e2eProviderConfigs/consumer_policy.yml index 737988efd6..eff68a5824 100644 --- a/testutil/e2e/e2eProviderConfigs/consumer_policy.yml +++ b/testutil/e2e/e2eProviderConfigs/consumer_policy.yml @@ -27,7 +27,7 @@ Policy: type: "POST" add_on: "debug" mixed: true - - chain_id: GTH1 + - chain_id: SEP1 requirements: - collection: api_interface: "jsonrpc" diff --git a/testutil/e2e/protocolE2E.go b/testutil/e2e/protocolE2E.go index b315733c95..dc5d72d91a 100644 --- a/testutil/e2e/protocolE2E.go +++ b/testutil/e2e/protocolE2E.go @@ -52,7 +52,7 @@ var ( checkedPlansE2E = []string{"DefaultPlan", "EmergencyModePlan"} checkedSubscriptions = []string{"user1", "user2", "user3", "user5"} checkedSpecsE2E = []string{"LAV1", "ETH1"} - checkedSpecsE2ELOL = []string{"GTH1"} + checkedSpecsE2ELOL = []string{"SEP1"} checkedSubscriptionsLOL = []string{"user4"} ) @@ -641,7 +641,7 @@ func (lt *lavaTest) lavaOverLava(ctx context.Context) { lt.execCommand(ctx, "startJSONRPCConsumer", "07_lavaOverLava", command, true) // scripts/init_e2e.sh will: - // - produce 5 specs: ETH1, GTH1, SEP1, IBC, COSMOSSDK, LAV1 (via spec_add_{ethereum,cosmoshub,lava}) + // - produce 5 specs: ETH1, HOL1, SEP1, IBC, COSMOSSDK, LAV1 (via spec_add_{ethereum,cosmoshub,lava}) // - produce 2 plans: "DefaultPlan", "EmergencyModePlan" lt.checkStakeLava(2, 6, 4, 5, checkedPlansE2E, checkedSpecsE2ELOL, checkedSubscriptionsLOL, "Lava Over Lava Test OK") @@ -1218,7 +1218,7 @@ func runProtocolE2E(timeout time.Duration) { lt.stakeLava(ctx) // scripts/init_e2e.sh will: - // - produce 4 specs: ETH1, GTH1, SEP1, IBC, COSMOSSDK, LAV1 (via spec_add_{ethereum,cosmoshub,lava}) + // - produce 4 specs: ETH1, HOL1, SEP1, IBC, COSMOSSDK, LAV1 (via spec_add_{ethereum,cosmoshub,lava}) // - produce 2 plans: "DefaultPlan", "EmergencyModePlan" // - produce 5 staked providers (for each of ETH1, LAV1) // - produce 1 staked client (for each of ETH1, LAV1) From 77e84469e35ed669456218e4468dcd0f191d0a7a Mon Sep 17 00:00:00 2001 From: Ran Mishael <106548467+ranlavanet@users.noreply.github.com> Date: Thu, 11 Apr 2024 09:27:13 +0200 Subject: [PATCH 09/17] chore: PRT-add more info to relay timeout case (#1366) * fixing parsing bug in near * remove spam on api support jsonrpc * remove spam on tendermintrpc * add more info for no pairing available * adding debug prints to parser.go * adding more debug information to relay timeout * fix debugging scripts near * adding get method for better debugging skills * adding get method * adding get method 2 * adding get method 3 * adding get method 4 * adding get method 5 * adding get method 6 * delete unused get all providers * top ten providers for stateful calls. stateful is now limited to 10 providers each time to prevent spam and unnecessary load * increase protocol version * fix null deref --- cookbook/specs/spec_add_near.json | 7 +++-- protocol/chainlib/chainproxy/common.go | 4 +++ .../chainproxy/rpcInterfaceMessages/common.go | 4 +++ .../rpcInterfaceMessages/grpcMessage.go | 4 +++ .../rpcInterfaceMessages/jsonRPCMessage.go | 4 +++ .../rpcInterfaceMessages/restMessage.go | 4 +++ protocol/chainlib/jsonRPC.go | 3 +- protocol/chainlib/tendermintRPC.go | 3 +- protocol/lavasession/common.go | 11 -------- .../lavasession/consumer_session_manager.go | 28 +++++++++++++++++-- protocol/parser/parser.go | 17 +++++++---- protocol/parser/parser_test.go | 4 +++ protocol/rpcconsumer/rpcconsumer_server.go | 1 + .../pre_setups/init_near_only_with_node.sh | 2 +- x/protocol/types/params.go | 2 +- 15 files changed, 72 insertions(+), 26 deletions(-) diff --git a/cookbook/specs/spec_add_near.json b/cookbook/specs/spec_add_near.json index 364e7ae622..d333ddbd75 100644 --- a/cookbook/specs/spec_add_near.json +++ b/cookbook/specs/spec_add_near.json @@ -72,10 +72,11 @@ "name": "block", "block_parsing": { "parser_arg": [ - "0", - "block_id" + "block_id", + "=", + "0" ], - "parser_func": "PARSE_CANONICAL", + "parser_func": "PARSE_DICTIONARY_OR_ORDERED", "default_value": "latest" }, "compute_units": 10, diff --git a/protocol/chainlib/chainproxy/common.go b/protocol/chainlib/chainproxy/common.go index 7986126482..53b0043121 100644 --- a/protocol/chainlib/chainproxy/common.go +++ b/protocol/chainlib/chainproxy/common.go @@ -80,6 +80,10 @@ func (dri DefaultRPCInput) GetParams() interface{} { return nil } +func (dri DefaultRPCInput) GetMethod() string { + return "" +} + func (dri DefaultRPCInput) GetResult() json.RawMessage { return dri.Result } diff --git a/protocol/chainlib/chainproxy/rpcInterfaceMessages/common.go b/protocol/chainlib/chainproxy/rpcInterfaceMessages/common.go index ef924b0d62..b7c5199c62 100644 --- a/protocol/chainlib/chainproxy/rpcInterfaceMessages/common.go +++ b/protocol/chainlib/chainproxy/rpcInterfaceMessages/common.go @@ -21,6 +21,10 @@ func (pri ParsableRPCInput) GetParams() interface{} { return nil } +func (pri ParsableRPCInput) GetMethod() string { + return "" +} + func (pri ParsableRPCInput) GetResult() json.RawMessage { return pri.Result } diff --git a/protocol/chainlib/chainproxy/rpcInterfaceMessages/grpcMessage.go b/protocol/chainlib/chainproxy/rpcInterfaceMessages/grpcMessage.go index c7d5833e07..a9744c9cc7 100644 --- a/protocol/chainlib/chainproxy/rpcInterfaceMessages/grpcMessage.go +++ b/protocol/chainlib/chainproxy/rpcInterfaceMessages/grpcMessage.go @@ -92,6 +92,10 @@ func (gm GrpcMessage) GetResult() json.RawMessage { return nil } +func (gm GrpcMessage) GetMethod() string { + return gm.Path +} + func (gm GrpcMessage) NewParsableRPCInput(input json.RawMessage) (parser.RPCInput, error) { msgFactory := dynamic.NewMessageFactoryWithDefaults() if gm.methodDesc == nil { diff --git a/protocol/chainlib/chainproxy/rpcInterfaceMessages/jsonRPCMessage.go b/protocol/chainlib/chainproxy/rpcInterfaceMessages/jsonRPCMessage.go index 11e0ff49b9..b78349dd89 100644 --- a/protocol/chainlib/chainproxy/rpcInterfaceMessages/jsonRPCMessage.go +++ b/protocol/chainlib/chainproxy/rpcInterfaceMessages/jsonRPCMessage.go @@ -107,6 +107,10 @@ func (cp JsonrpcMessage) GetParams() interface{} { return cp.Params } +func (cp JsonrpcMessage) GetMethod() string { + return cp.Method +} + func (cp JsonrpcMessage) GetResult() json.RawMessage { if cp.Error != nil { utils.LavaFormatWarning("GetResult() Request got an error from the node", nil, utils.Attribute{Key: "error", Value: cp.Error}) diff --git a/protocol/chainlib/chainproxy/rpcInterfaceMessages/restMessage.go b/protocol/chainlib/chainproxy/rpcInterfaceMessages/restMessage.go index f7b51495a3..2ead66aaa3 100644 --- a/protocol/chainlib/chainproxy/rpcInterfaceMessages/restMessage.go +++ b/protocol/chainlib/chainproxy/rpcInterfaceMessages/restMessage.go @@ -78,6 +78,10 @@ func (cp RestMessage) GetResult() json.RawMessage { return nil } +func (cp RestMessage) GetMethod() string { + return cp.Path +} + // ParseBlock parses default block number from string to int func (cp RestMessage) ParseBlock(inp string) (int64, error) { return parser.ParseDefaultBlockParameter(inp) diff --git a/protocol/chainlib/jsonRPC.go b/protocol/chainlib/jsonRPC.go index 8c0f95442d..00b1b6e436 100644 --- a/protocol/chainlib/jsonRPC.go +++ b/protocol/chainlib/jsonRPC.go @@ -106,7 +106,8 @@ func (apip *JsonRPCChainParser) ParseMsg(url string, data []byte, connectionType // Check api is supported and save it in nodeMsg apiCont, err := apip.getSupportedApi(msg.Method, connectionType) if err != nil { - return nil, utils.LavaFormatWarning("getSupportedApi jsonrpc failed", err, utils.LogAttr("method", msg.Method)) + utils.LavaFormatInfo("getSupportedApi jsonrpc failed", utils.LogAttr("method", msg.Method), utils.LogAttr("error", err)) + return nil, err } apiCollectionForMessage, err := apip.getApiCollection(connectionType, apiCont.collectionKey.InternalPath, apiCont.collectionKey.Addon) diff --git a/protocol/chainlib/tendermintRPC.go b/protocol/chainlib/tendermintRPC.go index c0568e3f43..9cfa8fbdfd 100644 --- a/protocol/chainlib/tendermintRPC.go +++ b/protocol/chainlib/tendermintRPC.go @@ -137,7 +137,8 @@ func (apip *TendermintChainParser) ParseMsg(urlPath string, data []byte, connect // Check api is supported and save it in nodeMsg apiCont, err := apip.getSupportedApi(msg.Method, connectionType) if err != nil { - return nil, utils.LavaFormatWarning("getSupportedApi jsonrpc failed", err, utils.LogAttr("method", msg.Method)) + utils.LavaFormatInfo("getSupportedApi jsonrpc failed", utils.LogAttr("method", msg.Method), utils.LogAttr("error", err)) + return nil, err } apiCollectionForMessage, err := apip.getApiCollection(connectionType, apiCont.collectionKey.InternalPath, apiCont.collectionKey.Addon) diff --git a/protocol/lavasession/common.go b/protocol/lavasession/common.go index 5aeed0a0a4..00bd892fca 100644 --- a/protocol/lavasession/common.go +++ b/protocol/lavasession/common.go @@ -141,17 +141,6 @@ func GetTlsConfig(networkAddress NetworkAddressData) *tls.Config { return tlsConfig } -func GetAllProviders(allAddresses []string, ignoredProviders map[string]struct{}) (returnedProviders []string) { - for _, providerAddress := range allAddresses { - if _, ok := ignoredProviders[providerAddress]; ok { - // ignored provider, skip it - continue - } - returnedProviders = append(returnedProviders, providerAddress) - } - return returnedProviders -} - func SortByGeolocations(pairingEndpoints []*Endpoint, currentGeo planstypes.Geolocation) { latencyToGeo := func(a, b planstypes.Geolocation) uint64 { _, latency := scores.CalcGeoLatency(a, []planstypes.Geolocation{b}) diff --git a/protocol/lavasession/consumer_session_manager.go b/protocol/lavasession/consumer_session_manager.go index 418aade5dc..d552ff80ea 100644 --- a/protocol/lavasession/consumer_session_manager.go +++ b/protocol/lavasession/consumer_session_manager.go @@ -499,6 +499,30 @@ func (csm *ConsumerSessionManager) GetSessions(ctx context.Context, cuNeededForS } } +// csm must be rlocked here +func (csm *ConsumerSessionManager) getTopTenProvidersForStatefulCalls(validAddresses []string, ignoredProvidersList map[string]struct{}) []string { + // sort by cu used, easiest to sort by that factor as it probably means highest QOS and easily read by atomic + customSort := func(i, j int) bool { + return csm.pairing[validAddresses[i]].atomicReadUsedComputeUnits() > csm.pairing[validAddresses[j]].atomicReadUsedComputeUnits() + } + // Sort the slice using the custom sorting rule + sort.Slice(validAddresses, customSort) + validAddressesMaxIndex := len(validAddresses) - 1 + addresses := []string{} + for i := 0; i < 10; i++ { + // do not overflow + if i > validAddressesMaxIndex { + break + } + // skip ignored providers + if _, foundInIgnoredProviderList := ignoredProvidersList[validAddresses[i]]; foundInIgnoredProviderList { + continue + } + addresses = append(addresses, validAddresses[i]) + } + return addresses +} + // Get a valid provider address. func (csm *ConsumerSessionManager) getValidProviderAddresses(ignoredProvidersList map[string]struct{}, cu uint64, requestedBlock int64, addon string, extensions []string, stateful uint32) (addresses []string, err error) { // cs.Lock must be Rlocked here. @@ -522,7 +546,7 @@ func (csm *ConsumerSessionManager) getValidProviderAddresses(ignoredProvidersLis } var providers []string if stateful == common.CONSISTENCY_SELECT_ALL_PROVIDERS && csm.providerOptimizer.Strategy() != provideroptimizer.STRATEGY_COST { - providers = GetAllProviders(validAddresses, ignoredProvidersList) + providers = csm.getTopTenProvidersForStatefulCalls(validAddresses, ignoredProvidersList) } else { providers = csm.providerOptimizer.ChooseProvider(validAddresses, ignoredProvidersList, cu, requestedBlock, OptimizerPerturbation) } @@ -603,7 +627,7 @@ func (csm *ConsumerSessionManager) tryGetConsumerSessionWithProviderFromBlockedP } // if we got here we failed to fetch a valid provider meaning no pairing available. - return nil, utils.LavaFormatError(csm.rpcEndpoint.ChainID+" could not get a provider address from blocked provider list", PairingListEmptyError, utils.LogAttr("csm.currentlyBlockedProviderAddresses", csm.currentlyBlockedProviderAddresses), utils.LogAttr("addons", addon), utils.LogAttr("extensions", extensions)) + return nil, utils.LavaFormatError(csm.rpcEndpoint.ChainID+" could not get a provider address from blocked provider list", PairingListEmptyError, utils.LogAttr("csm.currentlyBlockedProviderAddresses", csm.currentlyBlockedProviderAddresses), utils.LogAttr("addons", addon), utils.LogAttr("extensions", extensions), utils.LogAttr("ignoredProviders", ignoredProviders.providers)) } func (csm *ConsumerSessionManager) getValidConsumerSessionsWithProvider(ignoredProviders *ignoredProviders, cuNeededForSession uint64, requestedBlock int64, addon string, extensions []string, stateful uint32, virtualEpoch uint64) (sessionWithProviderMap SessionWithProviderMap, err error) { diff --git a/protocol/parser/parser.go b/protocol/parser/parser.go index 711ce148eb..086efc3337 100644 --- a/protocol/parser/parser.go +++ b/protocol/parser/parser.go @@ -17,6 +17,7 @@ import ( const ( PARSE_PARAMS = 0 PARSE_RESULT = 1 + debug = false ) var ValueNotSetError = sdkerrors.New("Value Not Set ", 6662, "when trying to parse, the value that we attempted to parse did not exist") @@ -26,6 +27,7 @@ type RPCInput interface { GetResult() json.RawMessage ParseBlock(block string) (int64, error) GetHeaders() []pairingtypes.Metadata + GetMethod() string } func ParseDefaultBlockParameter(block string) (int64, error) { @@ -138,7 +140,7 @@ func parse(rpcInput RPCInput, blockParser spectypes.BlockParser, dataSource int) case spectypes.PARSER_FUNC_PARSE_DICTIONARY_OR_ORDERED: retval, err = parseDictionaryOrOrdered(rpcInput, blockParser.ParserArg, dataSource) case spectypes.PARSER_FUNC_DEFAULT: - retval = parseDefault(rpcInput, blockParser.ParserArg, dataSource) + retval = parseDefault(blockParser.ParserArg) default: return nil, fmt.Errorf("unsupported block parser parserFunc") } @@ -152,10 +154,14 @@ func parse(rpcInput RPCInput, blockParser spectypes.BlockParser, dataSource int) } } + if debug { + utils.LavaFormatDebug("parsed block:", utils.LogAttr("retval", retval)) + } + return retval, nil } -func parseDefault(rpcInput RPCInput, input []string, dataSource int) []interface{} { +func parseDefault(input []string) []interface{} { retArr := make([]interface{}, 0) retArr = append(retArr, input[0]) return retArr @@ -214,7 +220,6 @@ func blockInterfaceToString(block interface{}) string { return castedBlock case float64: return strconv.FormatFloat(castedBlock, 'f', -1, 64) - case int64: return strconv.FormatInt(castedBlock, 10) case uint64: @@ -287,14 +292,14 @@ func parseCanonical(rpcInput RPCInput, input []string, dataSource int) ([]interf for _, key := range input[1:] { // type assertion for blockcontainer if blockContainer, ok := blockContainer.(map[string]interface{}); !ok { - return nil, utils.LavaFormatWarning("invalid parser input format, blockContainer is not map[string]interface{}", ValueNotSetError, utils.LogAttr("blockContainer", fmt.Sprintf("%v", blockContainer)), utils.LogAttr("key", key), utils.LogAttr("unmarshaledDataTyped", unmarshaledDataTyped)) + return nil, utils.LavaFormatWarning("invalid parser input format, blockContainer is not map[string]interface{}", ValueNotSetError, utils.LogAttr("method", rpcInput.GetMethod()), utils.LogAttr("blockContainer", fmt.Sprintf("%v", blockContainer)), utils.LogAttr("key", key), utils.LogAttr("unmarshaledDataTyped", unmarshaledDataTyped)) } // assertion for key if container, ok := blockContainer.(map[string]interface{})[key]; ok { blockContainer = container } else { - return nil, utils.LavaFormatWarning("invalid parser input format, blockContainer does not have the field searched inside", ValueNotSetError, utils.LogAttr("blockContainer", fmt.Sprintf("%v", blockContainer)), utils.LogAttr("key", key), utils.LogAttr("unmarshaledDataTyped", unmarshaledDataTyped)) + return nil, utils.LavaFormatWarning("invalid parser input format, blockContainer does not have the field searched inside", ValueNotSetError, utils.LogAttr("method", rpcInput.GetMethod()), utils.LogAttr("blockContainer", fmt.Sprintf("%v", blockContainer)), utils.LogAttr("key", key), utils.LogAttr("unmarshaledDataTyped", unmarshaledDataTyped)) } } retArr := make([]interface{}, 0) @@ -432,7 +437,7 @@ func parseDictionaryOrOrdered(rpcInput RPCInput, input []string, dataSource int) } // Else return not set error - return nil, ValueNotSetError + return nil, utils.LavaFormatWarning("Failed parsing parseDictionaryOrOrdered", ValueNotSetError, utils.LogAttr("propName", propName), utils.LogAttr("inp", inp), utils.LogAttr("unmarshalledDataTyped", unmarshalledDataTyped), utils.LogAttr("method", rpcInput.GetMethod())) default: return nil, fmt.Errorf("not Supported ParseDictionary with other types: %T", unmarshalledData) } diff --git a/protocol/parser/parser_test.go b/protocol/parser/parser_test.go index ed00d61792..a39bc5ff79 100644 --- a/protocol/parser/parser_test.go +++ b/protocol/parser/parser_test.go @@ -19,6 +19,10 @@ type RPCInputTest struct { GetHeadersFunc func() []pairingtypes.Metadata } +func (rpcInputTest *RPCInputTest) GetMethod() string { + return "" +} + func (rpcInputTest *RPCInputTest) GetParams() interface{} { return rpcInputTest.Params } diff --git a/protocol/rpcconsumer/rpcconsumer_server.go b/protocol/rpcconsumer/rpcconsumer_server.go index a69debad8b..c00679a531 100644 --- a/protocol/rpcconsumer/rpcconsumer_server.go +++ b/protocol/rpcconsumer/rpcconsumer_server.go @@ -399,6 +399,7 @@ func (rpccs *RPCConsumerServer) ProcessRelaySend(ctx context.Context, directiveH case <-processingCtx.Done(): // in case we got a processing timeout we return context deadline exceeded to the user. utils.LavaFormatWarning("Relay Got processingCtx timeout", nil, + utils.LogAttr("processingTimeout", processingTimeout), utils.LogAttr("dappId", dappID), utils.LogAttr("consumerIp", consumerIp), utils.LogAttr("chainMessage.GetApi().Name", chainMessage.GetApi().Name), diff --git a/scripts/pre_setups/init_near_only_with_node.sh b/scripts/pre_setups/init_near_only_with_node.sh index 4b56228fc3..c5e9f79747 100755 --- a/scripts/pre_setups/init_near_only_with_node.sh +++ b/scripts/pre_setups/init_near_only_with_node.sh @@ -42,7 +42,7 @@ PROVIDER1_LISTENER="127.0.0.1:2220" lavad tx subscription buy DefaultPlan $(lavad keys show user1 -a) -y --from user1 --gas-adjustment "1.5" --gas "auto" --gas-prices $GASPRICE wait_next_block -lavad tx pairing stake-provider "NEAR" $PROVIDERSTAKE "$PROVIDER1_LISTENER,1" 1 $(operator_address) -y --from servicer1 --provider-moniker "dummyMoniker" --gas-adjustment "1.5" --gas "auto" --gas-prices $GASPRICE +lavad tx pairing stake-provider "NEAR" $PROVIDERSTAKE "$PROVIDER1_LISTENER,1" 1 $(operator_address) -y --from servicer1 --provider-moniker "dummyMoniker" --delegate-limit 1000ulava --gas-adjustment "1.5" --gas "auto" --gas-prices $GASPRICE sleep_until_next_epoch diff --git a/x/protocol/types/params.go b/x/protocol/types/params.go index ead7716231..f08a395bbe 100644 --- a/x/protocol/types/params.go +++ b/x/protocol/types/params.go @@ -12,7 +12,7 @@ import ( var _ paramtypes.ParamSet = (*Params)(nil) const ( - TARGET_VERSION = "1.2.1" + TARGET_VERSION = "1.2.2" MIN_VERSION = "1.0.2" ) From d220effb086ed502e41f4f0e5d0362b4d8a1ddd2 Mon Sep 17 00:00:00 2001 From: Denis Policastro Date: Thu, 11 Apr 2024 05:39:22 -0300 Subject: [PATCH 10/17] docs: update docker README (#1341) Co-authored-by: Yaroms <103432884+Yaroms@users.noreply.github.com> --- docker/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/README.md b/docker/README.md index cdc0794f7e..faba5bca55 100644 --- a/docker/README.md +++ b/docker/README.md @@ -10,7 +10,7 @@ 2. Build the lava docker image locally ``` # to build from the current checked-out code: - make docker-build + LAVA_BINARY=all make docker-build # to build a specific lava version LAVA_BUILD_OPTIONS="release" LAVA_VERSION=0.4.3 make docker-build From 13cecd91b1417bfe97bf73d391d237eb6847319f Mon Sep 17 00:00:00 2001 From: Ran Mishael <106548467+ranlavanet@users.noreply.github.com> Date: Thu, 11 Apr 2024 18:55:36 +0200 Subject: [PATCH 11/17] fix: add retries to the first send relay (#1371) * add retries to the first send relay * adding timeout to pruning --- protocol/lavasession/common.go | 4 ++-- protocol/rpcconsumer/rpcconsumer_server.go | 14 ++++++++++---- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/protocol/lavasession/common.go b/protocol/lavasession/common.go index 00bd892fca..add8cdcccb 100644 --- a/protocol/lavasession/common.go +++ b/protocol/lavasession/common.go @@ -25,8 +25,8 @@ import ( const ( MaxConsecutiveConnectionAttempts = 5 - TimeoutForEstablishingAConnection = 1 * time.Second - MaxSessionsAllowedPerProvider = 1000 // Max number of sessions allowed per provider + TimeoutForEstablishingAConnection = 1500 * time.Millisecond // 1.5 seconds + MaxSessionsAllowedPerProvider = 1000 // Max number of sessions allowed per provider MaxAllowedBlockListedSessionPerProvider = 3 MaximumNumberOfFailuresAllowedPerConsumerSession = 3 RelayNumberIncrement = 1 diff --git a/protocol/rpcconsumer/rpcconsumer_server.go b/protocol/rpcconsumer/rpcconsumer_server.go index c00679a531..0799e93c5c 100644 --- a/protocol/rpcconsumer/rpcconsumer_server.go +++ b/protocol/rpcconsumer/rpcconsumer_server.go @@ -31,6 +31,7 @@ import ( const ( MaxRelayRetries = 6 + SendRelayAttempts = 3 numberOfTimesToCheckCurrentlyUsedIsEmpty = 3 ) @@ -329,10 +330,15 @@ func (rpccs *RPCConsumerServer) ProcessRelaySend(ctx context.Context, directiveH ctx, cancel := context.WithCancel(ctx) defer cancel() relayProcessor := NewRelayProcessor(ctx, lavasession.NewUsedProviders(directiveHeaders), rpccs.requiredResponses, chainMessage, rpccs.consumerConsistency, dappID, consumerIp) - err := rpccs.sendRelayToProvider(ctx, chainMessage, relayRequestData, dappID, consumerIp, relayProcessor) - if err != nil && relayProcessor.usedProviders.CurrentlyUsed() == 0 { - // we failed to send a batch of relays, if there are no active sends we can terminate - return relayProcessor, err + var err error + // try sending a relay 3 times. if failed return the error + for retryFirstRelayAttempt := 0; retryFirstRelayAttempt < SendRelayAttempts; retryFirstRelayAttempt++ { + err = rpccs.sendRelayToProvider(ctx, chainMessage, relayRequestData, dappID, consumerIp, relayProcessor) + // check if we had an error. if we did, try again. + if err == nil { + break + } + utils.LavaFormatWarning("Failed retryFirstRelayAttempt, will retry.", err, utils.LogAttr("attempt", retryFirstRelayAttempt)) } // a channel to be notified processing was done, true means we have results and can return gotResults := make(chan bool) From 418d976697d18acda26607f64f295e757e0377fc Mon Sep 17 00:00:00 2001 From: Omer <100387053+omerlavanet@users.noreply.github.com> Date: Sun, 14 Apr 2024 13:56:28 +0300 Subject: [PATCH 12/17] feat: added command for testing validator performance (#1362) * added command for testing validator performance * lint * fix unpacking of valcons * add info on jump blocks * added moniker regex to validator performance * added suppport for unbonded validators * added validators tokens and unbonded handling * fix space --- cmd/lavap/main.go | 6 + protocol/performance/validators/validators.go | 261 ++++++++++++++++++ 2 files changed, 267 insertions(+) create mode 100644 protocol/performance/validators/validators.go diff --git a/cmd/lavap/main.go b/cmd/lavap/main.go index bc67eb0bbe..136d7fd72f 100644 --- a/cmd/lavap/main.go +++ b/cmd/lavap/main.go @@ -16,6 +16,7 @@ import ( "github.com/lavanet/lava/protocol/badgeserver" "github.com/lavanet/lava/protocol/monitoring" "github.com/lavanet/lava/protocol/performance/connection" + validators "github.com/lavanet/lava/protocol/performance/validators" "github.com/lavanet/lava/protocol/rpcconsumer" "github.com/lavanet/lava/protocol/rpcprovider" "github.com/lavanet/lava/protocol/statetracker" @@ -42,6 +43,8 @@ func main() { // badge generator cobra command badgeServer := badgeserver.CreateBadgeServerCobraCommand() + validatorsCmd := validators.CreateValidatorsPerformanceCommand() + // Add Version Command rootCmd.AddCommand(cmdVersion) // Add RPC Consumer Command @@ -53,6 +56,9 @@ func main() { // Add Badge Generator Command rootCmd.AddCommand(badgeServer) + // add command to test validators + rootCmd.AddCommand(validatorsCmd) + testCmd := &cobra.Command{ Use: "test", Short: "Test commands for protocol network", diff --git a/protocol/performance/validators/validators.go b/protocol/performance/validators/validators.go new file mode 100644 index 0000000000..eb4ecb0144 --- /dev/null +++ b/protocol/performance/validators/validators.go @@ -0,0 +1,261 @@ +package validators + +import ( + "context" + "fmt" + "os" + "os/signal" + "regexp" + "strconv" + "time" + + "cosmossdk.io/math" + "github.com/cosmos/cosmos-sdk/client" + "github.com/cosmos/cosmos-sdk/client/flags" + "github.com/cosmos/cosmos-sdk/codec" + cryptotypes "github.com/cosmos/cosmos-sdk/crypto/types" + "github.com/cosmos/cosmos-sdk/types/bech32" + "github.com/cosmos/cosmos-sdk/types/query" + "github.com/cosmos/cosmos-sdk/version" + slashingtypes "github.com/cosmos/cosmos-sdk/x/slashing/types" + stakingtypes "github.com/cosmos/cosmos-sdk/x/staking/types" + "github.com/lavanet/lava/app" + "github.com/lavanet/lava/utils" + "github.com/lavanet/lava/utils/rand" + "github.com/spf13/cobra" + "github.com/spf13/viper" +) + +const ( + validatorMonikerFlagName = "regex" +) + +type RetInfo struct { + tombstone int64 + jailed int64 + missedBlocks int64 + checks int64 + unbonded int64 + tokens math.Int +} + +func extractValcons(codec codec.Codec, validator stakingtypes.Validator, hrp string) (valCons string, err error) { + var pk cryptotypes.PubKey + err = codec.UnpackAny(validator.ConsensusPubkey, &pk) + if err != nil { + return "", utils.LavaFormatError("failed unpacking", err) + } + valcons, err := bech32.ConvertAndEncode(hrp, pk.Address()) + if err != nil { + return "", utils.LavaFormatError("failed to encode cons Address", err) + } + return valcons, nil +} + +func checkValidatorPerformance(ctx context.Context, clientCtx client.Context, valAddr string, regex bool, blocks int64, fromBlock int64) (retInfo RetInfo, err error) { + retInfo = RetInfo{} + ctx, cancel := context.WithCancel(ctx) + signalChan := make(chan os.Signal, 1) + signal.Notify(signalChan, os.Interrupt) + defer func() { + signal.Stop(signalChan) + cancel() + }() + resultStatus, err := clientCtx.Client.Status(ctx) + if err != nil { + return retInfo, err + } + latestHeight := resultStatus.SyncInfo.LatestBlockHeight + if latestHeight < blocks { + return retInfo, utils.LavaFormatError("requested blocks is bigger than latest block height", nil, utils.Attribute{Key: "requested", Value: blocks}, utils.Attribute{Key: "latestHeight", Value: latestHeight}) + } + slashingQueryClient := slashingtypes.NewQueryClient(clientCtx) + timeoutCtx, cancel := context.WithTimeout(ctx, 5*time.Second) + params, err := slashingQueryClient.Params(timeoutCtx, &slashingtypes.QueryParamsRequest{}) + cancel() + if err != nil { + return retInfo, utils.LavaFormatError("invalid slashing params query", err) + } + jumpBlocks := params.Params.SignedBlocksWindow + utils.LavaFormatInfo("jump blocks", utils.LogAttr("blocks", jumpBlocks)) + timeoutCtx, cancel = context.WithTimeout(ctx, 5*time.Second) + signingInfos, err := slashingQueryClient.SigningInfos(timeoutCtx, &slashingtypes.QuerySigningInfosRequest{}) + cancel() + if err != nil { + return retInfo, utils.LavaFormatError("invalid slashing signing infos query", err) + } + exampleConsAddress := signingInfos.Info[0].Address + hrp, _, err := bech32.DecodeAndConvert(exampleConsAddress) + if err != nil { + return retInfo, utils.LavaFormatError("error decoding hrp", err) + } + valCons := "" + stakingQueryClient := stakingtypes.NewQueryClient(clientCtx) + if regex { + timeoutCtx, cancel = context.WithTimeout(ctx, 60*time.Second) + allValidators, err := stakingQueryClient.Validators(timeoutCtx, &stakingtypes.QueryValidatorsRequest{ + Pagination: &query.PageRequest{ + Limit: 10000, + }, + }) + cancel() + if err != nil { + return retInfo, utils.LavaFormatError("error reading validators", err) + } + re, err := regexp.Compile(valAddr) + if err != nil { + return retInfo, utils.LavaFormatError("failed compiling regex", err, utils.LogAttr("regex", valAddr)) + } + valAddr = "" + foundMoniker := "" + for _, validator := range allValidators.GetValidators() { + if re.MatchString(validator.Description.Moniker) { + if valAddr == "" { + foundMoniker = validator.Description.Moniker + valAddr = validator.OperatorAddress + valCons, err = extractValcons(clientCtx.Codec, validator, hrp) + if err != nil { + continue + } + } else { + return retInfo, utils.LavaFormatError("regex matched two validators", nil, utils.LogAttr("first", foundMoniker), utils.LogAttr("second", validator.Description.Moniker)) + } + } + } + if valAddr == "" { + return retInfo, utils.LavaFormatError("failed to match a validator with regex", err, utils.LogAttr("regex", re.String())) + } + utils.LavaFormatInfo("found validator moniker", utils.LogAttr("moniker", foundMoniker), utils.LogAttr("address", valAddr)) + } + utils.LavaFormatInfo("looking for validator signing info", utils.LogAttr("valAddr", valAddr), utils.LogAttr("valCons", valCons)) + timeoutCtx, cancel = context.WithTimeout(ctx, 5*time.Second) + validator, err := stakingQueryClient.Validator(timeoutCtx, &stakingtypes.QueryValidatorRequest{ + ValidatorAddr: valAddr, + }) + cancel() + if err != nil { + return retInfo, utils.LavaFormatError("error reading validator", err) + } + retInfo.tokens = validator.Validator.Tokens + ticker := time.NewTicker(3 * time.Second) + readEventsFromBlock := func(blockFrom int64, blockTo int64) error { + for block := blockFrom; block < blockTo; block += jumpBlocks { + select { + case <-signalChan: + return nil + case <-ticker.C: + fmt.Printf("Current Block: %d\r", block) + default: + } + clientCtxWithHeight := clientCtx.WithHeight(block) + stakingQueryClient := stakingtypes.NewQueryClient(clientCtxWithHeight) + slashingQueryClient := slashingtypes.NewQueryClient(clientCtxWithHeight) + timeoutCtx, cancel := context.WithTimeout(ctx, 5*time.Second) + // signingInfos, err := stakingQueryClient.SigningInfos(timeoutCtx, &slashingtypes.QuerySigningInfosRequest{}) + validatorResp, err := stakingQueryClient.Validator(timeoutCtx, &stakingtypes.QueryValidatorRequest{ + ValidatorAddr: valAddr, + }) + cancel() + if err != nil { + utils.LavaFormatWarning("failed to find validator at height", err, utils.LogAttr("block", block)) + continue + } + if validatorResp.Validator.Jailed { + retInfo.jailed++ + } + if validatorResp.Validator.Status == stakingtypes.Bonded { + if valCons == "" { + valCons, err = extractValcons(clientCtx.Codec, validatorResp.Validator, hrp) + if err != nil { + return err + } + } + timeoutCtx, cancel = context.WithTimeout(ctx, 5*time.Second) + signingInfo, err := slashingQueryClient.SigningInfo(timeoutCtx, &slashingtypes.QuerySigningInfoRequest{ + ConsAddress: valCons, + }) + cancel() + if err != nil { + utils.LavaFormatError("failed reading signing info at height", err, utils.LogAttr("block", block), utils.LogAttr("valCons", valCons)) + continue + } + retInfo.missedBlocks += signingInfo.ValSigningInfo.MissedBlocksCounter + if signingInfo.ValSigningInfo.Tombstoned { + retInfo.tombstone += 1 + } + } else { + retInfo.unbonded++ + } + + retInfo.checks += 1 + } + return nil + } + + if blocks > 0 { + if fromBlock <= 0 { + fromBlock = latestHeight - blocks + } + utils.LavaFormatInfo("Reading validator performance on blocks", utils.Attribute{Key: "from", Value: fromBlock}, utils.Attribute{Key: "to", Value: fromBlock + blocks}) + readEventsFromBlock(fromBlock, fromBlock+blocks) + } + return retInfo, nil +} + +func CreateValidatorsPerformanceCommand() *cobra.Command { + cmd := &cobra.Command{ + Use: `validator-performance [start_block(int)] [--node tmRPC]`, + Short: `validator-performance checks and prints the statistics of a validator, either by an operator address or a regex`, + Long: `validator-performance checks and prints the statistics of a validator`, + Example: `validator-performance lava@valoper1abcdefg 100 --node https://public-rpc.lavanet.xyz +validator-performance valida*_monik* --regex 100 --node https://public-rpc.lavanet.xyz`, + Args: cobra.RangeArgs(2, 3), + RunE: func(cmd *cobra.Command, args []string) error { + clientCtx, err := client.GetClientQueryContext(cmd) + if err != nil { + return err + } + // handle flags, pass necessary fields + ctx := context.Background() + if err != nil { + return err + } + logLevel, err := cmd.Flags().GetString(flags.FlagLogLevel) + if err != nil { + utils.LavaFormatFatal("failed to read log level flag", err) + } + + valAddress := args[0] + blocks, err := strconv.ParseInt(args[1], 0, 64) + if err != nil { + utils.LavaFormatFatal("failed to parse blocks as a number", err) + } + if blocks < 0 { + blocks = 0 + } + + fromBlock := int64(-1) + if len(args) == 3 { + fromBlock, err = strconv.ParseInt(args[2], 0, 64) + if err != nil { + utils.LavaFormatFatal("failed to parse blocks as a number", err) + } + } + + regex := viper.GetBool(validatorMonikerFlagName) + utils.SetGlobalLoggingLevel(logLevel) + utils.LavaFormatInfo("lavad Binary Version: " + version.Version) + rand.InitRandomSeed() + retInfo, err := checkValidatorPerformance(ctx, clientCtx, valAddress, regex, blocks, fromBlock) + if err == nil { + fmt.Printf("📄----------------------------------------✨SUMMARY✨----------------------------------------📄\n\n🔵 Validator Stats:\n🔹checks: %d\n🔹unbonded: %d\n🔹jailed: %d\n🔹missedBlocks: %d\n🔹tombstone: %d\n🔹tokens: %s\n\n", retInfo.checks, retInfo.unbonded, retInfo.jailed, retInfo.missedBlocks, retInfo.tombstone, retInfo.tokens.String()) + } + return err + }, + } + flags.AddQueryFlagsToCmd(cmd) + flags.AddKeyringFlags(cmd.Flags()) + cmd.Flags().String(flags.FlagChainID, app.Name, "network chain id") + cmd.Flags().Bool(validatorMonikerFlagName, false, "turn on regex parsing for the validator moniker instead of accepting a valoper") + return cmd +} From 8923002c1f58174fbfd72933ac2230af9638d3ac Mon Sep 17 00:00:00 2001 From: Yaroms <103432884+Yaroms@users.noreply.github.com> Date: Sun, 14 Apr 2024 13:59:10 +0300 Subject: [PATCH 13/17] remove verification (#1367) Co-authored-by: Yaroms --- cookbook/specs/spec_add_fvm.json | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/cookbook/specs/spec_add_fvm.json b/cookbook/specs/spec_add_fvm.json index c6d273752a..67054cbc79 100644 --- a/cookbook/specs/spec_add_fvm.json +++ b/cookbook/specs/spec_add_fvm.json @@ -1828,6 +1828,18 @@ "expected_value": "0x13a" } ] + }, + { + "name": "pruning", + "values": [ + { + "expected_value": "" + }, + { + "extension": "archive", + "expected_value": "" + } + ] } ] } From c04885b97e22eb57d36139f33ec52a6c82906d75 Mon Sep 17 00:00:00 2001 From: Ran Mishael <106548467+ranlavanet@users.noreply.github.com> Date: Sun, 14 Apr 2024 16:13:38 +0200 Subject: [PATCH 14/17] feat: PRT-add metrics to rpcconsumer sessions (#1374) * add metrics to rpcconsumer * fix lint --- ecosystem/lavajs/package.json | 2 +- ecosystem/lavajs/scripts/codegen.js | 12 +- .../lavasession/consumer_session_manager.go | 27 ++-- protocol/metrics/metrics_consumer_manager.go | 147 ++++++++++++++---- protocol/metrics/rpcconsumerlogs.go | 12 ++ protocol/rpcconsumer/rpcconsumer_server.go | 25 ++- .../pre_setups/init_lava_only_with_node.sh | 2 +- 7 files changed, 177 insertions(+), 50 deletions(-) diff --git a/ecosystem/lavajs/package.json b/ecosystem/lavajs/package.json index d64a99875b..5bf709e11b 100644 --- a/ecosystem/lavajs/package.json +++ b/ecosystem/lavajs/package.json @@ -1,6 +1,6 @@ { "name": "@lavanet/lavajs", - "version": "1.0.4", + "version": "1.2.2", "description": "lavajs", "author": "Lava Network", "homepage": "https://github.com/lavanet/lava/tree/main/ecosystem/lavajs#readme", diff --git a/ecosystem/lavajs/scripts/codegen.js b/ecosystem/lavajs/scripts/codegen.js index 5b26bf7cf5..71f140f4c6 100644 --- a/ecosystem/lavajs/scripts/codegen.js +++ b/ecosystem/lavajs/scripts/codegen.js @@ -18,6 +18,14 @@ telescope({ 'cosmos/authz/v1beta1/authz.ts', 'cosmos/gov/v1beta1/tx.ts', 'cosmos/gov/v1beta1/gov.ts', + 'cosmos/staking/v1beta1/staking.ts', + 'tendermint/types/evidence.ts', + 'cosmos/staking/v1beta1/tx.ts', + 'cosmos/orm/query/v1alpha1/query.ts', + 'tendermint/types/types.ts', + 'tendermint/abci/types.ts', + 'lavanet/lava/downtime/v1/genesis.ts', + 'cosmos/upgrade/v1beta1/upgrade.ts', 'cosmos/staking/v1beta1/tx.amino.ts' ], patterns: ['**/*amino.ts', '**/*registry.ts'] @@ -64,8 +72,8 @@ telescope({ ] }, methods: { - fromJSON: false, - toJSON: false, + fromJSON: true, + toJSON: true, encode: true, decode: true, fromPartial: true, diff --git a/protocol/lavasession/consumer_session_manager.go b/protocol/lavasession/consumer_session_manager.go index d552ff80ea..9e03b71b80 100644 --- a/protocol/lavasession/consumer_session_manager.go +++ b/protocol/lavasession/consumer_session_manager.go @@ -97,7 +97,8 @@ func (csm *ConsumerSessionManager) UpdateAllProviders(epoch uint64, pairingList csm.pairing[provider.PublicLavaAddress] = provider } csm.setValidAddressesToDefaultValue("", nil) // the starting point is that valid addresses are equal to pairing addresses. - csm.resetMetricsManager() + // reset session related metrics + csm.consumerMetricsManager.ResetSessionRelatedMetrics() utils.LavaFormatDebug("updated providers", utils.Attribute{Key: "epoch", Value: epoch}, utils.Attribute{Key: "spec", Value: csm.rpcEndpoint.Key()}) return nil } @@ -440,6 +441,11 @@ func (csm *ConsumerSessionManager) GetSessions(ctx context.Context, cuNeededForS } else { // consumer session is locked and valid, we need to set the relayNumber and the relay cu. before returning. + // add metric to currently open sessions metric + info := csm.RPCEndpoint() + apiInterface := info.ApiInterface + chainId := info.ChainID + go csm.consumerMetricsManager.AddOpenSessionMetric(chainId, apiInterface, providerAddress) // Successfully created/got a consumerSession. if debug { utils.LavaFormatDebug("Consumer get session", @@ -921,16 +927,17 @@ func (csm *ConsumerSessionManager) updateMetricsManager(consumerSession *SingleC qosEx := *consumerSession.QoSInfo.LastExcellenceQoSReport lastQosExcellence = &qosEx } + blockedSession := consumerSession.BlockListed + publicProviderAddress := consumerSession.Parent.PublicLavaAddress - go csm.consumerMetricsManager.SetQOSMetrics(chainId, apiInterface, consumerSession.Parent.PublicLavaAddress, lastQos, lastQosExcellence, consumerSession.LatestBlock, consumerSession.RelayNum) -} - -// consumerSession should still be locked when accessing this method as it fetches information from the session it self -func (csm *ConsumerSessionManager) resetMetricsManager() { - if csm.consumerMetricsManager == nil { - return - } - csm.consumerMetricsManager.ResetQOSMetrics() + go func() { + csm.consumerMetricsManager.SetQOSMetrics(chainId, apiInterface, publicProviderAddress, lastQos, lastQosExcellence, consumerSession.LatestBlock, consumerSession.RelayNum) + // in case we blocked the session add it to our block sessions metric + if blockedSession { + csm.consumerMetricsManager.AddNumberOfBlockedSessionMetric(chainId, apiInterface, publicProviderAddress) + } + csm.consumerMetricsManager.DecrementOpenSessionMetric(chainId, apiInterface, publicProviderAddress) + }() } // Get the reported providers currently stored in the session manager. diff --git a/protocol/metrics/metrics_consumer_manager.go b/protocol/metrics/metrics_consumer_manager.go index e912326be3..3252dcdc9a 100644 --- a/protocol/metrics/metrics_consumer_manager.go +++ b/protocol/metrics/metrics_consumer_manager.go @@ -13,21 +13,26 @@ import ( ) type ConsumerMetricsManager struct { - totalCURequestedMetric *prometheus.CounterVec - totalRelaysRequestedMetric *prometheus.CounterVec - totalErroredMetric *prometheus.CounterVec - blockMetric *prometheus.GaugeVec - latencyMetric *prometheus.GaugeVec - qosMetric *prometheus.GaugeVec - qosExcellenceMetric *prometheus.GaugeVec - LatestBlockMetric *prometheus.GaugeVec - LatestProviderRelay *prometheus.GaugeVec - virtualEpochMetric *prometheus.GaugeVec - endpointsHealthChecksOkMetric prometheus.Gauge - endpointsHealthChecksOk uint64 - lock sync.Mutex - protocolVersionMetric *prometheus.GaugeVec - providerRelays map[string]uint64 + totalCURequestedMetric *prometheus.CounterVec + totalRelaysRequestedMetric *prometheus.CounterVec + totalErroredMetric *prometheus.CounterVec + totalRelaysSentToProvidersMetric *prometheus.CounterVec + totalRelaysReturnedFromProvidersMetric *prometheus.CounterVec + totalRelaysSentByNewBatchTickerMetric *prometheus.CounterVec + currentNumberOfOpenSessionsMetric *prometheus.GaugeVec + currentNumberOfBlockedSessionsMetric *prometheus.GaugeVec + blockMetric *prometheus.GaugeVec + latencyMetric *prometheus.GaugeVec + qosMetric *prometheus.GaugeVec + qosExcellenceMetric *prometheus.GaugeVec + LatestBlockMetric *prometheus.GaugeVec + LatestProviderRelay *prometheus.GaugeVec + virtualEpochMetric *prometheus.GaugeVec + endpointsHealthChecksOkMetric prometheus.Gauge + endpointsHealthChecksOk uint64 + lock sync.Mutex + protocolVersionMetric *prometheus.GaugeVec + providerRelays map[string]uint64 } func NewConsumerMetricsManager(networkAddress string) *ConsumerMetricsManager { @@ -46,6 +51,27 @@ func NewConsumerMetricsManager(networkAddress string) *ConsumerMetricsManager { Help: "The total number of relays serviced by the consumer over time.", }, []string{"spec", "apiInterface"}) + totalRelaysSentToProvidersMetric := prometheus.NewCounterVec(prometheus.CounterOpts{ + Name: "lava_consumer_total_relays_sent_to_providers", + Help: "The total number of relays sent to providers", + }, []string{"spec", "apiInterface"}) + totalRelaysReturnedFromProvidersMetric := prometheus.NewCounterVec(prometheus.CounterOpts{ + Name: "lava_consumer_total_relays_returned_from_providers", + Help: "The total number of relays returned from providers", + }, []string{"spec", "apiInterface"}) + totalRelaysSentByNewBatchTickerMetric := prometheus.NewCounterVec(prometheus.CounterOpts{ + Name: "lava_consumer_total_relays_sent_by_batch_ticker", + Help: "The total number of relays sent by the batch ticker", + }, []string{"spec", "apiInterface"}) + currentNumberOfOpenSessionsMetric := prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Name: "lava_consumer_current_number_of_open_sessions", + Help: "The total number of currently open sessions", + }, []string{"spec", "apiInterface", "provider"}) + currentNumberOfBlockedSessionsMetric := prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Name: "lava_consumer_current_number_of_blocked_sessions", + Help: "The total number of currently blocked sessions", + }, []string{"spec", "apiInterface", "provider"}) + // Create a new GaugeVec metric to represent the TotalErrored over time. totalErroredMetric := prometheus.NewCounterVec(prometheus.CounterOpts{ Name: "lava_consumer_total_errored", @@ -106,22 +132,33 @@ func NewConsumerMetricsManager(networkAddress string) *ConsumerMetricsManager { prometheus.MustRegister(virtualEpochMetric) prometheus.MustRegister(endpointsHealthChecksOkMetric) prometheus.MustRegister(protocolVersionMetric) + // metrics related to session management + prometheus.MustRegister(totalRelaysSentToProvidersMetric) + prometheus.MustRegister(totalRelaysReturnedFromProvidersMetric) + prometheus.MustRegister(totalRelaysSentByNewBatchTickerMetric) + prometheus.MustRegister(currentNumberOfOpenSessionsMetric) + prometheus.MustRegister(currentNumberOfBlockedSessionsMetric) consumerMetricsManager := &ConsumerMetricsManager{ - totalCURequestedMetric: totalCURequestedMetric, - totalRelaysRequestedMetric: totalRelaysRequestedMetric, - totalErroredMetric: totalErroredMetric, - blockMetric: blockMetric, - latencyMetric: latencyMetric, - qosMetric: qosMetric, - qosExcellenceMetric: qosExcellenceMetric, - LatestBlockMetric: latestBlockMetric, - LatestProviderRelay: latestProviderRelay, - providerRelays: map[string]uint64{}, - virtualEpochMetric: virtualEpochMetric, - endpointsHealthChecksOkMetric: endpointsHealthChecksOkMetric, - endpointsHealthChecksOk: 1, - protocolVersionMetric: protocolVersionMetric, + totalCURequestedMetric: totalCURequestedMetric, + totalRelaysRequestedMetric: totalRelaysRequestedMetric, + totalErroredMetric: totalErroredMetric, + blockMetric: blockMetric, + latencyMetric: latencyMetric, + qosMetric: qosMetric, + qosExcellenceMetric: qosExcellenceMetric, + LatestBlockMetric: latestBlockMetric, + LatestProviderRelay: latestProviderRelay, + providerRelays: map[string]uint64{}, + virtualEpochMetric: virtualEpochMetric, + endpointsHealthChecksOkMetric: endpointsHealthChecksOkMetric, + endpointsHealthChecksOk: 1, + protocolVersionMetric: protocolVersionMetric, + totalRelaysSentToProvidersMetric: totalRelaysSentToProvidersMetric, + totalRelaysReturnedFromProvidersMetric: totalRelaysReturnedFromProvidersMetric, + totalRelaysSentByNewBatchTickerMetric: totalRelaysSentByNewBatchTickerMetric, + currentNumberOfOpenSessionsMetric: currentNumberOfOpenSessionsMetric, + currentNumberOfBlockedSessionsMetric: currentNumberOfBlockedSessionsMetric, } http.Handle("/metrics", promhttp.Handler()) @@ -170,6 +207,54 @@ func (pme *ConsumerMetricsManager) SetRelayMetrics(relayMetric *RelayMetrics, er } } +func (pme *ConsumerMetricsManager) SetRelaySentToProviderMetric(chainId string, apiInterface string) { + if pme == nil { + return + } + pme.totalRelaysSentToProvidersMetric.WithLabelValues(chainId, apiInterface).Inc() +} + +func (pme *ConsumerMetricsManager) SetRelayReturnedFromProviderMetric(chainId string, apiInterface string) { + if pme == nil { + return + } + pme.totalRelaysReturnedFromProvidersMetric.WithLabelValues(chainId, apiInterface).Inc() +} + +func (pme *ConsumerMetricsManager) SetRelaySentByNewBatchTickerMetric(chainId string, apiInterface string) { + if pme == nil { + return + } + pme.totalRelaysSentByNewBatchTickerMetric.WithLabelValues(chainId, apiInterface).Inc() +} + +func (pme *ConsumerMetricsManager) AddOpenSessionMetric(chainId string, apiInterface string, provider string) { + if pme == nil { + return + } + pme.lock.Lock() + defer pme.lock.Unlock() + pme.currentNumberOfOpenSessionsMetric.WithLabelValues(chainId, apiInterface, provider).Inc() +} + +func (pme *ConsumerMetricsManager) DecrementOpenSessionMetric(chainId string, apiInterface string, provider string) { + if pme == nil { + return + } + pme.lock.Lock() + defer pme.lock.Unlock() + pme.currentNumberOfOpenSessionsMetric.WithLabelValues(chainId, apiInterface, provider).Dec() +} + +func (pme *ConsumerMetricsManager) AddNumberOfBlockedSessionMetric(chainId string, apiInterface string, provider string) { + if pme == nil { + return + } + pme.lock.Lock() + defer pme.lock.Unlock() + pme.currentNumberOfBlockedSessionsMetric.WithLabelValues(chainId, apiInterface, provider).Inc() +} + func (pme *ConsumerMetricsManager) SetQOSMetrics(chainId string, apiInterface string, providerAddress string, qos *pairingtypes.QualityOfServiceReport, qosExcellence *pairingtypes.QualityOfServiceReport, latestBlock int64, relays uint64) { if pme == nil { return @@ -239,7 +324,7 @@ func (pme *ConsumerMetricsManager) UpdateHealthCheckStatus(status bool) { atomic.StoreUint64(&pme.endpointsHealthChecksOk, uint64(value)) } -func (pme *ConsumerMetricsManager) ResetQOSMetrics() { +func (pme *ConsumerMetricsManager) ResetSessionRelatedMetrics() { if pme == nil { return } @@ -247,6 +332,8 @@ func (pme *ConsumerMetricsManager) ResetQOSMetrics() { defer pme.lock.Unlock() pme.qosMetric.Reset() pme.qosExcellenceMetric.Reset() + pme.currentNumberOfBlockedSessionsMetric.Reset() + pme.currentNumberOfOpenSessionsMetric.Reset() pme.providerRelays = map[string]uint64{} } diff --git a/protocol/metrics/rpcconsumerlogs.go b/protocol/metrics/rpcconsumerlogs.go index 746d95e1a2..7a96ca3d96 100644 --- a/protocol/metrics/rpcconsumerlogs.go +++ b/protocol/metrics/rpcconsumerlogs.go @@ -211,6 +211,18 @@ func (rpccl *RPCConsumerLogs) shouldCountMetrics(refererHeaderValue string, user return true } +func (rpccl *RPCConsumerLogs) SetRelaySentToProviderMetric(chainId string, apiInterface string) { + rpccl.consumerMetricsManager.SetRelaySentToProviderMetric(chainId, apiInterface) +} + +func (rpccl *RPCConsumerLogs) SetRelayReturnedFromProviderMetric(chainId string, apiInterface string) { + rpccl.consumerMetricsManager.SetRelayReturnedFromProviderMetric(chainId, apiInterface) +} + +func (rpccl *RPCConsumerLogs) SetRelaySentByNewBatchTickerMetric(chainId string, apiInterface string) { + rpccl.consumerMetricsManager.SetRelaySentByNewBatchTickerMetric(chainId, apiInterface) +} + func (rpccl *RPCConsumerLogs) SendMetrics(data *RelayMetrics, err error, origin string) { data.Success = err == nil data.Origin = origin diff --git a/protocol/rpcconsumer/rpcconsumer_server.go b/protocol/rpcconsumer/rpcconsumer_server.go index 0799e93c5c..cd5100e3dd 100644 --- a/protocol/rpcconsumer/rpcconsumer_server.go +++ b/protocol/rpcconsumer/rpcconsumer_server.go @@ -325,6 +325,10 @@ func (rpccs *RPCConsumerServer) SendRelay( return returnedResult, nil } +func (rpccs *RPCConsumerServer) getChainIdAndApiInterface() (string, string) { + return rpccs.listenEndpoint.ChainID, rpccs.listenEndpoint.ApiInterface +} + func (rpccs *RPCConsumerServer) ProcessRelaySend(ctx context.Context, directiveHeaders map[string]string, chainMessage chainlib.ChainMessage, relayRequestData *pairingtypes.RelayPrivateData, dappID string, consumerIp string) (*RelayProcessor, error) { // make sure all of the child contexts are cancelled when we exit ctx, cancel := context.WithCancel(ctx) @@ -392,6 +396,8 @@ func (rpccs *RPCConsumerServer) ProcessRelaySend(ctx context.Context, directiveH if relayProcessor.selection != BestResult { err := rpccs.sendRelayToProvider(ctx, chainMessage, relayRequestData, dappID, consumerIp, relayProcessor) go validateReturnCondition(err) + // add ticker launch metrics + go rpccs.rpcConsumerLogs.SetRelaySentByNewBatchTickerMetric(rpccs.getChainIdAndApiInterface()) } case returnErr := <-returnCondition: // we use this channel because there could be a race condition between us releasing the provider and about to send the return @@ -443,7 +449,7 @@ func (rpccs *RPCConsumerServer) sendRelayToProvider( } privKey := rpccs.privKey - chainID := rpccs.listenEndpoint.ChainID + chainId, apiInterface := rpccs.getChainIdAndApiInterface() lavaChainID := rpccs.lavaChainID // Get Session. we get session here so we can use the epoch in the callbacks @@ -454,7 +460,7 @@ func (rpccs *RPCConsumerServer) sendRelayToProvider( if rpccs.cache.CacheActive() { // use cache only if its defined. if reqBlock != spectypes.NOT_APPLICABLE || !chainMessage.GetForceCacheRefresh() { var cacheReply *pairingtypes.CacheRelayReply - hashKey, outputFormatter, err := chainlib.HashCacheRequest(relayRequestData, chainID) + hashKey, outputFormatter, err := chainlib.HashCacheRequest(relayRequestData, chainId) if err != nil { utils.LavaFormatError("sendRelayToProvider Failed getting Hash for cache request", err) } else { @@ -462,7 +468,7 @@ func (rpccs *RPCConsumerServer) sendRelayToProvider( cacheReply, cacheError = rpccs.cache.GetEntry(cacheCtx, &pairingtypes.RelayCacheGet{ RequestHash: hashKey, RequestedBlock: relayRequestData.RequestBlock, - ChainId: chainID, + ChainId: chainId, BlockHash: nil, Finalized: false, SharedStateId: sharedStateId, @@ -534,6 +540,7 @@ func (rpccs *RPCConsumerServer) sendRelayToProvider( for providerPublicAddress, sessionInfo := range sessions { // Launch a separate goroutine for each session go func(providerPublicAddress string, sessionInfo *lavasession.SessionInfo) { + // add ticker launch metrics localRelayResult := &common.RelayResult{ ProviderInfo: common.ProviderInfo{ProviderAddress: providerPublicAddress, ProviderStake: sessionInfo.StakeSize, ProviderQoSExcellenceSummery: sessionInfo.QoSSummeryResult}, Finalized: false, @@ -549,6 +556,7 @@ func (rpccs *RPCConsumerServer) sendRelayToProvider( if found { goroutineCtx = utils.WithUniqueIdentifier(goroutineCtx, guid) } + defer func() { // Return response relayProcessor.SetResponse(&relayResponse{ @@ -567,7 +575,7 @@ func (rpccs *RPCConsumerServer) sendRelayToProvider( epoch := sessionInfo.Epoch reportedProviders := sessionInfo.ReportedProviders - relayRequest, errResponse := lavaprotocol.ConstructRelayRequest(goroutineCtx, privKey, lavaChainID, chainID, &localRelayRequestData, providerPublicAddress, singleConsumerSession, int64(epoch), reportedProviders) + relayRequest, errResponse := lavaprotocol.ConstructRelayRequest(goroutineCtx, privKey, lavaChainID, chainId, &localRelayRequestData, providerPublicAddress, singleConsumerSession, int64(epoch), reportedProviders) if errResponse != nil { utils.LavaFormatError("Failed ConstructRelayRequest", errResponse, utils.LogAttr("Request data", localRelayRequestData)) return @@ -575,6 +583,10 @@ func (rpccs *RPCConsumerServer) sendRelayToProvider( localRelayResult.Request = relayRequest endpointClient := *singleConsumerSession.Endpoint.Client + // add metrics (send and receive) + go rpccs.rpcConsumerLogs.SetRelaySentToProviderMetric(chainId, apiInterface) + defer func() { go rpccs.rpcConsumerLogs.SetRelayReturnedFromProviderMetric(chainId, apiInterface) }() + if isSubscription { errResponse = rpccs.relaySubscriptionInner(goroutineCtx, endpointClient, singleConsumerSession, localRelayResult) if errResponse != nil { @@ -586,6 +598,7 @@ func (rpccs *RPCConsumerServer) sendRelayToProvider( // unique per dappId and ip consumerToken := common.GetUniqueToken(dappID, consumerIp) processingTimeout, relayTimeout := rpccs.getProcessingTimeout(chainMessage) + // send relay relayLatency, errResponse, backoff := rpccs.relayInner(goroutineCtx, singleConsumerSession, localRelayResult, processingTimeout, chainMessage, consumerToken) if errResponse != nil { failRelaySession := func(origErr error, backoff_ bool) { @@ -642,7 +655,7 @@ func (rpccs *RPCConsumerServer) sendRelayToProvider( requestedBlock := localRelayResult.Request.RelayData.RequestBlock // get requested block before removing it from the data seenBlock := localRelayResult.Request.RelayData.SeenBlock // get seen block before removing it from the data - hashKey, _, hashErr := chainlib.HashCacheRequest(localRelayResult.Request.RelayData, chainID) // get the hash (this changes the data) + hashKey, _, hashErr := chainlib.HashCacheRequest(localRelayResult.Request.RelayData, chainId) // get the hash (this changes the data) go func() { // deal with copying error. @@ -665,7 +678,7 @@ func (rpccs *RPCConsumerServer) sendRelayToProvider( err2 := rpccs.cache.SetEntry(new_ctx, &pairingtypes.RelayCacheSet{ RequestHash: hashKey, - ChainId: chainID, + ChainId: chainId, RequestedBlock: requestedBlock, SeenBlock: seenBlock, BlockHash: nil, // consumer cache doesn't care about block hashes diff --git a/scripts/pre_setups/init_lava_only_with_node.sh b/scripts/pre_setups/init_lava_only_with_node.sh index ee1ceb2099..883730c934 100755 --- a/scripts/pre_setups/init_lava_only_with_node.sh +++ b/scripts/pre_setups/init_lava_only_with_node.sh @@ -57,7 +57,7 @@ wait_next_block screen -d -m -S consumers bash -c "source ~/.bashrc; lavap rpcconsumer \ 127.0.0.1:3360 LAV1 rest 127.0.0.1:3361 LAV1 tendermintrpc 127.0.0.1:3362 LAV1 grpc \ -$EXTRA_PORTAL_FLAGS --geolocation 1 --log_level debug --from user1 --chain-id lava --allow-insecure-provider-dialing --metrics-listen-address ":7779" 2>&1 | tee $LOGS_DIR/CONSUMERS.log" && sleep 0.25 +$EXTRA_PORTAL_FLAGS --geolocation 1 --log_level debug --from user1 --chain-id lava --pprof-address "127.0.0.1:6060" --allow-insecure-provider-dialing --metrics-listen-address ":7779" 2>&1 | tee $LOGS_DIR/CONSUMERS.log" && sleep 0.25 echo "--- setting up screens done ---" screen -ls \ No newline at end of file From d0431f98444475c72d2366221070a7f0a816b0c6 Mon Sep 17 00:00:00 2001 From: Yaroms <103432884+Yaroms@users.noreply.github.com> Date: Mon, 15 Apr 2024 11:35:06 +0300 Subject: [PATCH 15/17] chore: CNS-LAV1-spec-pruning (#1375) * add prunning and chainid verifications * fix * archive extension blocks * fix pruning verification * fix archive extension --------- Co-authored-by: Yarom Swisa --- cookbook/specs/spec_add_lava.json | 63 +++++++++++++++++++++++++++++-- 1 file changed, 59 insertions(+), 4 deletions(-) diff --git a/cookbook/specs/spec_add_lava.json b/cookbook/specs/spec_add_lava.json index d133804df0..522871843f 100644 --- a/cookbook/specs/spec_add_lava.json +++ b/cookbook/specs/spec_add_lava.json @@ -8,7 +8,7 @@ "name": "lava testnet", "enabled": true, "imports": [ - "COSMOSSDK" + "COSMOSSDKFULL" ], "providers_types": 1, "reliability_threshold": 268435455, @@ -1249,9 +1249,26 @@ "name": "chain-id", "values": [ { - "expected_value": "*" + "expected_value": "lava-testnet-2" } ] + }, + { + "name": "pruning", + "values": [ + { + "latest_distance": 5760 + } + ] + } + ], + "extensions": [ + { + "name": "archive", + "cu_multiplier": 5, + "rule": { + "block": 5680 + } } ] }, @@ -2444,10 +2461,27 @@ "name": "chain-id", "values": [ { - "expected_value": "*" + "expected_value": "lava-testnet-2" + } + ] + }, + { + "name": "pruning", + "values": [ + { + "latest_distance": 5760 } ] } + ], + "extensions": [ + { + "name": "archive", + "cu_multiplier": 5, + "rule": { + "block": 5680 + } + } ] }, { @@ -2467,10 +2501,31 @@ "name": "chain-id", "values": [ { - "expected_value": "*" + "expected_value": "lava-testnet-2" + } + ] + }, + { + "name": "pruning", + "values": [ + { + "latest_distance": 5760 + }, + { + "extension": "archive", + "expected_value": "340778" } ] } + ], + "extensions": [ + { + "name": "archive", + "cu_multiplier": 5, + "rule": { + "block": 5680 + } + } ] } ] From e90127e933cd371fb99622c35c88fc76e7296763 Mon Sep 17 00:00:00 2001 From: Yaroms <103432884+Yaroms@users.noreply.github.com> Date: Mon, 15 Apr 2024 16:00:00 +0300 Subject: [PATCH 16/17] fix: CNS-fix-LAV1-dev (#1378) * remove verifications in dev env * fix * another fix * another another fix * fix spec count * fix spec count --------- Co-authored-by: Yarom Swisa Co-authored-by: Ran Mishael --- scripts/init_e2e.sh | 2 +- scripts/init_payment_e2e.sh | 2 +- testutil/e2e/paymentE2E.go | 2 +- testutil/e2e/protocolE2E.go | 4 ++-- testutil/e2e/sdkE2E.go | 2 +- x/spec/client/cli/tx.go | 10 ++++++++++ 6 files changed, 16 insertions(+), 6 deletions(-) diff --git a/scripts/init_e2e.sh b/scripts/init_e2e.sh index 39e69c4cc5..353bb65926 100755 --- a/scripts/init_e2e.sh +++ b/scripts/init_e2e.sh @@ -9,7 +9,7 @@ GASPRICE="0.000000001ulava" # Specs proposal echo ---- Specs proposal ---- -lavad tx gov submit-legacy-proposal spec-add ./cookbook/specs/spec_add_ethereum.json,./cookbook/specs/spec_add_ibc.json,./cookbook/specs/spec_add_cosmossdk.json,./cookbook/specs/spec_add_lava.json --lava-dev-test -y --from alice --gas-adjustment "1.5" --gas "auto" --gas-prices $GASPRICE +lavad tx gov submit-legacy-proposal spec-add ./cookbook/specs/spec_add_ethereum.json,./cookbook/specs/spec_add_cosmoswasm.json,./cookbook/specs/spec_add_ibc.json,./cookbook/specs/spec_add_cosmossdk.json,./cookbook/specs/spec_add_cosmossdk_full.json,./cookbook/specs/spec_add_lava.json --lava-dev-test -y --from alice --gas-adjustment "1.5" --gas "auto" --gas-prices $GASPRICE wait_next_block lavad tx gov vote 1 yes -y --from alice --gas-adjustment "1.5" --gas "auto" --gas-prices $GASPRICE sleep 6 # need to sleep because plan policies need the specs when setting chain policies verifications diff --git a/scripts/init_payment_e2e.sh b/scripts/init_payment_e2e.sh index a1dd402c42..15c16574b8 100755 --- a/scripts/init_payment_e2e.sh +++ b/scripts/init_payment_e2e.sh @@ -8,7 +8,7 @@ source $__dir/useful_commands.sh GASPRICE="0.000000001ulava" # Specs proposal -lavad tx gov submit-legacy-proposal spec-add ./cookbook/specs/spec_add_ibc.json,./cookbook/specs/spec_add_cosmossdk.json,./cookbook/specs/spec_add_lava.json -y --from alice --gas-adjustment "1.5" --gas "auto" --gas-prices $GASPRICE +lavad tx gov submit-legacy-proposal spec-add ./cookbook/specs/spec_add_cosmoswasm.json,./cookbook/specs/spec_add_ibc.json,./cookbook/specs/spec_add_cosmossdk.json,./cookbook/specs/spec_add_cosmossdk_full.json,./cookbook/specs/spec_add_lava.json --lava-dev-test -y --from alice --gas-adjustment "1.5" --gas "auto" --gas-prices $GASPRICE wait_next_block lavad tx gov deposit 1 100ulava -y --from alice --gas-adjustment "1.5" --gas "auto" --gas-prices $GASPRICE wait_next_block diff --git a/testutil/e2e/paymentE2E.go b/testutil/e2e/paymentE2E.go index 4c68abdda5..5fe22baf99 100644 --- a/testutil/e2e/paymentE2E.go +++ b/testutil/e2e/paymentE2E.go @@ -321,7 +321,7 @@ func runPaymentE2E(timeout time.Duration) { // - produce 1 staked client (for LAV1) // - produce 1 subscription (for LAV1) - lt.checkStakeLava(1, 3, 1, 2, checkedPlansE2E, []string{"LAV1"}, []string{"user1"}, "Staking Lava OK") + lt.checkStakeLava(1, 5, 1, 2, checkedPlansE2E, []string{"LAV1"}, []string{"user1"}, "Staking Lava OK") // get balance of providers right after stake for payment check later providers, err := lt.getProvidersAddresses() diff --git a/testutil/e2e/protocolE2E.go b/testutil/e2e/protocolE2E.go index dc5d72d91a..d4cb80b0b9 100644 --- a/testutil/e2e/protocolE2E.go +++ b/testutil/e2e/protocolE2E.go @@ -644,7 +644,7 @@ func (lt *lavaTest) lavaOverLava(ctx context.Context) { // - produce 5 specs: ETH1, HOL1, SEP1, IBC, COSMOSSDK, LAV1 (via spec_add_{ethereum,cosmoshub,lava}) // - produce 2 plans: "DefaultPlan", "EmergencyModePlan" - lt.checkStakeLava(2, 6, 4, 5, checkedPlansE2E, checkedSpecsE2ELOL, checkedSubscriptionsLOL, "Lava Over Lava Test OK") + lt.checkStakeLava(2, 8, 4, 5, checkedPlansE2E, checkedSpecsE2ELOL, checkedSubscriptionsLOL, "Lava Over Lava Test OK") } func (lt *lavaTest) checkRESTConsumer(rpcURL string, timeout time.Duration) { @@ -1224,7 +1224,7 @@ func runProtocolE2E(timeout time.Duration) { // - produce 1 staked client (for each of ETH1, LAV1) // - produce 1 subscription (for both ETH1, LAV1) - lt.checkStakeLava(2, 6, 4, 5, checkedPlansE2E, checkedSpecsE2E, checkedSubscriptions, "Staking Lava OK") + lt.checkStakeLava(2, 8, 4, 5, checkedPlansE2E, checkedSpecsE2E, checkedSubscriptions, "Staking Lava OK") utils.LavaFormatInfo("RUNNING TESTS") diff --git a/testutil/e2e/sdkE2E.go b/testutil/e2e/sdkE2E.go index 9f16465902..e894750bf0 100644 --- a/testutil/e2e/sdkE2E.go +++ b/testutil/e2e/sdkE2E.go @@ -124,7 +124,7 @@ func runSDKE2E(timeout time.Duration) { utils.LavaFormatInfo("Staking Lava") lt.stakeLava(ctx) - lt.checkStakeLava(2, 6, 4, 5, checkedPlansE2E, checkedSpecsE2E, checkedSubscriptions, "Staking Lava OK") + lt.checkStakeLava(2, 8, 4, 5, checkedPlansE2E, checkedSpecsE2E, checkedSubscriptions, "Staking Lava OK") utils.LavaFormatInfo("RUNNING TESTS") diff --git a/x/spec/client/cli/tx.go b/x/spec/client/cli/tx.go index 49b6d9bef8..344cdb44c1 100644 --- a/x/spec/client/cli/tx.go +++ b/x/spec/client/cli/tx.go @@ -101,6 +101,16 @@ $ %s tx gov spec-proposal spec-add --from= Date: Mon, 15 Apr 2024 15:11:30 +0200 Subject: [PATCH 17/17] fix: PRT-fix context timeout when sending relay retries (#1376) * add metrics to rpcconsumer * fix lint * fix a bug where processing timeout was always provided to relay even though we had less time to return the response. * adding a protection for super low processing timeout left --- protocol/rpcconsumer/rpcconsumer_server.go | 26 ++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/protocol/rpcconsumer/rpcconsumer_server.go b/protocol/rpcconsumer/rpcconsumer_server.go index cd5100e3dd..47f88d1c38 100644 --- a/protocol/rpcconsumer/rpcconsumer_server.go +++ b/protocol/rpcconsumer/rpcconsumer_server.go @@ -344,6 +344,11 @@ func (rpccs *RPCConsumerServer) ProcessRelaySend(ctx context.Context, directiveH } utils.LavaFormatWarning("Failed retryFirstRelayAttempt, will retry.", err, utils.LogAttr("attempt", retryFirstRelayAttempt)) } + + if err != nil { + return relayProcessor, err + } + // a channel to be notified processing was done, true means we have results and can return gotResults := make(chan bool) processingTimeout, relayTimeout := rpccs.getProcessingTimeout(chainMessage) @@ -388,13 +393,13 @@ func (rpccs *RPCConsumerServer) ProcessRelaySend(ctx context.Context, directiveH if success { return relayProcessor, nil } - err := rpccs.sendRelayToProvider(ctx, chainMessage, relayRequestData, dappID, consumerIp, relayProcessor) + err := rpccs.sendRelayToProvider(processingCtx, chainMessage, relayRequestData, dappID, consumerIp, relayProcessor) go validateReturnCondition(err) go readResultsFromProcessor() case <-startNewBatchTicker.C: // only trigger another batch for non BestResult relays if relayProcessor.selection != BestResult { - err := rpccs.sendRelayToProvider(ctx, chainMessage, relayRequestData, dappID, consumerIp, relayProcessor) + err := rpccs.sendRelayToProvider(processingCtx, chainMessage, relayRequestData, dappID, consumerIp, relayProcessor) go validateReturnCondition(err) // add ticker launch metrics go rpccs.rpcConsumerLogs.SetRelaySentByNewBatchTickerMetric(rpccs.getChainIdAndApiInterface()) @@ -597,7 +602,20 @@ func (rpccs *RPCConsumerServer) sendRelayToProvider( // unique per dappId and ip consumerToken := common.GetUniqueToken(dappID, consumerIp) - processingTimeout, relayTimeout := rpccs.getProcessingTimeout(chainMessage) + processingTimeout, expectedRelayTimeoutForQOS := rpccs.getProcessingTimeout(chainMessage) + deadline, ok := ctx.Deadline() + if ok { // we have ctx deadline. we cant go past it. + processingTimeout = time.Until(deadline) + if processingTimeout <= 0 { + // no need to send we are out of time + utils.LavaFormatWarning("Creating context deadline for relay attempt ran out of time, processingTimeout <= 0 ", nil, utils.LogAttr("processingTimeout", processingTimeout), utils.LogAttr("Request data", localRelayRequestData)) + return + } + // to prevent absurdly short context timeout set the shortest timeout to be the expected latency for qos time. + if processingTimeout < expectedRelayTimeoutForQOS { + processingTimeout = expectedRelayTimeoutForQOS + } + } // send relay relayLatency, errResponse, backoff := rpccs.relayInner(goroutineCtx, singleConsumerSession, localRelayResult, processingTimeout, chainMessage, consumerToken) if errResponse != nil { @@ -645,7 +663,7 @@ func (rpccs *RPCConsumerServer) sendRelayToProvider( ) } - errResponse = rpccs.consumerSessionManager.OnSessionDone(singleConsumerSession, latestBlock, chainlib.GetComputeUnits(chainMessage), relayLatency, singleConsumerSession.CalculateExpectedLatency(relayTimeout), expectedBH, numOfProviders, pairingAddressesLen, chainMessage.GetApi().Category.HangingApi) // session done successfully + errResponse = rpccs.consumerSessionManager.OnSessionDone(singleConsumerSession, latestBlock, chainlib.GetComputeUnits(chainMessage), relayLatency, singleConsumerSession.CalculateExpectedLatency(expectedRelayTimeoutForQOS), expectedBH, numOfProviders, pairingAddressesLen, chainMessage.GetApi().Category.HangingApi) // session done successfully if rpccs.cache.CacheActive() && rpcclient.ValidateStatusCodes(localRelayResult.StatusCode, true) == nil { // copy reply data so if it changes it doesn't panic mid async send