From 86379cd28987d7f25041a91d9640fa1f0fa2e305 Mon Sep 17 00:00:00 2001 From: omerlavanet Date: Fri, 23 Feb 2024 00:52:55 +0200 Subject: [PATCH 01/57] refactored the rpcconsumer send, WIP --- protocol/chainlib/common.go | 4 +- protocol/lavaprotocol/request_builder.go | 2 +- .../lavasession/consumer_session_manager.go | 180 +--------- .../consumer_session_manager_test.go | 18 +- protocol/lavasession/consumer_types.go | 155 +------- .../lavasession/single_consumer_session.go | 150 ++++++++ protocol/lavasession/used_providers.go | 118 ++++++ protocol/rpcconsumer/relay_processor.go | 183 ++++++++++ protocol/rpcconsumer/rpcconsumer_server.go | 336 ++++++++---------- 9 files changed, 636 insertions(+), 510 deletions(-) create mode 100644 protocol/lavasession/single_consumer_session.go create mode 100644 protocol/lavasession/used_providers.go create mode 100644 protocol/rpcconsumer/relay_processor.go diff --git a/protocol/chainlib/common.go b/protocol/chainlib/common.go index 9fa6f2d98c..d36e6920e1 100644 --- a/protocol/chainlib/common.go +++ b/protocol/chainlib/common.go @@ -297,7 +297,7 @@ func CompareRequestedBlockInBatch(firstRequestedBlock int64, second int64) (late return returnBigger(firstRequestedBlock, second) } -func GetRelayTimeout(chainMessage ChainMessage, chainParser ChainParser, timeouts int) time.Duration { +func GetRelayTimeout(chainMessage ChainMessage, chainParser ChainParser) time.Duration { if chainMessage.TimeoutOverride() != 0 { return chainMessage.TimeoutOverride() } @@ -311,7 +311,7 @@ func GetRelayTimeout(chainMessage ChainMessage, chainParser ChainParser, timeout relayTimeAddition = time.Millisecond * time.Duration(chainMessage.GetApi().TimeoutMs) } // Set relay timout, increase it every time we fail a relay on timeout - return extraRelayTimeout + time.Duration(timeouts+1)*relayTimeAddition + common.AverageWorldLatency + return extraRelayTimeout + relayTimeAddition + common.AverageWorldLatency } // setup a common preflight and cors configuration allowing wild cards and preflight caching. diff --git a/protocol/lavaprotocol/request_builder.go b/protocol/lavaprotocol/request_builder.go index 5929db80d2..0cc39cc95f 100644 --- a/protocol/lavaprotocol/request_builder.go +++ b/protocol/lavaprotocol/request_builder.go @@ -163,7 +163,7 @@ func compareRelaysFindConflict(ctx context.Context, reply1 pairingtypes.RelayRep secondAsString := string(reply2.Data) _, idxDiff := findFirstDifferentChar(firstAsString, secondAsString) if idxDiff > 0 && idxDiff+100 < len(firstAsString) && idxDiff+100 < len(secondAsString) { - utils.LavaFormatDebug("different in responses detected", utils.Attribute{Key: "index", Value: idxDiff}, utils.Attribute{Key: "first_diff", Value: firstAsString[idxDiff : idxDiff+100]}, utils.Attribute{Key: "second_diff", Value: secondAsString[idxDiff : idxDiff+100]}) + utils.LavaFormatDebug("difference in responses detected", utils.Attribute{Key: "index", Value: idxDiff}, utils.Attribute{Key: "first_diff", Value: firstAsString[idxDiff : idxDiff+100]}, utils.Attribute{Key: "second_diff", Value: secondAsString[idxDiff : idxDiff+100]}) } } return true, responseConflict diff --git a/protocol/lavasession/consumer_session_manager.go b/protocol/lavasession/consumer_session_manager.go index cc26c6b70c..e3f7d80e73 100644 --- a/protocol/lavasession/consumer_session_manager.go +++ b/protocol/lavasession/consumer_session_manager.go @@ -314,9 +314,13 @@ func (csm *ConsumerSessionManager) validatePairingListNotEmpty(addon string, ext // GetSessions will return a ConsumerSession, given cu needed for that session. // The user can also request specific providers to not be included in the search for a session. -func (csm *ConsumerSessionManager) GetSessions(ctx context.Context, cuNeededForSession uint64, initUnwantedProviders map[string]struct{}, requestedBlock int64, addon string, extensions []*spectypes.Extension, stateful uint32, virtualEpoch uint64) ( +func (csm *ConsumerSessionManager) GetSessions(ctx context.Context, cuNeededForSession uint64, relayProcessor RelayProcessorInf, requestedBlock int64, addon string, extensions []*spectypes.Extension, stateful uint32, virtualEpoch uint64) ( consumerSessionMap ConsumerSessionsMap, errRet error, ) { + // set usedProviders if they were chosen for this relay + initUnwantedProviders := relayProcessor.GetUsedProviders().GetUnwantedProvidersToSend() + defer func() { relayProcessor.GetUsedProviders().AddUsed(consumerSessionMap) }() + extensionNames := common.GetExtensionNames(extensions) // if pairing list is empty we reset the state. numberOfResets := csm.validatePairingListNotEmpty(addon, extensionNames) @@ -407,15 +411,14 @@ func (csm *ConsumerSessionManager) GetSessions(ctx context.Context, cuNeededForS if MaxComputeUnitsExceededError.Is(err) { tempIgnoredProviders.providers[providerAddress] = struct{}{} // We must unlock the consumer session before continuing. - consumerSession.lock.Unlock() + consumerSession.Free(nil) continue } else { utils.LavaFormatFatal("Unsupported Error", err) } } else { // consumer session is locked and valid, we need to set the relayNumber and the relay cu. before returning. - consumerSession.LatestRelayCu = cuNeededForSession // set latestRelayCu - consumerSession.RelayNum += RelayNumberIncrement // increase relayNum + // Successfully created/got a consumerSession. if debug { utils.LavaFormatDebug("Consumer get session", @@ -440,10 +443,7 @@ func (csm *ConsumerSessionManager) GetSessions(ctx context.Context, cuNeededForS sessionInfo.QoSSummeryResult = consumerSession.getQosComputedResultOrZero() sessions[providerAddress] = sessionInfo - if consumerSession.RelayNum > 1 { - // we only set excellence for sessions with more than one successful relays, this guarantees data within the epoch exists - consumerSession.QoSInfo.LastExcellenceQoSReport = csm.providerOptimizer.GetExcellenceQoSReportForProvider(providerAddress) - } + consumerSession.SetUsageForSession(cuNeededForSession, csm.providerOptimizer.GetExcellenceQoSReportForProvider(providerAddress), relayProcessor) // We successfully added provider, we should ignore it if we need to fetch new tempIgnoredProviders.providers[providerAddress] = struct{}{} @@ -640,41 +640,10 @@ func (csm *ConsumerSessionManager) blockProvider(address string, reportProvider return nil } -// Verify the consumerSession is locked when getting to this function, if its not locked throw an error -func (csm *ConsumerSessionManager) verifyLock(consumerSession *SingleConsumerSession) error { - if consumerSession.lock.TryLock() { // verify. - // if we managed to lock throw an error for misuse. - defer consumerSession.lock.Unlock() - // if failed to lock we should block session as it seems like a very rare case. - consumerSession.BlockListed = true // block this session from future usages - utils.LavaFormatError("Verify Lock failed on session Failure, blocking session", nil, utils.LogAttr("consumerSession", consumerSession)) - return LockMisUseDetectedError - } - return nil -} - -// A Session can be created but unused if consumer found the response in the cache. -// So we need to unlock the session and decrease the cu that were applied -func (csm *ConsumerSessionManager) OnSessionUnUsed(consumerSession *SingleConsumerSession) error { - if err := csm.verifyLock(consumerSession); err != nil { - return sdkerrors.Wrapf(err, "OnSessionUnUsed, consumerSession.lock must be locked before accessing this method, additional info:") - } - cuToDecrease := consumerSession.LatestRelayCu - consumerSession.LatestRelayCu = 0 // making sure no one uses it in a wrong way - parentConsumerSessionsWithProvider := consumerSession.Parent // must read this pointer before unlocking - // finished with consumerSession here can unlock. - consumerSession.lock.Unlock() // we unlock before we change anything in the parent ConsumerSessionsWithProvider - err := parentConsumerSessionsWithProvider.decreaseUsedComputeUnits(cuToDecrease) // change the cu in parent - if err != nil { - return err - } - return nil -} - // Report session failure, mark it as blocked from future usages, report if timeout happened. func (csm *ConsumerSessionManager) OnSessionFailure(consumerSession *SingleConsumerSession, errorReceived error) error { // consumerSession must be locked when getting here. - if err := csm.verifyLock(consumerSession); err != nil { + if err := consumerSession.VerifyLock(); err != nil { return sdkerrors.Wrapf(err, "OnSessionFailure, consumerSession.lock must be locked before accessing this method, additional info:") } @@ -718,7 +687,7 @@ func (csm *ConsumerSessionManager) OnSessionFailure(consumerSession *SingleConsu parentConsumerSessionsWithProvider := consumerSession.Parent // must read this pointer before unlocking csm.updateMetricsManager(consumerSession) // finished with consumerSession here can unlock. - consumerSession.lock.Unlock() // we unlock before we change anything in the parent ConsumerSessionsWithProvider + consumerSession.Free(errorReceived) // we unlock before we change anything in the parent ConsumerSessionsWithProvider err := parentConsumerSessionsWithProvider.decreaseUsedComputeUnits(cuToDecrease) // change the cu in parent if err != nil { @@ -738,35 +707,6 @@ func (csm *ConsumerSessionManager) OnSessionFailure(consumerSession *SingleConsu return nil } -// On a successful DataReliability session we don't need to increase and update any field, we just need to unlock the session. -func (csm *ConsumerSessionManager) OnDataReliabilitySessionDone(consumerSession *SingleConsumerSession, - latestServicedBlock int64, - specComputeUnits uint64, - currentLatency time.Duration, - expectedLatency time.Duration, - expectedBH int64, - numOfProviders int, - providersCount uint64, -) error { - if err := csm.verifyLock(consumerSession); err != nil { - return sdkerrors.Wrapf(err, "OnDataReliabilitySessionDone, consumerSession.lock must be locked before accessing this method") - } - - defer consumerSession.lock.Unlock() // we need to be locked here, if we didn't get it locked we try lock anyway - consumerSession.ConsecutiveErrors = []error{} - consumerSession.LatestBlock = latestServicedBlock // update latest serviced block - if expectedBH-latestServicedBlock > 1000 { - utils.LavaFormatWarning("identified block gap", nil, - utils.Attribute{Key: "expectedBH", Value: expectedBH}, - utils.Attribute{Key: "latestServicedBlock", Value: latestServicedBlock}, - utils.Attribute{Key: "session_id", Value: consumerSession.SessionId}, - utils.Attribute{Key: "provider_address", Value: consumerSession.Parent.PublicLavaAddress}, - ) - } - consumerSession.CalculateQoS(currentLatency, expectedLatency, expectedBH-latestServicedBlock, numOfProviders, int64(providersCount)) - return nil -} - // On a successful session this function will update all necessary fields in the consumerSession. and unlock it when it finishes func (csm *ConsumerSessionManager) OnSessionDone( consumerSession *SingleConsumerSession, @@ -780,11 +720,11 @@ func (csm *ConsumerSessionManager) OnSessionDone( isHangingApi bool, ) error { // release locks, update CU, relaynum etc.. - if err := csm.verifyLock(consumerSession); err != nil { + if err := consumerSession.VerifyLock(); err != nil { return sdkerrors.Wrapf(err, "OnSessionDone, consumerSession.lock must be locked before accessing this method") } - defer consumerSession.lock.Unlock() // we need to be locked here, if we didn't get it locked we try lock anyway + defer consumerSession.Free(nil) // we need to be locked here, if we didn't get it locked we try lock anyway consumerSession.CuSum += consumerSession.LatestRelayCu // add CuSum to current cu usage. consumerSession.LatestRelayCu = 0 // reset cu just in case consumerSession.ConsecutiveErrors = []error{} @@ -835,110 +775,18 @@ func (csm *ConsumerSessionManager) GetReportedProviders(epoch uint64) []*pairing return csm.reportedProviders.GetReportedProviders() } -// Data Reliability Section: - // Atomically read csm.pairingAddressesLength for data reliability. func (csm *ConsumerSessionManager) GetAtomicPairingAddressesLength() uint64 { return atomic.LoadUint64(&csm.pairingAddressesLength) } -func (csm *ConsumerSessionManager) getDataReliabilityProviderIndex(unAllowedAddress string, index uint64) (cswp *ConsumerSessionsWithProvider, providerAddress string, epoch uint64, err error) { - csm.lock.RLock() - defer csm.lock.RUnlock() - currentEpoch := csm.atomicReadCurrentEpoch() - pairingAddressesLength := csm.GetAtomicPairingAddressesLength() - if index >= pairingAddressesLength { - utils.LavaFormatInfo(DataReliabilityIndexOutOfRangeError.Error(), utils.Attribute{Key: "index", Value: index}, utils.Attribute{Key: "pairingAddressesLength", Value: pairingAddressesLength}) - return nil, "", currentEpoch, DataReliabilityIndexOutOfRangeError - } - providerAddress = csm.pairingAddresses[index] - if providerAddress == unAllowedAddress { - return nil, "", currentEpoch, DataReliabilityIndexRequestedIsOriginalProviderError - } - // if address is valid return the ConsumerSessionsWithProvider - return csm.pairing[providerAddress], providerAddress, currentEpoch, nil -} - -func (csm *ConsumerSessionManager) fetchEndpointFromConsumerSessionsWithProviderWithRetry(ctx context.Context, consumerSessionsWithProvider *ConsumerSessionsWithProvider, sessionEpoch uint64) (endpoint *Endpoint, err error) { - var connected bool - var providerAddress string - for idx := 0; idx < MaxConsecutiveConnectionAttempts; idx++ { // try to connect to the endpoint 3 times - connected, endpoint, providerAddress, err = consumerSessionsWithProvider.fetchEndpointConnectionFromConsumerSessionWithProvider(ctx) - if err != nil { - // verify err is AllProviderEndpointsDisabled and report. - if AllProviderEndpointsDisabledError.Is(err) { - err = csm.blockProvider(providerAddress, true, sessionEpoch, MaxConsecutiveConnectionAttempts, 0, csm.GenerateReconnectCallback(consumerSessionsWithProvider)) // reporting and blocking provider this epoch - if err != nil { - if !EpochMismatchError.Is(err) { - // only acceptable error is EpochMismatchError so if different, throw fatal - utils.LavaFormatFatal("Unsupported Error", err) - } - } - break // all endpoints are disabled, no reason to continue with this provider. - } else { - utils.LavaFormatFatal("Unsupported Error", err) - } - } - if connected { - // if we are connected we can stop trying and return the endpoint - break - } else { - continue - } - } - if !connected { // if we are not connected at the end - // failed to get an endpoint connection from that provider. return an error. - return nil, utils.LavaFormatError("Not Connected", FailedToConnectToEndPointForDataReliabilityError, utils.Attribute{Key: "provider", Value: providerAddress}) - } - return endpoint, nil -} - -// Get a Data Reliability Session -func (csm *ConsumerSessionManager) GetDataReliabilitySession(ctx context.Context, originalProviderAddress string, index int64, sessionEpoch uint64) (singleConsumerSession *SingleConsumerSession, providerAddress string, epoch uint64, err error) { - consumerSessionWithProvider, providerAddress, currentEpoch, err := csm.getDataReliabilityProviderIndex(originalProviderAddress, uint64(index)) - if err != nil { - return nil, "", 0, err - } - if sessionEpoch != currentEpoch { // validate we are in the same epoch. - return nil, "", currentEpoch, DataReliabilityEpochMismatchError - } - - // after choosing a provider, try to see if it already has an existing data reliability session. - consumerSession, pairingEpoch, err := consumerSessionWithProvider.verifyDataReliabilitySessionWasNotAlreadyCreated() - if NoDataReliabilitySessionWasCreatedError.Is(err) { // need to create a new data reliability session - // We can get an endpoint now and create a data reliability session. - endpoint, err := csm.fetchEndpointFromConsumerSessionsWithProviderWithRetry(ctx, consumerSessionWithProvider, currentEpoch) - if err != nil { - return nil, "", currentEpoch, err - } - - // get data reliability session from endpoint - consumerSession, pairingEpoch, err = consumerSessionWithProvider.getDataReliabilitySingleConsumerSession(endpoint) - if err != nil { - return nil, "", currentEpoch, err - } - } else if err != nil { - return nil, "", currentEpoch, err - } - - if currentEpoch != pairingEpoch { // validate they are the same, if not print an error and set currentEpoch to pairingEpoch. - utils.LavaFormatError("currentEpoch and pairingEpoch mismatch", nil, utils.Attribute{Key: "sessionEpoch", Value: currentEpoch}, utils.Attribute{Key: "pairingEpoch", Value: pairingEpoch}) - currentEpoch = pairingEpoch - } - - // DR consumer session is locked, we can increment data reliability relay number. - consumerSession.RelayNum += 1 - - return consumerSession, providerAddress, currentEpoch, nil -} - // On a successful Subscribe relay func (csm *ConsumerSessionManager) OnSessionDoneIncreaseCUOnly(consumerSession *SingleConsumerSession) error { - if err := csm.verifyLock(consumerSession); err != nil { + if err := consumerSession.VerifyLock(); err != nil { return sdkerrors.Wrapf(err, "OnSessionDoneIncreaseRelayAndCu consumerSession.lock must be locked before accessing this method") } - defer consumerSession.lock.Unlock() // we need to be locked here, if we didn't get it locked we try lock anyway + defer consumerSession.Free(nil) // we need to be locked here, if we didn't get it locked we try lock anyway consumerSession.CuSum += consumerSession.LatestRelayCu // add CuSum to current cu usage. consumerSession.LatestRelayCu = 0 // reset cu just in case consumerSession.ConsecutiveErrors = []error{} diff --git a/protocol/lavasession/consumer_session_manager_test.go b/protocol/lavasession/consumer_session_manager_test.go index 3b58e84e18..17c1b76eee 100644 --- a/protocol/lavasession/consumer_session_manager_test.go +++ b/protocol/lavasession/consumer_session_manager_test.go @@ -767,6 +767,18 @@ func TestNoPairingsError(t *testing.T) { require.True(t, PairingListEmptyError.Is(err)) } +type mockProcessor struct { + usedProviders *UsedProviders +} + +func (mp mockProcessor) GetUsedProviders() *UsedProviders { + return mp.usedProviders +} + +func (mp mockProcessor) RemoveUsed(provider string, err error) { + mp.usedProviders.RemoveUsed(provider, err) +} + func TestPairingWithStateful(t *testing.T) { ctx := context.Background() t.Run("stateful", func(t *testing.T) { @@ -788,8 +800,10 @@ func TestPairingWithStateful(t *testing.T) { err = csm.OnSessionDone(cs.Session, servicedBlockNumber, cuForFirstRequest, time.Millisecond, cs.Session.CalculateExpectedLatency(2*time.Millisecond), (servicedBlockNumber - 1), numberOfProviders, numberOfProviders, false) require.NoError(t, err) } - unwantedProvider := map[string]struct{}{providerAddresses[0]: {}} - css, err = csm.GetSessions(ctx, cuForFirstRequest, unwantedProvider, servicedBlockNumber, addon, nil, common.CONSISTENCY_SELECT_ALLPROVIDERS, 0) // get a session + usedProviders := NewUsedProviders(nil) + usedProviders.SetUnwanted(providerAddresses[0]) + processor := mockProcessor{usedProviders: usedProviders} + css, err = csm.GetSessions(ctx, cuForFirstRequest, processor, servicedBlockNumber, addon, nil, common.CONSISTENCY_SELECT_ALLPROVIDERS, 0) // get a session require.NoError(t, err) require.Equal(t, allProviders-1, len(css)) }) diff --git a/protocol/lavasession/consumer_types.go b/protocol/lavasession/consumer_types.go index 3de0bafa62..71e0119d43 100644 --- a/protocol/lavasession/consumer_types.go +++ b/protocol/lavasession/consumer_types.go @@ -2,8 +2,6 @@ package lavasession import ( "context" - "math" - "sort" "strconv" "sync" "sync/atomic" @@ -23,6 +21,11 @@ const AllowInsecureConnectionToProvidersFlag = "allow-insecure-provider-dialing" var AllowInsecureConnectionToProviders = false +type RelayProcessorInf interface { + GetUsedProviders() *UsedProviders + RemoveUsed(providerAddress string, err error) +} + type SessionInfo struct { Session *SingleConsumerSession StakeSize sdk.Coin @@ -57,21 +60,6 @@ type QoSReport struct { AnsweredRelays uint64 } -type SingleConsumerSession struct { - CuSum uint64 - LatestRelayCu uint64 // set by GetSessions cuNeededForSession - QoSInfo QoSReport - SessionId int64 - Parent *ConsumerSessionsWithProvider - lock utils.LavaMutex - RelayNum uint64 - LatestBlock int64 - Endpoint *Endpoint - BlockListed bool // if session lost sync we blacklist it. - ConsecutiveErrors []error - errorsCount uint64 -} - type DataReliabilitySession struct { SingleConsumerSession *SingleConsumerSession Epoch uint64 @@ -202,46 +190,6 @@ func (cswp *ConsumerSessionsWithProvider) atomicReadUsedComputeUnits() uint64 { return atomic.LoadUint64(&cswp.UsedComputeUnits) } -// verify data reliability session exists or not -func (cswp *ConsumerSessionsWithProvider) verifyDataReliabilitySessionWasNotAlreadyCreated() (singleConsumerSession *SingleConsumerSession, pairingEpoch uint64, err error) { - cswp.Lock.RLock() - defer cswp.Lock.RUnlock() - if dataReliabilitySession, ok := cswp.Sessions[DataReliabilitySessionId]; ok { // check if we already have a data reliability session. - // validate our relay number reached the data reliability relay number limit - if dataReliabilitySession.RelayNum >= DataReliabilityRelayNumber { - return nil, cswp.PairingEpoch, DataReliabilityAlreadySentThisEpochError - } - dataReliabilitySession.lock.Lock() // lock before returning. - return dataReliabilitySession, cswp.PairingEpoch, nil - } - return nil, cswp.PairingEpoch, NoDataReliabilitySessionWasCreatedError -} - -// get a data reliability session from an endpoint -func (cswp *ConsumerSessionsWithProvider) getDataReliabilitySingleConsumerSession(endpoint *Endpoint) (singleConsumerSession *SingleConsumerSession, pairingEpoch uint64, err error) { - cswp.Lock.Lock() - defer cswp.Lock.Unlock() - // we re validate the data reliability session now that we are locked. - if dataReliabilitySession, ok := cswp.Sessions[DataReliabilitySessionId]; ok { // check if we already have a data reliability session. - if dataReliabilitySession.RelayNum >= DataReliabilityRelayNumber { - return nil, cswp.PairingEpoch, DataReliabilityAlreadySentThisEpochError - } - // we already have the dr session. so return it. - return dataReliabilitySession, cswp.PairingEpoch, nil - } - - singleDataReliabilitySession := &SingleConsumerSession{ - SessionId: DataReliabilitySessionId, - Parent: cswp, - Endpoint: endpoint, - RelayNum: 0, - } - singleDataReliabilitySession.lock.Lock() // we must lock the session so other requests wont get it. - - cswp.Sessions[singleDataReliabilitySession.SessionId] = singleDataReliabilitySession // applying the session to the pool of sessions. - return singleDataReliabilitySession, cswp.PairingEpoch, nil -} - func (cswp *ConsumerSessionsWithProvider) GetPairingEpoch() uint64 { return atomic.LoadUint64(&cswp.PairingEpoch) } @@ -345,16 +293,13 @@ func (cswp *ConsumerSessionsWithProvider) GetConsumerSessionInstanceFromEndpoint if numberOfBlockedSessions >= maximumBlockedSessionsAllowed { return nil, 0, MaximumNumberOfBlockListedSessionsError } - - if session.lock.TryLock() { - if session.BlockListed { // this session cannot be used. - numberOfBlockedSessions += 1 // increase the number of blocked sessions so we can block this provider is too many are blocklisted - session.lock.Unlock() - continue - } - // if we locked the session its available to use, otherwise someone else is already using it + blocked, ok := session.TryUseSession() + if ok { return session, cswp.PairingEpoch, nil } + if blocked { + numberOfBlockedSessions += 1 // increase the number of blocked sessions so we can block this provider is too many are blocklisted + } } // No Sessions available, create a new session or return an error upon maximum sessions allowed if len(cswp.Sessions) > MaxSessionsAllowedPerProvider { @@ -371,7 +316,7 @@ func (cswp *ConsumerSessionsWithProvider) GetConsumerSessionInstanceFromEndpoint Parent: cswp, Endpoint: endpoint, } - consumerSession.lock.Lock() // we must lock the session so other requests wont get it. + consumerSession.TryUseSession() // we must lock the session so other requests wont get it. cswp.Sessions[consumerSession.SessionId] = consumerSession // applying the session to the pool of sessions. return consumerSession, cswp.PairingEpoch, nil @@ -458,86 +403,8 @@ func (cswp *ConsumerSessionsWithProvider) fetchEndpointConnectionFromConsumerSes return connected, endpointPtr, cswp.PublicLavaAddress, nil } -// returns the expected latency to a threshold. -func (cs *SingleConsumerSession) CalculateExpectedLatency(timeoutGivenToRelay time.Duration) time.Duration { - expectedLatency := (timeoutGivenToRelay / 2) - return expectedLatency -} - -// cs should be locked here to use this method, returns the computed qos or zero if last qos is nil or failed to compute. -func (cs *SingleConsumerSession) getQosComputedResultOrZero() sdk.Dec { - if cs.QoSInfo.LastExcellenceQoSReport != nil { - qosComputed, errComputing := cs.QoSInfo.LastExcellenceQoSReport.ComputeQoSExcellence() - if errComputing == nil { // if we failed to compute the qos will be 0 so this provider wont be picked to return the error in case we get it - return qosComputed - } - utils.LavaFormatError("Failed computing QoS used for error parsing", errComputing, utils.LogAttr("Report", cs.QoSInfo.LastExcellenceQoSReport)) - } - return sdk.ZeroDec() -} - -func (cs *SingleConsumerSession) CalculateQoS(latency, expectedLatency time.Duration, blockHeightDiff int64, numOfProviders int, servicersToCount int64) { - // Add current Session QoS - cs.QoSInfo.TotalRelays++ // increase total relays - cs.QoSInfo.AnsweredRelays++ // increase answered relays - - if cs.QoSInfo.LastQoSReport == nil { - cs.QoSInfo.LastQoSReport = &pairingtypes.QualityOfServiceReport{} - } - - downtimePercentage, scaledAvailabilityScore := CalculateAvailabilityScore(&cs.QoSInfo) - cs.QoSInfo.LastQoSReport.Availability = scaledAvailabilityScore - if sdk.OneDec().GT(cs.QoSInfo.LastQoSReport.Availability) { - utils.LavaFormatInfo("QoS Availability report", utils.Attribute{Key: "Availability", Value: cs.QoSInfo.LastQoSReport.Availability}, utils.Attribute{Key: "down percent", Value: downtimePercentage}) - } - - latencyScore := sdk.MinDec(sdk.OneDec(), sdk.NewDecFromInt(sdk.NewInt(int64(expectedLatency))).Quo(sdk.NewDecFromInt(sdk.NewInt(int64(latency))))) - - insertSorted := func(list []sdk.Dec, value sdk.Dec) []sdk.Dec { - index := sort.Search(len(list), func(i int) bool { - return list[i].GTE(value) - }) - if len(list) == index { // nil or empty slice or after last element - return append(list, value) - } - list = append(list[:index+1], list[index:]...) // index < len(a) - list[index] = value - return list - } - cs.QoSInfo.LatencyScoreList = insertSorted(cs.QoSInfo.LatencyScoreList, latencyScore) - cs.QoSInfo.LastQoSReport.Latency = cs.QoSInfo.LatencyScoreList[int(float64(len(cs.QoSInfo.LatencyScoreList))*PercentileToCalculateLatency)] - - // checking if we have enough information to calculate the sync score for the providers, if we haven't talked - // with enough providers we don't have enough information and we will wait to have more information before setting the sync score - shouldCalculateSyncScore := int64(numOfProviders) > int64(math.Ceil(float64(servicersToCount)*MinProvidersForSync)) - if shouldCalculateSyncScore { // - if blockHeightDiff <= 0 { // if the diff is bigger than 0 than the block is too old (blockHeightDiff = expected - allowedLag - blockHeight) and we don't give him the score - cs.QoSInfo.SyncScoreSum++ - } - cs.QoSInfo.TotalSyncScore++ - cs.QoSInfo.LastQoSReport.Sync = sdk.NewDec(cs.QoSInfo.SyncScoreSum).QuoInt64(cs.QoSInfo.TotalSyncScore) - if sdk.OneDec().GT(cs.QoSInfo.LastQoSReport.Sync) { - utils.LavaFormatDebug("QoS Sync report", - utils.Attribute{Key: "Sync", Value: cs.QoSInfo.LastQoSReport.Sync}, - utils.Attribute{Key: "block diff", Value: blockHeightDiff}, - utils.Attribute{Key: "sync score", Value: strconv.FormatInt(cs.QoSInfo.SyncScoreSum, 10) + "/" + strconv.FormatInt(cs.QoSInfo.TotalSyncScore, 10)}, - utils.Attribute{Key: "session_id", Value: cs.SessionId}, - utils.Attribute{Key: "provider", Value: cs.Parent.PublicLavaAddress}, - ) - } - } else { - // we prefer to give them a score of 1 when there is no other data, since otherwise we damage their payments - cs.QoSInfo.LastQoSReport.Sync = sdk.NewDec(1) - } -} - func CalculateAvailabilityScore(qosReport *QoSReport) (downtimePercentageRet, scaledAvailabilityScoreRet sdk.Dec) { downtimePercentage := sdk.NewDecWithPrec(int64(qosReport.TotalRelays-qosReport.AnsweredRelays), 0).Quo(sdk.NewDecWithPrec(int64(qosReport.TotalRelays), 0)) scaledAvailabilityScore := sdk.MaxDec(sdk.ZeroDec(), AvailabilityPercentage.Sub(downtimePercentage).Quo(AvailabilityPercentage)) return downtimePercentage, scaledAvailabilityScore } - -// validate if this is a data reliability session -func (scs *SingleConsumerSession) IsDataReliabilitySession() bool { - return scs.SessionId <= DataReliabilitySessionId -} diff --git a/protocol/lavasession/single_consumer_session.go b/protocol/lavasession/single_consumer_session.go new file mode 100644 index 0000000000..706059251e --- /dev/null +++ b/protocol/lavasession/single_consumer_session.go @@ -0,0 +1,150 @@ +package lavasession + +import ( + "math" + "sort" + "strconv" + "time" + + sdk "github.com/cosmos/cosmos-sdk/types" + "github.com/lavanet/lava/utils" + pairingtypes "github.com/lavanet/lava/x/pairing/types" +) + +type SingleConsumerSession struct { + CuSum uint64 + LatestRelayCu uint64 // set by GetSessions cuNeededForSession + QoSInfo QoSReport + SessionId int64 + Parent *ConsumerSessionsWithProvider + lock utils.LavaMutex + RelayNum uint64 + LatestBlock int64 + Endpoint *Endpoint + BlockListed bool // if session lost sync we blacklist it. + ConsecutiveErrors []error + errorsCount uint64 + relayProcessor RelayProcessorInf +} + +// returns the expected latency to a threshold. +func (cs *SingleConsumerSession) CalculateExpectedLatency(timeoutGivenToRelay time.Duration) time.Duration { + expectedLatency := (timeoutGivenToRelay / 2) + return expectedLatency +} + +// cs should be locked here to use this method, returns the computed qos or zero if last qos is nil or failed to compute. +func (cs *SingleConsumerSession) getQosComputedResultOrZero() sdk.Dec { + if cs.QoSInfo.LastExcellenceQoSReport != nil { + qosComputed, errComputing := cs.QoSInfo.LastExcellenceQoSReport.ComputeQoSExcellence() + if errComputing == nil { // if we failed to compute the qos will be 0 so this provider wont be picked to return the error in case we get it + return qosComputed + } + utils.LavaFormatError("Failed computing QoS used for error parsing", errComputing, utils.LogAttr("Report", cs.QoSInfo.LastExcellenceQoSReport)) + } + return sdk.ZeroDec() +} + +func (cs *SingleConsumerSession) CalculateQoS(latency, expectedLatency time.Duration, blockHeightDiff int64, numOfProviders int, servicersToCount int64) { + // Add current Session QoS + cs.QoSInfo.TotalRelays++ // increase total relays + cs.QoSInfo.AnsweredRelays++ // increase answered relays + + if cs.QoSInfo.LastQoSReport == nil { + cs.QoSInfo.LastQoSReport = &pairingtypes.QualityOfServiceReport{} + } + + downtimePercentage, scaledAvailabilityScore := CalculateAvailabilityScore(&cs.QoSInfo) + cs.QoSInfo.LastQoSReport.Availability = scaledAvailabilityScore + if sdk.OneDec().GT(cs.QoSInfo.LastQoSReport.Availability) { + utils.LavaFormatInfo("QoS Availability report", utils.Attribute{Key: "Availability", Value: cs.QoSInfo.LastQoSReport.Availability}, utils.Attribute{Key: "down percent", Value: downtimePercentage}) + } + + latencyScore := sdk.MinDec(sdk.OneDec(), sdk.NewDecFromInt(sdk.NewInt(int64(expectedLatency))).Quo(sdk.NewDecFromInt(sdk.NewInt(int64(latency))))) + + insertSorted := func(list []sdk.Dec, value sdk.Dec) []sdk.Dec { + index := sort.Search(len(list), func(i int) bool { + return list[i].GTE(value) + }) + if len(list) == index { // nil or empty slice or after last element + return append(list, value) + } + list = append(list[:index+1], list[index:]...) // index < len(a) + list[index] = value + return list + } + cs.QoSInfo.LatencyScoreList = insertSorted(cs.QoSInfo.LatencyScoreList, latencyScore) + cs.QoSInfo.LastQoSReport.Latency = cs.QoSInfo.LatencyScoreList[int(float64(len(cs.QoSInfo.LatencyScoreList))*PercentileToCalculateLatency)] + + // checking if we have enough information to calculate the sync score for the providers, if we haven't talked + // with enough providers we don't have enough information and we will wait to have more information before setting the sync score + shouldCalculateSyncScore := int64(numOfProviders) > int64(math.Ceil(float64(servicersToCount)*MinProvidersForSync)) + if shouldCalculateSyncScore { // + if blockHeightDiff <= 0 { // if the diff is bigger than 0 than the block is too old (blockHeightDiff = expected - allowedLag - blockHeight) and we don't give him the score + cs.QoSInfo.SyncScoreSum++ + } + cs.QoSInfo.TotalSyncScore++ + cs.QoSInfo.LastQoSReport.Sync = sdk.NewDec(cs.QoSInfo.SyncScoreSum).QuoInt64(cs.QoSInfo.TotalSyncScore) + if sdk.OneDec().GT(cs.QoSInfo.LastQoSReport.Sync) { + utils.LavaFormatDebug("QoS Sync report", + utils.Attribute{Key: "Sync", Value: cs.QoSInfo.LastQoSReport.Sync}, + utils.Attribute{Key: "block diff", Value: blockHeightDiff}, + utils.Attribute{Key: "sync score", Value: strconv.FormatInt(cs.QoSInfo.SyncScoreSum, 10) + "/" + strconv.FormatInt(cs.QoSInfo.TotalSyncScore, 10)}, + utils.Attribute{Key: "session_id", Value: cs.SessionId}, + utils.Attribute{Key: "provider", Value: cs.Parent.PublicLavaAddress}, + ) + } + } else { + // we prefer to give them a score of 1 when there is no other data, since otherwise we damage their payments + cs.QoSInfo.LastQoSReport.Sync = sdk.NewDec(1) + } +} + +func (scs *SingleConsumerSession) SetUsageForSession(cuNeededForSession uint64, qoSExcellenceReport *pairingtypes.QualityOfServiceReport, relayProcessor RelayProcessorInf) error { + scs.LatestRelayCu = cuNeededForSession // set latestRelayCu + scs.RelayNum += RelayNumberIncrement // increase relayNum + if scs.RelayNum > 1 { + // we only set excellence for sessions with more than one successful relays, this guarantees data within the epoch exists + scs.QoSInfo.LastExcellenceQoSReport = qoSExcellenceReport + } + scs.relayProcessor = relayProcessor + return nil +} + +func (scs *SingleConsumerSession) Free(err error) { + if scs.relayProcessor != nil { + scs.relayProcessor.RemoveUsed(scs.Parent.PublicLavaAddress, err) + scs.relayProcessor = nil + } + scs.lock.Unlock() +} + +func (session *SingleConsumerSession) TryUseSession() (blocked bool, ok bool) { + if session.lock.TryLock() { + if session.BlockListed { // this session cannot be used. + session.lock.Unlock() + return true, false + } + if session.relayProcessor != nil { + utils.LavaFormatError("session misuse detected, usedProviders isn't nil, blocking", nil, utils.LogAttr("session", session.SessionId)) + session.BlockListed = true + session.lock.Unlock() + return true, false + } + return false, true + } + return false, false +} + +// Verify the consumerSession is locked when getting to this function, if its not locked throw an error +func (consumerSession *SingleConsumerSession) VerifyLock() error { + if consumerSession.lock.TryLock() { // verify. + // if we managed to lock throw an error for misuse. + defer consumerSession.Free(nil) + // if failed to lock we should block session as it seems like a very rare case. + consumerSession.BlockListed = true // block this session from future usages + utils.LavaFormatError("Verify Lock failed on session Failure, blocking session", nil, utils.LogAttr("consumerSession", consumerSession)) + return LockMisUseDetectedError + } + return nil +} diff --git a/protocol/lavasession/used_providers.go b/protocol/lavasession/used_providers.go new file mode 100644 index 0000000000..3dacce65de --- /dev/null +++ b/protocol/lavasession/used_providers.go @@ -0,0 +1,118 @@ +package lavasession + +import ( + "strings" + "sync" + + "github.com/lavanet/lava/protocol/common" + "github.com/lavanet/lava/utils" +) + +func NewUsedProviders(directiveHeaders map[string]string) *UsedProviders { + unwantedProviders := map[string]struct{}{} + if len(directiveHeaders) > 0 { + blockedProviders, ok := directiveHeaders[common.BLOCK_PROVIDERS_ADDRESSES_HEADER_NAME] + if ok { + providerAddressesToBlock := strings.Split(blockedProviders, ",") + for _, providerAddress := range providerAddressesToBlock { + unwantedProviders[providerAddress] = struct{}{} + } + } + } + return &UsedProviders{providers: map[string]struct{}{}, unwantedProviders: unwantedProviders, blockOnSyncLoss: map[string]struct{}{}} +} + +type UsedProviders struct { + lock sync.RWMutex + providers map[string]struct{} + selecting bool + unwantedProviders map[string]struct{} + blockOnSyncLoss map[string]struct{} +} + +func (up *UsedProviders) CurrentlyUsed() int { + up.lock.RLock() + defer up.lock.RUnlock() + return len(up.providers) +} + +func (up *UsedProviders) RemoveUsed(provider string, err error) { + if up == nil { + return + } + up.lock.Lock() + defer up.lock.Unlock() + if err != nil { + _, ok := up.blockOnSyncLoss[provider] + if !ok && IsSessionSyncLoss(err) { + up.blockOnSyncLoss[provider] = struct{}{} + utils.LavaFormatWarning("Identified SyncLoss in provider, not removing it from list for another attempt", err, utils.Attribute{Key: "address", Value: provider}) + } else { + up.SetUnwanted(provider) + } + } else { + // we got a valid response from this provider, no reason to keep using it + up.SetUnwanted(provider) + } + delete(up.providers, provider) +} + +func (up *UsedProviders) AddUsed(sessions ConsumerSessionsMap) { + if up == nil { + return + } + up.lock.Lock() + defer up.lock.Unlock() + // this is argument nil safe + for provider := range sessions { // the key for ConsumerSessionsMap is the provider public address + up.providers[provider] = struct{}{} + } + up.selecting = false +} + +func (up *UsedProviders) SetUnwanted(provider string) { + if up == nil { + return + } + up.lock.Lock() + defer up.lock.Unlock() + up.unwantedProviders[provider] = struct{}{} +} + +func (up *UsedProviders) TryLockSelection() bool { + if up == nil { + return true + } + up.lock.Lock() + defer up.lock.Unlock() + if !up.selecting { + up.selecting = true + return true + } + return false +} + +func (up *UsedProviders) GetSelecting() bool { + if up == nil { + return false + } + up.lock.RLock() + defer up.lock.RUnlock() + return up.selecting +} + +func (up *UsedProviders) GetUnwantedProvidersToSend() map[string]struct{} { + if up == nil { + return map[string]struct{}{} + } + up.lock.RLock() + defer up.lock.RUnlock() + unwantedProvidersToSend := map[string]struct{}{} + for provider := range up.providers { + unwantedProvidersToSend[provider] = struct{}{} + } + for provider := range up.unwantedProviders { + unwantedProvidersToSend[provider] = struct{}{} + } + return unwantedProvidersToSend +} diff --git a/protocol/rpcconsumer/relay_processor.go b/protocol/rpcconsumer/relay_processor.go new file mode 100644 index 0000000000..a3015aaaa2 --- /dev/null +++ b/protocol/rpcconsumer/relay_processor.go @@ -0,0 +1,183 @@ +package rpcconsumer + +import ( + "context" + "fmt" + "sync" + + "github.com/lavanet/lava/protocol/chainlib" + "github.com/lavanet/lava/protocol/common" + "github.com/lavanet/lava/protocol/lavasession" + "github.com/lavanet/lava/utils" + spectypes "github.com/lavanet/lava/x/spec/types" +) + +const ( + MaxCallsPerRelay = 50 +) + +func NewRelayProcessor(ctx context.Context, usedProviders *lavasession.UsedProviders, requiredSuccesses int, chainMessage chainlib.ChainMessage) *RelayProcessor { + guid, _ := utils.GetUniqueIdentifier(ctx) + return &RelayProcessor{ + usedProviders: usedProviders, + requiredResults: requiredSuccesses, + responses: make(chan *relayResponse, MaxCallsPerRelay), // we set it as buffered so it is not blocking + nodeResponseErrors: &RelayErrors{relayErrors: []RelayError{}}, + protocolResponseErrors: &RelayErrors{relayErrors: []RelayError{}, onFailureMergeAll: true}, + chainMessage: chainMessage, + guid: guid, + // TODO: handle required errors + requiredErrors: requiredSuccesses, + } +} + +type RelayProcessor struct { + usedProviders *lavasession.UsedProviders + responses chan *relayResponse + requiredResults int + nodeResponseErrors *RelayErrors + protocolResponseErrors *RelayErrors + results []common.RelayResult + lock sync.RWMutex + chainMessage chainlib.ChainMessage + errorRelayResult common.RelayResult + guid uint64 + requiredErrors int +} + +func (rp *RelayProcessor) String() string { + // TODO: + return "" +} + +func (rp *RelayProcessor) RemoveUsed(providerAddress string, err error) { + // TODO: +} + +func (rp *RelayProcessor) GetUsedProviders() *lavasession.UsedProviders { + return rp.usedProviders +} + +func (rp *RelayProcessor) ComparableResults() []common.RelayResult { + rp.lock.RLock() + defer rp.lock.RUnlock() + // TODO: add nodeResponseErrors + return rp.results +} + +func (rp *RelayProcessor) ProtocolErrors() uint64 { + rp.lock.RLock() + defer rp.lock.RUnlock() + return uint64(len(rp.protocolResponseErrors.relayErrors)) +} + +func (rp *RelayProcessor) SetResponse(response *relayResponse) { + if response == nil { + return + } + rp.responses <- response +} + +func (rp *RelayProcessor) setValidResponse(response *relayResponse) { + rp.lock.Lock() + defer rp.lock.Unlock() + foundError, errorMessage := rp.chainMessage.CheckResponseError(response.relayResult.Reply.Data, response.relayResult.StatusCode) + if foundError { + // this is a node error, meaning we still didn't get a good response. + // we may choose to wait until there will be a response or timeout happens + // if we decide to wait and timeout happens we will take the majority of response messages + err := fmt.Errorf(errorMessage) + rp.nodeResponseErrors.relayErrors = append(rp.nodeResponseErrors.relayErrors, RelayError{err: err, ProviderInfo: response.relayResult.ProviderInfo, response: response}) + return + } + // future relay requests and data reliability requests need to ask for the same specific block height to get consensus on the reply + // we do not modify the chain message data on the consumer, only it's requested block, so we let the provider know it can't put any block height it wants by setting a specific block height + reqBlock, _ := rp.chainMessage.RequestedBlock() + if reqBlock == spectypes.LATEST_BLOCK { + modifiedOnLatestReq := rp.chainMessage.UpdateLatestBlockInMessage(response.relayResult.Request.RelayData.RequestBlock, false) + if !modifiedOnLatestReq { + response.relayResult.Finalized = false // shut down data reliability + } + } + rp.results = append(rp.results, response.relayResult) + return +} + +func (rp *RelayProcessor) setErrorResponse(response *relayResponse) { + rp.lock.Lock() + defer rp.lock.Unlock() + utils.LavaFormatDebug("could not send relay to provider", utils.Attribute{Key: "GUID", Value: rp.guid}, utils.Attribute{Key: "provider", Value: response.relayResult.ProviderInfo.ProviderAddress}, utils.Attribute{Key: "error", Value: response.err.Error()}) + rp.protocolResponseErrors.relayErrors = append(rp.protocolResponseErrors.relayErrors, RelayError{err: response.err, ProviderInfo: response.relayResult.ProviderInfo, response: response}) +} + +func (rp *RelayProcessor) CheckEndProcessing() bool { + rp.lock.RLock() + defer rp.lock.RUnlock() + resultsCount := len(rp.results) + if resultsCount >= rp.requiredResults { + return true + } + nodeErrors := len(rp.nodeResponseErrors.relayErrors) + protocolErrors := len(rp.protocolResponseErrors.relayErrors) + if resultsCount+nodeErrors+protocolErrors >= rp.requiredErrors { + return true + } + return false +} + +func (rp *RelayProcessor) HasResponses() bool { + if rp == nil { + return false + } + rp.lock.RLock() + defer rp.lock.RUnlock() + resultsCount := len(rp.results) + nodeErrors := len(rp.nodeResponseErrors.relayErrors) + protocolErrors := len(rp.protocolResponseErrors.relayErrors) + return resultsCount+nodeErrors+protocolErrors > 0 +} + +func (rp *RelayProcessor) GetRelayResult(ctx context.Context) ([]common.RelayResult, error) { + responsesCount := 0 + for { + select { + case response := <-rp.responses: + responsesCount++ + if response.err != nil { + rp.setErrorResponse(response) + } else { + rp.setValidResponse(response) + } + if rp.CheckEndProcessing() { + // we can finish processing + return rp.ProcessingResult() + } + case <-ctx.Done(): + utils.LavaFormatWarning("cancelled relay processor", nil, utils.LogAttr("total responses", responsesCount)) + return rp.ProcessingResult() + } + } +} + +func (rp *RelayProcessor) ProcessingResult() ([]common.RelayResult, error) { + // when getting an error from all the results + // rp.errorRelayResult.ProviderInfo.ProviderAddress += relayResult.ProviderInfo.ProviderAddress + // if relayResult.GetStatusCode() != 0 { + // // keep the error status code + // rp.errorRelayResult.StatusCode = relayResult.GetStatusCode() + // } + + // if len(relayResults) == 0 { + // rpccs.appendHeadersToRelayResult(ctx, errorRelayResult, retries) + // // suggest the user to add the timeout flag + // if uint64(timeouts) == retries && retries > 0 { + // utils.LavaFormatDebug("all relays timeout", utils.Attribute{Key: "GUID", Value: ctx}, utils.Attribute{Key: "errors", Value: relayErrors.relayErrors}) + // return errorRelayResult, utils.LavaFormatError("Failed all relay retries due to timeout consider adding 'lava-relay-timeout' header to extend the allowed timeout duration", nil, utils.Attribute{Key: "GUID", Value: ctx}) + // } + // bestRelayError := relayErrors.GetBestErrorMessageForUser() + // return errorRelayResult, utils.LavaFormatError("Failed all retries", nil, utils.Attribute{Key: "GUID", Value: ctx}, utils.LogAttr("error", bestRelayError.err)) + // } else if len(relayErrors.relayErrors) > 0 { + // utils.LavaFormatDebug("relay succeeded but had some errors", utils.Attribute{Key: "GUID", Value: ctx}, utils.Attribute{Key: "errors", Value: relayErrors}) + // } + return nil, fmt.Errorf("TODO") +} diff --git a/protocol/rpcconsumer/rpcconsumer_server.go b/protocol/rpcconsumer/rpcconsumer_server.go index 5ee1181926..9e7ff85c71 100644 --- a/protocol/rpcconsumer/rpcconsumer_server.go +++ b/protocol/rpcconsumer/rpcconsumer_server.go @@ -12,6 +12,7 @@ import ( "github.com/btcsuite/btcd/btcec" sdk "github.com/cosmos/cosmos-sdk/types" "github.com/lavanet/lava/protocol/chainlib" + "github.com/lavanet/lava/protocol/chainlib/chainproxy/rpcclient" "github.com/lavanet/lava/protocol/chainlib/extensionslib" "github.com/lavanet/lava/protocol/common" "github.com/lavanet/lava/protocol/lavaprotocol" @@ -55,7 +56,7 @@ type RPCConsumerServer struct { } type relayResponse struct { - relayResult *common.RelayResult + relayResult common.RelayResult err error } @@ -186,33 +187,27 @@ func (rpccs *RPCConsumerServer) craftRelay(ctx context.Context) (ok bool, relay } func (rpccs *RPCConsumerServer) sendRelayWithRetries(ctx context.Context, retries int, initialRelays bool, relay *pairingtypes.RelayPrivateData, chainMessage chainlib.ChainMessage) (bool, error) { - unwantedProviders := map[string]struct{}{} - timeouts := 0 success := false var err error - + relayProcessor := NewRelayProcessor(ctx, lavasession.NewUsedProviders(nil), 1, chainMessage) for i := 0; i < retries; i++ { - var relayResult *common.RelayResult - relayResult, err = rpccs.sendRelayToProvider(ctx, chainMessage, relay, "-init-", "", &unwantedProviders, timeouts) + err = rpccs.sendRelayToProvider(ctx, chainMessage, relay, "-init-", "", relayProcessor) if err != nil { - utils.LavaFormatError("[-] failed sending init relay", err, []utils.Attribute{{Key: "chainID", Value: rpccs.listenEndpoint.ChainID}, {Key: "APIInterface", Value: rpccs.listenEndpoint.ApiInterface}, {Key: "unwantedProviders", Value: unwantedProviders}}...) - if relayResult != nil && relayResult.ProviderInfo.ProviderAddress != "" { - unwantedProviders[relayResult.ProviderInfo.ProviderAddress] = struct{}{} - } - if common.IsTimeout(err) { - timeouts++ - } + utils.LavaFormatError("[-] failed sending init relay", err, []utils.Attribute{{Key: "chainID", Value: rpccs.listenEndpoint.ChainID}, {Key: "APIInterface", Value: rpccs.listenEndpoint.ApiInterface}, {Key: "relayProcessor", Value: relayProcessor}}...) } else { - unwantedProviders = map[string]struct{}{} - utils.LavaFormatInfo("[+] init relay succeeded", []utils.Attribute{{Key: "chainID", Value: rpccs.listenEndpoint.ChainID}, {Key: "APIInterface", Value: rpccs.listenEndpoint.ApiInterface}, {Key: "latestBlock", Value: relayResult.Reply.LatestBlock}, {Key: "provider address", Value: relayResult.ProviderInfo.ProviderAddress}}...) - - rpccs.relaysMonitor.LogRelay() - success = true - - // If this is the first time we send relays, we want to send all of them, instead of break on first successful relay - // That way, we populate the providers with the latest blocks with successful relays - if !initialRelays { - break + relayResults, err := relayProcessor.GetRelayResult(ctx) + if err != nil || len(relayResults) == 0 { + utils.LavaFormatError("[-] failed sending init relay", err, []utils.Attribute{{Key: "chainID", Value: rpccs.listenEndpoint.ChainID}, {Key: "APIInterface", Value: rpccs.listenEndpoint.ApiInterface}, {Key: "relayProcessor", Value: relayProcessor}}...) + } else { + relayResult := relayResults[0] // will return only 1 since we have set the processor with 1 + utils.LavaFormatInfo("[+] init relay succeeded", []utils.Attribute{{Key: "chainID", Value: rpccs.listenEndpoint.ChainID}, {Key: "APIInterface", Value: rpccs.listenEndpoint.ApiInterface}, {Key: "latestBlock", Value: relayResult.Reply.LatestBlock}, {Key: "provider address", Value: relayResult.ProviderInfo.ProviderAddress}}...) + rpccs.relaysMonitor.LogRelay() + success = true + // If this is the first time we send relays, we want to send all of them, instead of break on first successful relay + // That way, we populate the providers with the latest blocks with successful relays + if !initialRelays { + break + } } } time.Sleep(2 * time.Millisecond) @@ -285,124 +280,84 @@ func (rpccs *RPCConsumerServer) SendRelay( seenBlock = 0 } relayRequestData := lavaprotocol.NewRelayData(ctx, connectionType, url, []byte(req), seenBlock, reqBlock, rpccs.listenEndpoint.ApiInterface, chainMessage.GetRPCMessage().GetHeaders(), chainlib.GetAddon(chainMessage), common.GetExtensionNames(chainMessage.GetExtensions())) - relayResults := []*common.RelayResult{} - relayErrors := &RelayErrors{onFailureMergeAll: true} - blockOnSyncLoss := map[string]struct{}{} - modifiedOnLatestReq := false - errorRelayResult := &common.RelayResult{} // returned on error - retries := uint64(0) - timeouts := 0 - unwantedProviders := rpccs.GetInitialUnwantedProviders(directiveHeaders) - - for ; retries < MaxRelayRetries; retries++ { - // TODO: make this async between different providers - relayResult, err := rpccs.sendRelayToProvider(ctx, chainMessage, relayRequestData, dappID, consumerIp, &unwantedProviders, timeouts) - if relayResult.ProviderInfo.ProviderAddress != "" { - if err != nil { - // add this provider to the erroring providers - if errorRelayResult.ProviderInfo.ProviderAddress != "" { - errorRelayResult.ProviderInfo.ProviderAddress += "," - } - errorRelayResult.ProviderInfo.ProviderAddress += relayResult.ProviderInfo.ProviderAddress - _, ok := blockOnSyncLoss[relayResult.ProviderInfo.ProviderAddress] - if !ok && lavasession.IsSessionSyncLoss(err) { - // allow this provider to be wantedProvider on a retry, if it didn't fail once on syncLoss - blockOnSyncLoss[relayResult.ProviderInfo.ProviderAddress] = struct{}{} - utils.LavaFormatWarning("Identified SyncLoss in provider, not removing it from list for another attempt", err, utils.Attribute{Key: "address", Value: relayResult.ProviderInfo.ProviderAddress}) - } else { - unwantedProviders[relayResult.ProviderInfo.ProviderAddress] = struct{}{} - } - if common.IsTimeout(err) { - timeouts++ - } - } - } - if err != nil { - if relayResult.GetStatusCode() != 0 { - // keep the error status code - errorRelayResult.StatusCode = relayResult.GetStatusCode() - } - relayErrors.relayErrors = append(relayErrors.relayErrors, RelayError{err: err, ProviderInfo: relayResult.ProviderInfo}) - if lavasession.PairingListEmptyError.Is(err) { - // if we ran out of pairings because unwantedProviders is too long or validProviders is too short, continue to reply handling code - break - } - // decide if we should break here if its something retry won't solve - utils.LavaFormatDebug("could not send relay to provider", utils.Attribute{Key: "GUID", Value: ctx}, utils.Attribute{Key: "error", Value: err.Error()}, utils.Attribute{Key: "endpoint", Value: rpccs.listenEndpoint}) - continue - } - relayResults = append(relayResults, relayResult) - unwantedProviders[relayResult.ProviderInfo.ProviderAddress] = struct{}{} - // future relay requests and data reliability requests need to ask for the same specific block height to get consensus on the reply - // we do not modify the chain message data on the consumer, only it's requested block, so we let the provider know it can't put any block height it wants by setting a specific block height - reqBlock, _ := chainMessage.RequestedBlock() - if reqBlock == spectypes.LATEST_BLOCK { - modifiedOnLatestReq = chainMessage.UpdateLatestBlockInMessage(relayResult.Request.RelayData.RequestBlock, false) - if !modifiedOnLatestReq { - relayResult.Finalized = false // shut down data reliability - } - } - if len(relayResults) >= rpccs.requiredResponses { - break - } - } + relayProcessor, err := rpccs.SendRelayToProvidersWithRetry(ctx, directiveHeaders, chainMessage, relayRequestData, dappID, consumerIp) + if err != nil && !relayProcessor.HasResponses() { + return nil, err + } + // Handle Data Reliability enabled, dataReliabilityThreshold := rpccs.chainParser.DataReliabilityParams() if enabled { - for _, relayResult := range relayResults { - // new context is needed for data reliability as some clients cancel the context they provide when the relay returns - // as data reliability happens in a go routine it will continue while the response returns. - guid, found := utils.GetUniqueIdentifier(ctx) - dataReliabilityContext := context.Background() - if found { - dataReliabilityContext = utils.WithUniqueIdentifier(dataReliabilityContext, guid) - } - go rpccs.sendDataReliabilityRelayIfApplicable(dataReliabilityContext, dappID, consumerIp, relayResult, chainMessage, dataReliabilityThreshold, unwantedProviders) // runs asynchronously + // new context is needed for data reliability as some clients cancel the context they provide when the relay returns + // as data reliability happens in a go routine it will continue while the response returns. + guid, found := utils.GetUniqueIdentifier(ctx) + dataReliabilityContext, _ := context.WithTimeout(context.Background(), 30*time.Second) + if found { + dataReliabilityContext = utils.WithUniqueIdentifier(dataReliabilityContext, guid) } + go rpccs.sendDataReliabilityRelayIfApplicable(dataReliabilityContext, dappID, consumerIp, chainMessage, dataReliabilityThreshold, relayProcessor) // runs asynchronously } - if len(relayResults) == 0 { - rpccs.appendHeadersToRelayResult(ctx, errorRelayResult, retries) - // suggest the user to add the timeout flag - if uint64(timeouts) == retries && retries > 0 { - utils.LavaFormatDebug("all relays timeout", utils.Attribute{Key: "GUID", Value: ctx}, utils.Attribute{Key: "errors", Value: relayErrors.relayErrors}) - return errorRelayResult, utils.LavaFormatError("Failed all relay retries due to timeout consider adding 'lava-relay-timeout' header to extend the allowed timeout duration", nil, utils.Attribute{Key: "GUID", Value: ctx}) - } - bestRelayError := relayErrors.GetBestErrorMessageForUser() - return errorRelayResult, utils.LavaFormatError("Failed all retries", nil, utils.Attribute{Key: "GUID", Value: ctx}, utils.LogAttr("error", bestRelayError.err)) - } else if len(relayErrors.relayErrors) > 0 { - utils.LavaFormatDebug("relay succeeded but had some errors", utils.Attribute{Key: "GUID", Value: ctx}, utils.Attribute{Key: "errors", Value: relayErrors}) + // TODO: implement majority selection option + results, err := relayProcessor.ProcessingResult() + // even on error we are going to have returnedResult + if len(results) == 0 { + return nil, utils.LavaFormatError("invalid relayProcessor result, results are empty", err, utils.LogAttr("relayProcessor", relayProcessor)) } - var returnedResult *common.RelayResult - for _, iteratedResult := range relayResults { - // TODO: go over rpccs.requiredResponses and get majority - returnedResult = iteratedResult + returnedResult := &results[0] + if err != nil { + return returnedResult, err } - if analytics != nil { currentLatency := time.Since(relaySentTime) analytics.Latency = currentLatency.Milliseconds() analytics.ComputeUnits = chainMessage.GetApi().ComputeUnits } - if retries > 0 { - utils.LavaFormatDebug("relay succeeded after retries", utils.Attribute{Key: "GUID", Value: ctx}, utils.Attribute{Key: "retries", Value: retries}) - } - rpccs.appendHeadersToRelayResult(ctx, returnedResult, retries) + rpccs.appendHeadersToRelayResult(ctx, returnedResult, relayProcessor.ProtocolErrors()) rpccs.relaysMonitor.LogRelay() return returnedResult, nil } +func (rpccs *RPCConsumerServer) SendRelayToProvidersWithRetry(ctx context.Context, directiveHeaders map[string]string, chainMessage chainlib.ChainMessage, relayRequestData *pairingtypes.RelayPrivateData, dappID string, consumerIp string) (*RelayProcessor, error) { + relayProcessor := NewRelayProcessor(ctx, lavasession.NewUsedProviders(directiveHeaders), rpccs.requiredResponses, chainMessage) + err := rpccs.sendRelayToProvider(ctx, chainMessage, relayRequestData, dappID, consumerIp, relayProcessor) + if err != nil && relayProcessor.usedProviders.CurrentlyUsed() == 0 { + // we failed to send a batch of relays, if there are no active sends we can terminate + return relayProcessor, err + } + gotResults := make(chan bool) + go func() { + // ProcessResults is reading responses while blocking until the conditions are met + relayProcessor.GetRelayResult(ctx) + gotResults <- true + }() + relayTimeout := chainlib.GetRelayTimeout(chainMessage, rpccs.chainParser) + // every relay timeout we send a new batch + startNewBatchTicker := time.NewTicker(relayTimeout) + for { + select { + case <-gotResults: + return relayProcessor, nil + case <-startNewBatchTicker.C: + err := rpccs.sendRelayToProvider(ctx, chainMessage, relayRequestData, dappID, consumerIp, relayProcessor) + if err != nil && relayProcessor.usedProviders.CurrentlyUsed() == 0 { + // we failed to send a batch of relays, if there are no active sends we can terminate + return relayProcessor, err + } + } + } +} + func (rpccs *RPCConsumerServer) sendRelayToProvider( ctx context.Context, chainMessage chainlib.ChainMessage, relayRequestData *pairingtypes.RelayPrivateData, dappID string, consumerIp string, - unwantedProviders *map[string]struct{}, - timeouts int, -) (relayResult *common.RelayResult, errRet error) { + relayProcessor *RelayProcessor, +) (errRet error) { // get a session for the relay from the ConsumerSessionManager // construct a relay message with lavaprotocol package, include QoS and jail providers // sign the relay message with the lavaprotocol package @@ -414,12 +369,11 @@ func (rpccs *RPCConsumerServer) sendRelayToProvider( // if necessary send detection tx for hashes consensus mismatch // handle QoS updates // in case connection totally fails, update unresponsive providers in ConsumerSessionManager - isSubscription := chainlib.IsSubscription(chainMessage) if isSubscription { // temporarily disable subscriptions // TODO: fix subscription and disable this case. - return &common.RelayResult{ProviderInfo: common.ProviderInfo{ProviderAddress: ""}}, utils.LavaFormatError("Subscriptions are disabled currently", nil) + return utils.LavaFormatError("Subscriptions are disabled currently", nil) } var sharedStateId string // defaults to "", if shared state is disabled then no shared state will be used. @@ -455,14 +409,19 @@ func (rpccs *RPCConsumerServer) sendRelayToProvider( if cacheError == nil && reply != nil { // Info was fetched from cache, so we don't need to change the state // so we can return here, no need to update anything and calculate as this info was fetched from the cache - relayResult = &common.RelayResult{ + relayResult := common.RelayResult{ Reply: reply, Request: &pairingtypes.RelayRequest{ RelayData: relayRequestData, }, - Finalized: false, // set false to skip data reliability + Finalized: false, // set false to skip data reliability + StatusCode: 200, } - return relayResult, nil + relayProcessor.SetResponse(&relayResponse{ + relayResult: relayResult, + err: nil, + }) + return nil } // cache failed, move on to regular relay if performance.NotConnectedError.Is(cacheError) { @@ -480,20 +439,16 @@ func (rpccs *RPCConsumerServer) sendRelayToProvider( virtualEpoch := rpccs.consumerTxSender.GetLatestVirtualEpoch() addon := chainlib.GetAddon(chainMessage) extensions := chainMessage.GetExtensions() - - sessions, err := rpccs.consumerSessionManager.GetSessions(ctx, chainlib.GetComputeUnits(chainMessage), *unwantedProviders, reqBlock, addon, extensions, chainlib.GetStateful(chainMessage), virtualEpoch) + sessions, err := rpccs.consumerSessionManager.GetSessions(ctx, chainlib.GetComputeUnits(chainMessage), relayProcessor, reqBlock, addon, extensions, chainlib.GetStateful(chainMessage), virtualEpoch) if err != nil { if lavasession.PairingListEmptyError.Is(err) && (addon != "" || len(extensions) > 0) { // if we have no providers for a specific addon or extension, return an indicative error err = utils.LavaFormatError("No Providers For Addon Or Extension", err, utils.LogAttr("addon", addon), utils.LogAttr("extensions", extensions)) } - return &common.RelayResult{ProviderInfo: common.ProviderInfo{ProviderAddress: ""}}, err + return err } - // Make a channel for all providers to send responses - responses := make(chan *relayResponse, len(sessions)) - - relayTimeout := chainlib.GetRelayTimeout(chainMessage, rpccs.chainParser, timeouts) + relayTimeout := chainlib.GetRelayTimeout(chainMessage, rpccs.chainParser) // Iterate over the sessions map for providerPublicAddress, sessionInfo := range sessions { // Launch a separate goroutine for each session @@ -506,13 +461,13 @@ func (rpccs *RPCConsumerServer) sendRelayToProvider( goroutineCtx = utils.WithUniqueIdentifier(goroutineCtx, guid) } defer func() { - // Return response - responses <- &relayResponse{ - relayResult: localRelayResult, - err: errResponse, - } // Close context goroutineCtxCancel() + // Return response + relayProcessor.SetResponse(&relayResponse{ + relayResult: *localRelayResult, + err: errResponse, + }) }() localRelayResult = &common.RelayResult{ ProviderInfo: common.ProviderInfo{ProviderAddress: providerPublicAddress, ProviderStake: sessionInfo.StakeSize, ProviderQoSExcellenceSummery: sessionInfo.QoSSummeryResult}, @@ -594,7 +549,7 @@ func (rpccs *RPCConsumerServer) sendRelayToProvider( } errResponse = rpccs.consumerSessionManager.OnSessionDone(singleConsumerSession, latestBlock, chainlib.GetComputeUnits(chainMessage), relayLatency, singleConsumerSession.CalculateExpectedLatency(relayTimeout), expectedBH, numOfProviders, pairingAddressesLen, chainMessage.GetApi().Category.HangingApi) // session done successfully - if rpccs.cache.CacheActive() { + if rpccs.cache.CacheActive() && rpcclient.ValidateStatusCodes(localRelayResult.StatusCode, true) == nil { // copy private data so if it changes it doesn't panic mid async send copyPrivateData := &pairingtypes.RelayPrivateData{} copyRequestErr := protocopy.DeepCopyProtoObject(localRelayResult.Request.RelayData, copyPrivateData) @@ -620,26 +575,11 @@ func (rpccs *RPCConsumerServer) sendRelayToProvider( } }() } + // localRelayResult is being sent on the relayProcessor by a deferred function }(providerPublicAddress, sessionInfo) } - - // Getting the best result from the providers, - // if there was an error we wait for the next result util timeout or a valid response - // priority order {valid response -> error response -> relay error} - // if there were multiple error responses picking the majority - response := rpccs.getBestResult(relayTimeout, responses, len(sessions), chainMessage) - - if response == nil { - return nil, utils.LavaFormatError("Received unexpected nil response from getBestResult", nil, utils.LogAttr("sessions", sessions), utils.LogAttr("chainMessage", chainMessage)) - } - - if response.err == nil && response.relayResult != nil && response.relayResult.Reply != nil { - // no error, update the seen block - blockSeen := response.relayResult.Reply.LatestBlock - rpccs.consumerConsistency.SetSeenBlock(blockSeen, dappID, consumerIp) - } - - return response.relayResult, response.err + // finished setting up go routines, can return and wait for responses + return nil } func (rpccs *RPCConsumerServer) getBestResult(timeout time.Duration, responses chan *relayResponse, numberOfSessions int, chainMessage chainlib.ChainMessage) *relayResponse { @@ -685,6 +625,7 @@ func (rpccs *RPCConsumerServer) getBestResult(timeout time.Duration, responses c // in case we got only errors and we want to return the best one protocolResponseErrors.relayErrors = append(protocolResponseErrors.relayErrors, RelayError{err: response.err, ProviderInfo: response.relayResult.ProviderInfo, response: response}) } + // check if this is the last response we are going to receive // we get here only if all other responses including this one are not valid responses // (whether its a node error or protocol errors) @@ -709,7 +650,7 @@ func (rpccs *RPCConsumerServer) getBestResult(timeout time.Duration, responses c return bestRelayResponse } // failed fetching any error, getting here indicates a real context timeout happened. - return &relayResponse{nil, NoResponseTimeout} + return &relayResponse{common.RelayResult{}, NoResponseTimeout} } } } @@ -819,14 +760,10 @@ func (rpccs *RPCConsumerServer) relaySubscriptionInner(ctx context.Context, endp return relayResult, err } -func (rpccs *RPCConsumerServer) sendDataReliabilityRelayIfApplicable(ctx context.Context, dappID string, consumerIp string, relayResult *common.RelayResult, chainMessage chainlib.ChainMessage, dataReliabilityThreshold uint32, unwantedProviders map[string]struct{}) error { - // validate relayResult is not nil - if relayResult == nil || relayResult.Reply == nil || relayResult.Request == nil { - return utils.LavaFormatError("sendDataReliabilityRelayIfApplicable relayResult nil check", nil, utils.Attribute{Key: "GUID", Value: ctx}, utils.Attribute{Key: "relayResult", Value: relayResult}) - } +func (rpccs *RPCConsumerServer) sendDataReliabilityRelayIfApplicable(ctx context.Context, dappID string, consumerIp string, chainMessage chainlib.ChainMessage, dataReliabilityThreshold uint32, relayProcessor *RelayProcessor) error { specCategory := chainMessage.GetApi().Category - if !specCategory.Deterministic || !relayResult.Finalized { + if !specCategory.Deterministic { return nil // disabled for this spec and requested block so no data reliability messages } @@ -843,37 +780,58 @@ func (rpccs *RPCConsumerServer) sendDataReliabilityRelayIfApplicable(ctx context // decided not to do data reliability return nil } - relayRequestData := lavaprotocol.NewRelayData(ctx, relayResult.Request.RelayData.ConnectionType, relayResult.Request.RelayData.ApiUrl, relayResult.Request.RelayData.Data, relayResult.Request.RelayData.SeenBlock, reqBlock, relayResult.Request.RelayData.ApiInterface, chainMessage.GetRPCMessage().GetHeaders(), relayResult.Request.RelayData.Addon, relayResult.Request.RelayData.Extensions) - // TODO: give the same timeout the original provider got by setting the same retry - relayResultDataReliability, err := rpccs.sendRelayToProvider(ctx, chainMessage, relayRequestData, dappID, consumerIp, &unwantedProviders, 0) - if err != nil { - errAttributes := []utils.Attribute{} - // failed to send to a provider - if relayResultDataReliability.ProviderInfo.ProviderAddress != "" { - errAttributes = append(errAttributes, utils.Attribute{Key: "address", Value: relayResultDataReliability.ProviderInfo.ProviderAddress}) + // only need to send another relay if we don't have enough replies + results := []common.RelayResult{} + for _, result := range relayProcessor.ComparableResults() { + if result.Finalized { + results = append(results, result) } - errAttributes = append(errAttributes, utils.Attribute{Key: "relayRequestData", Value: relayRequestData}) - return utils.LavaFormatWarning("failed data reliability relay to provider", err, errAttributes...) } - if !relayResultDataReliability.Finalized { - utils.LavaFormatInfo("skipping data reliability check since response from second provider was not finalized", utils.Attribute{Key: "providerAddress", Value: relayResultDataReliability.ProviderInfo.ProviderAddress}) + if len(results) == 0 { + // nothing to check return nil } - conflict := lavaprotocol.VerifyReliabilityResults(ctx, relayResult, relayResultDataReliability, chainMessage.GetApiCollection(), rpccs.chainParser) - if conflict != nil { - // TODO: remove this check when we fix the missing extensions information on conflict detection transaction - if relayRequestData.Extensions == nil || len(relayRequestData.Extensions) == 0 { - err := rpccs.consumerTxSender.TxConflictDetection(ctx, nil, conflict, nil, relayResultDataReliability.ConflictHandler) - if err != nil { - utils.LavaFormatError("could not send detection Transaction", err, utils.Attribute{Key: "GUID", Value: ctx}, utils.Attribute{Key: "conflict", Value: conflict}) + + relayResult := results[0] + if len(results) < 2 { + relayRequestData := lavaprotocol.NewRelayData(ctx, relayResult.Request.RelayData.ConnectionType, relayResult.Request.RelayData.ApiUrl, relayResult.Request.RelayData.Data, relayResult.Request.RelayData.SeenBlock, reqBlock, relayResult.Request.RelayData.ApiInterface, chainMessage.GetRPCMessage().GetHeaders(), relayResult.Request.RelayData.Addon, relayResult.Request.RelayData.Extensions) + relayProcessorDataReliability := NewRelayProcessor(ctx, relayProcessor.usedProviders, 1, chainMessage) + err := rpccs.sendRelayToProvider(ctx, chainMessage, relayRequestData, dappID, consumerIp, relayProcessorDataReliability) + if err != nil { + return utils.LavaFormatWarning("failed data reliability relay to provider", err, utils.LogAttr("relayProcessorDataReliability", relayProcessorDataReliability)) + } + relayResultsDataReliability, err := relayProcessorDataReliability.GetRelayResult(ctx) + resultsDataReliability := []common.RelayResult{} + for _, result := range relayResultsDataReliability { + if result.Finalized { + resultsDataReliability = append(resultsDataReliability, result) } - if rpccs.reporter != nil { - utils.LavaFormatDebug("sending conflict report to BE", utils.LogAttr("conflicting api", chainMessage.GetApi().Name)) - rpccs.reporter.AppendConflict(metrics.NewConflictRequest(relayResult.Request, relayResult.Reply, relayResultDataReliability.Request, relayResultDataReliability.Reply)) + } + if len(resultsDataReliability) == 0 { + utils.LavaFormatDebug("skipping data reliability check since responses from second batch was not finalized", utils.Attribute{Key: "results", Value: relayResultsDataReliability}) + return nil + } + results = append(results, resultsDataReliability...) + } + for i := 0; i < len(results)-1; i++ { + relayResult := results[i] + relayResultDataReliability := results[i+1] + conflict := lavaprotocol.VerifyReliabilityResults(ctx, &relayResult, &relayResultDataReliability, chainMessage.GetApiCollection(), rpccs.chainParser) + if conflict != nil { + // TODO: remove this check when we fix the missing extensions information on conflict detection transaction + if len(chainMessage.GetExtensions()) == 0 { + err := rpccs.consumerTxSender.TxConflictDetection(ctx, nil, conflict, nil, relayResultDataReliability.ConflictHandler) + if err != nil { + utils.LavaFormatError("could not send detection Transaction", err, utils.Attribute{Key: "GUID", Value: ctx}, utils.Attribute{Key: "conflict", Value: conflict}) + } + if rpccs.reporter != nil { + utils.LavaFormatDebug("sending conflict report to BE", utils.LogAttr("conflicting api", chainMessage.GetApi().Name)) + rpccs.reporter.AppendConflict(metrics.NewConflictRequest(relayResult.Request, relayResult.Reply, relayResultDataReliability.Request, relayResultDataReliability.Reply)) + } } + } else { + utils.LavaFormatDebug("[+] verified relay successfully with data reliability", utils.LogAttr("api", chainMessage.GetApi().Name)) } - } else { - utils.LavaFormatDebug("[+] verified relay successfully with data reliability", utils.LogAttr("api", chainMessage.GetApi().Name)) } return nil } @@ -899,18 +857,6 @@ func (rpccs *RPCConsumerServer) LavaDirectiveHeaders(metadata []pairingtypes.Met return metadataRet, headerDirectives } -func (rpccs *RPCConsumerServer) GetInitialUnwantedProviders(directiveHeaders map[string]string) map[string]struct{} { - unwantedProviders := map[string]struct{}{} - blockedProviders, ok := directiveHeaders[common.BLOCK_PROVIDERS_ADDRESSES_HEADER_NAME] - if ok { - providerAddressesToBlock := strings.Split(blockedProviders, ",") - for _, providerAddress := range providerAddressesToBlock { - unwantedProviders[providerAddress] = struct{}{} - } - } - return unwantedProviders -} - func (rpccs *RPCConsumerServer) getExtensionsFromDirectiveHeaders(latestBlock uint64, directiveHeaders map[string]string) extensionslib.ExtensionInfo { extensionsStr, ok := directiveHeaders[common.EXTENSION_OVERRIDE_HEADER_NAME] if ok { From c0bcf0ec58fddcf55a8ad0826054e0778cce08a0 Mon Sep 17 00:00:00 2001 From: omerlavanet Date: Fri, 23 Feb 2024 00:57:29 +0200 Subject: [PATCH 02/57] added TODO --- protocol/rpcconsumer/relay_processor.go | 2 +- protocol/rpcconsumer/rpcconsumer_server.go | 11 ++++++----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/protocol/rpcconsumer/relay_processor.go b/protocol/rpcconsumer/relay_processor.go index a3015aaaa2..5daa2a105f 100644 --- a/protocol/rpcconsumer/relay_processor.go +++ b/protocol/rpcconsumer/relay_processor.go @@ -137,7 +137,7 @@ func (rp *RelayProcessor) HasResponses() bool { return resultsCount+nodeErrors+protocolErrors > 0 } -func (rp *RelayProcessor) GetRelayResult(ctx context.Context) ([]common.RelayResult, error) { +func (rp *RelayProcessor) ProcessResults(ctx context.Context) ([]common.RelayResult, error) { responsesCount := 0 for { select { diff --git a/protocol/rpcconsumer/rpcconsumer_server.go b/protocol/rpcconsumer/rpcconsumer_server.go index 9e7ff85c71..0012570e61 100644 --- a/protocol/rpcconsumer/rpcconsumer_server.go +++ b/protocol/rpcconsumer/rpcconsumer_server.go @@ -195,7 +195,7 @@ func (rpccs *RPCConsumerServer) sendRelayWithRetries(ctx context.Context, retrie if err != nil { utils.LavaFormatError("[-] failed sending init relay", err, []utils.Attribute{{Key: "chainID", Value: rpccs.listenEndpoint.ChainID}, {Key: "APIInterface", Value: rpccs.listenEndpoint.ApiInterface}, {Key: "relayProcessor", Value: relayProcessor}}...) } else { - relayResults, err := relayProcessor.GetRelayResult(ctx) + relayResults, err := relayProcessor.ProcessResults(ctx) if err != nil || len(relayResults) == 0 { utils.LavaFormatError("[-] failed sending init relay", err, []utils.Attribute{{Key: "chainID", Value: rpccs.listenEndpoint.ChainID}, {Key: "APIInterface", Value: rpccs.listenEndpoint.ApiInterface}, {Key: "relayProcessor", Value: relayProcessor}}...) } else { @@ -281,7 +281,7 @@ func (rpccs *RPCConsumerServer) SendRelay( } relayRequestData := lavaprotocol.NewRelayData(ctx, connectionType, url, []byte(req), seenBlock, reqBlock, rpccs.listenEndpoint.ApiInterface, chainMessage.GetRPCMessage().GetHeaders(), chainlib.GetAddon(chainMessage), common.GetExtensionNames(chainMessage.GetExtensions())) - relayProcessor, err := rpccs.SendRelayToProvidersWithRetry(ctx, directiveHeaders, chainMessage, relayRequestData, dappID, consumerIp) + relayProcessor, err := rpccs.ProcessRelaySend(ctx, directiveHeaders, chainMessage, relayRequestData, dappID, consumerIp) if err != nil && !relayProcessor.HasResponses() { return nil, err } @@ -320,7 +320,7 @@ func (rpccs *RPCConsumerServer) SendRelay( return returnedResult, nil } -func (rpccs *RPCConsumerServer) SendRelayToProvidersWithRetry(ctx context.Context, directiveHeaders map[string]string, chainMessage chainlib.ChainMessage, relayRequestData *pairingtypes.RelayPrivateData, dappID string, consumerIp string) (*RelayProcessor, error) { +func (rpccs *RPCConsumerServer) ProcessRelaySend(ctx context.Context, directiveHeaders map[string]string, chainMessage chainlib.ChainMessage, relayRequestData *pairingtypes.RelayPrivateData, dappID string, consumerIp string) (*RelayProcessor, error) { relayProcessor := NewRelayProcessor(ctx, lavasession.NewUsedProviders(directiveHeaders), rpccs.requiredResponses, chainMessage) err := rpccs.sendRelayToProvider(ctx, chainMessage, relayRequestData, dappID, consumerIp, relayProcessor) if err != nil && relayProcessor.usedProviders.CurrentlyUsed() == 0 { @@ -329,8 +329,9 @@ func (rpccs *RPCConsumerServer) SendRelayToProvidersWithRetry(ctx context.Contex } gotResults := make(chan bool) go func() { + // TODO: set timeout for ProcessResults via ctx // ProcessResults is reading responses while blocking until the conditions are met - relayProcessor.GetRelayResult(ctx) + relayProcessor.ProcessResults(ctx) gotResults <- true }() relayTimeout := chainlib.GetRelayTimeout(chainMessage, rpccs.chainParser) @@ -800,7 +801,7 @@ func (rpccs *RPCConsumerServer) sendDataReliabilityRelayIfApplicable(ctx context if err != nil { return utils.LavaFormatWarning("failed data reliability relay to provider", err, utils.LogAttr("relayProcessorDataReliability", relayProcessorDataReliability)) } - relayResultsDataReliability, err := relayProcessorDataReliability.GetRelayResult(ctx) + relayResultsDataReliability, err := relayProcessorDataReliability.ProcessResults(ctx) resultsDataReliability := []common.RelayResult{} for _, result := range relayResultsDataReliability { if result.Finalized { From 130eeebb7ec9fe57f46ba2f3545c16cf50aa4932 Mon Sep 17 00:00:00 2001 From: omerlavanet Date: Mon, 26 Feb 2024 12:20:56 +0200 Subject: [PATCH 03/57] progress, WIP --- .../lavasession/consumer_session_manager.go | 14 +- .../lavasession/single_consumer_session.go | 2 +- protocol/lavasession/used_providers.go | 59 ++++++- protocol/rpcconsumer/relay_errors.go | 1 + protocol/rpcconsumer/relay_processor.go | 144 +++++++++++++++--- protocol/rpcconsumer/rpcconsumer_server.go | 120 ++++----------- 6 files changed, 217 insertions(+), 123 deletions(-) diff --git a/protocol/lavasession/consumer_session_manager.go b/protocol/lavasession/consumer_session_manager.go index e3f7d80e73..ddf5303ae5 100644 --- a/protocol/lavasession/consumer_session_manager.go +++ b/protocol/lavasession/consumer_session_manager.go @@ -318,18 +318,20 @@ func (csm *ConsumerSessionManager) GetSessions(ctx context.Context, cuNeededForS consumerSessionMap ConsumerSessionsMap, errRet error, ) { // set usedProviders if they were chosen for this relay - initUnwantedProviders := relayProcessor.GetUsedProviders().GetUnwantedProvidersToSend() + usedProviders := relayProcessor.GetUsedProviders() + timeoutCtx, cancel := context.WithTimeout(ctx, time.Second) + defer cancel() + canSelect := usedProviders.TryLockSelection(timeoutCtx) + if !canSelect { + return nil, utils.LavaFormatError("failed getting sessions from used Providers", nil, utils.LogAttr("usedProviders", usedProviders), utils.LogAttr("endpoint", csm.rpcEndpoint)) + } defer func() { relayProcessor.GetUsedProviders().AddUsed(consumerSessionMap) }() + initUnwantedProviders := usedProviders.GetUnwantedProvidersToSend() extensionNames := common.GetExtensionNames(extensions) // if pairing list is empty we reset the state. numberOfResets := csm.validatePairingListNotEmpty(addon, extensionNames) - // verify initUnwantedProviders is not nil - if initUnwantedProviders == nil { - initUnwantedProviders = make(map[string]struct{}) - } - // providers that we don't try to connect this iteration. tempIgnoredProviders := &ignoredProviders{ providers: initUnwantedProviders, diff --git a/protocol/lavasession/single_consumer_session.go b/protocol/lavasession/single_consumer_session.go index 706059251e..41ec6b967c 100644 --- a/protocol/lavasession/single_consumer_session.go +++ b/protocol/lavasession/single_consumer_session.go @@ -126,7 +126,7 @@ func (session *SingleConsumerSession) TryUseSession() (blocked bool, ok bool) { return true, false } if session.relayProcessor != nil { - utils.LavaFormatError("session misuse detected, usedProviders isn't nil, blocking", nil, utils.LogAttr("session", session.SessionId)) + utils.LavaFormatError("session misuse detected, usedProviders isn't nil, missing Free call, blocking", nil, utils.LogAttr("session", session.SessionId)) session.BlockListed = true session.lock.Unlock() return true, false diff --git a/protocol/lavasession/used_providers.go b/protocol/lavasession/used_providers.go index 3dacce65de..e7f705de04 100644 --- a/protocol/lavasession/used_providers.go +++ b/protocol/lavasession/used_providers.go @@ -1,8 +1,10 @@ package lavasession import ( + "context" "strings" "sync" + "time" "github.com/lavanet/lava/protocol/common" "github.com/lavanet/lava/utils" @@ -36,6 +38,26 @@ func (up *UsedProviders) CurrentlyUsed() int { return len(up.providers) } +func (up *UsedProviders) CurrentlyUsedAddresses() []string { + up.lock.RLock() + defer up.lock.RUnlock() + addresses := []string{} + for addr := range up.providers { + addresses = append(addresses, addr) + } + return addresses +} + +func (up *UsedProviders) UnwantedAddresses() []string { + up.lock.RLock() + defer up.lock.RUnlock() + addresses := []string{} + for addr := range up.unwantedProviders { + addresses = append(addresses, addr) + } + return addresses +} + func (up *UsedProviders) RemoveUsed(provider string, err error) { if up == nil { return @@ -43,10 +65,14 @@ func (up *UsedProviders) RemoveUsed(provider string, err error) { up.lock.Lock() defer up.lock.Unlock() if err != nil { - _, ok := up.blockOnSyncLoss[provider] - if !ok && IsSessionSyncLoss(err) { - up.blockOnSyncLoss[provider] = struct{}{} - utils.LavaFormatWarning("Identified SyncLoss in provider, not removing it from list for another attempt", err, utils.Attribute{Key: "address", Value: provider}) + if ShouldRetryWithThisError(err) { + _, ok := up.blockOnSyncLoss[provider] + if !ok && IsSessionSyncLoss(err) { + up.blockOnSyncLoss[provider] = struct{}{} + utils.LavaFormatWarning("Identified SyncLoss in provider, allowing retry", err, utils.Attribute{Key: "address", Value: provider}) + } else { + up.SetUnwanted(provider) + } } else { up.SetUnwanted(provider) } @@ -63,7 +89,7 @@ func (up *UsedProviders) AddUsed(sessions ConsumerSessionsMap) { } up.lock.Lock() defer up.lock.Unlock() - // this is argument nil safe + // this is nil safe for provider := range sessions { // the key for ConsumerSessionsMap is the provider public address up.providers[provider] = struct{}{} } @@ -79,10 +105,25 @@ func (up *UsedProviders) SetUnwanted(provider string) { up.unwantedProviders[provider] = struct{}{} } -func (up *UsedProviders) TryLockSelection() bool { +func (up *UsedProviders) TryLockSelection(ctx context.Context) bool { if up == nil { return true } + for { + select { + case <-ctx.Done(): + return false + default: + canSelect := up.tryLockSelection() + if canSelect { + return true + } + time.Sleep(10 * time.Millisecond) + } + } +} + +func (up *UsedProviders) tryLockSelection() bool { up.lock.Lock() defer up.lock.Unlock() if !up.selecting { @@ -108,11 +149,17 @@ func (up *UsedProviders) GetUnwantedProvidersToSend() map[string]struct{} { up.lock.RLock() defer up.lock.RUnlock() unwantedProvidersToSend := map[string]struct{}{} + // block the currently used providers for provider := range up.providers { unwantedProvidersToSend[provider] = struct{}{} } + // block providers that we have a response for for provider := range up.unwantedProviders { unwantedProvidersToSend[provider] = struct{}{} } return unwantedProvidersToSend } + +func ShouldRetryWithThisError(err error) bool { + return IsSessionSyncLoss(err) +} diff --git a/protocol/rpcconsumer/relay_errors.go b/protocol/rpcconsumer/relay_errors.go index 88db666326..03d9669cb7 100644 --- a/protocol/rpcconsumer/relay_errors.go +++ b/protocol/rpcconsumer/relay_errors.go @@ -97,6 +97,7 @@ func (r *RelayErrors) mergeAllErrors() error { return fmt.Errorf(mergedMessage) } +// TODO: there's no need to save error twice and provider info twice, this can just be a relayResponse type RelayError struct { err error ProviderInfo common.ProviderInfo diff --git a/protocol/rpcconsumer/relay_processor.go b/protocol/rpcconsumer/relay_processor.go index 5daa2a105f..a284633b53 100644 --- a/protocol/rpcconsumer/relay_processor.go +++ b/protocol/rpcconsumer/relay_processor.go @@ -3,7 +3,9 @@ package rpcconsumer import ( "context" "fmt" + "strings" "sync" + "time" "github.com/lavanet/lava/protocol/chainlib" "github.com/lavanet/lava/protocol/common" @@ -13,7 +15,9 @@ import ( ) const ( - MaxCallsPerRelay = 50 + MaxCallsPerRelay = 50 + DefaultTimeout = 20 * time.Second + DefaultTimeoutLong = 3 * time.Minute ) func NewRelayProcessor(ctx context.Context, usedProviders *lavasession.UsedProviders, requiredSuccesses int, chainMessage chainlib.ChainMessage) *RelayProcessor { @@ -22,8 +26,8 @@ func NewRelayProcessor(ctx context.Context, usedProviders *lavasession.UsedProvi usedProviders: usedProviders, requiredResults: requiredSuccesses, responses: make(chan *relayResponse, MaxCallsPerRelay), // we set it as buffered so it is not blocking - nodeResponseErrors: &RelayErrors{relayErrors: []RelayError{}}, - protocolResponseErrors: &RelayErrors{relayErrors: []RelayError{}, onFailureMergeAll: true}, + nodeResponseErrors: RelayErrors{relayErrors: []RelayError{}}, + protocolResponseErrors: RelayErrors{relayErrors: []RelayError{}, onFailureMergeAll: true}, chainMessage: chainMessage, guid: guid, // TODO: handle required errors @@ -35,34 +39,49 @@ type RelayProcessor struct { usedProviders *lavasession.UsedProviders responses chan *relayResponse requiredResults int - nodeResponseErrors *RelayErrors - protocolResponseErrors *RelayErrors + nodeResponseErrors RelayErrors + protocolResponseErrors RelayErrors results []common.RelayResult lock sync.RWMutex chainMessage chainlib.ChainMessage - errorRelayResult common.RelayResult guid uint64 requiredErrors int } func (rp *RelayProcessor) String() string { - // TODO: - return "" + rp.lock.RLock() + nodeErrors := len(rp.nodeResponseErrors.relayErrors) + protocolErrors := len(rp.protocolResponseErrors.relayErrors) + results := len(rp.results) + usedProviders := rp.usedProviders + rp.lock.RUnlock() + + currentlyUsedAddresses := usedProviders.CurrentlyUsedAddresses() + unwantedAddresses := usedProviders.UnwantedAddresses() + return fmt.Sprintf("relayProcessor {results:%d, nodeErrors:%d, protocolErrors:%d,unwantedAddresses: %s,currentlyUsedAddresses:%s}", + results, nodeErrors, protocolErrors, strings.Join(unwantedAddresses, ";"), strings.Join(currentlyUsedAddresses, ";")) } +// RemoveUsed will set the provider as being currently used, if the error is one that allows a retry with the same provider, it will only be removed from currently used +// if it's not, then it will be added to unwanted providers since the same relay shouldn't send to it again func (rp *RelayProcessor) RemoveUsed(providerAddress string, err error) { - // TODO: + rp.usedProviders.RemoveUsed(providerAddress, err) } func (rp *RelayProcessor) GetUsedProviders() *lavasession.UsedProviders { return rp.usedProviders } -func (rp *RelayProcessor) ComparableResults() []common.RelayResult { +// this function returns all results that came from a node, meaning success, and node errors +func (rp *RelayProcessor) NodeResults() []common.RelayResult { rp.lock.RLock() defer rp.lock.RUnlock() - // TODO: add nodeResponseErrors - return rp.results + // start with results and add to them node results + nodeResults := rp.results + for _, relayError := range rp.nodeResponseErrors.relayErrors { + nodeResults = append(nodeResults, relayError.response.relayResult) + } + return nodeResults } func (rp *RelayProcessor) ProtocolErrors() uint64 { @@ -100,7 +119,6 @@ func (rp *RelayProcessor) setValidResponse(response *relayResponse) { } } rp.results = append(rp.results, response.relayResult) - return } func (rp *RelayProcessor) setErrorResponse(response *relayResponse) { @@ -119,10 +137,7 @@ func (rp *RelayProcessor) CheckEndProcessing() bool { } nodeErrors := len(rp.nodeResponseErrors.relayErrors) protocolErrors := len(rp.protocolResponseErrors.relayErrors) - if resultsCount+nodeErrors+protocolErrors >= rp.requiredErrors { - return true - } - return false + return resultsCount+nodeErrors+protocolErrors >= rp.requiredErrors } func (rp *RelayProcessor) HasResponses() bool { @@ -137,7 +152,9 @@ func (rp *RelayProcessor) HasResponses() bool { return resultsCount+nodeErrors+protocolErrors > 0 } -func (rp *RelayProcessor) ProcessResults(ctx context.Context) ([]common.RelayResult, error) { +// this function waits for the processing results, they are written by multiple go routines and read by this go routine +// it then updates the responses in their respective place, node errors, protocol errors or success results +func (rp *RelayProcessor) WaitForResults(ctx context.Context) error { responsesCount := 0 for { select { @@ -150,15 +167,16 @@ func (rp *RelayProcessor) ProcessResults(ctx context.Context) ([]common.RelayRes } if rp.CheckEndProcessing() { // we can finish processing - return rp.ProcessingResult() + return nil } case <-ctx.Done(): - utils.LavaFormatWarning("cancelled relay processor", nil, utils.LogAttr("total responses", responsesCount)) - return rp.ProcessingResult() + return utils.LavaFormatWarning("cancelled relay processor", nil, utils.LogAttr("total responses", responsesCount)) } } } +// this function returns the results according to the defined strategy +// results were stored in WaitForResults and now there's logic to select which results are returned to the user func (rp *RelayProcessor) ProcessingResult() ([]common.RelayResult, error) { // when getting an error from all the results // rp.errorRelayResult.ProviderInfo.ProviderAddress += relayResult.ProviderInfo.ProviderAddress @@ -181,3 +199,87 @@ func (rp *RelayProcessor) ProcessingResult() ([]common.RelayResult, error) { // } return nil, fmt.Errorf("TODO") } + +// func (rpccs *RPCConsumerServer) getBestResult(timeout time.Duration, responses chan *relayResponse, numberOfSessions int, chainMessage chainlib.ChainMessage) *relayResponse { +// responsesReceived := 0 +// nodeResponseErrors := &RelayErrors{relayErrors: []RelayError{}} +// protocolResponseErrors := &RelayErrors{relayErrors: []RelayError{}, onFailureMergeAll: true} +// // a helper function to fetch the best response (prioritize node over protocol) +// getBestResponseBetweenNodeAndProtocolErrors := func() (*relayResponse, error) { +// if len(nodeResponseErrors.relayErrors) > 0 { // if we have node errors, we prefer returning them over protocol errors. +// bestErrorMessage := nodeResponseErrors.GetBestErrorMessageForUser() +// return bestErrorMessage.response, nil +// } +// if len(protocolResponseErrors.relayErrors) > 0 { // if we have protocol errors at this point return the best one +// protocolsBestErrorMessage := protocolResponseErrors.GetBestErrorMessageForUser() +// return protocolsBestErrorMessage.response, nil +// } +// return nil, fmt.Errorf("failed getting best response") +// } +// startTime := time.Now() +// for { +// select { +// case response := <-responses: +// // increase responses received +// responsesReceived++ +// if response.err == nil { +// // validate if its a error response (from the node not the provider) +// foundError, errorMessage := chainMessage.CheckResponseError(response.relayResult.Reply.Data, response.relayResult.StatusCode) +// // print debug only when we have multiple responses +// if numberOfSessions > 1 { +// utils.LavaFormatDebug("Got Response", utils.LogAttr("responsesReceived", responsesReceived), utils.LogAttr("out_of", numberOfSessions), utils.LogAttr("foundError", foundError), utils.LogAttr("errorMessage", errorMessage), utils.LogAttr("Status code", response.relayResult.StatusCode)) +// } +// if foundError { +// // this is a node error, meaning we still didn't get a good response. +// // we will choose to wait until there will be a response or timeout happens +// // if timeout happens we will take the majority of response messages +// nodeResponseErrors.relayErrors = append(nodeResponseErrors.relayErrors, RelayError{err: fmt.Errorf(errorMessage), ProviderInfo: response.relayResult.ProviderInfo, response: response}) +// } else { +// // Return the first successful response +// return response // returning response +// } +// } else { +// // we want to keep the error message in a separate response error structure +// // in case we got only errors and we want to return the best one +// protocolResponseErrors.relayErrors = append(protocolResponseErrors.relayErrors, RelayError{err: response.err, ProviderInfo: response.relayResult.ProviderInfo, response: response}) +// } + +// // check if this is the last response we are going to receive +// // we get here only if all other responses including this one are not valid responses +// // (whether its a node error or protocol errors) +// if responsesReceived == numberOfSessions { +// bestRelayResult, err := getBestResponseBetweenNodeAndProtocolErrors() +// if err == nil { // successfully sent the channel response +// return bestRelayResult +// } +// // if we got here, we for some reason failed to fetch both the best node error and the protocol error +// // it indicates mostly an unwanted behavior. +// utils.LavaFormatWarning("failed getting best error message for both node and protocol", nil, +// utils.LogAttr("nodeResponseErrors", nodeResponseErrors), +// utils.LogAttr("protocolsBestErrorMessage", protocolResponseErrors), +// utils.LogAttr("numberOfSessions", numberOfSessions), +// ) +// return response +// } +// case <-time.After(timeout + 3*time.Second - time.Since(startTime)): +// // Timeout occurred, try fetching the best result we have, prefer node errors over protocol errors +// bestRelayResponse, err := getBestResponseBetweenNodeAndProtocolErrors() +// if err == nil { // successfully sent the channel response +// return bestRelayResponse +// } +// // failed fetching any error, getting here indicates a real context timeout happened. +// return &relayResponse{common.RelayResult{}, NoResponseTimeout} +// } +// } +// } + +func GetTimeoutForProcessing(relayTimeout time.Duration, chainMessage chainlib.ChainMessage) time.Duration { + ctxTimeout := DefaultTimeout + if chainlib.IsHangingApi(chainMessage) || chainMessage.GetApi().ComputeUnits > 100 || chainlib.GetStateful(chainMessage) == common.CONSISTENCY_SELECT_ALLPROVIDERS { + ctxTimeout = DefaultTimeoutLong + } + if relayTimeout > ctxTimeout { + ctxTimeout = relayTimeout + } + return ctxTimeout +} diff --git a/protocol/rpcconsumer/rpcconsumer_server.go b/protocol/rpcconsumer/rpcconsumer_server.go index 0012570e61..4682cba22e 100644 --- a/protocol/rpcconsumer/rpcconsumer_server.go +++ b/protocol/rpcconsumer/rpcconsumer_server.go @@ -3,7 +3,6 @@ package rpcconsumer import ( "context" "errors" - "fmt" "strconv" "strings" "time" @@ -195,18 +194,23 @@ func (rpccs *RPCConsumerServer) sendRelayWithRetries(ctx context.Context, retrie if err != nil { utils.LavaFormatError("[-] failed sending init relay", err, []utils.Attribute{{Key: "chainID", Value: rpccs.listenEndpoint.ChainID}, {Key: "APIInterface", Value: rpccs.listenEndpoint.ApiInterface}, {Key: "relayProcessor", Value: relayProcessor}}...) } else { - relayResults, err := relayProcessor.ProcessResults(ctx) - if err != nil || len(relayResults) == 0 { + err := relayProcessor.WaitForResults(ctx) + if err != nil { utils.LavaFormatError("[-] failed sending init relay", err, []utils.Attribute{{Key: "chainID", Value: rpccs.listenEndpoint.ChainID}, {Key: "APIInterface", Value: rpccs.listenEndpoint.ApiInterface}, {Key: "relayProcessor", Value: relayProcessor}}...) } else { - relayResult := relayResults[0] // will return only 1 since we have set the processor with 1 - utils.LavaFormatInfo("[+] init relay succeeded", []utils.Attribute{{Key: "chainID", Value: rpccs.listenEndpoint.ChainID}, {Key: "APIInterface", Value: rpccs.listenEndpoint.ApiInterface}, {Key: "latestBlock", Value: relayResult.Reply.LatestBlock}, {Key: "provider address", Value: relayResult.ProviderInfo.ProviderAddress}}...) - rpccs.relaysMonitor.LogRelay() - success = true - // If this is the first time we send relays, we want to send all of them, instead of break on first successful relay - // That way, we populate the providers with the latest blocks with successful relays - if !initialRelays { - break + relayResults, err := relayProcessor.ProcessingResult() + relayResult := relayResults[0] + if err == nil { + utils.LavaFormatInfo("[+] init relay succeeded", []utils.Attribute{{Key: "chainID", Value: rpccs.listenEndpoint.ChainID}, {Key: "APIInterface", Value: rpccs.listenEndpoint.ApiInterface}, {Key: "latestBlock", Value: relayResult.Reply.LatestBlock}, {Key: "provider address", Value: relayResult.ProviderInfo.ProviderAddress}}...) + rpccs.relaysMonitor.LogRelay() + success = true + // If this is the first time we send relays, we want to send all of them, instead of break on first successful relay + // That way, we populate the providers with the latest blocks with successful relays + if !initialRelays { + break + } + } else { + utils.LavaFormatError("[-] failed sending init relay", err, []utils.Attribute{{Key: "chainID", Value: rpccs.listenEndpoint.ChainID}, {Key: "APIInterface", Value: rpccs.listenEndpoint.ApiInterface}, {Key: "relayProcessor", Value: relayProcessor}}...) } } } @@ -327,14 +331,17 @@ func (rpccs *RPCConsumerServer) ProcessRelaySend(ctx context.Context, directiveH // we failed to send a batch of relays, if there are no active sends we can terminate return relayProcessor, err } + relayTimeout := chainlib.GetRelayTimeout(chainMessage, rpccs.chainParser) gotResults := make(chan bool) go func() { - // TODO: set timeout for ProcessResults via ctx + processingTimeout := GetTimeoutForProcessing(relayTimeout, chainMessage) + processingCtx, cancel := context.WithTimeout(ctx, processingTimeout) + defer cancel() // ProcessResults is reading responses while blocking until the conditions are met - relayProcessor.ProcessResults(ctx) + relayProcessor.WaitForResults(processingCtx) gotResults <- true }() - relayTimeout := chainlib.GetRelayTimeout(chainMessage, rpccs.chainParser) + // every relay timeout we send a new batch startNewBatchTicker := time.NewTicker(relayTimeout) for { @@ -583,79 +590,6 @@ func (rpccs *RPCConsumerServer) sendRelayToProvider( return nil } -func (rpccs *RPCConsumerServer) getBestResult(timeout time.Duration, responses chan *relayResponse, numberOfSessions int, chainMessage chainlib.ChainMessage) *relayResponse { - responsesReceived := 0 - nodeResponseErrors := &RelayErrors{relayErrors: []RelayError{}} - protocolResponseErrors := &RelayErrors{relayErrors: []RelayError{}, onFailureMergeAll: true} - // a helper function to fetch the best response (prioritize node over protocol) - getBestResponseBetweenNodeAndProtocolErrors := func() (*relayResponse, error) { - if len(nodeResponseErrors.relayErrors) > 0 { // if we have node errors, we prefer returning them over protocol errors. - bestErrorMessage := nodeResponseErrors.GetBestErrorMessageForUser() - return bestErrorMessage.response, nil - } - if len(protocolResponseErrors.relayErrors) > 0 { // if we have protocol errors at this point return the best one - protocolsBestErrorMessage := protocolResponseErrors.GetBestErrorMessageForUser() - return protocolsBestErrorMessage.response, nil - } - return nil, fmt.Errorf("failed getting best response") - } - startTime := time.Now() - for { - select { - case response := <-responses: - // increase responses received - responsesReceived++ - if response.err == nil { - // validate if its a error response (from the node not the provider) - foundError, errorMessage := chainMessage.CheckResponseError(response.relayResult.Reply.Data, response.relayResult.StatusCode) - // print debug only when we have multiple responses - if numberOfSessions > 1 { - utils.LavaFormatDebug("Got Response", utils.LogAttr("responsesReceived", responsesReceived), utils.LogAttr("out_of", numberOfSessions), utils.LogAttr("foundError", foundError), utils.LogAttr("errorMessage", errorMessage), utils.LogAttr("Status code", response.relayResult.StatusCode)) - } - if foundError { - // this is a node error, meaning we still didn't get a good response. - // we will choose to wait until there will be a response or timeout happens - // if timeout happens we will take the majority of response messages - nodeResponseErrors.relayErrors = append(nodeResponseErrors.relayErrors, RelayError{err: fmt.Errorf(errorMessage), ProviderInfo: response.relayResult.ProviderInfo, response: response}) - } else { - // Return the first successful response - return response // returning response - } - } else { - // we want to keep the error message in a separate response error structure - // in case we got only errors and we want to return the best one - protocolResponseErrors.relayErrors = append(protocolResponseErrors.relayErrors, RelayError{err: response.err, ProviderInfo: response.relayResult.ProviderInfo, response: response}) - } - - // check if this is the last response we are going to receive - // we get here only if all other responses including this one are not valid responses - // (whether its a node error or protocol errors) - if responsesReceived == numberOfSessions { - bestRelayResult, err := getBestResponseBetweenNodeAndProtocolErrors() - if err == nil { // successfully sent the channel response - return bestRelayResult - } - // if we got here, we for some reason failed to fetch both the best node error and the protocol error - // it indicates mostly an unwanted behavior. - utils.LavaFormatWarning("failed getting best error message for both node and protocol", nil, - utils.LogAttr("nodeResponseErrors", nodeResponseErrors), - utils.LogAttr("protocolsBestErrorMessage", protocolResponseErrors), - utils.LogAttr("numberOfSessions", numberOfSessions), - ) - return response - } - case <-time.After(timeout + 3*time.Second - time.Since(startTime)): - // Timeout occurred, try fetching the best result we have, prefer node errors over protocol errors - bestRelayResponse, err := getBestResponseBetweenNodeAndProtocolErrors() - if err == nil { // successfully sent the channel response - return bestRelayResponse - } - // failed fetching any error, getting here indicates a real context timeout happened. - return &relayResponse{common.RelayResult{}, NoResponseTimeout} - } - } -} - func (rpccs *RPCConsumerServer) relayInner(ctx context.Context, singleConsumerSession *lavasession.SingleConsumerSession, relayResult *common.RelayResult, relayTimeout time.Duration, chainMessage chainlib.ChainMessage, consumerToken string) (relayResultRet *common.RelayResult, relayLatency time.Duration, err error, needsBackoff bool) { existingSessionLatestBlock := singleConsumerSession.LatestBlock // we read it now because singleConsumerSession is locked, and later it's not endpointClient := *singleConsumerSession.Endpoint.Client @@ -783,7 +717,7 @@ func (rpccs *RPCConsumerServer) sendDataReliabilityRelayIfApplicable(ctx context } // only need to send another relay if we don't have enough replies results := []common.RelayResult{} - for _, result := range relayProcessor.ComparableResults() { + for _, result := range relayProcessor.NodeResults() { if result.Finalized { results = append(results, result) } @@ -801,7 +735,15 @@ func (rpccs *RPCConsumerServer) sendDataReliabilityRelayIfApplicable(ctx context if err != nil { return utils.LavaFormatWarning("failed data reliability relay to provider", err, utils.LogAttr("relayProcessorDataReliability", relayProcessorDataReliability)) } - relayResultsDataReliability, err := relayProcessorDataReliability.ProcessResults(ctx) + relayTimeout := chainlib.GetRelayTimeout(chainMessage, rpccs.chainParser) + processingTimeout := GetTimeoutForProcessing(relayTimeout, chainMessage) + processingCtx, cancel := context.WithTimeout(ctx, processingTimeout) + defer cancel() + err = relayProcessorDataReliability.WaitForResults(processingCtx) + if err != nil { + return utils.LavaFormatWarning("failed sending data reliability relays", err, utils.Attribute{Key: "relayProcessorDataReliability", Value: relayProcessorDataReliability}) + } + relayResultsDataReliability := relayProcessorDataReliability.NodeResults() resultsDataReliability := []common.RelayResult{} for _, result := range relayResultsDataReliability { if result.Finalized { From 348e5feeec782c8539fdf65cdbf2da24411561f1 Mon Sep 17 00:00:00 2001 From: omerlavanet Date: Mon, 26 Feb 2024 18:30:47 +0200 Subject: [PATCH 04/57] wip --- protocol/rpcconsumer/relay_processor.go | 123 +++++++++++++++++---- protocol/rpcconsumer/rpcconsumer_server.go | 43 +++---- 2 files changed, 128 insertions(+), 38 deletions(-) diff --git a/protocol/rpcconsumer/relay_processor.go b/protocol/rpcconsumer/relay_processor.go index a284633b53..0a70d12929 100644 --- a/protocol/rpcconsumer/relay_processor.go +++ b/protocol/rpcconsumer/relay_processor.go @@ -20,39 +20,49 @@ const ( DefaultTimeoutLong = 3 * time.Minute ) +type Selection int + +const ( + Quorum Selection = iota // get the majority out of requiredSuccesses + BestResult // get the best result, even if it means waiting +) + func NewRelayProcessor(ctx context.Context, usedProviders *lavasession.UsedProviders, requiredSuccesses int, chainMessage chainlib.ChainMessage) *RelayProcessor { guid, _ := utils.GetUniqueIdentifier(ctx) + selection := Quorum // select the majority of node responses + if chainlib.GetStateful(chainMessage) == common.CONSISTENCY_SELECT_ALLPROVIDERS { + selection = BestResult // select the majority of node successes + } return &RelayProcessor{ usedProviders: usedProviders, - requiredResults: requiredSuccesses, + requiredSuccesses: requiredSuccesses, responses: make(chan *relayResponse, MaxCallsPerRelay), // we set it as buffered so it is not blocking nodeResponseErrors: RelayErrors{relayErrors: []RelayError{}}, protocolResponseErrors: RelayErrors{relayErrors: []RelayError{}, onFailureMergeAll: true}, chainMessage: chainMessage, guid: guid, - // TODO: handle required errors - requiredErrors: requiredSuccesses, + selection: selection, } } type RelayProcessor struct { usedProviders *lavasession.UsedProviders responses chan *relayResponse - requiredResults int + requiredSuccesses int nodeResponseErrors RelayErrors protocolResponseErrors RelayErrors - results []common.RelayResult + successResults []common.RelayResult lock sync.RWMutex chainMessage chainlib.ChainMessage guid uint64 - requiredErrors int + selection Selection } func (rp *RelayProcessor) String() string { rp.lock.RLock() nodeErrors := len(rp.nodeResponseErrors.relayErrors) protocolErrors := len(rp.protocolResponseErrors.relayErrors) - results := len(rp.results) + results := len(rp.successResults) usedProviders := rp.usedProviders rp.lock.RUnlock() @@ -76,8 +86,12 @@ func (rp *RelayProcessor) GetUsedProviders() *lavasession.UsedProviders { func (rp *RelayProcessor) NodeResults() []common.RelayResult { rp.lock.RLock() defer rp.lock.RUnlock() + return rp.nodeResultsInner() +} + +func (rp *RelayProcessor) nodeResultsInner() []common.RelayResult { // start with results and add to them node results - nodeResults := rp.results + nodeResults := rp.successResults for _, relayError := range rp.nodeResponseErrors.relayErrors { nodeResults = append(nodeResults, relayError.response.relayResult) } @@ -118,7 +132,7 @@ func (rp *RelayProcessor) setValidResponse(response *relayResponse) { response.relayResult.Finalized = false // shut down data reliability } } - rp.results = append(rp.results, response.relayResult) + rp.successResults = append(rp.successResults, response.relayResult) } func (rp *RelayProcessor) setErrorResponse(response *relayResponse) { @@ -128,30 +142,65 @@ func (rp *RelayProcessor) setErrorResponse(response *relayResponse) { rp.protocolResponseErrors.relayErrors = append(rp.protocolResponseErrors.relayErrors, RelayError{err: response.err, ProviderInfo: response.relayResult.ProviderInfo, response: response}) } -func (rp *RelayProcessor) CheckEndProcessing() bool { +func (rp *RelayProcessor) checkEndProcessing() bool { rp.lock.RLock() defer rp.lock.RUnlock() - resultsCount := len(rp.results) - if resultsCount >= rp.requiredResults { + resultsCount := len(rp.successResults) + if resultsCount >= rp.requiredSuccesses { + // we have enough successes, we can return return true } - nodeErrors := len(rp.nodeResponseErrors.relayErrors) - protocolErrors := len(rp.protocolResponseErrors.relayErrors) - return resultsCount+nodeErrors+protocolErrors >= rp.requiredErrors + if rp.selection == Quorum { + // we need a quorum of all node results + nodeErrors := len(rp.nodeResponseErrors.relayErrors) + if nodeErrors+resultsCount >= rp.requiredSuccesses { + // we have enough node results for our quorum + return true + } + } + if rp.usedProviders.CurrentlyUsed() == 0 { + // no active sessions, we can return + return true + } + + return false } -func (rp *RelayProcessor) HasResponses() bool { +// this function defines if we should use the processor to return the result (meaning it has some insight and responses) or just return to the user +func (rp *RelayProcessor) HasResults() bool { if rp == nil { return false } rp.lock.RLock() defer rp.lock.RUnlock() - resultsCount := len(rp.results) + resultsCount := len(rp.successResults) nodeErrors := len(rp.nodeResponseErrors.relayErrors) protocolErrors := len(rp.protocolResponseErrors.relayErrors) return resultsCount+nodeErrors+protocolErrors > 0 } +func (rp *RelayProcessor) HasRequiredNodeResults() bool { + if rp == nil { + return false + } + rp.lock.RLock() + defer rp.lock.RUnlock() + resultsCount := len(rp.successResults) + if resultsCount >= rp.requiredSuccesses { + return true + } + if rp.selection == Quorum { + // we need a quorum of all node results + nodeErrors := len(rp.nodeResponseErrors.relayErrors) + if nodeErrors+resultsCount >= rp.requiredSuccesses { + // we have enough node results for our quorum + return true + } + } + // on BestResult we want to retry if there is no success + return false +} + // this function waits for the processing results, they are written by multiple go routines and read by this go routine // it then updates the responses in their respective place, node errors, protocol errors or success results func (rp *RelayProcessor) WaitForResults(ctx context.Context) error { @@ -165,7 +214,7 @@ func (rp *RelayProcessor) WaitForResults(ctx context.Context) error { } else { rp.setValidResponse(response) } - if rp.CheckEndProcessing() { + if rp.checkEndProcessing() { // we can finish processing return nil } @@ -175,9 +224,45 @@ func (rp *RelayProcessor) WaitForResults(ctx context.Context) error { } } +func (rp *RelayProcessor) processingError() (returnedResult *common.RelayResult, processingError error) { + // TODO: + return nil, fmt.Errorf("not implmented") +} + // this function returns the results according to the defined strategy // results were stored in WaitForResults and now there's logic to select which results are returned to the user -func (rp *RelayProcessor) ProcessingResult() ([]common.RelayResult, error) { +// will return an error if we did not meet quota of replies, if we did we follow the strategies: +// if return strategy == get_first: return the first success, if none: get best node error +// if strategy == quorum get majority of node responses +// on error: we will return a placeholder relayResult, with a provider address and a status code +func (rp *RelayProcessor) ProcessingResult() (returnedResult *common.RelayResult, processingError error) { + rp.lock.RLock() + defer rp.lock.RUnlock() + // there are enough successes + if len(rp.successResults) > rp.requiredSuccesses { + return rp.responsesQuorum(rp.successResults, rp.requiredSuccesses) + } + nodeResults := rp.nodeResultsInner() + // there are not enough successes, let's check if there are enough node errors + + if len(nodeResults) > rp.requiredSuccesses { + if rp.selection == Quorum { + return rp.responsesQuorum(nodeResults, rp.requiredSuccesses) + } else if rp.selection == BestResult && len(rp.successResults) > len(rp.nodeResponseErrors.relayErrors) { + // we have more than half succeeded, quorum will be + return rp.responsesQuorum(rp.successResults, (rp.requiredSuccesses+1)/2) + } + } + var bestErrorMessage RelayError + // we don't have enough for a quorum, prefer a node error on protocol errors + if len(rp.nodeResponseErrors.relayErrors) > 0 { // if we have node errors, we prefer returning them over protocol errors. + bestErrorMessage = rp.nodeResponseErrors.GetBestErrorMessageForUser() + } else if len(rp.protocolResponseErrors.relayErrors) > 0 { // if we have protocol errors at this point return the best one + bestErrorMessage = rp.protocolResponseErrors.GetBestErrorMessageForUser() + } + + returnedResult = &common.RelayResult{} + // when getting an error from all the results // rp.errorRelayResult.ProviderInfo.ProviderAddress += relayResult.ProviderInfo.ProviderAddress // if relayResult.GetStatusCode() != 0 { diff --git a/protocol/rpcconsumer/rpcconsumer_server.go b/protocol/rpcconsumer/rpcconsumer_server.go index 4682cba22e..0a3aaaf910 100644 --- a/protocol/rpcconsumer/rpcconsumer_server.go +++ b/protocol/rpcconsumer/rpcconsumer_server.go @@ -198,8 +198,7 @@ func (rpccs *RPCConsumerServer) sendRelayWithRetries(ctx context.Context, retrie if err != nil { utils.LavaFormatError("[-] failed sending init relay", err, []utils.Attribute{{Key: "chainID", Value: rpccs.listenEndpoint.ChainID}, {Key: "APIInterface", Value: rpccs.listenEndpoint.ApiInterface}, {Key: "relayProcessor", Value: relayProcessor}}...) } else { - relayResults, err := relayProcessor.ProcessingResult() - relayResult := relayResults[0] + relayResult, err := relayProcessor.ProcessingResult() if err == nil { utils.LavaFormatInfo("[+] init relay succeeded", []utils.Attribute{{Key: "chainID", Value: rpccs.listenEndpoint.ChainID}, {Key: "APIInterface", Value: rpccs.listenEndpoint.ApiInterface}, {Key: "latestBlock", Value: relayResult.Reply.LatestBlock}, {Key: "provider address", Value: relayResult.ProviderInfo.ProviderAddress}}...) rpccs.relaysMonitor.LogRelay() @@ -286,7 +285,8 @@ func (rpccs *RPCConsumerServer) SendRelay( relayRequestData := lavaprotocol.NewRelayData(ctx, connectionType, url, []byte(req), seenBlock, reqBlock, rpccs.listenEndpoint.ApiInterface, chainMessage.GetRPCMessage().GetHeaders(), chainlib.GetAddon(chainMessage), common.GetExtensionNames(chainMessage.GetExtensions())) relayProcessor, err := rpccs.ProcessRelaySend(ctx, directiveHeaders, chainMessage, relayRequestData, dappID, consumerIp) - if err != nil && !relayProcessor.HasResponses() { + if err != nil && !relayProcessor.HasResults() { + // we can't send anymore, and we don't have any responses return nil, err } // Handle Data Reliability @@ -302,13 +302,8 @@ func (rpccs *RPCConsumerServer) SendRelay( go rpccs.sendDataReliabilityRelayIfApplicable(dataReliabilityContext, dappID, consumerIp, chainMessage, dataReliabilityThreshold, relayProcessor) // runs asynchronously } - // TODO: implement majority selection option - results, err := relayProcessor.ProcessingResult() - // even on error we are going to have returnedResult - if len(results) == 0 { - return nil, utils.LavaFormatError("invalid relayProcessor result, results are empty", err, utils.LogAttr("relayProcessor", relayProcessor)) - } - returnedResult := &results[0] + returnedResult, err := relayProcessor.ProcessingResult() + rpccs.appendHeadersToRelayResult(ctx, returnedResult, relayProcessor.ProtocolErrors()) if err != nil { return returnedResult, err } @@ -317,10 +312,7 @@ func (rpccs *RPCConsumerServer) SendRelay( analytics.Latency = currentLatency.Milliseconds() analytics.ComputeUnits = chainMessage.GetApi().ComputeUnits } - rpccs.appendHeadersToRelayResult(ctx, returnedResult, relayProcessor.ProtocolErrors()) - rpccs.relaysMonitor.LogRelay() - return returnedResult, nil } @@ -332,6 +324,7 @@ func (rpccs *RPCConsumerServer) ProcessRelaySend(ctx context.Context, directiveH return relayProcessor, err } relayTimeout := chainlib.GetRelayTimeout(chainMessage, rpccs.chainParser) + // a channel to be notified processing was done, true means we have results and can return gotResults := make(chan bool) go func() { processingTimeout := GetTimeoutForProcessing(relayTimeout, chainMessage) @@ -339,15 +332,27 @@ func (rpccs *RPCConsumerServer) ProcessRelaySend(ctx context.Context, directiveH defer cancel() // ProcessResults is reading responses while blocking until the conditions are met relayProcessor.WaitForResults(processingCtx) - gotResults <- true + // decide if we need to resend or not + if relayProcessor.HasRequiredNodeResults() { + gotResults <- true + } else { + gotResults <- false + } }() // every relay timeout we send a new batch startNewBatchTicker := time.NewTicker(relayTimeout) for { select { - case <-gotResults: - return relayProcessor, nil + case success := <-gotResults: + if success { + return relayProcessor, nil + } + err := rpccs.sendRelayToProvider(ctx, chainMessage, relayRequestData, dappID, consumerIp, relayProcessor) + if err != nil && relayProcessor.usedProviders.CurrentlyUsed() == 0 { + // we failed to send a batch of relays, if there are no active sends we can terminate + return relayProcessor, err + } case <-startNewBatchTicker.C: err := rpccs.sendRelayToProvider(ctx, chainMessage, relayRequestData, dappID, consumerIp, relayProcessor) if err != nil && relayProcessor.usedProviders.CurrentlyUsed() == 0 { @@ -830,7 +835,7 @@ func (rpccs *RPCConsumerServer) HandleDirectiveHeadersForMessage(chainMessage ch chainMessage.SetForceCacheRefresh(ok) } -func (rpccs *RPCConsumerServer) appendHeadersToRelayResult(ctx context.Context, relayResult *common.RelayResult, retries uint64) { +func (rpccs *RPCConsumerServer) appendHeadersToRelayResult(ctx context.Context, relayResult *common.RelayResult, protocolErrors uint64) { if relayResult == nil { return } @@ -844,11 +849,11 @@ func (rpccs *RPCConsumerServer) appendHeadersToRelayResult(ctx context.Context, }) } // add the relay retried count - if retries > 0 { + if protocolErrors > 0 { metadataReply = append(metadataReply, pairingtypes.Metadata{ Name: common.RETRY_COUNT_HEADER_NAME, - Value: strconv.FormatUint(retries, 10), + Value: strconv.FormatUint(protocolErrors, 10), }) } guid, found := utils.GetUniqueIdentifier(ctx) From 201649730d9fe3c4c77e4f05a3d6fa7b0cc8068f Mon Sep 17 00:00:00 2001 From: omerlavanet Date: Mon, 26 Feb 2024 20:30:04 +0200 Subject: [PATCH 05/57] wip --- protocol/common/endpoints.go | 1 + protocol/rpcconsumer/relay_errors.go | 3 + protocol/rpcconsumer/relay_processor.go | 161 ++++++++---------------- 3 files changed, 59 insertions(+), 106 deletions(-) diff --git a/protocol/common/endpoints.go b/protocol/common/endpoints.go index 02f965ed51..9f15377e84 100644 --- a/protocol/common/endpoints.go +++ b/protocol/common/endpoints.go @@ -221,6 +221,7 @@ type RelayResult struct { Finalized bool ConflictHandler ConflictHandlerInterface StatusCode int + Quorum int } func (rr *RelayResult) GetReplyServer() *pairingtypes.Relayer_RelaySubscribeClient { diff --git a/protocol/rpcconsumer/relay_errors.go b/protocol/rpcconsumer/relay_errors.go index 03d9669cb7..d5e04485c8 100644 --- a/protocol/rpcconsumer/relay_errors.go +++ b/protocol/rpcconsumer/relay_errors.go @@ -51,6 +51,9 @@ func (r *RelayErrors) GetBestErrorMessageForUser() RelayError { errorCount, index := r.findMaxAppearances(errorMap) if index >= 0 && errorCount >= (len(r.relayErrors)/2) { // we have majority of errors we can return this error. + if r.relayErrors[index].response != nil { + r.relayErrors[index].response.relayResult.Quorum = errorCount + } return r.relayErrors[index] } diff --git a/protocol/rpcconsumer/relay_processor.go b/protocol/rpcconsumer/relay_processor.go index 0a70d12929..a72898445f 100644 --- a/protocol/rpcconsumer/relay_processor.go +++ b/protocol/rpcconsumer/relay_processor.go @@ -3,6 +3,7 @@ package rpcconsumer import ( "context" "fmt" + "net/http" "strings" "sync" "time" @@ -59,6 +60,9 @@ type RelayProcessor struct { } func (rp *RelayProcessor) String() string { + if rp == nil { + return "" + } rp.lock.RLock() nodeErrors := len(rp.nodeResponseErrors.relayErrors) protocolErrors := len(rp.protocolResponseErrors.relayErrors) @@ -75,10 +79,18 @@ func (rp *RelayProcessor) String() string { // RemoveUsed will set the provider as being currently used, if the error is one that allows a retry with the same provider, it will only be removed from currently used // if it's not, then it will be added to unwanted providers since the same relay shouldn't send to it again func (rp *RelayProcessor) RemoveUsed(providerAddress string, err error) { - rp.usedProviders.RemoveUsed(providerAddress, err) + if rp == nil { + return + } + rp.lock.RLock() + usedProviders := rp.usedProviders + rp.lock.RUnlock() + usedProviders.RemoveUsed(providerAddress, err) } func (rp *RelayProcessor) GetUsedProviders() *lavasession.UsedProviders { + rp.lock.RLock() + defer rp.lock.RUnlock() return rp.usedProviders } @@ -89,22 +101,35 @@ func (rp *RelayProcessor) NodeResults() []common.RelayResult { return rp.nodeResultsInner() } +// only when locked func (rp *RelayProcessor) nodeResultsInner() []common.RelayResult { // start with results and add to them node results nodeResults := rp.successResults + nodeResults = append(nodeResults, rp.nodeErrors()...) + return nodeResults +} + +// only when locked +func (rp *RelayProcessor) nodeErrors() (ret []common.RelayResult) { for _, relayError := range rp.nodeResponseErrors.relayErrors { - nodeResults = append(nodeResults, relayError.response.relayResult) + ret = append(ret, relayError.response.relayResult) } - return nodeResults + return ret } func (rp *RelayProcessor) ProtocolErrors() uint64 { + if rp == nil { + return 0 + } rp.lock.RLock() defer rp.lock.RUnlock() return uint64(len(rp.protocolResponseErrors.relayErrors)) } func (rp *RelayProcessor) SetResponse(response *relayResponse) { + if rp == nil { + return + } if response == nil { return } @@ -236,128 +261,52 @@ func (rp *RelayProcessor) processingError() (returnedResult *common.RelayResult, // if strategy == quorum get majority of node responses // on error: we will return a placeholder relayResult, with a provider address and a status code func (rp *RelayProcessor) ProcessingResult() (returnedResult *common.RelayResult, processingError error) { + allProvidersAddresses := rp.GetUsedProviders().UnwantedAddresses() rp.lock.RLock() defer rp.lock.RUnlock() // there are enough successes - if len(rp.successResults) > rp.requiredSuccesses { + if len(rp.successResults) >= rp.requiredSuccesses { return rp.responsesQuorum(rp.successResults, rp.requiredSuccesses) } nodeResults := rp.nodeResultsInner() // there are not enough successes, let's check if there are enough node errors - if len(nodeResults) > rp.requiredSuccesses { + if len(nodeResults) >= rp.requiredSuccesses { if rp.selection == Quorum { return rp.responsesQuorum(nodeResults, rp.requiredSuccesses) } else if rp.selection == BestResult && len(rp.successResults) > len(rp.nodeResponseErrors.relayErrors) { - // we have more than half succeeded, quorum will be + // we have more than half succeeded, and we are success oriented return rp.responsesQuorum(rp.successResults, (rp.requiredSuccesses+1)/2) } } - var bestErrorMessage RelayError // we don't have enough for a quorum, prefer a node error on protocol errors - if len(rp.nodeResponseErrors.relayErrors) > 0 { // if we have node errors, we prefer returning them over protocol errors. - bestErrorMessage = rp.nodeResponseErrors.GetBestErrorMessageForUser() - } else if len(rp.protocolResponseErrors.relayErrors) > 0 { // if we have protocol errors at this point return the best one - bestErrorMessage = rp.protocolResponseErrors.GetBestErrorMessageForUser() + if len(rp.nodeResponseErrors.relayErrors) >= rp.requiredSuccesses { // if we have node errors, we prefer returning them over protocol errors. + nodeErr := rp.nodeResponseErrors.GetBestErrorMessageForUser() + return rp.responsesQuorum(rp.nodeErrors(), rp.requiredSuccesses) } - returnedResult = &common.RelayResult{} - - // when getting an error from all the results - // rp.errorRelayResult.ProviderInfo.ProviderAddress += relayResult.ProviderInfo.ProviderAddress - // if relayResult.GetStatusCode() != 0 { - // // keep the error status code - // rp.errorRelayResult.StatusCode = relayResult.GetStatusCode() - // } - - // if len(relayResults) == 0 { - // rpccs.appendHeadersToRelayResult(ctx, errorRelayResult, retries) - // // suggest the user to add the timeout flag - // if uint64(timeouts) == retries && retries > 0 { - // utils.LavaFormatDebug("all relays timeout", utils.Attribute{Key: "GUID", Value: ctx}, utils.Attribute{Key: "errors", Value: relayErrors.relayErrors}) - // return errorRelayResult, utils.LavaFormatError("Failed all relay retries due to timeout consider adding 'lava-relay-timeout' header to extend the allowed timeout duration", nil, utils.Attribute{Key: "GUID", Value: ctx}) - // } - // bestRelayError := relayErrors.GetBestErrorMessageForUser() - // return errorRelayResult, utils.LavaFormatError("Failed all retries", nil, utils.Attribute{Key: "GUID", Value: ctx}, utils.LogAttr("error", bestRelayError.err)) - // } else if len(relayErrors.relayErrors) > 0 { - // utils.LavaFormatDebug("relay succeeded but had some errors", utils.Attribute{Key: "GUID", Value: ctx}, utils.Attribute{Key: "errors", Value: relayErrors}) - // } - return nil, fmt.Errorf("TODO") + // if we got here we trigger a protocol error + if len(rp.nodeResponseErrors.relayErrors) > 0 { // if we have node errors, we prefer returning them over protocol errors, even if it's just the one + nodeErr := rp.nodeResponseErrors.GetBestErrorMessageForUser() + processingError = nodeErr.err + errorResponse := nodeErr.response + if errorResponse != nil { + returnedResult = &errorResponse.relayResult + } + } else if len(rp.protocolResponseErrors.relayErrors) > 0 { + protocolErr := rp.protocolResponseErrors.GetBestErrorMessageForUser() + processingError = protocolErr.err + errorResponse := protocolErr.response + if errorResponse != nil { + returnedResult = &errorResponse.relayResult + } + } else { + returnedResult = &common.RelayResult{StatusCode: http.StatusInternalServerError} + } + returnedResult.ProviderInfo.ProviderAddress = strings.Join(allProvidersAddresses, ",") + return returnedResult, utils.LavaFormatError("failed relay, insufficient results", processingError) } -// func (rpccs *RPCConsumerServer) getBestResult(timeout time.Duration, responses chan *relayResponse, numberOfSessions int, chainMessage chainlib.ChainMessage) *relayResponse { -// responsesReceived := 0 -// nodeResponseErrors := &RelayErrors{relayErrors: []RelayError{}} -// protocolResponseErrors := &RelayErrors{relayErrors: []RelayError{}, onFailureMergeAll: true} -// // a helper function to fetch the best response (prioritize node over protocol) -// getBestResponseBetweenNodeAndProtocolErrors := func() (*relayResponse, error) { -// if len(nodeResponseErrors.relayErrors) > 0 { // if we have node errors, we prefer returning them over protocol errors. -// bestErrorMessage := nodeResponseErrors.GetBestErrorMessageForUser() -// return bestErrorMessage.response, nil -// } -// if len(protocolResponseErrors.relayErrors) > 0 { // if we have protocol errors at this point return the best one -// protocolsBestErrorMessage := protocolResponseErrors.GetBestErrorMessageForUser() -// return protocolsBestErrorMessage.response, nil -// } -// return nil, fmt.Errorf("failed getting best response") -// } -// startTime := time.Now() -// for { -// select { -// case response := <-responses: -// // increase responses received -// responsesReceived++ -// if response.err == nil { -// // validate if its a error response (from the node not the provider) -// foundError, errorMessage := chainMessage.CheckResponseError(response.relayResult.Reply.Data, response.relayResult.StatusCode) -// // print debug only when we have multiple responses -// if numberOfSessions > 1 { -// utils.LavaFormatDebug("Got Response", utils.LogAttr("responsesReceived", responsesReceived), utils.LogAttr("out_of", numberOfSessions), utils.LogAttr("foundError", foundError), utils.LogAttr("errorMessage", errorMessage), utils.LogAttr("Status code", response.relayResult.StatusCode)) -// } -// if foundError { -// // this is a node error, meaning we still didn't get a good response. -// // we will choose to wait until there will be a response or timeout happens -// // if timeout happens we will take the majority of response messages -// nodeResponseErrors.relayErrors = append(nodeResponseErrors.relayErrors, RelayError{err: fmt.Errorf(errorMessage), ProviderInfo: response.relayResult.ProviderInfo, response: response}) -// } else { -// // Return the first successful response -// return response // returning response -// } -// } else { -// // we want to keep the error message in a separate response error structure -// // in case we got only errors and we want to return the best one -// protocolResponseErrors.relayErrors = append(protocolResponseErrors.relayErrors, RelayError{err: response.err, ProviderInfo: response.relayResult.ProviderInfo, response: response}) -// } - -// // check if this is the last response we are going to receive -// // we get here only if all other responses including this one are not valid responses -// // (whether its a node error or protocol errors) -// if responsesReceived == numberOfSessions { -// bestRelayResult, err := getBestResponseBetweenNodeAndProtocolErrors() -// if err == nil { // successfully sent the channel response -// return bestRelayResult -// } -// // if we got here, we for some reason failed to fetch both the best node error and the protocol error -// // it indicates mostly an unwanted behavior. -// utils.LavaFormatWarning("failed getting best error message for both node and protocol", nil, -// utils.LogAttr("nodeResponseErrors", nodeResponseErrors), -// utils.LogAttr("protocolsBestErrorMessage", protocolResponseErrors), -// utils.LogAttr("numberOfSessions", numberOfSessions), -// ) -// return response -// } -// case <-time.After(timeout + 3*time.Second - time.Since(startTime)): -// // Timeout occurred, try fetching the best result we have, prefer node errors over protocol errors -// bestRelayResponse, err := getBestResponseBetweenNodeAndProtocolErrors() -// if err == nil { // successfully sent the channel response -// return bestRelayResponse -// } -// // failed fetching any error, getting here indicates a real context timeout happened. -// return &relayResponse{common.RelayResult{}, NoResponseTimeout} -// } -// } -// } - func GetTimeoutForProcessing(relayTimeout time.Duration, chainMessage chainlib.ChainMessage) time.Duration { ctxTimeout := DefaultTimeout if chainlib.IsHangingApi(chainMessage) || chainMessage.GetApi().ComputeUnits > 100 || chainlib.GetStateful(chainMessage) == common.CONSISTENCY_SELECT_ALLPROVIDERS { From 0342af06424b44b6edc5bc3bddeb79455171cd71 Mon Sep 17 00:00:00 2001 From: omerlavanet Date: Wed, 28 Feb 2024 09:58:28 +0200 Subject: [PATCH 06/57] relay_processor done --- protocol/lavasession/used_providers.go | 19 ++++++--- protocol/rpcconsumer/relay_processor.go | 56 +++++++++++++++++++------ 2 files changed, 58 insertions(+), 17 deletions(-) diff --git a/protocol/lavasession/used_providers.go b/protocol/lavasession/used_providers.go index e7f705de04..61fc64bf24 100644 --- a/protocol/lavasession/used_providers.go +++ b/protocol/lavasession/used_providers.go @@ -25,11 +25,12 @@ func NewUsedProviders(directiveHeaders map[string]string) *UsedProviders { } type UsedProviders struct { - lock sync.RWMutex - providers map[string]struct{} - selecting bool - unwantedProviders map[string]struct{} - blockOnSyncLoss map[string]struct{} + lock sync.RWMutex + providers map[string]struct{} + selecting bool + unwantedProviders map[string]struct{} + blockOnSyncLoss map[string]struct{} + sessionsLatestBatch int } func (up *UsedProviders) CurrentlyUsed() int { @@ -38,6 +39,12 @@ func (up *UsedProviders) CurrentlyUsed() int { return len(up.providers) } +func (up *UsedProviders) TotalSessions() int { + up.lock.RLock() + defer up.lock.RUnlock() + return up.sessionsLatestBatch +} + func (up *UsedProviders) CurrentlyUsedAddresses() []string { up.lock.RLock() defer up.lock.RUnlock() @@ -90,8 +97,10 @@ func (up *UsedProviders) AddUsed(sessions ConsumerSessionsMap) { up.lock.Lock() defer up.lock.Unlock() // this is nil safe + up.sessionsLatestBatch = 0 for provider := range sessions { // the key for ConsumerSessionsMap is the provider public address up.providers[provider] = struct{}{} + up.sessionsLatestBatch++ } up.selecting = false } diff --git a/protocol/rpcconsumer/relay_processor.go b/protocol/rpcconsumer/relay_processor.go index a72898445f..6b1466905b 100644 --- a/protocol/rpcconsumer/relay_processor.go +++ b/protocol/rpcconsumer/relay_processor.go @@ -2,6 +2,7 @@ package rpcconsumer import ( "context" + "errors" "fmt" "net/http" "strings" @@ -34,6 +35,9 @@ func NewRelayProcessor(ctx context.Context, usedProviders *lavasession.UsedProvi if chainlib.GetStateful(chainMessage) == common.CONSISTENCY_SELECT_ALLPROVIDERS { selection = BestResult // select the majority of node successes } + if requiredSuccesses <= 0 { + utils.LavaFormatFatal("invalid requirement, successes count must be greater than 0", nil, utils.LogAttr("requiredSuccesses", requiredSuccesses)) + } return &RelayProcessor{ usedProviders: usedProviders, requiredSuccesses: requiredSuccesses, @@ -167,7 +171,7 @@ func (rp *RelayProcessor) setErrorResponse(response *relayResponse) { rp.protocolResponseErrors.relayErrors = append(rp.protocolResponseErrors.relayErrors, RelayError{err: response.err, ProviderInfo: response.relayResult.ProviderInfo, response: response}) } -func (rp *RelayProcessor) checkEndProcessing() bool { +func (rp *RelayProcessor) checkEndProcessing(responsesCount int) bool { rp.lock.RLock() defer rp.lock.RUnlock() resultsCount := len(rp.successResults) @@ -183,8 +187,9 @@ func (rp *RelayProcessor) checkEndProcessing() bool { return true } } - if rp.usedProviders.CurrentlyUsed() == 0 { - // no active sessions, we can return + // check if we got all of the responses + if rp.usedProviders.CurrentlyUsed() == 0 && responsesCount >= rp.usedProviders.TotalSessions() { + // no active sessions, and we read all the responses, we can return return true } @@ -239,7 +244,7 @@ func (rp *RelayProcessor) WaitForResults(ctx context.Context) error { } else { rp.setValidResponse(response) } - if rp.checkEndProcessing() { + if rp.checkEndProcessing(responsesCount) { // we can finish processing return nil } @@ -249,9 +254,34 @@ func (rp *RelayProcessor) WaitForResults(ctx context.Context) error { } } -func (rp *RelayProcessor) processingError() (returnedResult *common.RelayResult, processingError error) { - // TODO: - return nil, fmt.Errorf("not implmented") +func (rp *RelayProcessor) responsesQuorum(results []common.RelayResult, quorumSize int) (returnedResult *common.RelayResult, processingError error) { + if quorumSize <= 0 { + return nil, errors.New("quorumSize must be greater than zero") + } + countMap := make(map[string]int) // Map to store the count of each unique result.Reply.Data + for _, result := range results { + if result.Reply != nil && result.Reply.Data != nil { + countMap[string(result.Reply.Data)]++ + } + } + var mostCommonResult *common.RelayResult + var maxCount int + for _, result := range results { + if result.Reply != nil && result.Reply.Data != nil { + count := countMap[string(result.Reply.Data)] + if count > maxCount { + maxCount = count + mostCommonResult = &result + } + } + } + + // Check if the majority count is less than quorumSize + if mostCommonResult == nil || maxCount < quorumSize { + return nil, errors.New("majority count is less than quorumSize") + } + mostCommonResult.Quorum = maxCount + return mostCommonResult, nil } // this function returns the results according to the defined strategy @@ -261,11 +291,14 @@ func (rp *RelayProcessor) processingError() (returnedResult *common.RelayResult, // if strategy == quorum get majority of node responses // on error: we will return a placeholder relayResult, with a provider address and a status code func (rp *RelayProcessor) ProcessingResult() (returnedResult *common.RelayResult, processingError error) { + // this must be here before the lock because this function locks allProvidersAddresses := rp.GetUsedProviders().UnwantedAddresses() + rp.lock.RLock() defer rp.lock.RUnlock() // there are enough successes - if len(rp.successResults) >= rp.requiredSuccesses { + successResultsCount := len(rp.successResults) + if successResultsCount >= rp.requiredSuccesses { return rp.responsesQuorum(rp.successResults, rp.requiredSuccesses) } nodeResults := rp.nodeResultsInner() @@ -274,7 +307,7 @@ func (rp *RelayProcessor) ProcessingResult() (returnedResult *common.RelayResult if len(nodeResults) >= rp.requiredSuccesses { if rp.selection == Quorum { return rp.responsesQuorum(nodeResults, rp.requiredSuccesses) - } else if rp.selection == BestResult && len(rp.successResults) > len(rp.nodeResponseErrors.relayErrors) { + } else if rp.selection == BestResult && successResultsCount > len(rp.nodeResponseErrors.relayErrors) { // we have more than half succeeded, and we are success oriented return rp.responsesQuorum(rp.successResults, (rp.requiredSuccesses+1)/2) } @@ -282,10 +315,11 @@ func (rp *RelayProcessor) ProcessingResult() (returnedResult *common.RelayResult // we don't have enough for a quorum, prefer a node error on protocol errors if len(rp.nodeResponseErrors.relayErrors) >= rp.requiredSuccesses { // if we have node errors, we prefer returning them over protocol errors. nodeErr := rp.nodeResponseErrors.GetBestErrorMessageForUser() - return rp.responsesQuorum(rp.nodeErrors(), rp.requiredSuccesses) + return &nodeErr.response.relayResult, nil } // if we got here we trigger a protocol error + returnedResult = &common.RelayResult{StatusCode: http.StatusInternalServerError} if len(rp.nodeResponseErrors.relayErrors) > 0 { // if we have node errors, we prefer returning them over protocol errors, even if it's just the one nodeErr := rp.nodeResponseErrors.GetBestErrorMessageForUser() processingError = nodeErr.err @@ -300,8 +334,6 @@ func (rp *RelayProcessor) ProcessingResult() (returnedResult *common.RelayResult if errorResponse != nil { returnedResult = &errorResponse.relayResult } - } else { - returnedResult = &common.RelayResult{StatusCode: http.StatusInternalServerError} } returnedResult.ProviderInfo.ProviderAddress = strings.Join(allProvidersAddresses, ",") return returnedResult, utils.LavaFormatError("failed relay, insufficient results", processingError) From 516c327e2eafe146386e3cc1739da26560909203 Mon Sep 17 00:00:00 2001 From: omerlavanet Date: Wed, 28 Feb 2024 12:23:08 +0200 Subject: [PATCH 07/57] change timeouts across the protocol --- protocol/chainlib/chain_message_queries.go | 10 +++--- protocol/chainlib/chainlib.go | 1 + protocol/chainlib/common.go | 34 +++++++++++++++++-- protocol/chainlib/grpc.go | 2 +- protocol/chainlib/jsonRPC.go | 4 +-- protocol/chainlib/rest.go | 2 +- protocol/chainlib/tendermintRPC.go | 4 +-- protocol/common/endpoints.go | 20 +++-------- protocol/common/timeout.go | 19 +++++++++++ protocol/lavasession/used_providers.go | 15 ++------ protocol/rpcconsumer/relay_processor.go | 18 ++-------- protocol/rpcconsumer/rpcconsumer_server.go | 18 ++++++---- protocol/rpcprovider/rpcprovider_server.go | 23 ++++++++----- .../rpcprovider/rpcprovider_server_test.go | 2 +- 14 files changed, 99 insertions(+), 73 deletions(-) diff --git a/protocol/chainlib/chain_message_queries.go b/protocol/chainlib/chain_message_queries.go index 0f964152d4..7fce2df76f 100644 --- a/protocol/chainlib/chain_message_queries.go +++ b/protocol/chainlib/chain_message_queries.go @@ -6,22 +6,22 @@ func ShouldSendToAllProviders(chainMessage ChainMessage) bool { return chainMessage.GetApi().Category.Stateful == common.CONSISTENCY_SELECT_ALLPROVIDERS } -func GetAddon(chainMessage ChainMessage) string { +func GetAddon(chainMessage ChainMessageForSend) string { return chainMessage.GetApiCollection().CollectionData.AddOn } -func IsSubscription(chainMessage ChainMessage) bool { +func IsSubscription(chainMessage ChainMessageForSend) bool { return chainMessage.GetApi().Category.Subscription } -func IsHangingApi(chainMessage ChainMessage) bool { +func IsHangingApi(chainMessage ChainMessageForSend) bool { return chainMessage.GetApi().Category.HangingApi } -func GetComputeUnits(chainMessage ChainMessage) uint64 { +func GetComputeUnits(chainMessage ChainMessageForSend) uint64 { return chainMessage.GetApi().ComputeUnits } -func GetStateful(chainMessage ChainMessage) uint32 { +func GetStateful(chainMessage ChainMessageForSend) uint32 { return chainMessage.GetApi().Category.Stateful } diff --git a/protocol/chainlib/chainlib.go b/protocol/chainlib/chainlib.go index f0f0d395a1..5be294391c 100644 --- a/protocol/chainlib/chainlib.go +++ b/protocol/chainlib/chainlib.go @@ -85,6 +85,7 @@ type ChainMessage interface { } type ChainMessageForSend interface { + TimeoutOverride(...time.Duration) time.Duration GetApi() *spectypes.Api GetRPCMessage() rpcInterfaceMessages.GenericMessage GetApiCollection() *spectypes.ApiCollection diff --git a/protocol/chainlib/common.go b/protocol/chainlib/common.go index d36e6920e1..78495c20aa 100644 --- a/protocol/chainlib/common.go +++ b/protocol/chainlib/common.go @@ -1,6 +1,7 @@ package chainlib import ( + "context" "encoding/json" "fmt" "net" @@ -84,6 +85,13 @@ func (bcp *BaseChainProxy) GetChainProxyInformation() (common.NodeUrl, string) { return bcp.NodeUrl, bcp.ChainID } +func (bcp *BaseChainProxy) CapTimeoutForSend(ctx context.Context, chainMessage ChainMessageForSend) (context.Context, context.CancelFunc) { + relayTimeout := GetRelayTimeout(chainMessage, bcp.averageBlockTime) + processingTimeout := common.GetTimeoutForProcessing(relayTimeout, GetTimeoutInfo(chainMessage)) + connectCtx, cancel := bcp.NodeUrl.LowerContextTimeout(ctx, processingTimeout) + return connectCtx, cancel +} + func extractDappIDFromFiberContext(c *fiber.Ctx) (dappID string) { // Read the dappID from the headers dappID = c.Get("dapp-id") @@ -297,14 +305,28 @@ func CompareRequestedBlockInBatch(firstRequestedBlock int64, second int64) (late return returnBigger(firstRequestedBlock, second) } -func GetRelayTimeout(chainMessage ChainMessage, chainParser ChainParser) time.Duration { +func GetRelayTimeout(chainMessage ChainMessageForSend, averageBlockTime time.Duration) time.Duration { if chainMessage.TimeoutOverride() != 0 { return chainMessage.TimeoutOverride() } // Calculate extra RelayTimeout extraRelayTimeout := time.Duration(0) if IsHangingApi(chainMessage) { - _, extraRelayTimeout, _, _ = chainParser.ChainBlockStats() + extraRelayTimeout = averageBlockTime + } + relayTimeAddition := common.GetTimePerCu(GetComputeUnits(chainMessage)) + if chainMessage.GetApi().TimeoutMs > 0 { + relayTimeAddition = time.Millisecond * time.Duration(chainMessage.GetApi().TimeoutMs) + } + // Set relay timout, increase it every time we fail a relay on timeout + return extraRelayTimeout + relayTimeAddition + common.AverageWorldLatency +} + +func GetRelayTimeoutForSend(chainMessage ChainMessageForSend, averageBlockTime time.Duration) time.Duration { + // Calculate extra RelayTimeout + extraRelayTimeout := time.Duration(0) + if IsHangingApi(chainMessage) { + extraRelayTimeout = averageBlockTime } relayTimeAddition := common.GetTimePerCu(GetComputeUnits(chainMessage)) if chainMessage.GetApi().TimeoutMs > 0 { @@ -390,3 +412,11 @@ func (rd *RefererData) SendReferer(refererMatchString string) error { rd.ReferrerClient.AppendReferrer(metrics.NewReferrerRequest(refererMatchString)) return nil } + +func GetTimeoutInfo(chainMessage ChainMessageForSend) common.TimeoutInfo { + return common.TimeoutInfo{ + CU: chainMessage.GetApi().ComputeUnits, + Hanging: IsHangingApi(chainMessage), + Stateful: GetStateful(chainMessage), + } +} diff --git a/protocol/chainlib/grpc.go b/protocol/chainlib/grpc.go index f90ee0f61a..a7e6c6d07a 100644 --- a/protocol/chainlib/grpc.go +++ b/protocol/chainlib/grpc.go @@ -518,7 +518,7 @@ func (cp *GrpcChainProxy) SendNodeMsg(ctx context.Context, ch chan interface{}, } var respHeaders metadata.MD response := msgFactory.NewMessage(methodDescriptor.GetOutputType()) - connectCtx, cancel := cp.NodeUrl.LowerContextTimeout(ctx, chainMessage, cp.averageBlockTime) + connectCtx, cancel := cp.CapTimeoutForSend(ctx, chainMessage) defer cancel() err = conn.Invoke(connectCtx, "/"+nodeMessage.Path, msg, response, grpc.Header(&respHeaders)) if err != nil { diff --git a/protocol/chainlib/jsonRPC.go b/protocol/chainlib/jsonRPC.go index 4a76d9233d..03d7ce33a2 100644 --- a/protocol/chainlib/jsonRPC.go +++ b/protocol/chainlib/jsonRPC.go @@ -573,7 +573,7 @@ func (cp *JrpcChainProxy) sendBatchMessage(ctx context.Context, nodeMessage *rpc } } // set context with timeout - connectCtx, cancel := cp.NodeUrl.LowerContextTimeout(ctx, chainMessage, cp.averageBlockTime) + connectCtx, cancel := cp.CapTimeoutForSend(ctx, chainMessage) defer cancel() cp.NodeUrl.SetIpForwardingIfNecessary(ctx, rpc.SetHeader) @@ -645,7 +645,7 @@ func (cp *JrpcChainProxy) SendNodeMsg(ctx context.Context, ch chan interface{}, } else { // we use the minimum timeout between the two, spec or context. to prevent the provider from hanging // we don't use the context alone so the provider won't be hanging forever by an attack - connectCtx, cancel := cp.NodeUrl.LowerContextTimeout(ctx, chainMessage, cp.averageBlockTime) + connectCtx, cancel := cp.CapTimeoutForSend(ctx, chainMessage) defer cancel() cp.NodeUrl.SetIpForwardingIfNecessary(ctx, rpc.SetHeader) diff --git a/protocol/chainlib/rest.go b/protocol/chainlib/rest.go index 011f2ddba6..9ab96a92f1 100644 --- a/protocol/chainlib/rest.go +++ b/protocol/chainlib/rest.go @@ -468,7 +468,7 @@ func (rcp *RestChainProxy) SendNodeMsg(ctx context.Context, ch chan interface{}, urlPath := rcp.NodeUrl.Url + nodeMessage.Path // set context with timeout - connectCtx, cancel := rcp.NodeUrl.LowerContextTimeout(ctx, chainMessage, rcp.averageBlockTime) + connectCtx, cancel := rcp.CapTimeoutForSend(ctx, chainMessage) defer cancel() req, err := http.NewRequestWithContext(connectCtx, connectionTypeSlected, rcp.NodeUrl.AuthConfig.AddAuthPath(urlPath), msgBuffer) diff --git a/protocol/chainlib/tendermintRPC.go b/protocol/chainlib/tendermintRPC.go index e543c1fd85..eab8127053 100644 --- a/protocol/chainlib/tendermintRPC.go +++ b/protocol/chainlib/tendermintRPC.go @@ -651,7 +651,7 @@ func (cp *tendermintRpcChainProxy) SendURI(ctx context.Context, nodeMessage *rpc url := cp.httpNodeUrl.Url + "/" + nodeMessage.Path // set context with timeout - connectCtx, cancel := cp.NodeUrl.LowerContextTimeout(ctx, chainMessage, cp.averageBlockTime) + connectCtx, cancel := cp.CapTimeoutForSend(ctx, chainMessage) defer cancel() // create a new http request @@ -753,7 +753,7 @@ func (cp *tendermintRpcChainProxy) SendRPC(ctx context.Context, nodeMessage *rpc sub, rpcMessage, err = rpc.Subscribe(context.Background(), nodeMessage.ID, nodeMessage.Method, ch, nodeMessage.Params) } else { // set context with timeout - connectCtx, cancel := cp.NodeUrl.LowerContextTimeout(ctx, chainMessage, cp.averageBlockTime) + connectCtx, cancel := cp.CapTimeoutForSend(ctx, chainMessage) defer cancel() cp.NodeUrl.SetIpForwardingIfNecessary(ctx, rpc.SetHeader) diff --git a/protocol/common/endpoints.go b/protocol/common/endpoints.go index 9f15377e84..3b5dc8d09a 100644 --- a/protocol/common/endpoints.go +++ b/protocol/common/endpoints.go @@ -89,27 +89,15 @@ func (url *NodeUrl) LowerContextTimeoutWithDuration(ctx context.Context, timeout return CapContextTimeout(ctx, timeout+url.Timeout) } -func (url *NodeUrl) LowerContextTimeout(ctx context.Context, chainMessage ChainMessageGetApiInterface, averageBlockTime time.Duration) (context.Context, context.CancelFunc) { - var timeout time.Duration - specOverwriteTimeout := chainMessage.GetApi().TimeoutMs - if specOverwriteTimeout > 0 { - timeout = time.Millisecond * time.Duration(specOverwriteTimeout) - } else { - timeout = LocalNodeTimePerCu(chainMessage.GetApi().ComputeUnits) - } - - // check if this API is hanging (waiting for block confirmation) - if chainMessage.GetApi().Category.HangingApi { - timeout += averageBlockTime - } +func (url *NodeUrl) LowerContextTimeout(ctx context.Context, processingTimeout time.Duration) (context.Context, context.CancelFunc) { // allowing the consumer's context to increase the timeout by up to x2 // this allows the consumer to get extra timeout than the spec up to a threshold so // the provider wont be attacked by infinite context timeout - timeout *= MAXIMUM_ALLOWED_TIMEOUT_EXTEND_MULTIPLIER_BY_THE_CONSUMER + processingTimeout *= MAXIMUM_ALLOWED_TIMEOUT_EXTEND_MULTIPLIER_BY_THE_CONSUMER if url == nil || url.Timeout <= 0 { - return CapContextTimeout(ctx, timeout) + return CapContextTimeout(ctx, processingTimeout) } - return CapContextTimeout(ctx, timeout+url.Timeout) + return CapContextTimeout(ctx, processingTimeout+url.Timeout) } type AuthConfig struct { diff --git a/protocol/common/timeout.go b/protocol/common/timeout.go index cece8d31b6..c979372ae0 100644 --- a/protocol/common/timeout.go +++ b/protocol/common/timeout.go @@ -16,6 +16,8 @@ const ( DataReliabilityTimeoutIncrease = 5 * time.Second AverageWorldLatency = 300 * time.Millisecond CommunicateWithLocalLavaNodeTimeout = (3 * time.Second) + AverageWorldLatency + DefaultTimeout = 20 * time.Second + DefaultTimeoutLong = 3 * time.Minute ) func LocalNodeTimePerCu(cu uint64) time.Duration { @@ -60,3 +62,20 @@ func IsTimeout(errArg error) bool { } return false } + +type TimeoutInfo struct { + CU uint64 + Hanging bool + Stateful uint32 +} + +func GetTimeoutForProcessing(relayTimeout time.Duration, timeoutInfo TimeoutInfo) time.Duration { + ctxTimeout := DefaultTimeout + if timeoutInfo.Hanging || timeoutInfo.CU > 100 || timeoutInfo.Stateful == CONSISTENCY_SELECT_ALLPROVIDERS { + ctxTimeout = DefaultTimeoutLong + } + if relayTimeout > ctxTimeout { + ctxTimeout = relayTimeout + } + return ctxTimeout +} diff --git a/protocol/lavasession/used_providers.go b/protocol/lavasession/used_providers.go index 61fc64bf24..2298aa9358 100644 --- a/protocol/lavasession/used_providers.go +++ b/protocol/lavasession/used_providers.go @@ -39,7 +39,7 @@ func (up *UsedProviders) CurrentlyUsed() int { return len(up.providers) } -func (up *UsedProviders) TotalSessions() int { +func (up *UsedProviders) SessionsLatestBatch() int { up.lock.RLock() defer up.lock.RUnlock() return up.sessionsLatestBatch @@ -72,7 +72,7 @@ func (up *UsedProviders) RemoveUsed(provider string, err error) { up.lock.Lock() defer up.lock.Unlock() if err != nil { - if ShouldRetryWithThisError(err) { + if shouldRetryWithThisError(err) { _, ok := up.blockOnSyncLoss[provider] if !ok && IsSessionSyncLoss(err) { up.blockOnSyncLoss[provider] = struct{}{} @@ -142,15 +142,6 @@ func (up *UsedProviders) tryLockSelection() bool { return false } -func (up *UsedProviders) GetSelecting() bool { - if up == nil { - return false - } - up.lock.RLock() - defer up.lock.RUnlock() - return up.selecting -} - func (up *UsedProviders) GetUnwantedProvidersToSend() map[string]struct{} { if up == nil { return map[string]struct{}{} @@ -169,6 +160,6 @@ func (up *UsedProviders) GetUnwantedProvidersToSend() map[string]struct{} { return unwantedProvidersToSend } -func ShouldRetryWithThisError(err error) bool { +func shouldRetryWithThisError(err error) bool { return IsSessionSyncLoss(err) } diff --git a/protocol/rpcconsumer/relay_processor.go b/protocol/rpcconsumer/relay_processor.go index 6b1466905b..52a1488d35 100644 --- a/protocol/rpcconsumer/relay_processor.go +++ b/protocol/rpcconsumer/relay_processor.go @@ -7,7 +7,6 @@ import ( "net/http" "strings" "sync" - "time" "github.com/lavanet/lava/protocol/chainlib" "github.com/lavanet/lava/protocol/common" @@ -17,9 +16,7 @@ import ( ) const ( - MaxCallsPerRelay = 50 - DefaultTimeout = 20 * time.Second - DefaultTimeoutLong = 3 * time.Minute + MaxCallsPerRelay = 50 ) type Selection int @@ -188,7 +185,7 @@ func (rp *RelayProcessor) checkEndProcessing(responsesCount int) bool { } } // check if we got all of the responses - if rp.usedProviders.CurrentlyUsed() == 0 && responsesCount >= rp.usedProviders.TotalSessions() { + if rp.usedProviders.CurrentlyUsed() == 0 && responsesCount >= rp.usedProviders.SessionsLatestBatch() { // no active sessions, and we read all the responses, we can return return true } @@ -338,14 +335,3 @@ func (rp *RelayProcessor) ProcessingResult() (returnedResult *common.RelayResult returnedResult.ProviderInfo.ProviderAddress = strings.Join(allProvidersAddresses, ",") return returnedResult, utils.LavaFormatError("failed relay, insufficient results", processingError) } - -func GetTimeoutForProcessing(relayTimeout time.Duration, chainMessage chainlib.ChainMessage) time.Duration { - ctxTimeout := DefaultTimeout - if chainlib.IsHangingApi(chainMessage) || chainMessage.GetApi().ComputeUnits > 100 || chainlib.GetStateful(chainMessage) == common.CONSISTENCY_SELECT_ALLPROVIDERS { - ctxTimeout = DefaultTimeoutLong - } - if relayTimeout > ctxTimeout { - ctxTimeout = relayTimeout - } - return ctxTimeout -} diff --git a/protocol/rpcconsumer/rpcconsumer_server.go b/protocol/rpcconsumer/rpcconsumer_server.go index 0a3aaaf910..3f5516d4f3 100644 --- a/protocol/rpcconsumer/rpcconsumer_server.go +++ b/protocol/rpcconsumer/rpcconsumer_server.go @@ -323,11 +323,10 @@ func (rpccs *RPCConsumerServer) ProcessRelaySend(ctx context.Context, directiveH // we failed to send a batch of relays, if there are no active sends we can terminate return relayProcessor, err } - relayTimeout := chainlib.GetRelayTimeout(chainMessage, rpccs.chainParser) // a channel to be notified processing was done, true means we have results and can return gotResults := make(chan bool) + processingTimeout, relayTimeout := rpccs.getProcessingTimeout(chainMessage) go func() { - processingTimeout := GetTimeoutForProcessing(relayTimeout, chainMessage) processingCtx, cancel := context.WithTimeout(ctx, processingTimeout) defer cancel() // ProcessResults is reading responses while blocking until the conditions are met @@ -461,7 +460,6 @@ func (rpccs *RPCConsumerServer) sendRelayToProvider( return err } - relayTimeout := chainlib.GetRelayTimeout(chainMessage, rpccs.chainParser) // Iterate over the sessions map for providerPublicAddress, sessionInfo := range sessions { // Launch a separate goroutine for each session @@ -514,7 +512,8 @@ func (rpccs *RPCConsumerServer) sendRelayToProvider( // unique per dappId and ip consumerToken := common.GetUniqueToken(dappID, consumerIp) - localRelayResult, relayLatency, errResponse, backoff := rpccs.relayInner(goroutineCtx, singleConsumerSession, localRelayResult, relayTimeout, chainMessage, consumerToken) + processingTimeout, relayTimeout := rpccs.getProcessingTimeout(chainMessage) + localRelayResult, relayLatency, errResponse, backoff := rpccs.relayInner(goroutineCtx, singleConsumerSession, localRelayResult, processingTimeout, chainMessage, consumerToken) if errResponse != nil { failRelaySession := func(origErr error, backoff_ bool) { backOffDuration := 0 * time.Second @@ -560,6 +559,7 @@ func (rpccs *RPCConsumerServer) sendRelayToProvider( utils.Attribute{Key: "finalizationConsensus", Value: rpccs.finalizationConsensus.String()}, ) } + errResponse = rpccs.consumerSessionManager.OnSessionDone(singleConsumerSession, latestBlock, chainlib.GetComputeUnits(chainMessage), relayLatency, singleConsumerSession.CalculateExpectedLatency(relayTimeout), expectedBH, numOfProviders, pairingAddressesLen, chainMessage.GetApi().Category.HangingApi) // session done successfully if rpccs.cache.CacheActive() && rpcclient.ValidateStatusCodes(localRelayResult.StatusCode, true) == nil { @@ -740,8 +740,7 @@ func (rpccs *RPCConsumerServer) sendDataReliabilityRelayIfApplicable(ctx context if err != nil { return utils.LavaFormatWarning("failed data reliability relay to provider", err, utils.LogAttr("relayProcessorDataReliability", relayProcessorDataReliability)) } - relayTimeout := chainlib.GetRelayTimeout(chainMessage, rpccs.chainParser) - processingTimeout := GetTimeoutForProcessing(relayTimeout, chainMessage) + processingTimeout, _ := rpccs.getProcessingTimeout(chainMessage) processingCtx, cancel := context.WithTimeout(ctx, processingTimeout) defer cancel() err = relayProcessorDataReliability.WaitForResults(processingCtx) @@ -784,6 +783,13 @@ func (rpccs *RPCConsumerServer) sendDataReliabilityRelayIfApplicable(ctx context return nil } +func (rpccs *RPCConsumerServer) getProcessingTimeout(chainMessage chainlib.ChainMessage) (processingTimeout time.Duration, relayTimeout time.Duration) { + _, averageBlockTime, _, _ := rpccs.chainParser.ChainBlockStats() + relayTimeout = chainlib.GetRelayTimeout(chainMessage, averageBlockTime) + processingTimeout = common.GetTimeoutForProcessing(relayTimeout, chainlib.GetTimeoutInfo(chainMessage)) + return processingTimeout, relayTimeout +} + func (rpccs *RPCConsumerServer) LavaDirectiveHeaders(metadata []pairingtypes.Metadata) ([]pairingtypes.Metadata, map[string]string) { metadataRet := []pairingtypes.Metadata{} headerDirectives := map[string]string{} diff --git a/protocol/rpcprovider/rpcprovider_server.go b/protocol/rpcprovider/rpcprovider_server.go index aa253cbb2e..cc82cc1ccf 100644 --- a/protocol/rpcprovider/rpcprovider_server.go +++ b/protocol/rpcprovider/rpcprovider_server.go @@ -677,9 +677,10 @@ func (rpcps *RPCProviderServer) TryRelay(ctx context.Context, request *pairingty var blockDistanceToFinalization uint32 var averageBlockTime time.Duration updatedChainMessage := false + var blockLagForQosSync int64 + blockLagForQosSync, averageBlockTime, blockDistanceToFinalization, blocksInFinalizationData = rpcps.chainParser.ChainBlockStats() + relayTimeout := chainlib.GetRelayTimeout(chainMsg, averageBlockTime) if dataReliabilityEnabled { - var blockLagForQosSync int64 - blockLagForQosSync, averageBlockTime, blockDistanceToFinalization, blocksInFinalizationData = rpcps.chainParser.ChainBlockStats() var err error specificBlock := request.RelayData.RequestBlock if specificBlock < spectypes.LATEST_BLOCK { @@ -690,7 +691,7 @@ func (rpcps *RPCProviderServer) TryRelay(ctx context.Context, request *pairingty // handle consistency, if the consumer requested information we do not have in the state tracker - latestBlock, requestedHashes, _, err = rpcps.handleConsistency(ctx, request.RelayData.GetSeenBlock(), request.RelayData.GetRequestBlock(), averageBlockTime, blockLagForQosSync, blockDistanceToFinalization, blocksInFinalizationData) + latestBlock, requestedHashes, _, err = rpcps.handleConsistency(ctx, relayTimeout, request.RelayData.GetSeenBlock(), request.RelayData.GetRequestBlock(), averageBlockTime, blockLagForQosSync, blockDistanceToFinalization, blocksInFinalizationData) if err != nil { return nil, err } @@ -792,7 +793,8 @@ func (rpcps *RPCProviderServer) TryRelay(ctx context.Context, request *pairingty if proofBlock < modifiedReqBlock && proofBlock < request.RelayData.SeenBlock { // we requested with a newer block, but don't necessarily have the finaliziation proof, chaintracker might be behind proofBlock = slices.Min([]int64{modifiedReqBlock, request.RelayData.SeenBlock}) - proofBlock, requestedHashes, err = rpcps.GetBlockDataForOptimisticFetch(ctx, proofBlock, blockDistanceToFinalization, blocksInFinalizationData, averageBlockTime) + + proofBlock, requestedHashes, err = rpcps.GetBlockDataForOptimisticFetch(ctx, relayTimeout, proofBlock, blockDistanceToFinalization, blocksInFinalizationData, averageBlockTime) if err != nil { return nil, utils.LavaFormatError("error getting block range for finalization proof", err) } @@ -817,7 +819,7 @@ func (rpcps *RPCProviderServer) TryRelay(ctx context.Context, request *pairingty return reply, nil } -func (rpcps *RPCProviderServer) GetBlockDataForOptimisticFetch(ctx context.Context, requiredProofBlock int64, blockDistanceToFinalization uint32, blocksInFinalizationData uint32, averageBlockTime time.Duration) (latestBlock int64, requestedHashes []*chaintracker.BlockStore, err error) { +func (rpcps *RPCProviderServer) GetBlockDataForOptimisticFetch(ctx context.Context, relayBaseTimeout time.Duration, requiredProofBlock int64, blockDistanceToFinalization uint32, blocksInFinalizationData uint32, averageBlockTime time.Duration) (latestBlock int64, requestedHashes []*chaintracker.BlockStore, err error) { utils.LavaFormatDebug("getting new blockData for optimistic fetch", utils.Attribute{Key: "GUID", Value: ctx}, utils.Attribute{Key: "requiredProofBlock", Value: requiredProofBlock}) proofBlock := requiredProofBlock toBlock := proofBlock - int64(blockDistanceToFinalization) @@ -830,7 +832,7 @@ func (rpcps *RPCProviderServer) GetBlockDataForOptimisticFetch(ctx context.Conte } timeSlept := 0 * time.Millisecond refreshTime := (averageBlockTime / chaintracker.MostFrequentPollingMultiplier) / 2 - sleepTime := slices.Min([]time.Duration{10 * refreshTime, timeCanWait}) + sleepTime := slices.Min([]time.Duration{10 * refreshTime, timeCanWait, relayBaseTimeout / 2}) sleepContext, cancel := context.WithTimeout(context.Background(), sleepTime) fetchedWithoutError := func() bool { timeSlept += refreshTime @@ -853,7 +855,7 @@ func (rpcps *RPCProviderServer) GetBlockDataForOptimisticFetch(ctx context.Conte return proofBlock, requestedHashes, err } -func (rpcps *RPCProviderServer) handleConsistency(ctx context.Context, seenBlock int64, requestBlock int64, averageBlockTime time.Duration, blockLagForQosSync int64, blockDistanceToFinalization uint32, blocksInFinalizationData uint32) (latestBlock int64, requestedHashes []*chaintracker.BlockStore, timeSlept time.Duration, err error) { +func (rpcps *RPCProviderServer) handleConsistency(ctx context.Context, baseRelayTimeout time.Duration, seenBlock int64, requestBlock int64, averageBlockTime time.Duration, blockLagForQosSync int64, blockDistanceToFinalization uint32, blocksInFinalizationData uint32) (latestBlock int64, requestedHashes []*chaintracker.BlockStore, timeSlept time.Duration, err error) { latestBlock, requestedHashes, changeTime, err := rpcps.GetLatestBlockData(ctx, blockDistanceToFinalization, blocksInFinalizationData) if err != nil { return 0, nil, 0, err @@ -875,6 +877,10 @@ func (rpcps *RPCProviderServer) handleConsistency(ctx context.Context, seenBlock deadline, ok := ctx.Deadline() probabilityBlockError := 0.0 halfTimeLeft := time.Until(deadline) / 2 // giving the node at least half the timeout time to process + if baseRelayTimeout/2 < halfTimeLeft { + // do not allow waiting the full timeout since now it's absurdly high + halfTimeLeft = baseRelayTimeout / 2 + } if ok { timeProviderHasS := (time.Since(changeTime) + halfTimeLeft).Seconds() // add waiting half the timeout time if changeTime.IsZero() { @@ -903,8 +909,7 @@ func (rpcps *RPCProviderServer) handleConsistency(ctx context.Context, seenBlock } // we are waiting for the state tracker to catch up with the requested block utils.LavaFormatDebug("waiting for state tracker to update", utils.Attribute{Key: "probabilityBlockError", Value: probabilityBlockError}, utils.Attribute{Key: "time", Value: time.Until(deadline)}, utils.Attribute{Key: "GUID", Value: ctx}, utils.Attribute{Key: "requestedBlock", Value: requestBlock}, utils.Attribute{Key: "seenBlock", Value: seenBlock}, utils.Attribute{Key: "latestBlock", Value: latestBlock}, utils.Attribute{Key: "blockGap", Value: blockGap}) - sleepTime := time.Until(deadline) / 2 // sleep up to half the timeout so we actually have time to do the relay - sleepContext, cancel := context.WithTimeout(context.Background(), sleepTime) + sleepContext, cancel := context.WithTimeout(context.Background(), halfTimeLeft) getLatestBlock := func() bool { ret, _ := rpcps.reliabilityManager.GetLatestBlockNum() // if we hit either seen or requested we can return diff --git a/protocol/rpcprovider/rpcprovider_server_test.go b/protocol/rpcprovider/rpcprovider_server_test.go index 8efa93f7e4..445ef73142 100644 --- a/protocol/rpcprovider/rpcprovider_server_test.go +++ b/protocol/rpcprovider/rpcprovider_server_test.go @@ -251,7 +251,7 @@ func TestHandleConsistency(t *testing.T) { } }() ctx, cancel := context.WithTimeout(context.Background(), play.timeout) - latestBlock, _, timeSlept, err := rpcproviderServer.handleConsistency(ctx, seenBlock, requestBlock, averageBlockTime, blockLagForQosSync, blocksInFinalizationData, blockDistanceToFinalization) + latestBlock, _, timeSlept, err := rpcproviderServer.handleConsistency(ctx, play.timeout, seenBlock, requestBlock, averageBlockTime, blockLagForQosSync, blocksInFinalizationData, blockDistanceToFinalization) cancel() require.Equal(t, play.err == nil, err == nil, err, strconv.Itoa(calls)) require.Less(t, timeSlept, play.timeout) From a66beefa0b4cc98058cb78d46f8ae7fbcd547ed7 Mon Sep 17 00:00:00 2001 From: omerlavanet Date: Wed, 28 Feb 2024 12:38:41 +0200 Subject: [PATCH 08/57] handle non deterministic apis in the quorum --- protocol/rpcconsumer/relay_processor.go | 27 ++++++++++++++++++---- protocol/rpcconsumer/rpcconsumer_server.go | 1 - 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/protocol/rpcconsumer/relay_processor.go b/protocol/rpcconsumer/relay_processor.go index 52a1488d35..5025a1d90a 100644 --- a/protocol/rpcconsumer/relay_processor.go +++ b/protocol/rpcconsumer/relay_processor.go @@ -8,6 +8,7 @@ import ( "strings" "sync" + sdktypes "github.com/cosmos/cosmos-sdk/types" "github.com/lavanet/lava/protocol/chainlib" "github.com/lavanet/lava/protocol/common" "github.com/lavanet/lava/protocol/lavasession" @@ -256,29 +257,47 @@ func (rp *RelayProcessor) responsesQuorum(results []common.RelayResult, quorumSi return nil, errors.New("quorumSize must be greater than zero") } countMap := make(map[string]int) // Map to store the count of each unique result.Reply.Data + deterministic := rp.chainMessage.GetApi().Category.Deterministic + var bestQosResult common.RelayResult + bestQos := sdktypes.ZeroDec() for _, result := range results { if result.Reply != nil && result.Reply.Data != nil { countMap[string(result.Reply.Data)]++ + if !deterministic { + if result.ProviderInfo.ProviderQoSExcellenceSummery.IsNil() || result.ProviderInfo.ProviderStake.Amount.IsNil() { + continue + } + currentResult := result.ProviderInfo.ProviderQoSExcellenceSummery.MulInt(result.ProviderInfo.ProviderStake.Amount) + if currentResult.GTE(bestQos) { + bestQos.Set(currentResult) + bestQosResult = result + } + } } } - var mostCommonResult *common.RelayResult + var mostCommonResult common.RelayResult var maxCount int for _, result := range results { if result.Reply != nil && result.Reply.Data != nil { count := countMap[string(result.Reply.Data)] if count > maxCount { maxCount = count - mostCommonResult = &result + mostCommonResult = result } } } // Check if the majority count is less than quorumSize - if mostCommonResult == nil || maxCount < quorumSize { + if mostCommonResult.Reply == nil || maxCount < quorumSize { + if !deterministic { + // non deterministic apis might not have a quorum + // instead of failing get the best one + return &bestQosResult, nil + } return nil, errors.New("majority count is less than quorumSize") } mostCommonResult.Quorum = maxCount - return mostCommonResult, nil + return &mostCommonResult, nil } // this function returns the results according to the defined strategy diff --git a/protocol/rpcconsumer/rpcconsumer_server.go b/protocol/rpcconsumer/rpcconsumer_server.go index 3f5516d4f3..e0e2bf9531 100644 --- a/protocol/rpcconsumer/rpcconsumer_server.go +++ b/protocol/rpcconsumer/rpcconsumer_server.go @@ -701,7 +701,6 @@ func (rpccs *RPCConsumerServer) relaySubscriptionInner(ctx context.Context, endp } func (rpccs *RPCConsumerServer) sendDataReliabilityRelayIfApplicable(ctx context.Context, dappID string, consumerIp string, chainMessage chainlib.ChainMessage, dataReliabilityThreshold uint32, relayProcessor *RelayProcessor) error { - specCategory := chainMessage.GetApi().Category if !specCategory.Deterministic { return nil // disabled for this spec and requested block so no data reliability messages From 353ca05f9080a1648d468acfff65a3062549c900 Mon Sep 17 00:00:00 2001 From: omerlavanet Date: Wed, 28 Feb 2024 12:43:06 +0200 Subject: [PATCH 09/57] lint --- protocol/rpcconsumer/rpcconsumer_server.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/protocol/rpcconsumer/rpcconsumer_server.go b/protocol/rpcconsumer/rpcconsumer_server.go index e0e2bf9531..23a05ba4f3 100644 --- a/protocol/rpcconsumer/rpcconsumer_server.go +++ b/protocol/rpcconsumer/rpcconsumer_server.go @@ -295,7 +295,7 @@ func (rpccs *RPCConsumerServer) SendRelay( // new context is needed for data reliability as some clients cancel the context they provide when the relay returns // as data reliability happens in a go routine it will continue while the response returns. guid, found := utils.GetUniqueIdentifier(ctx) - dataReliabilityContext, _ := context.WithTimeout(context.Background(), 30*time.Second) + dataReliabilityContext := context.Background() if found { dataReliabilityContext = utils.WithUniqueIdentifier(dataReliabilityContext, guid) } @@ -701,6 +701,8 @@ func (rpccs *RPCConsumerServer) relaySubscriptionInner(ctx context.Context, endp } func (rpccs *RPCConsumerServer) sendDataReliabilityRelayIfApplicable(ctx context.Context, dappID string, consumerIp string, chainMessage chainlib.ChainMessage, dataReliabilityThreshold uint32, relayProcessor *RelayProcessor) error { + ctx, cancel := context.WithTimeout(ctx, 30*time.Second) + defer cancel() specCategory := chainMessage.GetApi().Category if !specCategory.Deterministic { return nil // disabled for this spec and requested block so no data reliability messages From 5d1552553fadd34121a039e871157f0799641d0b Mon Sep 17 00:00:00 2001 From: omerlavanet Date: Wed, 28 Feb 2024 14:18:59 +0200 Subject: [PATCH 10/57] fix tests --- .../lavasession/consumer_session_manager.go | 2 +- .../consumer_session_manager_test.go | 74 ++++++++++--------- 2 files changed, 40 insertions(+), 36 deletions(-) diff --git a/protocol/lavasession/consumer_session_manager.go b/protocol/lavasession/consumer_session_manager.go index ddf5303ae5..1ab401c495 100644 --- a/protocol/lavasession/consumer_session_manager.go +++ b/protocol/lavasession/consumer_session_manager.go @@ -325,7 +325,7 @@ func (csm *ConsumerSessionManager) GetSessions(ctx context.Context, cuNeededForS if !canSelect { return nil, utils.LavaFormatError("failed getting sessions from used Providers", nil, utils.LogAttr("usedProviders", usedProviders), utils.LogAttr("endpoint", csm.rpcEndpoint)) } - defer func() { relayProcessor.GetUsedProviders().AddUsed(consumerSessionMap) }() + defer func() { usedProviders.AddUsed(consumerSessionMap) }() initUnwantedProviders := usedProviders.GetUnwantedProvidersToSend() extensionNames := common.GetExtensionNames(extensions) diff --git a/protocol/lavasession/consumer_session_manager_test.go b/protocol/lavasession/consumer_session_manager_test.go index 17c1b76eee..99bc3e305b 100644 --- a/protocol/lavasession/consumer_session_manager_test.go +++ b/protocol/lavasession/consumer_session_manager_test.go @@ -50,6 +50,22 @@ func CreateConsumerSessionManager() *ConsumerSessionManager { var grpcServer *grpc.Server +type mockProcessor struct { + usedProviders *UsedProviders +} + +func (mp mockProcessor) GetUsedProviders() *UsedProviders { + return mp.usedProviders +} + +func (mp mockProcessor) RemoveUsed(provider string, err error) { + mp.usedProviders.RemoveUsed(provider, err) +} + +func emptyRelayProcessor() mockProcessor { + return mockProcessor{usedProviders: NewUsedProviders(nil)} +} + func TestMain(m *testing.M) { serverStarted := make(chan struct{}) @@ -139,7 +155,7 @@ func TestHappyFlow(t *testing.T) { pairingList := createPairingList("", true) err := csm.UpdateAllProviders(firstEpochHeight, pairingList) // update the providers. require.NoError(t, err) - css, err := csm.GetSessions(ctx, cuForFirstRequest, nil, servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session + css, err := csm.GetSessions(ctx, cuForFirstRequest, emptyRelayProcessor(), servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session require.NoError(t, err) for _, cs := range css { @@ -161,7 +177,7 @@ func TestHappyFlowVirtualEpoch(t *testing.T) { pairingList := createPairingList("", true) err := csm.UpdateAllProviders(firstEpochHeight, pairingList) // update the providers. require.NoError(t, err) - css, err := csm.GetSessions(ctx, maxCuForVirtualEpoch*(virtualEpoch+1), nil, servicedBlockNumber, "", nil, common.NOSTATE, virtualEpoch) // get a session + css, err := csm.GetSessions(ctx, maxCuForVirtualEpoch*(virtualEpoch+1), emptyRelayProcessor(), servicedBlockNumber, "", nil, common.NOSTATE, virtualEpoch) // get a session require.NoError(t, err) for _, cs := range css { @@ -185,7 +201,7 @@ func TestVirtualEpochWithFailure(t *testing.T) { err := csm.UpdateAllProviders(firstEpochHeight, pairingList) // update the providers. require.NoError(t, err) - _, err = csm.GetSessions(ctx, maxCuForVirtualEpoch*(virtualEpoch+1)+10, nil, servicedBlockNumber, "", nil, common.NOSTATE, virtualEpoch) // get a session + _, err = csm.GetSessions(ctx, maxCuForVirtualEpoch*(virtualEpoch+1)+10, emptyRelayProcessor(), servicedBlockNumber, "", nil, common.NOSTATE, virtualEpoch) // get a session require.Error(t, err) } @@ -195,8 +211,8 @@ func TestPairingReset(t *testing.T) { pairingList := createPairingList("", true) err := csm.UpdateAllProviders(firstEpochHeight, pairingList) // update the providers. require.NoError(t, err) - csm.validAddresses = []string{} // set valid addresses to zero - css, err := csm.GetSessions(ctx, cuForFirstRequest, nil, servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session + csm.validAddresses = []string{} // set valid addresses to zero + css, err := csm.GetSessions(ctx, cuForFirstRequest, emptyRelayProcessor(), servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session require.NoError(t, err) require.Equal(t, len(csm.validAddresses), len(csm.pairingAddresses)) @@ -225,7 +241,7 @@ func TestPairingResetWithFailures(t *testing.T) { if len(csm.validAddresses) == 0 { // wait for all pairings to be blocked. break } - css, err := csm.GetSessions(ctx, cuForFirstRequest, nil, servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session + css, err := csm.GetSessions(ctx, cuForFirstRequest, emptyRelayProcessor(), servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session require.NoError(t, err) for _, cs := range css { @@ -234,7 +250,7 @@ func TestPairingResetWithFailures(t *testing.T) { } } require.Equal(t, len(csm.validAddresses), 0) - css, err := csm.GetSessions(ctx, cuForFirstRequest, nil, servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session + css, err := csm.GetSessions(ctx, cuForFirstRequest, emptyRelayProcessor(), servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session require.NoError(t, err) require.Equal(t, len(csm.validAddresses), len(csm.pairingAddresses)) @@ -259,7 +275,7 @@ func TestPairingResetWithMultipleFailures(t *testing.T) { if len(csm.validAddresses) == 0 { // wait for all pairings to be blocked. break } - css, err := csm.GetSessions(ctx, cuForFirstRequest, nil, servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session + css, err := csm.GetSessions(ctx, cuForFirstRequest, emptyRelayProcessor(), servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session for _, cs := range css { err = csm.OnSessionFailure(cs.Session, nil) @@ -271,7 +287,7 @@ func TestPairingResetWithMultipleFailures(t *testing.T) { } } require.Equal(t, len(csm.validAddresses), 0) - css, err := csm.GetSessions(ctx, cuForFirstRequest, nil, servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session + css, err := csm.GetSessions(ctx, cuForFirstRequest, emptyRelayProcessor(), servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session require.NoError(t, err) require.Equal(t, len(csm.validAddresses), len(csm.pairingAddresses)) @@ -283,7 +299,7 @@ func TestPairingResetWithMultipleFailures(t *testing.T) { } } - css, err := csm.GetSessions(ctx, cuForFirstRequest, nil, servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session + css, err := csm.GetSessions(ctx, cuForFirstRequest, emptyRelayProcessor(), servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session require.NoError(t, err) for _, cs := range css { @@ -318,7 +334,7 @@ func TestSuccessAndFailureOfSessionWithUpdatePairingsInTheMiddle(t *testing.T) { sessionList := make([]session, numberOfAllowedSessionsPerConsumer) sessionListData := make([]SessTestData, numberOfAllowedSessionsPerConsumer) for i := 0; i < numberOfAllowedSessionsPerConsumer; i++ { - css, err := csm.GetSessions(ctx, cuForFirstRequest, nil, servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session + css, err := csm.GetSessions(ctx, cuForFirstRequest, emptyRelayProcessor(), servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session require.NoError(t, err) for _, cs := range css { // get a session @@ -354,7 +370,7 @@ func TestSuccessAndFailureOfSessionWithUpdatePairingsInTheMiddle(t *testing.T) { } for i := 0; i < numberOfAllowedSessionsPerConsumer; i++ { - css, err := csm.GetSessions(ctx, cuForFirstRequest, nil, servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session + css, err := csm.GetSessions(ctx, cuForFirstRequest, emptyRelayProcessor(), servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session require.NoError(t, err) for _, cs := range css { // get a session @@ -387,7 +403,7 @@ func TestSuccessAndFailureOfSessionWithUpdatePairingsInTheMiddle(t *testing.T) { } func successfulSession(ctx context.Context, csm *ConsumerSessionManager, t *testing.T, p int, ch chan int) { - css, err := csm.GetSessions(ctx, cuForFirstRequest, nil, servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session + css, err := csm.GetSessions(ctx, cuForFirstRequest, emptyRelayProcessor(), servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session require.NoError(t, err) for _, cs := range css { @@ -400,7 +416,7 @@ func successfulSession(ctx context.Context, csm *ConsumerSessionManager, t *test } func failedSession(ctx context.Context, csm *ConsumerSessionManager, t *testing.T, p int, ch chan int) { - css, err := csm.GetSessions(ctx, cuForFirstRequest, nil, servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session + css, err := csm.GetSessions(ctx, cuForFirstRequest, emptyRelayProcessor(), servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session require.NoError(t, err) for _, cs := range css { @@ -518,7 +534,7 @@ func TestSessionFailureAndGetReportedProviders(t *testing.T) { pairingList := createPairingList("", true) err := csm.UpdateAllProviders(firstEpochHeight, pairingList) // update the providers. require.NoError(t, err) - css, err := csm.GetSessions(ctx, cuForFirstRequest, nil, servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session + css, err := csm.GetSessions(ctx, cuForFirstRequest, emptyRelayProcessor(), servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session require.NoError(t, err) for _, cs := range css { @@ -552,7 +568,7 @@ func TestSessionFailureEpochMisMatch(t *testing.T) { pairingList := createPairingList("", true) err := csm.UpdateAllProviders(firstEpochHeight, pairingList) // update the providers. require.NoError(t, err) - css, err := csm.GetSessions(ctx, cuForFirstRequest, nil, servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session + css, err := csm.GetSessions(ctx, cuForFirstRequest, emptyRelayProcessor(), servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session require.NoError(t, err) for _, cs := range css { @@ -573,7 +589,7 @@ func TestAllProvidersEndpointsDisabled(t *testing.T) { pairingList := createPairingList("", false) err := csm.UpdateAllProviders(firstEpochHeight, pairingList) // update the providers. require.NoError(t, err) - cs, err := csm.GetSessions(ctx, cuForFirstRequest, nil, servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session + cs, err := csm.GetSessions(ctx, cuForFirstRequest, emptyRelayProcessor(), servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session require.Nil(t, cs) require.Error(t, err) } @@ -613,7 +629,7 @@ func TestGetSession(t *testing.T) { pairingList := createPairingList("", true) err := csm.UpdateAllProviders(firstEpochHeight, pairingList) require.NoError(t, err) - css, err := csm.GetSessions(ctx, cuForFirstRequest, nil, servicedBlockNumber, "", nil, common.NOSTATE, 0) + css, err := csm.GetSessions(ctx, cuForFirstRequest, emptyRelayProcessor(), servicedBlockNumber, "", nil, common.NOSTATE, 0) require.NoError(t, err) for _, cs := range css { @@ -659,7 +675,7 @@ func TestPairingWithAddons(t *testing.T) { // block all providers initialProvidersLen := len(csm.getValidAddresses(addon, nil)) for i := 0; i < initialProvidersLen; i++ { - css, err := csm.GetSessions(ctx, cuForFirstRequest, nil, servicedBlockNumber, addon, nil, common.NOSTATE, 0) // get a session + css, err := csm.GetSessions(ctx, cuForFirstRequest, emptyRelayProcessor(), servicedBlockNumber, addon, nil, common.NOSTATE, 0) // get a session require.NoError(t, err, i) for _, cs := range css { err = csm.OnSessionFailure(cs.Session, ReportAndBlockProviderError) @@ -671,7 +687,7 @@ func TestPairingWithAddons(t *testing.T) { if addon != "" { require.NotEqual(t, csm.getValidAddresses(addon, nil), csm.getValidAddresses("", nil)) } - css, err := csm.GetSessions(ctx, cuForFirstRequest, nil, servicedBlockNumber, addon, nil, common.NOSTATE, 0) // get a session + css, err := csm.GetSessions(ctx, cuForFirstRequest, emptyRelayProcessor(), servicedBlockNumber, addon, nil, common.NOSTATE, 0) // get a session require.NoError(t, err) for _, cs := range css { err = csm.OnSessionDone(cs.Session, servicedBlockNumber, cuForFirstRequest, time.Millisecond, cs.Session.CalculateExpectedLatency(2*time.Millisecond), (servicedBlockNumber - 1), numberOfProviders, numberOfProviders, false) @@ -734,7 +750,7 @@ func TestPairingWithExtensions(t *testing.T) { } initialProvidersLen := len(csm.getValidAddresses(extensionOpt.addon, extensionOpt.extensions)) for i := 0; i < initialProvidersLen; i++ { - css, err := csm.GetSessions(ctx, cuForFirstRequest, nil, servicedBlockNumber, extensionOpt.addon, extensionsList, common.NOSTATE, 0) // get a session + css, err := csm.GetSessions(ctx, cuForFirstRequest, emptyRelayProcessor(), servicedBlockNumber, extensionOpt.addon, extensionsList, common.NOSTATE, 0) // get a session require.NoError(t, err, i) for _, cs := range css { err = csm.OnSessionFailure(cs.Session, ReportAndBlockProviderError) @@ -746,7 +762,7 @@ func TestPairingWithExtensions(t *testing.T) { if len(extensionOpt.extensions) > 0 || extensionOpt.addon != "" { require.NotEqual(t, csm.getValidAddresses(extensionOpt.addon, extensionOpt.extensions), csm.getValidAddresses("", nil)) } - css, err := csm.GetSessions(ctx, cuForFirstRequest, nil, servicedBlockNumber, extensionOpt.addon, extensionsList, common.NOSTATE, 0) // get a session + css, err := csm.GetSessions(ctx, cuForFirstRequest, emptyRelayProcessor(), servicedBlockNumber, extensionOpt.addon, extensionsList, common.NOSTATE, 0) // get a session require.NoError(t, err) for _, cs := range css { err = csm.OnSessionDone(cs.Session, servicedBlockNumber, cuForFirstRequest, time.Millisecond, cs.Session.CalculateExpectedLatency(2*time.Millisecond), (servicedBlockNumber - 1), numberOfProviders, numberOfProviders, false) @@ -767,18 +783,6 @@ func TestNoPairingsError(t *testing.T) { require.True(t, PairingListEmptyError.Is(err)) } -type mockProcessor struct { - usedProviders *UsedProviders -} - -func (mp mockProcessor) GetUsedProviders() *UsedProviders { - return mp.usedProviders -} - -func (mp mockProcessor) RemoveUsed(provider string, err error) { - mp.usedProviders.RemoveUsed(provider, err) -} - func TestPairingWithStateful(t *testing.T) { ctx := context.Background() t.Run("stateful", func(t *testing.T) { @@ -793,7 +797,7 @@ func TestPairingWithStateful(t *testing.T) { providerAddresses := csm.getValidAddresses(addon, nil) allProviders := len(providerAddresses) require.Equal(t, 10, allProviders) - css, err := csm.GetSessions(ctx, cuForFirstRequest, nil, servicedBlockNumber, addon, nil, common.CONSISTENCY_SELECT_ALLPROVIDERS, 0) // get a session + css, err := csm.GetSessions(ctx, cuForFirstRequest, emptyRelayProcessor(), servicedBlockNumber, addon, nil, common.CONSISTENCY_SELECT_ALLPROVIDERS, 0) // get a session require.NoError(t, err) require.Equal(t, allProviders, len(css)) for _, cs := range css { From 154e441b9d0970d190716ffc6f9463850e611fa9 Mon Sep 17 00:00:00 2001 From: omerlavanet Date: Wed, 28 Feb 2024 17:45:01 +0200 Subject: [PATCH 11/57] fix deadlock --- protocol/lavasession/consumer_session_manager_test.go | 2 +- protocol/lavasession/used_providers.go | 10 ++++------ 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/protocol/lavasession/consumer_session_manager_test.go b/protocol/lavasession/consumer_session_manager_test.go index 99bc3e305b..6e840e209e 100644 --- a/protocol/lavasession/consumer_session_manager_test.go +++ b/protocol/lavasession/consumer_session_manager_test.go @@ -805,7 +805,7 @@ func TestPairingWithStateful(t *testing.T) { require.NoError(t, err) } usedProviders := NewUsedProviders(nil) - usedProviders.SetUnwanted(providerAddresses[0]) + usedProviders.RemoveUsed(providerAddresses[0], nil) processor := mockProcessor{usedProviders: usedProviders} css, err = csm.GetSessions(ctx, cuForFirstRequest, processor, servicedBlockNumber, addon, nil, common.CONSISTENCY_SELECT_ALLPROVIDERS, 0) // get a session require.NoError(t, err) diff --git a/protocol/lavasession/used_providers.go b/protocol/lavasession/used_providers.go index 2298aa9358..8855141335 100644 --- a/protocol/lavasession/used_providers.go +++ b/protocol/lavasession/used_providers.go @@ -78,14 +78,14 @@ func (up *UsedProviders) RemoveUsed(provider string, err error) { up.blockOnSyncLoss[provider] = struct{}{} utils.LavaFormatWarning("Identified SyncLoss in provider, allowing retry", err, utils.Attribute{Key: "address", Value: provider}) } else { - up.SetUnwanted(provider) + up.setUnwanted(provider) } } else { - up.SetUnwanted(provider) + up.setUnwanted(provider) } } else { // we got a valid response from this provider, no reason to keep using it - up.SetUnwanted(provider) + up.setUnwanted(provider) } delete(up.providers, provider) } @@ -105,12 +105,10 @@ func (up *UsedProviders) AddUsed(sessions ConsumerSessionsMap) { up.selecting = false } -func (up *UsedProviders) SetUnwanted(provider string) { +func (up *UsedProviders) setUnwanted(provider string) { if up == nil { return } - up.lock.Lock() - defer up.lock.Unlock() up.unwantedProviders[provider] = struct{}{} } From 6398a5f10a6b2af5538321044bd2a739e4640276 Mon Sep 17 00:00:00 2001 From: omerlavanet Date: Wed, 28 Feb 2024 17:55:50 +0200 Subject: [PATCH 12/57] fix nil getSessions --- protocol/lavasession/end_to_end_lavasession_test.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/protocol/lavasession/end_to_end_lavasession_test.go b/protocol/lavasession/end_to_end_lavasession_test.go index 9d7b081fea..09282520e8 100644 --- a/protocol/lavasession/end_to_end_lavasession_test.go +++ b/protocol/lavasession/end_to_end_lavasession_test.go @@ -28,7 +28,7 @@ func TestHappyFlowE2EEmergency(t *testing.T) { successfulRelays++ for i := 0; i < len(consumerVirtualEpochs); i++ { - css, err := csm.GetSessions(ctx, maxCuForVirtualEpoch, nil, servicedBlockNumber, "", nil, common.NOSTATE, consumerVirtualEpochs[i]) // get a session + css, err := csm.GetSessions(ctx, maxCuForVirtualEpoch, emptyRelayProcessor(), servicedBlockNumber, "", nil, common.NOSTATE, consumerVirtualEpochs[i]) // get a session require.NoError(t, err) for _, cs := range css { @@ -92,7 +92,7 @@ func TestHappyFlowE2EEmergency(t *testing.T) { func TestHappyFlowEmergencyInConsumer(t *testing.T) { csm, psm, ctx := prepareSessionsWithFirstRelay(t, maxCuForVirtualEpoch) - css, err := csm.GetSessions(ctx, maxCuForVirtualEpoch, nil, servicedBlockNumber, "", nil, common.NOSTATE, virtualEpoch) // get a session + css, err := csm.GetSessions(ctx, maxCuForVirtualEpoch, emptyRelayProcessor(), servicedBlockNumber, "", nil, common.NOSTATE, virtualEpoch) // get a session require.NoError(t, err) for _, cs := range css { @@ -157,7 +157,7 @@ func prepareSessionsWithFirstRelay(t *testing.T, cuForFirstRequest uint64) (*Con err := csm.UpdateAllProviders(epoch1, cswpList) // update the providers. require.NoError(t, err) // get single consumer session - css, err := csm.GetSessions(ctx, cuForFirstRequest, nil, servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session + css, err := csm.GetSessions(ctx, cuForFirstRequest, emptyRelayProcessor(), servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session require.NoError(t, err) for _, cs := range css { From fcd51d625403dcb1fecd4d4f9330df78aad441a7 Mon Sep 17 00:00:00 2001 From: omerlavanet Date: Wed, 28 Feb 2024 17:59:21 +0200 Subject: [PATCH 13/57] increase hanging api time --- protocol/chainlib/common.go | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/protocol/chainlib/common.go b/protocol/chainlib/common.go index 78495c20aa..daf6a58795 100644 --- a/protocol/chainlib/common.go +++ b/protocol/chainlib/common.go @@ -312,21 +312,7 @@ func GetRelayTimeout(chainMessage ChainMessageForSend, averageBlockTime time.Dur // Calculate extra RelayTimeout extraRelayTimeout := time.Duration(0) if IsHangingApi(chainMessage) { - extraRelayTimeout = averageBlockTime - } - relayTimeAddition := common.GetTimePerCu(GetComputeUnits(chainMessage)) - if chainMessage.GetApi().TimeoutMs > 0 { - relayTimeAddition = time.Millisecond * time.Duration(chainMessage.GetApi().TimeoutMs) - } - // Set relay timout, increase it every time we fail a relay on timeout - return extraRelayTimeout + relayTimeAddition + common.AverageWorldLatency -} - -func GetRelayTimeoutForSend(chainMessage ChainMessageForSend, averageBlockTime time.Duration) time.Duration { - // Calculate extra RelayTimeout - extraRelayTimeout := time.Duration(0) - if IsHangingApi(chainMessage) { - extraRelayTimeout = averageBlockTime + extraRelayTimeout = averageBlockTime * 2 } relayTimeAddition := common.GetTimePerCu(GetComputeUnits(chainMessage)) if chainMessage.GetApi().TimeoutMs > 0 { From 9c2b59ce2e71ce482217986db13b36f78aa21fa9 Mon Sep 17 00:00:00 2001 From: omerlavanet Date: Thu, 29 Feb 2024 20:45:42 +0200 Subject: [PATCH 14/57] add unitest for usedProviders --- protocol/lavasession/used_providers_test.go | 99 +++++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 protocol/lavasession/used_providers_test.go diff --git a/protocol/lavasession/used_providers_test.go b/protocol/lavasession/used_providers_test.go new file mode 100644 index 0000000000..f2f2557b25 --- /dev/null +++ b/protocol/lavasession/used_providers_test.go @@ -0,0 +1,99 @@ +package lavasession + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/gogo/status" + "github.com/stretchr/testify/require" + "google.golang.org/grpc/codes" +) + +func TestUsedProviders(t *testing.T) { + t.Run("basic", func(t *testing.T) { + usedProviders := NewUsedProviders(nil) + canUse := usedProviders.tryLockSelection() + require.True(t, canUse) + canUseAgain := usedProviders.tryLockSelection() + require.False(t, canUseAgain) + require.Zero(t, usedProviders.CurrentlyUsed()) + require.Zero(t, usedProviders.SessionsLatestBatch()) + unwanted := usedProviders.GetUnwantedProvidersToSend() + require.Len(t, unwanted, 0) + consumerSessionsMap := ConsumerSessionsMap{"test": &SessionInfo{}, "test2": &SessionInfo{}} + usedProviders.AddUsed(consumerSessionsMap) + canUseAgain = usedProviders.tryLockSelection() + require.True(t, canUseAgain) + unwanted = usedProviders.GetUnwantedProvidersToSend() + require.Len(t, unwanted, 2) + require.Equal(t, 2, usedProviders.CurrentlyUsed()) + canUseAgain = usedProviders.tryLockSelection() + require.False(t, canUseAgain) + consumerSessionsMap = ConsumerSessionsMap{"test3": &SessionInfo{}, "test4": &SessionInfo{}} + usedProviders.AddUsed(consumerSessionsMap) + unwanted = usedProviders.GetUnwantedProvidersToSend() + require.Len(t, unwanted, 4) + require.Equal(t, 4, usedProviders.CurrentlyUsed()) + // one provider gives a retry + usedProviders.RemoveUsed("test", status.Error(codes.Code(SessionOutOfSyncError.ABCICode()), "")) + require.Equal(t, 3, usedProviders.CurrentlyUsed()) + unwanted = usedProviders.GetUnwantedProvidersToSend() + require.Len(t, unwanted, 3) + // one provider gives a result + usedProviders.RemoveUsed("test2", nil) + unwanted = usedProviders.GetUnwantedProvidersToSend() + require.Len(t, unwanted, 3) + require.Equal(t, 2, usedProviders.CurrentlyUsed()) + // one provider gives an error + usedProviders.RemoveUsed("test3", fmt.Errorf("bad")) + unwanted = usedProviders.GetUnwantedProvidersToSend() + require.Len(t, unwanted, 3) + require.Equal(t, 1, usedProviders.CurrentlyUsed()) + canUseAgain = usedProviders.tryLockSelection() + require.True(t, canUseAgain) + }) +} + +func TestUsedProvidersAsync(t *testing.T) { + t.Run("concurrency", func(t *testing.T) { + usedProviders := NewUsedProviders(nil) + ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond*100) + defer cancel() + canUse := usedProviders.TryLockSelection(ctx) + require.True(t, canUse) + require.Zero(t, usedProviders.CurrentlyUsed()) + require.Zero(t, usedProviders.SessionsLatestBatch()) + go func() { + time.Sleep(time.Millisecond * 10) + consumerSessionsMap := ConsumerSessionsMap{"test": &SessionInfo{}, "test2": &SessionInfo{}} + usedProviders.AddUsed(consumerSessionsMap) + }() + ctx, cancel = context.WithTimeout(context.Background(), time.Millisecond*100) + defer cancel() + canUseAgain := usedProviders.TryLockSelection(ctx) + require.True(t, canUseAgain) + unwanted := usedProviders.GetUnwantedProvidersToSend() + require.Len(t, unwanted, 2) + require.Equal(t, 2, usedProviders.CurrentlyUsed()) + }) +} + +func TestUsedProvidersAsyncFail(t *testing.T) { + t.Run("concurrency", func(t *testing.T) { + usedProviders := NewUsedProviders(nil) + ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond*100) + defer cancel() + canUse := usedProviders.TryLockSelection(ctx) + require.True(t, canUse) + require.Zero(t, usedProviders.CurrentlyUsed()) + require.Zero(t, usedProviders.SessionsLatestBatch()) + ctx, cancel = context.WithTimeout(context.Background(), time.Millisecond*10) + defer cancel() + canUseAgain := usedProviders.TryLockSelection(ctx) + require.False(t, canUseAgain) + err := ctx.Err() + require.Error(t, err) + }) +} From 1cd0136a9f810a2cb06a7d8d699930b9279f24bf Mon Sep 17 00:00:00 2001 From: omerlavanet Date: Thu, 29 Feb 2024 21:54:11 +0200 Subject: [PATCH 15/57] added tests + refactor for simplicity --- .../lavasession/consumer_session_manager.go | 5 +- .../consumer_session_manager_test.go | 63 ++++++---------- protocol/lavasession/consumer_types.go | 6 +- .../end_to_end_lavasession_test.go | 6 +- .../lavasession/single_consumer_session.go | 6 +- protocol/rpcconsumer/relay_processor.go | 45 +++++++----- protocol/rpcconsumer/relay_processor_test.go | 73 +++++++++++++++++++ protocol/rpcconsumer/rpcconsumer_server.go | 15 +++- 8 files changed, 147 insertions(+), 72 deletions(-) create mode 100644 protocol/rpcconsumer/relay_processor_test.go diff --git a/protocol/lavasession/consumer_session_manager.go b/protocol/lavasession/consumer_session_manager.go index 1ab401c495..89d7915f1f 100644 --- a/protocol/lavasession/consumer_session_manager.go +++ b/protocol/lavasession/consumer_session_manager.go @@ -314,11 +314,10 @@ func (csm *ConsumerSessionManager) validatePairingListNotEmpty(addon string, ext // GetSessions will return a ConsumerSession, given cu needed for that session. // The user can also request specific providers to not be included in the search for a session. -func (csm *ConsumerSessionManager) GetSessions(ctx context.Context, cuNeededForSession uint64, relayProcessor RelayProcessorInf, requestedBlock int64, addon string, extensions []*spectypes.Extension, stateful uint32, virtualEpoch uint64) ( +func (csm *ConsumerSessionManager) GetSessions(ctx context.Context, cuNeededForSession uint64, usedProviders UsedProvidersInf, requestedBlock int64, addon string, extensions []*spectypes.Extension, stateful uint32, virtualEpoch uint64) ( consumerSessionMap ConsumerSessionsMap, errRet error, ) { // set usedProviders if they were chosen for this relay - usedProviders := relayProcessor.GetUsedProviders() timeoutCtx, cancel := context.WithTimeout(ctx, time.Second) defer cancel() canSelect := usedProviders.TryLockSelection(timeoutCtx) @@ -445,7 +444,7 @@ func (csm *ConsumerSessionManager) GetSessions(ctx context.Context, cuNeededForS sessionInfo.QoSSummeryResult = consumerSession.getQosComputedResultOrZero() sessions[providerAddress] = sessionInfo - consumerSession.SetUsageForSession(cuNeededForSession, csm.providerOptimizer.GetExcellenceQoSReportForProvider(providerAddress), relayProcessor) + consumerSession.SetUsageForSession(cuNeededForSession, csm.providerOptimizer.GetExcellenceQoSReportForProvider(providerAddress), usedProviders) // We successfully added provider, we should ignore it if we need to fetch new tempIgnoredProviders.providers[providerAddress] = struct{}{} diff --git a/protocol/lavasession/consumer_session_manager_test.go b/protocol/lavasession/consumer_session_manager_test.go index 6e840e209e..a43be66df7 100644 --- a/protocol/lavasession/consumer_session_manager_test.go +++ b/protocol/lavasession/consumer_session_manager_test.go @@ -50,22 +50,6 @@ func CreateConsumerSessionManager() *ConsumerSessionManager { var grpcServer *grpc.Server -type mockProcessor struct { - usedProviders *UsedProviders -} - -func (mp mockProcessor) GetUsedProviders() *UsedProviders { - return mp.usedProviders -} - -func (mp mockProcessor) RemoveUsed(provider string, err error) { - mp.usedProviders.RemoveUsed(provider, err) -} - -func emptyRelayProcessor() mockProcessor { - return mockProcessor{usedProviders: NewUsedProviders(nil)} -} - func TestMain(m *testing.M) { serverStarted := make(chan struct{}) @@ -155,7 +139,7 @@ func TestHappyFlow(t *testing.T) { pairingList := createPairingList("", true) err := csm.UpdateAllProviders(firstEpochHeight, pairingList) // update the providers. require.NoError(t, err) - css, err := csm.GetSessions(ctx, cuForFirstRequest, emptyRelayProcessor(), servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session + css, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session require.NoError(t, err) for _, cs := range css { @@ -177,7 +161,7 @@ func TestHappyFlowVirtualEpoch(t *testing.T) { pairingList := createPairingList("", true) err := csm.UpdateAllProviders(firstEpochHeight, pairingList) // update the providers. require.NoError(t, err) - css, err := csm.GetSessions(ctx, maxCuForVirtualEpoch*(virtualEpoch+1), emptyRelayProcessor(), servicedBlockNumber, "", nil, common.NOSTATE, virtualEpoch) // get a session + css, err := csm.GetSessions(ctx, maxCuForVirtualEpoch*(virtualEpoch+1), NewUsedProviders(nil), servicedBlockNumber, "", nil, common.NOSTATE, virtualEpoch) // get a session require.NoError(t, err) for _, cs := range css { @@ -201,7 +185,7 @@ func TestVirtualEpochWithFailure(t *testing.T) { err := csm.UpdateAllProviders(firstEpochHeight, pairingList) // update the providers. require.NoError(t, err) - _, err = csm.GetSessions(ctx, maxCuForVirtualEpoch*(virtualEpoch+1)+10, emptyRelayProcessor(), servicedBlockNumber, "", nil, common.NOSTATE, virtualEpoch) // get a session + _, err = csm.GetSessions(ctx, maxCuForVirtualEpoch*(virtualEpoch+1)+10, NewUsedProviders(nil), servicedBlockNumber, "", nil, common.NOSTATE, virtualEpoch) // get a session require.Error(t, err) } @@ -212,7 +196,7 @@ func TestPairingReset(t *testing.T) { err := csm.UpdateAllProviders(firstEpochHeight, pairingList) // update the providers. require.NoError(t, err) csm.validAddresses = []string{} // set valid addresses to zero - css, err := csm.GetSessions(ctx, cuForFirstRequest, emptyRelayProcessor(), servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session + css, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session require.NoError(t, err) require.Equal(t, len(csm.validAddresses), len(csm.pairingAddresses)) @@ -241,7 +225,7 @@ func TestPairingResetWithFailures(t *testing.T) { if len(csm.validAddresses) == 0 { // wait for all pairings to be blocked. break } - css, err := csm.GetSessions(ctx, cuForFirstRequest, emptyRelayProcessor(), servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session + css, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session require.NoError(t, err) for _, cs := range css { @@ -250,7 +234,7 @@ func TestPairingResetWithFailures(t *testing.T) { } } require.Equal(t, len(csm.validAddresses), 0) - css, err := csm.GetSessions(ctx, cuForFirstRequest, emptyRelayProcessor(), servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session + css, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session require.NoError(t, err) require.Equal(t, len(csm.validAddresses), len(csm.pairingAddresses)) @@ -275,7 +259,7 @@ func TestPairingResetWithMultipleFailures(t *testing.T) { if len(csm.validAddresses) == 0 { // wait for all pairings to be blocked. break } - css, err := csm.GetSessions(ctx, cuForFirstRequest, emptyRelayProcessor(), servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session + css, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session for _, cs := range css { err = csm.OnSessionFailure(cs.Session, nil) @@ -287,7 +271,7 @@ func TestPairingResetWithMultipleFailures(t *testing.T) { } } require.Equal(t, len(csm.validAddresses), 0) - css, err := csm.GetSessions(ctx, cuForFirstRequest, emptyRelayProcessor(), servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session + css, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session require.NoError(t, err) require.Equal(t, len(csm.validAddresses), len(csm.pairingAddresses)) @@ -299,7 +283,7 @@ func TestPairingResetWithMultipleFailures(t *testing.T) { } } - css, err := csm.GetSessions(ctx, cuForFirstRequest, emptyRelayProcessor(), servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session + css, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session require.NoError(t, err) for _, cs := range css { @@ -334,7 +318,7 @@ func TestSuccessAndFailureOfSessionWithUpdatePairingsInTheMiddle(t *testing.T) { sessionList := make([]session, numberOfAllowedSessionsPerConsumer) sessionListData := make([]SessTestData, numberOfAllowedSessionsPerConsumer) for i := 0; i < numberOfAllowedSessionsPerConsumer; i++ { - css, err := csm.GetSessions(ctx, cuForFirstRequest, emptyRelayProcessor(), servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session + css, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session require.NoError(t, err) for _, cs := range css { // get a session @@ -370,7 +354,7 @@ func TestSuccessAndFailureOfSessionWithUpdatePairingsInTheMiddle(t *testing.T) { } for i := 0; i < numberOfAllowedSessionsPerConsumer; i++ { - css, err := csm.GetSessions(ctx, cuForFirstRequest, emptyRelayProcessor(), servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session + css, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session require.NoError(t, err) for _, cs := range css { // get a session @@ -403,7 +387,7 @@ func TestSuccessAndFailureOfSessionWithUpdatePairingsInTheMiddle(t *testing.T) { } func successfulSession(ctx context.Context, csm *ConsumerSessionManager, t *testing.T, p int, ch chan int) { - css, err := csm.GetSessions(ctx, cuForFirstRequest, emptyRelayProcessor(), servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session + css, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session require.NoError(t, err) for _, cs := range css { @@ -416,7 +400,7 @@ func successfulSession(ctx context.Context, csm *ConsumerSessionManager, t *test } func failedSession(ctx context.Context, csm *ConsumerSessionManager, t *testing.T, p int, ch chan int) { - css, err := csm.GetSessions(ctx, cuForFirstRequest, emptyRelayProcessor(), servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session + css, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session require.NoError(t, err) for _, cs := range css { @@ -534,7 +518,7 @@ func TestSessionFailureAndGetReportedProviders(t *testing.T) { pairingList := createPairingList("", true) err := csm.UpdateAllProviders(firstEpochHeight, pairingList) // update the providers. require.NoError(t, err) - css, err := csm.GetSessions(ctx, cuForFirstRequest, emptyRelayProcessor(), servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session + css, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session require.NoError(t, err) for _, cs := range css { @@ -568,7 +552,7 @@ func TestSessionFailureEpochMisMatch(t *testing.T) { pairingList := createPairingList("", true) err := csm.UpdateAllProviders(firstEpochHeight, pairingList) // update the providers. require.NoError(t, err) - css, err := csm.GetSessions(ctx, cuForFirstRequest, emptyRelayProcessor(), servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session + css, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session require.NoError(t, err) for _, cs := range css { @@ -589,7 +573,7 @@ func TestAllProvidersEndpointsDisabled(t *testing.T) { pairingList := createPairingList("", false) err := csm.UpdateAllProviders(firstEpochHeight, pairingList) // update the providers. require.NoError(t, err) - cs, err := csm.GetSessions(ctx, cuForFirstRequest, emptyRelayProcessor(), servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session + cs, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session require.Nil(t, cs) require.Error(t, err) } @@ -629,7 +613,7 @@ func TestGetSession(t *testing.T) { pairingList := createPairingList("", true) err := csm.UpdateAllProviders(firstEpochHeight, pairingList) require.NoError(t, err) - css, err := csm.GetSessions(ctx, cuForFirstRequest, emptyRelayProcessor(), servicedBlockNumber, "", nil, common.NOSTATE, 0) + css, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, "", nil, common.NOSTATE, 0) require.NoError(t, err) for _, cs := range css { @@ -675,7 +659,7 @@ func TestPairingWithAddons(t *testing.T) { // block all providers initialProvidersLen := len(csm.getValidAddresses(addon, nil)) for i := 0; i < initialProvidersLen; i++ { - css, err := csm.GetSessions(ctx, cuForFirstRequest, emptyRelayProcessor(), servicedBlockNumber, addon, nil, common.NOSTATE, 0) // get a session + css, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, addon, nil, common.NOSTATE, 0) // get a session require.NoError(t, err, i) for _, cs := range css { err = csm.OnSessionFailure(cs.Session, ReportAndBlockProviderError) @@ -687,7 +671,7 @@ func TestPairingWithAddons(t *testing.T) { if addon != "" { require.NotEqual(t, csm.getValidAddresses(addon, nil), csm.getValidAddresses("", nil)) } - css, err := csm.GetSessions(ctx, cuForFirstRequest, emptyRelayProcessor(), servicedBlockNumber, addon, nil, common.NOSTATE, 0) // get a session + css, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, addon, nil, common.NOSTATE, 0) // get a session require.NoError(t, err) for _, cs := range css { err = csm.OnSessionDone(cs.Session, servicedBlockNumber, cuForFirstRequest, time.Millisecond, cs.Session.CalculateExpectedLatency(2*time.Millisecond), (servicedBlockNumber - 1), numberOfProviders, numberOfProviders, false) @@ -750,7 +734,7 @@ func TestPairingWithExtensions(t *testing.T) { } initialProvidersLen := len(csm.getValidAddresses(extensionOpt.addon, extensionOpt.extensions)) for i := 0; i < initialProvidersLen; i++ { - css, err := csm.GetSessions(ctx, cuForFirstRequest, emptyRelayProcessor(), servicedBlockNumber, extensionOpt.addon, extensionsList, common.NOSTATE, 0) // get a session + css, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, extensionOpt.addon, extensionsList, common.NOSTATE, 0) // get a session require.NoError(t, err, i) for _, cs := range css { err = csm.OnSessionFailure(cs.Session, ReportAndBlockProviderError) @@ -762,7 +746,7 @@ func TestPairingWithExtensions(t *testing.T) { if len(extensionOpt.extensions) > 0 || extensionOpt.addon != "" { require.NotEqual(t, csm.getValidAddresses(extensionOpt.addon, extensionOpt.extensions), csm.getValidAddresses("", nil)) } - css, err := csm.GetSessions(ctx, cuForFirstRequest, emptyRelayProcessor(), servicedBlockNumber, extensionOpt.addon, extensionsList, common.NOSTATE, 0) // get a session + css, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, extensionOpt.addon, extensionsList, common.NOSTATE, 0) // get a session require.NoError(t, err) for _, cs := range css { err = csm.OnSessionDone(cs.Session, servicedBlockNumber, cuForFirstRequest, time.Millisecond, cs.Session.CalculateExpectedLatency(2*time.Millisecond), (servicedBlockNumber - 1), numberOfProviders, numberOfProviders, false) @@ -797,7 +781,7 @@ func TestPairingWithStateful(t *testing.T) { providerAddresses := csm.getValidAddresses(addon, nil) allProviders := len(providerAddresses) require.Equal(t, 10, allProviders) - css, err := csm.GetSessions(ctx, cuForFirstRequest, emptyRelayProcessor(), servicedBlockNumber, addon, nil, common.CONSISTENCY_SELECT_ALLPROVIDERS, 0) // get a session + css, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, addon, nil, common.CONSISTENCY_SELECT_ALLPROVIDERS, 0) // get a session require.NoError(t, err) require.Equal(t, allProviders, len(css)) for _, cs := range css { @@ -806,8 +790,7 @@ func TestPairingWithStateful(t *testing.T) { } usedProviders := NewUsedProviders(nil) usedProviders.RemoveUsed(providerAddresses[0], nil) - processor := mockProcessor{usedProviders: usedProviders} - css, err = csm.GetSessions(ctx, cuForFirstRequest, processor, servicedBlockNumber, addon, nil, common.CONSISTENCY_SELECT_ALLPROVIDERS, 0) // get a session + css, err = csm.GetSessions(ctx, cuForFirstRequest, usedProviders, servicedBlockNumber, addon, nil, common.CONSISTENCY_SELECT_ALLPROVIDERS, 0) // get a session require.NoError(t, err) require.Equal(t, allProviders-1, len(css)) }) diff --git a/protocol/lavasession/consumer_types.go b/protocol/lavasession/consumer_types.go index 71e0119d43..1b5c64438b 100644 --- a/protocol/lavasession/consumer_types.go +++ b/protocol/lavasession/consumer_types.go @@ -21,9 +21,11 @@ const AllowInsecureConnectionToProvidersFlag = "allow-insecure-provider-dialing" var AllowInsecureConnectionToProviders = false -type RelayProcessorInf interface { - GetUsedProviders() *UsedProviders +type UsedProvidersInf interface { RemoveUsed(providerAddress string, err error) + TryLockSelection(context.Context) bool + AddUsed(ConsumerSessionsMap) + GetUnwantedProvidersToSend() map[string]struct{} } type SessionInfo struct { diff --git a/protocol/lavasession/end_to_end_lavasession_test.go b/protocol/lavasession/end_to_end_lavasession_test.go index 09282520e8..6cdd141aca 100644 --- a/protocol/lavasession/end_to_end_lavasession_test.go +++ b/protocol/lavasession/end_to_end_lavasession_test.go @@ -28,7 +28,7 @@ func TestHappyFlowE2EEmergency(t *testing.T) { successfulRelays++ for i := 0; i < len(consumerVirtualEpochs); i++ { - css, err := csm.GetSessions(ctx, maxCuForVirtualEpoch, emptyRelayProcessor(), servicedBlockNumber, "", nil, common.NOSTATE, consumerVirtualEpochs[i]) // get a session + css, err := csm.GetSessions(ctx, maxCuForVirtualEpoch, NewUsedProviders(nil), servicedBlockNumber, "", nil, common.NOSTATE, consumerVirtualEpochs[i]) // get a session require.NoError(t, err) for _, cs := range css { @@ -92,7 +92,7 @@ func TestHappyFlowE2EEmergency(t *testing.T) { func TestHappyFlowEmergencyInConsumer(t *testing.T) { csm, psm, ctx := prepareSessionsWithFirstRelay(t, maxCuForVirtualEpoch) - css, err := csm.GetSessions(ctx, maxCuForVirtualEpoch, emptyRelayProcessor(), servicedBlockNumber, "", nil, common.NOSTATE, virtualEpoch) // get a session + css, err := csm.GetSessions(ctx, maxCuForVirtualEpoch, NewUsedProviders(nil), servicedBlockNumber, "", nil, common.NOSTATE, virtualEpoch) // get a session require.NoError(t, err) for _, cs := range css { @@ -157,7 +157,7 @@ func prepareSessionsWithFirstRelay(t *testing.T, cuForFirstRequest uint64) (*Con err := csm.UpdateAllProviders(epoch1, cswpList) // update the providers. require.NoError(t, err) // get single consumer session - css, err := csm.GetSessions(ctx, cuForFirstRequest, emptyRelayProcessor(), servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session + css, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session require.NoError(t, err) for _, cs := range css { diff --git a/protocol/lavasession/single_consumer_session.go b/protocol/lavasession/single_consumer_session.go index 41ec6b967c..561720c40d 100644 --- a/protocol/lavasession/single_consumer_session.go +++ b/protocol/lavasession/single_consumer_session.go @@ -24,7 +24,7 @@ type SingleConsumerSession struct { BlockListed bool // if session lost sync we blacklist it. ConsecutiveErrors []error errorsCount uint64 - relayProcessor RelayProcessorInf + relayProcessor UsedProvidersInf } // returns the expected latency to a threshold. @@ -100,14 +100,14 @@ func (cs *SingleConsumerSession) CalculateQoS(latency, expectedLatency time.Dura } } -func (scs *SingleConsumerSession) SetUsageForSession(cuNeededForSession uint64, qoSExcellenceReport *pairingtypes.QualityOfServiceReport, relayProcessor RelayProcessorInf) error { +func (scs *SingleConsumerSession) SetUsageForSession(cuNeededForSession uint64, qoSExcellenceReport *pairingtypes.QualityOfServiceReport, usedProviders UsedProvidersInf) error { scs.LatestRelayCu = cuNeededForSession // set latestRelayCu scs.RelayNum += RelayNumberIncrement // increase relayNum if scs.RelayNum > 1 { // we only set excellence for sessions with more than one successful relays, this guarantees data within the epoch exists scs.QoSInfo.LastExcellenceQoSReport = qoSExcellenceReport } - scs.relayProcessor = relayProcessor + scs.relayProcessor = usedProviders return nil } diff --git a/protocol/rpcconsumer/relay_processor.go b/protocol/rpcconsumer/relay_processor.go index 5025a1d90a..3097fa3962 100644 --- a/protocol/rpcconsumer/relay_processor.go +++ b/protocol/rpcconsumer/relay_processor.go @@ -78,18 +78,6 @@ func (rp *RelayProcessor) String() string { results, nodeErrors, protocolErrors, strings.Join(unwantedAddresses, ";"), strings.Join(currentlyUsedAddresses, ";")) } -// RemoveUsed will set the provider as being currently used, if the error is one that allows a retry with the same provider, it will only be removed from currently used -// if it's not, then it will be added to unwanted providers since the same relay shouldn't send to it again -func (rp *RelayProcessor) RemoveUsed(providerAddress string, err error) { - if rp == nil { - return - } - rp.lock.RLock() - usedProviders := rp.usedProviders - rp.lock.RUnlock() - usedProviders.RemoveUsed(providerAddress, err) -} - func (rp *RelayProcessor) GetUsedProviders() *lavasession.UsedProviders { rp.lock.RLock() defer rp.lock.RUnlock() @@ -98,6 +86,10 @@ func (rp *RelayProcessor) GetUsedProviders() *lavasession.UsedProviders { // this function returns all results that came from a node, meaning success, and node errors func (rp *RelayProcessor) NodeResults() []common.RelayResult { + if rp == nil { + return nil + } + rp.readExistingResponses() rp.lock.RLock() defer rp.lock.RUnlock() return rp.nodeResultsInner() @@ -229,6 +221,29 @@ func (rp *RelayProcessor) HasRequiredNodeResults() bool { return false } +func (rp *RelayProcessor) handleResponse(response *relayResponse) { + if response == nil { + return + } + if response.err != nil { + rp.setErrorResponse(response) + } else { + rp.setValidResponse(response) + } +} + +func (rp *RelayProcessor) readExistingResponses() { + for { + select { + case response := <-rp.responses: + rp.handleResponse(response) + default: + // No more responses immediately available, exit the loop + return + } + } +} + // this function waits for the processing results, they are written by multiple go routines and read by this go routine // it then updates the responses in their respective place, node errors, protocol errors or success results func (rp *RelayProcessor) WaitForResults(ctx context.Context) error { @@ -237,11 +252,7 @@ func (rp *RelayProcessor) WaitForResults(ctx context.Context) error { select { case response := <-rp.responses: responsesCount++ - if response.err != nil { - rp.setErrorResponse(response) - } else { - rp.setValidResponse(response) - } + rp.handleResponse(response) if rp.checkEndProcessing(responsesCount) { // we can finish processing return nil diff --git a/protocol/rpcconsumer/relay_processor_test.go b/protocol/rpcconsumer/relay_processor_test.go new file mode 100644 index 0000000000..8c69182cdb --- /dev/null +++ b/protocol/rpcconsumer/relay_processor_test.go @@ -0,0 +1,73 @@ +package rpcconsumer + +import ( + "context" + "net/http" + "testing" + "time" + + "github.com/lavanet/lava/protocol/chainlib" + "github.com/lavanet/lava/protocol/chainlib/extensionslib" + "github.com/lavanet/lava/protocol/common" + "github.com/lavanet/lava/protocol/lavasession" + pairingtypes "github.com/lavanet/lava/x/pairing/types" + spectypes "github.com/lavanet/lava/x/spec/types" + "github.com/stretchr/testify/require" +) + +func sendSuccessResp(relayProcessor *RelayProcessor, provider string, delay time.Duration) { + time.Sleep(delay) + relayProcessor.GetUsedProviders().RemoveUsed(provider, nil) + response := &relayResponse{ + relayResult: common.RelayResult{ + Request: &pairingtypes.RelayRequest{}, + Reply: &pairingtypes.RelayReply{Data: []byte("ok")}, + ProviderInfo: common.ProviderInfo{ProviderAddress: provider}, + StatusCode: http.StatusOK, + }, + err: nil, + } + relayProcessor.SetResponse(response) +} + +func TestRelayProcessorHappyFlow(t *testing.T) { + t.Run("happy", func(t *testing.T) { + ctx := context.Background() + serverHandler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // Handle the incoming request and provide the desired response + w.WriteHeader(http.StatusOK) + }) + specId := "LAV1" + chainParser, _, _, closeServer, err := chainlib.CreateChainLibMocks(ctx, specId, spectypes.APIInterfaceRest, serverHandler, "../../", nil) + if closeServer != nil { + defer closeServer() + } + require.NoError(t, err) + chainMsg, err := chainParser.ParseMsg("/cosmos/base/tendermint/v1beta1/blocks/17", nil, http.MethodGet, nil, extensionslib.ExtensionInfo{LatestBlock: 0}) + require.NoError(t, err) + relayProcessor := NewRelayProcessor(ctx, lavasession.NewUsedProviders(nil), 1, chainMsg) + + usedProviders := relayProcessor.GetUsedProviders() + ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond*10) + defer cancel() + canUse := usedProviders.TryLockSelection(ctx) + require.NoError(t, ctx.Err()) + require.True(t, canUse) + require.Zero(t, usedProviders.CurrentlyUsed()) + require.Zero(t, usedProviders.SessionsLatestBatch()) + consumerSessionsMap := lavasession.ConsumerSessionsMap{"lava@test": &lavasession.SessionInfo{}, "lava@test2": &lavasession.SessionInfo{}} + usedProviders.AddUsed(consumerSessionsMap) + ctx, cancel = context.WithTimeout(context.Background(), time.Millisecond*10) + defer cancel() + go sendSuccessResp(relayProcessor, "lava@test", time.Millisecond*5) + err = relayProcessor.WaitForResults(ctx) + require.NoError(t, err) + resultsOk := relayProcessor.HasResults() + require.True(t, resultsOk) + protocolErrors := relayProcessor.ProtocolErrors() + require.Zero(t, protocolErrors) + returnedResult, err := relayProcessor.ProcessingResult() + require.NoError(t, err) + require.Equal(t, string(returnedResult.Reply.Data), "ok") + }) +} diff --git a/protocol/rpcconsumer/rpcconsumer_server.go b/protocol/rpcconsumer/rpcconsumer_server.go index 23a05ba4f3..e5f1ff358d 100644 --- a/protocol/rpcconsumer/rpcconsumer_server.go +++ b/protocol/rpcconsumer/rpcconsumer_server.go @@ -317,6 +317,9 @@ func (rpccs *RPCConsumerServer) SendRelay( } func (rpccs *RPCConsumerServer) ProcessRelaySend(ctx context.Context, directiveHeaders map[string]string, chainMessage chainlib.ChainMessage, relayRequestData *pairingtypes.RelayPrivateData, dappID string, consumerIp string) (*RelayProcessor, error) { + // make sure all of the child contexts are cancelled when we exit + ctx, cancel := context.WithCancel(ctx) + defer cancel() relayProcessor := NewRelayProcessor(ctx, lavasession.NewUsedProviders(directiveHeaders), rpccs.requiredResponses, chainMessage) err := rpccs.sendRelayToProvider(ctx, chainMessage, relayRequestData, dappID, consumerIp, relayProcessor) if err != nil && relayProcessor.usedProviders.CurrentlyUsed() == 0 { @@ -326,7 +329,8 @@ func (rpccs *RPCConsumerServer) ProcessRelaySend(ctx context.Context, directiveH // a channel to be notified processing was done, true means we have results and can return gotResults := make(chan bool) processingTimeout, relayTimeout := rpccs.getProcessingTimeout(chainMessage) - go func() { + + readResultsFromProcessor := func() { processingCtx, cancel := context.WithTimeout(ctx, processingTimeout) defer cancel() // ProcessResults is reading responses while blocking until the conditions are met @@ -337,8 +341,8 @@ func (rpccs *RPCConsumerServer) ProcessRelaySend(ctx context.Context, directiveH } else { gotResults <- false } - }() - + } + go readResultsFromProcessor() // every relay timeout we send a new batch startNewBatchTicker := time.NewTicker(relayTimeout) for { @@ -352,6 +356,7 @@ func (rpccs *RPCConsumerServer) ProcessRelaySend(ctx context.Context, directiveH // we failed to send a batch of relays, if there are no active sends we can terminate return relayProcessor, err } + go readResultsFromProcessor() case <-startNewBatchTicker.C: err := rpccs.sendRelayToProvider(ctx, chainMessage, relayRequestData, dappID, consumerIp, relayProcessor) if err != nil && relayProcessor.usedProviders.CurrentlyUsed() == 0 { @@ -360,6 +365,7 @@ func (rpccs *RPCConsumerServer) ProcessRelaySend(ctx context.Context, directiveH } } } + } func (rpccs *RPCConsumerServer) sendRelayToProvider( @@ -451,7 +457,7 @@ func (rpccs *RPCConsumerServer) sendRelayToProvider( virtualEpoch := rpccs.consumerTxSender.GetLatestVirtualEpoch() addon := chainlib.GetAddon(chainMessage) extensions := chainMessage.GetExtensions() - sessions, err := rpccs.consumerSessionManager.GetSessions(ctx, chainlib.GetComputeUnits(chainMessage), relayProcessor, reqBlock, addon, extensions, chainlib.GetStateful(chainMessage), virtualEpoch) + sessions, err := rpccs.consumerSessionManager.GetSessions(ctx, chainlib.GetComputeUnits(chainMessage), relayProcessor.GetUsedProviders(), reqBlock, addon, extensions, chainlib.GetStateful(chainMessage), virtualEpoch) if err != nil { if lavasession.PairingListEmptyError.Is(err) && (addon != "" || len(extensions) > 0) { // if we have no providers for a specific addon or extension, return an indicative error @@ -563,6 +569,7 @@ func (rpccs *RPCConsumerServer) sendRelayToProvider( errResponse = rpccs.consumerSessionManager.OnSessionDone(singleConsumerSession, latestBlock, chainlib.GetComputeUnits(chainMessage), relayLatency, singleConsumerSession.CalculateExpectedLatency(relayTimeout), expectedBH, numOfProviders, pairingAddressesLen, chainMessage.GetApi().Category.HangingApi) // session done successfully if rpccs.cache.CacheActive() && rpcclient.ValidateStatusCodes(localRelayResult.StatusCode, true) == nil { + // TODO: we set every valid response in cache, without checking quorum or data reliability // copy private data so if it changes it doesn't panic mid async send copyPrivateData := &pairingtypes.RelayPrivateData{} copyRequestErr := protocopy.DeepCopyProtoObject(localRelayResult.Request.RelayData, copyPrivateData) From b38652b5bfba80518bad7cc774e32a546e30996a Mon Sep 17 00:00:00 2001 From: omerlavanet Date: Thu, 29 Feb 2024 22:06:54 +0200 Subject: [PATCH 16/57] added more tests --- protocol/rpcconsumer/relay_processor_test.go | 52 ++++++++++++++++++++ protocol/rpcconsumer/rpcconsumer_server.go | 11 +++-- 2 files changed, 59 insertions(+), 4 deletions(-) diff --git a/protocol/rpcconsumer/relay_processor_test.go b/protocol/rpcconsumer/relay_processor_test.go index 8c69182cdb..5977ce94f2 100644 --- a/protocol/rpcconsumer/relay_processor_test.go +++ b/protocol/rpcconsumer/relay_processor_test.go @@ -71,3 +71,55 @@ func TestRelayProcessorHappyFlow(t *testing.T) { require.Equal(t, string(returnedResult.Reply.Data), "ok") }) } + +func TestRelayProcessorRetry(t *testing.T) { + t.Run("retry", func(t *testing.T) { + ctx := context.Background() + serverHandler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // Handle the incoming request and provide the desired response + w.WriteHeader(http.StatusOK) + }) + specId := "LAV1" + chainParser, _, _, closeServer, err := chainlib.CreateChainLibMocks(ctx, specId, spectypes.APIInterfaceRest, serverHandler, "../../", nil) + if closeServer != nil { + defer closeServer() + } + require.NoError(t, err) + chainMsg, err := chainParser.ParseMsg("/cosmos/base/tendermint/v1beta1/blocks/17", nil, http.MethodGet, nil, extensionslib.ExtensionInfo{LatestBlock: 0}) + require.NoError(t, err) + relayProcessor := NewRelayProcessor(ctx, lavasession.NewUsedProviders(nil), 1, chainMsg) + + usedProviders := relayProcessor.GetUsedProviders() + ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond*10) + defer cancel() + canUse := usedProviders.TryLockSelection(ctx) + require.NoError(t, ctx.Err()) + require.True(t, canUse) + require.Zero(t, usedProviders.CurrentlyUsed()) + require.Zero(t, usedProviders.SessionsLatestBatch()) + consumerSessionsMap := lavasession.ConsumerSessionsMap{"lava@test": &lavasession.SessionInfo{}, "lava@test2": &lavasession.SessionInfo{}} + usedProviders.AddUsed(consumerSessionsMap) + go func() { + time.Sleep(time.Millisecond * 5) + ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond*10) + defer cancel() + canUse := usedProviders.TryLockSelection(ctx) + require.NoError(t, ctx.Err()) + require.True(t, canUse) + consumerSessionsMap := lavasession.ConsumerSessionsMap{"lava@test3": &lavasession.SessionInfo{}, "lava@test4": &lavasession.SessionInfo{}} + usedProviders.AddUsed(consumerSessionsMap) + }() + sendSuccessResp(relayProcessor, "lava@test", time.Millisecond*20) + ctx, cancel = context.WithTimeout(context.Background(), time.Millisecond*200) + defer cancel() + err = relayProcessor.WaitForResults(ctx) + require.NoError(t, err) + resultsOk := relayProcessor.HasResults() + require.True(t, resultsOk) + protocolErrors := relayProcessor.ProtocolErrors() + require.Zero(t, protocolErrors) + returnedResult, err := relayProcessor.ProcessingResult() + require.NoError(t, err) + require.Equal(t, string(returnedResult.Reply.Data), "ok") + }) +} diff --git a/protocol/rpcconsumer/rpcconsumer_server.go b/protocol/rpcconsumer/rpcconsumer_server.go index e5f1ff358d..5505dc2cf1 100644 --- a/protocol/rpcconsumer/rpcconsumer_server.go +++ b/protocol/rpcconsumer/rpcconsumer_server.go @@ -358,10 +358,13 @@ func (rpccs *RPCConsumerServer) ProcessRelaySend(ctx context.Context, directiveH } go readResultsFromProcessor() case <-startNewBatchTicker.C: - err := rpccs.sendRelayToProvider(ctx, chainMessage, relayRequestData, dappID, consumerIp, relayProcessor) - if err != nil && relayProcessor.usedProviders.CurrentlyUsed() == 0 { - // we failed to send a batch of relays, if there are no active sends we can terminate - return relayProcessor, err + // only trigger another batch for non BestResult relays + if relayProcessor.selection != BestResult { + err := rpccs.sendRelayToProvider(ctx, chainMessage, relayRequestData, dappID, consumerIp, relayProcessor) + if err != nil && relayProcessor.usedProviders.CurrentlyUsed() == 0 { + // we failed to send a batch of relays, if there are no active sends we can terminate + return relayProcessor, err + } } } } From 628ab5c874743e5f1f5ae2a41a243ed5e74b86d0 Mon Sep 17 00:00:00 2001 From: omerlavanet Date: Fri, 1 Mar 2024 02:04:34 +0200 Subject: [PATCH 17/57] added more testing --- protocol/rpcconsumer/relay_processor_test.go | 126 ++++++++++++++++++- 1 file changed, 123 insertions(+), 3 deletions(-) diff --git a/protocol/rpcconsumer/relay_processor_test.go b/protocol/rpcconsumer/relay_processor_test.go index 5977ce94f2..66d3351c14 100644 --- a/protocol/rpcconsumer/relay_processor_test.go +++ b/protocol/rpcconsumer/relay_processor_test.go @@ -2,6 +2,7 @@ package rpcconsumer import ( "context" + "fmt" "net/http" "testing" "time" @@ -30,6 +31,36 @@ func sendSuccessResp(relayProcessor *RelayProcessor, provider string, delay time relayProcessor.SetResponse(response) } +func sendProtocolError(relayProcessor *RelayProcessor, provider string, delay time.Duration, err error) { + time.Sleep(delay) + relayProcessor.GetUsedProviders().RemoveUsed(provider, err) + response := &relayResponse{ + relayResult: common.RelayResult{ + Request: &pairingtypes.RelayRequest{}, + Reply: &pairingtypes.RelayReply{Data: []byte(`{"message":"bad","code":123}`)}, + ProviderInfo: common.ProviderInfo{ProviderAddress: provider}, + StatusCode: 0, + }, + err: err, + } + relayProcessor.SetResponse(response) +} + +func sendNodeError(relayProcessor *RelayProcessor, provider string, delay time.Duration) { + time.Sleep(delay) + relayProcessor.GetUsedProviders().RemoveUsed(provider, nil) + response := &relayResponse{ + relayResult: common.RelayResult{ + Request: &pairingtypes.RelayRequest{}, + Reply: &pairingtypes.RelayReply{Data: []byte(`{"message":"bad","code":123}`)}, + ProviderInfo: common.ProviderInfo{ProviderAddress: provider}, + StatusCode: http.StatusInternalServerError, + }, + err: nil, + } + relayProcessor.SetResponse(response) +} + func TestRelayProcessorHappyFlow(t *testing.T) { t.Run("happy", func(t *testing.T) { ctx := context.Background() @@ -72,8 +103,8 @@ func TestRelayProcessorHappyFlow(t *testing.T) { }) } -func TestRelayProcessorRetry(t *testing.T) { - t.Run("retry", func(t *testing.T) { +func TestRelayProcessorTimeout(t *testing.T) { + t.Run("timeout", func(t *testing.T) { ctx := context.Background() serverHandler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { // Handle the incoming request and provide the desired response @@ -109,7 +140,7 @@ func TestRelayProcessorRetry(t *testing.T) { consumerSessionsMap := lavasession.ConsumerSessionsMap{"lava@test3": &lavasession.SessionInfo{}, "lava@test4": &lavasession.SessionInfo{}} usedProviders.AddUsed(consumerSessionsMap) }() - sendSuccessResp(relayProcessor, "lava@test", time.Millisecond*20) + go sendSuccessResp(relayProcessor, "lava@test", time.Millisecond*20) ctx, cancel = context.WithTimeout(context.Background(), time.Millisecond*200) defer cancel() err = relayProcessor.WaitForResults(ctx) @@ -123,3 +154,92 @@ func TestRelayProcessorRetry(t *testing.T) { require.Equal(t, string(returnedResult.Reply.Data), "ok") }) } + +func TestRelayProcessorRetry(t *testing.T) { + t.Run("retry", func(t *testing.T) { + ctx := context.Background() + serverHandler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // Handle the incoming request and provide the desired response + w.WriteHeader(http.StatusOK) + }) + specId := "LAV1" + chainParser, _, _, closeServer, err := chainlib.CreateChainLibMocks(ctx, specId, spectypes.APIInterfaceRest, serverHandler, "../../", nil) + if closeServer != nil { + defer closeServer() + } + require.NoError(t, err) + chainMsg, err := chainParser.ParseMsg("/cosmos/base/tendermint/v1beta1/blocks/17", nil, http.MethodGet, nil, extensionslib.ExtensionInfo{LatestBlock: 0}) + require.NoError(t, err) + relayProcessor := NewRelayProcessor(ctx, lavasession.NewUsedProviders(nil), 1, chainMsg) + + usedProviders := relayProcessor.GetUsedProviders() + ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond*10) + defer cancel() + canUse := usedProviders.TryLockSelection(ctx) + require.NoError(t, ctx.Err()) + require.True(t, canUse) + require.Zero(t, usedProviders.CurrentlyUsed()) + require.Zero(t, usedProviders.SessionsLatestBatch()) + consumerSessionsMap := lavasession.ConsumerSessionsMap{"lava@test": &lavasession.SessionInfo{}, "lava@test2": &lavasession.SessionInfo{}} + usedProviders.AddUsed(consumerSessionsMap) + + go sendProtocolError(relayProcessor, "lava@test", time.Millisecond*5, fmt.Errorf("bad")) + go sendSuccessResp(relayProcessor, "lava@test2", time.Millisecond*20) + ctx, cancel = context.WithTimeout(context.Background(), time.Millisecond*200) + defer cancel() + err = relayProcessor.WaitForResults(ctx) + require.NoError(t, err) + resultsOk := relayProcessor.HasResults() + require.True(t, resultsOk) + protocolErrors := relayProcessor.ProtocolErrors() + require.Equal(t, uint64(1), protocolErrors) + returnedResult, err := relayProcessor.ProcessingResult() + require.NoError(t, err) + require.Equal(t, string(returnedResult.Reply.Data), "ok") + }) +} + +func TestRelayProcessorRetryNodeError(t *testing.T) { + t.Run("retry", func(t *testing.T) { + ctx := context.Background() + serverHandler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // Handle the incoming request and provide the desired response + w.WriteHeader(http.StatusOK) + }) + specId := "LAV1" + chainParser, _, _, closeServer, err := chainlib.CreateChainLibMocks(ctx, specId, spectypes.APIInterfaceRest, serverHandler, "../../", nil) + if closeServer != nil { + defer closeServer() + } + require.NoError(t, err) + chainMsg, err := chainParser.ParseMsg("/cosmos/base/tendermint/v1beta1/blocks/17", nil, http.MethodGet, nil, extensionslib.ExtensionInfo{LatestBlock: 0}) + require.NoError(t, err) + relayProcessor := NewRelayProcessor(ctx, lavasession.NewUsedProviders(nil), 1, chainMsg) + + usedProviders := relayProcessor.GetUsedProviders() + ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond*10) + defer cancel() + canUse := usedProviders.TryLockSelection(ctx) + require.NoError(t, ctx.Err()) + require.True(t, canUse) + require.Zero(t, usedProviders.CurrentlyUsed()) + require.Zero(t, usedProviders.SessionsLatestBatch()) + consumerSessionsMap := lavasession.ConsumerSessionsMap{"lava@test": &lavasession.SessionInfo{}, "lava@test2": &lavasession.SessionInfo{}} + usedProviders.AddUsed(consumerSessionsMap) + + go sendProtocolError(relayProcessor, "lava@test", time.Millisecond*5, fmt.Errorf("bad")) + go sendNodeError(relayProcessor, "lava@test2", time.Millisecond*20) + ctx, cancel = context.WithTimeout(context.Background(), time.Millisecond*200) + defer cancel() + err = relayProcessor.WaitForResults(ctx) + require.NoError(t, err) + resultsOk := relayProcessor.HasResults() + require.True(t, resultsOk) + protocolErrors := relayProcessor.ProtocolErrors() + require.Equal(t, uint64(1), protocolErrors) + returnedResult, err := relayProcessor.ProcessingResult() + require.NoError(t, err) + require.Equal(t, string(returnedResult.Reply.Data), `{"message":"bad","code":123}`) + require.Equal(t, returnedResult.StatusCode, http.StatusInternalServerError) + }) +} From 05a9be47c31292b0264cf6ae194c906ed6c90690 Mon Sep 17 00:00:00 2001 From: omerlavanet Date: Fri, 1 Mar 2024 13:07:01 +0200 Subject: [PATCH 18/57] lint --- protocol/rpcconsumer/rpcconsumer_server.go | 1 - protocol/statetracker/updaters/policy_updater.go | 1 - scripts/pre_setups/init_lava_only_test_5.sh | 10 +++++----- 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/protocol/rpcconsumer/rpcconsumer_server.go b/protocol/rpcconsumer/rpcconsumer_server.go index 2c510750ce..c500d9e468 100644 --- a/protocol/rpcconsumer/rpcconsumer_server.go +++ b/protocol/rpcconsumer/rpcconsumer_server.go @@ -370,7 +370,6 @@ func (rpccs *RPCConsumerServer) ProcessRelaySend(ctx context.Context, directiveH } } } - } func (rpccs *RPCConsumerServer) sendRelayToProvider( diff --git a/protocol/statetracker/updaters/policy_updater.go b/protocol/statetracker/updaters/policy_updater.go index 8dd7c3cd9e..5d87a1418c 100644 --- a/protocol/statetracker/updaters/policy_updater.go +++ b/protocol/statetracker/updaters/policy_updater.go @@ -71,7 +71,6 @@ func (pu *PolicyUpdater) UpdateEpoch(epoch uint64) { pu.lock.Lock() defer pu.lock.Unlock() // update policy now - utils.LavaFormatDebug("PolicyUpdater, fetching current policy and updating the effective policy", utils.LogAttr("epoch", epoch), utils.LogAttr("chainId", pu.chainId)) ctx, cancel := context.WithTimeout(context.Background(), time.Second*10) defer cancel() policy, err := pu.policyFetcher.GetConsumerPolicy(ctx, pu.consumerAddress, pu.chainId) diff --git a/scripts/pre_setups/init_lava_only_test_5.sh b/scripts/pre_setups/init_lava_only_test_5.sh index 8a0e3e6530..79c3374989 100755 --- a/scripts/pre_setups/init_lava_only_test_5.sh +++ b/scripts/pre_setups/init_lava_only_test_5.sh @@ -35,11 +35,11 @@ PROVIDER5_LISTENER="127.0.0.1:2225" lavad tx subscription buy DefaultPlan $(lavad keys show user1 -a) -y --from user1 --gas-adjustment "1.5" --gas "auto" --gas-prices $GASPRICE wait_next_block -lavad tx pairing stake-provider "LAV1" $PROVIDERSTAKE "$PROVIDER1_LISTENER,1" 1 $(operator_address) -y --from servicer1 --provider-moniker "dummyMoniker" --gas-adjustment "1.5" --gas "auto" --gas-prices $GASPRICE -lavad tx pairing stake-provider "LAV1" $PROVIDERSTAKE "$PROVIDER2_LISTENER,1" 1 $(operator_address) -y --from servicer2 --provider-moniker "dummyMoniker" --gas-adjustment "1.5" --gas "auto" --gas-prices $GASPRICE -lavad tx pairing stake-provider "LAV1" $PROVIDERSTAKE "$PROVIDER3_LISTENER,1" 1 $(operator_address) -y --from servicer3 --provider-moniker "dummyMoniker" --gas-adjustment "1.5" --gas "auto" --gas-prices $GASPRICE -lavad tx pairing stake-provider "LAV1" $PROVIDERSTAKE "$PROVIDER4_LISTENER,1" 1 $(operator_address) -y --from servicer4 --provider-moniker "dummyMoniker" --gas-adjustment "1.5" --gas "auto" --gas-prices $GASPRICE -lavad tx pairing stake-provider "LAV1" $PROVIDERSTAKE "$PROVIDER5_LISTENER,1" 1 $(operator_address) -y --from servicer5 --provider-moniker "dummyMoniker" --gas-adjustment "1.5" --gas "auto" --gas-prices $GASPRICE +lavad tx pairing stake-provider "LAV1" $PROVIDERSTAKE "$PROVIDER1_LISTENER,1" 1 $(operator_address) -y --delegate-limit 10ulava --from servicer1 --provider-moniker "dummyMoniker" --gas-adjustment "1.5" --gas "auto" --gas-prices $GASPRICE +lavad tx pairing stake-provider "LAV1" $PROVIDERSTAKE "$PROVIDER2_LISTENER,1" 1 $(operator_address) -y --delegate-limit 10ulava --from servicer2 --provider-moniker "dummyMoniker" --gas-adjustment "1.5" --gas "auto" --gas-prices $GASPRICE +lavad tx pairing stake-provider "LAV1" $PROVIDERSTAKE "$PROVIDER3_LISTENER,1" 1 $(operator_address) -y --delegate-limit 10ulava --from servicer3 --provider-moniker "dummyMoniker" --gas-adjustment "1.5" --gas "auto" --gas-prices $GASPRICE +lavad tx pairing stake-provider "LAV1" $PROVIDERSTAKE "$PROVIDER4_LISTENER,1" 1 $(operator_address) -y --delegate-limit 10ulava --from servicer4 --provider-moniker "dummyMoniker" --gas-adjustment "1.5" --gas "auto" --gas-prices $GASPRICE +lavad tx pairing stake-provider "LAV1" $PROVIDERSTAKE "$PROVIDER5_LISTENER,1" 1 $(operator_address) -y --delegate-limit 10ulava --from servicer5 --provider-moniker "dummyMoniker" --gas-adjustment "1.5" --gas "auto" --gas-prices $GASPRICE sleep_until_next_epoch From fbc2242063ff26d54ebb802952f605c489326735 Mon Sep 17 00:00:00 2001 From: omerlavanet Date: Sun, 3 Mar 2024 22:59:47 +0200 Subject: [PATCH 19/57] add unitests --- protocol/rpcconsumer/relay_processor_test.go | 104 ++++++++++++++++++- 1 file changed, 101 insertions(+), 3 deletions(-) diff --git a/protocol/rpcconsumer/relay_processor_test.go b/protocol/rpcconsumer/relay_processor_test.go index 66d3351c14..b92f41969a 100644 --- a/protocol/rpcconsumer/relay_processor_test.go +++ b/protocol/rpcconsumer/relay_processor_test.go @@ -21,7 +21,10 @@ func sendSuccessResp(relayProcessor *RelayProcessor, provider string, delay time relayProcessor.GetUsedProviders().RemoveUsed(provider, nil) response := &relayResponse{ relayResult: common.RelayResult{ - Request: &pairingtypes.RelayRequest{}, + Request: &pairingtypes.RelayRequest{ + RelaySession: &pairingtypes.RelaySession{}, + RelayData: &pairingtypes.RelayPrivateData{}, + }, Reply: &pairingtypes.RelayReply{Data: []byte("ok")}, ProviderInfo: common.ProviderInfo{ProviderAddress: provider}, StatusCode: http.StatusOK, @@ -36,7 +39,10 @@ func sendProtocolError(relayProcessor *RelayProcessor, provider string, delay ti relayProcessor.GetUsedProviders().RemoveUsed(provider, err) response := &relayResponse{ relayResult: common.RelayResult{ - Request: &pairingtypes.RelayRequest{}, + Request: &pairingtypes.RelayRequest{ + RelaySession: &pairingtypes.RelaySession{}, + RelayData: &pairingtypes.RelayPrivateData{}, + }, Reply: &pairingtypes.RelayReply{Data: []byte(`{"message":"bad","code":123}`)}, ProviderInfo: common.ProviderInfo{ProviderAddress: provider}, StatusCode: 0, @@ -51,7 +57,10 @@ func sendNodeError(relayProcessor *RelayProcessor, provider string, delay time.D relayProcessor.GetUsedProviders().RemoveUsed(provider, nil) response := &relayResponse{ relayResult: common.RelayResult{ - Request: &pairingtypes.RelayRequest{}, + Request: &pairingtypes.RelayRequest{ + RelaySession: &pairingtypes.RelaySession{}, + RelayData: &pairingtypes.RelayPrivateData{}, + }, Reply: &pairingtypes.RelayReply{Data: []byte(`{"message":"bad","code":123}`)}, ProviderInfo: common.ProviderInfo{ProviderAddress: provider}, StatusCode: http.StatusInternalServerError, @@ -243,3 +252,92 @@ func TestRelayProcessorRetryNodeError(t *testing.T) { require.Equal(t, returnedResult.StatusCode, http.StatusInternalServerError) }) } + +func TestRelayProcessorStatefulApi(t *testing.T) { + t.Run("stateful", func(t *testing.T) { + ctx := context.Background() + serverHandler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // Handle the incoming request and provide the desired response + w.WriteHeader(http.StatusOK) + }) + specId := "LAV1" + chainParser, _, _, closeServer, err := chainlib.CreateChainLibMocks(ctx, specId, spectypes.APIInterfaceRest, serverHandler, "../../", nil) + if closeServer != nil { + defer closeServer() + } + require.NoError(t, err) + chainMsg, err := chainParser.ParseMsg("/cosmos/tx/v1beta1/txs", []byte("data"), http.MethodPost, nil, extensionslib.ExtensionInfo{LatestBlock: 0}) + require.NoError(t, err) + relayProcessor := NewRelayProcessor(ctx, lavasession.NewUsedProviders(nil), 1, chainMsg) + usedProviders := relayProcessor.GetUsedProviders() + ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond*10) + defer cancel() + canUse := usedProviders.TryLockSelection(ctx) + require.NoError(t, ctx.Err()) + require.True(t, canUse) + require.Zero(t, usedProviders.CurrentlyUsed()) + require.Zero(t, usedProviders.SessionsLatestBatch()) + consumerSessionsMap := lavasession.ConsumerSessionsMap{"lava4@test": &lavasession.SessionInfo{}, "lava3@test": &lavasession.SessionInfo{}, "lava@test": &lavasession.SessionInfo{}, "lava2@test": &lavasession.SessionInfo{}} + usedProviders.AddUsed(consumerSessionsMap) + go sendProtocolError(relayProcessor, "lava@test", time.Millisecond*5, fmt.Errorf("bad")) + go sendNodeError(relayProcessor, "lava2@test", time.Millisecond*20) + go sendNodeError(relayProcessor, "lava3@test", time.Millisecond*25) + go sendSuccessResp(relayProcessor, "lava4@test", time.Millisecond*100) + ctx, cancel = context.WithTimeout(context.Background(), time.Millisecond*200) + defer cancel() + err = relayProcessor.WaitForResults(ctx) + require.NoError(t, err) + resultsOk := relayProcessor.HasResults() + require.True(t, resultsOk) + protocolErrors := relayProcessor.ProtocolErrors() + require.Equal(t, uint64(1), protocolErrors) + returnedResult, err := relayProcessor.ProcessingResult() + require.NoError(t, err) + require.Equal(t, string(returnedResult.Reply.Data), "ok") + require.Equal(t, http.StatusOK, returnedResult.StatusCode) + }) +} + +func TestRelayProcessorStatefulApiErr(t *testing.T) { + t.Run("stateful", func(t *testing.T) { + ctx := context.Background() + serverHandler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // Handle the incoming request and provide the desired response + w.WriteHeader(http.StatusOK) + }) + specId := "LAV1" + chainParser, _, _, closeServer, err := chainlib.CreateChainLibMocks(ctx, specId, spectypes.APIInterfaceRest, serverHandler, "../../", nil) + if closeServer != nil { + defer closeServer() + } + require.NoError(t, err) + chainMsg, err := chainParser.ParseMsg("/cosmos/tx/v1beta1/txs", []byte("data"), http.MethodPost, nil, extensionslib.ExtensionInfo{LatestBlock: 0}) + require.NoError(t, err) + relayProcessor := NewRelayProcessor(ctx, lavasession.NewUsedProviders(nil), 1, chainMsg) + usedProviders := relayProcessor.GetUsedProviders() + ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond*10) + defer cancel() + canUse := usedProviders.TryLockSelection(ctx) + require.NoError(t, ctx.Err()) + require.True(t, canUse) + require.Zero(t, usedProviders.CurrentlyUsed()) + require.Zero(t, usedProviders.SessionsLatestBatch()) + consumerSessionsMap := lavasession.ConsumerSessionsMap{"lava4@test": &lavasession.SessionInfo{}, "lava3@test": &lavasession.SessionInfo{}, "lava@test": &lavasession.SessionInfo{}, "lava2@test": &lavasession.SessionInfo{}} + usedProviders.AddUsed(consumerSessionsMap) + go sendProtocolError(relayProcessor, "lava@test", time.Millisecond*5, fmt.Errorf("bad")) + go sendNodeError(relayProcessor, "lava2@test", time.Millisecond*20) + go sendNodeError(relayProcessor, "lava3@test", time.Millisecond*25) + ctx, cancel = context.WithTimeout(context.Background(), time.Millisecond*50) + defer cancel() + err = relayProcessor.WaitForResults(ctx) + require.Error(t, err) + resultsOk := relayProcessor.HasResults() + require.True(t, resultsOk) + protocolErrors := relayProcessor.ProtocolErrors() + require.Equal(t, uint64(1), protocolErrors) + returnedResult, err := relayProcessor.ProcessingResult() + require.NoError(t, err) + require.Equal(t, string(returnedResult.Reply.Data), `{"message":"bad","code":123}`) + require.Equal(t, returnedResult.StatusCode, http.StatusInternalServerError) + }) +} From c243a398174a12c18563bd259a30bbd93cbd69a5 Mon Sep 17 00:00:00 2001 From: omerlavanet Date: Sun, 3 Mar 2024 23:09:18 +0200 Subject: [PATCH 20/57] on init do not fail init relays on pairing --- protocol/lavasession/used_providers.go | 10 ++++++++++ protocol/rpcconsumer/rpcconsumer_server.go | 5 +++++ 2 files changed, 15 insertions(+) diff --git a/protocol/lavasession/used_providers.go b/protocol/lavasession/used_providers.go index 8855141335..342e216ab4 100644 --- a/protocol/lavasession/used_providers.go +++ b/protocol/lavasession/used_providers.go @@ -90,6 +90,16 @@ func (up *UsedProviders) RemoveUsed(provider string, err error) { delete(up.providers, provider) } +func (up *UsedProviders) ClearUnwanted() { + if up == nil { + return + } + up.lock.Lock() + defer up.lock.Unlock() + // this is nil safe + up.unwantedProviders = map[string]struct{}{} +} + func (up *UsedProviders) AddUsed(sessions ConsumerSessionsMap) { if up == nil { return diff --git a/protocol/rpcconsumer/rpcconsumer_server.go b/protocol/rpcconsumer/rpcconsumer_server.go index c500d9e468..f4b5537062 100644 --- a/protocol/rpcconsumer/rpcconsumer_server.go +++ b/protocol/rpcconsumer/rpcconsumer_server.go @@ -193,6 +193,11 @@ func (rpccs *RPCConsumerServer) sendRelayWithRetries(ctx context.Context, retrie relayProcessor := NewRelayProcessor(ctx, lavasession.NewUsedProviders(nil), 1, chainMessage) for i := 0; i < retries; i++ { err = rpccs.sendRelayToProvider(ctx, chainMessage, relay, "-init-", "", relayProcessor) + if lavasession.PairingListEmptyError.Is(err) { + // we don't have pairings anymore, could be related to unwanted providers + relayProcessor.GetUsedProviders().ClearUnwanted() + err = rpccs.sendRelayToProvider(ctx, chainMessage, relay, "-init-", "", relayProcessor) + } if err != nil { utils.LavaFormatError("[-] failed sending init relay", err, []utils.Attribute{{Key: "chainID", Value: rpccs.listenEndpoint.ChainID}, {Key: "APIInterface", Value: rpccs.listenEndpoint.ApiInterface}, {Key: "relayProcessor", Value: relayProcessor}}...) } else { From 60652ab2269f97cc69d469742435311f461d6eea Mon Sep 17 00:00:00 2001 From: omerlavanet Date: Sun, 3 Mar 2024 23:28:31 +0200 Subject: [PATCH 21/57] change csm get provider error to warning --- protocol/lavasession/consumer_session_manager.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/protocol/lavasession/consumer_session_manager.go b/protocol/lavasession/consumer_session_manager.go index 89d7915f1f..d5124bbd6a 100644 --- a/protocol/lavasession/consumer_session_manager.go +++ b/protocol/lavasession/consumer_session_manager.go @@ -534,7 +534,7 @@ func (csm *ConsumerSessionManager) getValidConsumerSessionsWithProvider(ignoredP // Fetch provider addresses providerAddresses, err := csm.getValidProviderAddresses(ignoredProviders.providers, cuNeededForSession, requestedBlock, addon, extensions, stateful) if err != nil { - utils.LavaFormatError("could not get a provider addresses", err) + utils.LavaFormatWarning("could not get a provider addresses", err, utils.LogAttr("endpoint", csm.rpcEndpoint)) return nil, err } From 617ea4a4dbb19599dae72a441d4409c66a0b16e3 Mon Sep 17 00:00:00 2001 From: omerlavanet Date: Sun, 3 Mar 2024 23:41:47 +0200 Subject: [PATCH 22/57] added errors when failing relays --- protocol/rpcconsumer/rpcconsumer_server.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/protocol/rpcconsumer/rpcconsumer_server.go b/protocol/rpcconsumer/rpcconsumer_server.go index f4b5537062..5ef0770636 100644 --- a/protocol/rpcconsumer/rpcconsumer_server.go +++ b/protocol/rpcconsumer/rpcconsumer_server.go @@ -294,7 +294,7 @@ func (rpccs *RPCConsumerServer) SendRelay( relayProcessor, err := rpccs.ProcessRelaySend(ctx, directiveHeaders, chainMessage, relayRequestData, dappID, consumerIp) if err != nil && !relayProcessor.HasResults() { // we can't send anymore, and we don't have any responses - return nil, err + return nil, utils.LavaFormatError("failed getting responses from providers", err, utils.Attribute{Key: "GUID", Value: ctx}, utils.LogAttr("endpoint", rpccs.listenEndpoint.Key())) } // Handle Data Reliability enabled, dataReliabilityThreshold := rpccs.chainParser.DataReliabilityParams() @@ -312,7 +312,7 @@ func (rpccs *RPCConsumerServer) SendRelay( returnedResult, err := relayProcessor.ProcessingResult() rpccs.appendHeadersToRelayResult(ctx, returnedResult, relayProcessor.ProtocolErrors()) if err != nil { - return returnedResult, err + return returnedResult, utils.LavaFormatError("failed processing responses from providers", err, utils.Attribute{Key: "GUID", Value: ctx}, utils.LogAttr("endpoint", rpccs.listenEndpoint.Key())) } if analytics != nil { currentLatency := time.Since(relaySentTime) From 47fb46b825329a96df5a6cb057bcf430c11d4d28 Mon Sep 17 00:00:00 2001 From: omerlavanet Date: Mon, 4 Mar 2024 00:01:28 +0200 Subject: [PATCH 23/57] remove data reliability errors on latest block requests --- protocol/rpcconsumer/relay_processor.go | 2 +- protocol/rpcconsumer/relay_processor_test.go | 46 ++++++++++++++++++++ protocol/rpcconsumer/rpcconsumer_server.go | 17 ++++---- 3 files changed, 55 insertions(+), 10 deletions(-) diff --git a/protocol/rpcconsumer/relay_processor.go b/protocol/rpcconsumer/relay_processor.go index 3097fa3962..3b799e3e84 100644 --- a/protocol/rpcconsumer/relay_processor.go +++ b/protocol/rpcconsumer/relay_processor.go @@ -146,7 +146,7 @@ func (rp *RelayProcessor) setValidResponse(response *relayResponse) { // we do not modify the chain message data on the consumer, only it's requested block, so we let the provider know it can't put any block height it wants by setting a specific block height reqBlock, _ := rp.chainMessage.RequestedBlock() if reqBlock == spectypes.LATEST_BLOCK { - modifiedOnLatestReq := rp.chainMessage.UpdateLatestBlockInMessage(response.relayResult.Request.RelayData.RequestBlock, false) + modifiedOnLatestReq := rp.chainMessage.UpdateLatestBlockInMessage(response.relayResult.Reply.LatestBlock, false) if !modifiedOnLatestReq { response.relayResult.Finalized = false // shut down data reliability } diff --git a/protocol/rpcconsumer/relay_processor_test.go b/protocol/rpcconsumer/relay_processor_test.go index b92f41969a..b8be633b73 100644 --- a/protocol/rpcconsumer/relay_processor_test.go +++ b/protocol/rpcconsumer/relay_processor_test.go @@ -341,3 +341,49 @@ func TestRelayProcessorStatefulApiErr(t *testing.T) { require.Equal(t, returnedResult.StatusCode, http.StatusInternalServerError) }) } + +func TestRelayProcessorLatest(t *testing.T) { + t.Run("latest req", func(t *testing.T) { + ctx := context.Background() + serverHandler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // Handle the incoming request and provide the desired response + w.WriteHeader(http.StatusOK) + }) + specId := "LAV1" + chainParser, _, _, closeServer, err := chainlib.CreateChainLibMocks(ctx, specId, spectypes.APIInterfaceRest, serverHandler, "../../", nil) + if closeServer != nil { + defer closeServer() + } + require.NoError(t, err) + chainMsg, err := chainParser.ParseMsg("/cosmos/base/tendermint/v1beta1/blocks/latest", nil, http.MethodGet, nil, extensionslib.ExtensionInfo{LatestBlock: 0}) + require.NoError(t, err) + relayProcessor := NewRelayProcessor(ctx, lavasession.NewUsedProviders(nil), 1, chainMsg) + usedProviders := relayProcessor.GetUsedProviders() + ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond*10) + defer cancel() + canUse := usedProviders.TryLockSelection(ctx) + require.NoError(t, ctx.Err()) + require.True(t, canUse) + require.Zero(t, usedProviders.CurrentlyUsed()) + require.Zero(t, usedProviders.SessionsLatestBatch()) + + consumerSessionsMap := lavasession.ConsumerSessionsMap{"lava@test": &lavasession.SessionInfo{}, "lava@test2": &lavasession.SessionInfo{}} + usedProviders.AddUsed(consumerSessionsMap) + + go sendProtocolError(relayProcessor, "lava@test", time.Millisecond*5, fmt.Errorf("bad")) + go sendSuccessResp(relayProcessor, "lava@test2", time.Millisecond*20) + ctx, cancel = context.WithTimeout(context.Background(), time.Millisecond*200) + defer cancel() + err = relayProcessor.WaitForResults(ctx) + require.NoError(t, err) + resultsOk := relayProcessor.HasResults() + require.True(t, resultsOk) + protocolErrors := relayProcessor.ProtocolErrors() + require.Equal(t, uint64(1), protocolErrors) + returnedResult, err := relayProcessor.ProcessingResult() + require.NoError(t, err) + require.Equal(t, string(returnedResult.Reply.Data), "ok") + // reqBlock, _ := chainMsg.RequestedBlock() + // require.NotEqual(t, spectypes.LATEST_BLOCK, reqBlock) // disabled until we enable requested block modification again + }) +} diff --git a/protocol/rpcconsumer/rpcconsumer_server.go b/protocol/rpcconsumer/rpcconsumer_server.go index 5ef0770636..f994731d5c 100644 --- a/protocol/rpcconsumer/rpcconsumer_server.go +++ b/protocol/rpcconsumer/rpcconsumer_server.go @@ -724,15 +724,6 @@ func (rpccs *RPCConsumerServer) sendDataReliabilityRelayIfApplicable(ctx context return nil // disabled for this spec and requested block so no data reliability messages } - reqBlock, _ := chainMessage.RequestedBlock() - if reqBlock <= spectypes.NOT_APPLICABLE { - if reqBlock <= spectypes.LATEST_BLOCK { - return utils.LavaFormatError("sendDataReliabilityRelayIfApplicable latest requestBlock", nil, utils.Attribute{Key: "GUID", Value: ctx}, utils.Attribute{Key: "RequestBlock", Value: reqBlock}) - } - // does not support sending data reliability requests on a block that is not specific - return nil - } - if rand.Uint32() > dataReliabilityThreshold { // decided not to do data reliability return nil @@ -749,6 +740,14 @@ func (rpccs *RPCConsumerServer) sendDataReliabilityRelayIfApplicable(ctx context return nil } + reqBlock, _ := chainMessage.RequestedBlock() + if reqBlock <= spectypes.NOT_APPLICABLE { + if reqBlock <= spectypes.LATEST_BLOCK { + return utils.LavaFormatError("sendDataReliabilityRelayIfApplicable latest requestBlock", nil, utils.Attribute{Key: "GUID", Value: ctx}, utils.Attribute{Key: "RequestBlock", Value: reqBlock}) + } + // does not support sending data reliability requests on a block that is not specific + return nil + } relayResult := results[0] if len(results) < 2 { relayRequestData := lavaprotocol.NewRelayData(ctx, relayResult.Request.RelayData.ConnectionType, relayResult.Request.RelayData.ApiUrl, relayResult.Request.RelayData.Data, relayResult.Request.RelayData.SeenBlock, reqBlock, relayResult.Request.RelayData.ApiInterface, chainMessage.GetRPCMessage().GetHeaders(), relayResult.Request.RelayData.Addon, relayResult.Request.RelayData.Extensions) From 60abc952bd732f0a42f6510a56a25ba60873c7d7 Mon Sep 17 00:00:00 2001 From: omerlavanet Date: Mon, 4 Mar 2024 00:22:46 +0200 Subject: [PATCH 24/57] disabled data reliability latest also on node errors --- protocol/common/endpoints.go | 1 + protocol/rpcconsumer/relay_processor.go | 22 +++++++++++++--------- protocol/rpcconsumer/rpcconsumer_server.go | 7 +++++++ 3 files changed, 21 insertions(+), 9 deletions(-) diff --git a/protocol/common/endpoints.go b/protocol/common/endpoints.go index 3b5dc8d09a..ab133cebf9 100644 --- a/protocol/common/endpoints.go +++ b/protocol/common/endpoints.go @@ -22,6 +22,7 @@ const ( IP_FORWARDING_HEADER_NAME = "X-Forwarded-For" PROVIDER_ADDRESS_HEADER_NAME = "Lava-Provider-Address" RETRY_COUNT_HEADER_NAME = "Lava-Retries" + PROVIDER_LATEST_BLOCK_HEADER_NAME = "Provider-Latest-Block" GUID_HEADER_NAME = "Lava-Guid" // these headers need to be lowercase BLOCK_PROVIDERS_ADDRESSES_HEADER_NAME = "lava-providers-block" diff --git a/protocol/rpcconsumer/relay_processor.go b/protocol/rpcconsumer/relay_processor.go index 3b799e3e84..aa41920c99 100644 --- a/protocol/rpcconsumer/relay_processor.go +++ b/protocol/rpcconsumer/relay_processor.go @@ -133,6 +133,18 @@ func (rp *RelayProcessor) SetResponse(response *relayResponse) { func (rp *RelayProcessor) setValidResponse(response *relayResponse) { rp.lock.Lock() defer rp.lock.Unlock() + + // future relay requests and data reliability requests need to ask for the same specific block height to get consensus on the reply + // we do not modify the chain message data on the consumer, only it's requested block, so we let the provider know it can't put any block height it wants by setting a specific block height + reqBlock, _ := rp.chainMessage.RequestedBlock() + if reqBlock == spectypes.LATEST_BLOCK { + // TODO: when we turn on dataReliability on latest call UpdateLatest, until then we turn it off always + // modifiedOnLatestReq := rp.chainMessage.UpdateLatestBlockInMessage(response.relayResult.Reply.LatestBlock, false) + // if !modifiedOnLatestReq { + response.relayResult.Finalized = false // shut down data reliability + // } + } + foundError, errorMessage := rp.chainMessage.CheckResponseError(response.relayResult.Reply.Data, response.relayResult.StatusCode) if foundError { // this is a node error, meaning we still didn't get a good response. @@ -142,15 +154,7 @@ func (rp *RelayProcessor) setValidResponse(response *relayResponse) { rp.nodeResponseErrors.relayErrors = append(rp.nodeResponseErrors.relayErrors, RelayError{err: err, ProviderInfo: response.relayResult.ProviderInfo, response: response}) return } - // future relay requests and data reliability requests need to ask for the same specific block height to get consensus on the reply - // we do not modify the chain message data on the consumer, only it's requested block, so we let the provider know it can't put any block height it wants by setting a specific block height - reqBlock, _ := rp.chainMessage.RequestedBlock() - if reqBlock == spectypes.LATEST_BLOCK { - modifiedOnLatestReq := rp.chainMessage.UpdateLatestBlockInMessage(response.relayResult.Reply.LatestBlock, false) - if !modifiedOnLatestReq { - response.relayResult.Finalized = false // shut down data reliability - } - } + rp.successResults = append(rp.successResults, response.relayResult) } diff --git a/protocol/rpcconsumer/rpcconsumer_server.go b/protocol/rpcconsumer/rpcconsumer_server.go index f994731d5c..51af5fb57b 100644 --- a/protocol/rpcconsumer/rpcconsumer_server.go +++ b/protocol/rpcconsumer/rpcconsumer_server.go @@ -878,6 +878,13 @@ func (rpccs *RPCConsumerServer) appendHeadersToRelayResult(ctx context.Context, Value: strconv.FormatUint(protocolErrors, 10), }) } + if relayResult.Reply.LatestBlock > 0 { + metadataReply = append(metadataReply, + pairingtypes.Metadata{ + Name: common.PROVIDER_LATEST_BLOCK_HEADER_NAME, + Value: strconv.FormatInt(relayResult.Reply.LatestBlock, 10), + }) + } guid, found := utils.GetUniqueIdentifier(ctx) if found && guid != 0 { guidStr := strconv.FormatUint(guid, 10) From 488da3adacc4c167a6f9051fdd758772b0560cb2 Mon Sep 17 00:00:00 2001 From: omerlavanet Date: Mon, 4 Mar 2024 08:32:53 +0200 Subject: [PATCH 25/57] add more information on error during quorum --- protocol/rpcconsumer/relay_processor.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/protocol/rpcconsumer/relay_processor.go b/protocol/rpcconsumer/relay_processor.go index aa41920c99..b9ed614f31 100644 --- a/protocol/rpcconsumer/relay_processor.go +++ b/protocol/rpcconsumer/relay_processor.go @@ -275,6 +275,7 @@ func (rp *RelayProcessor) responsesQuorum(results []common.RelayResult, quorumSi deterministic := rp.chainMessage.GetApi().Category.Deterministic var bestQosResult common.RelayResult bestQos := sdktypes.ZeroDec() + nilReplies := 0 for _, result := range results { if result.Reply != nil && result.Reply.Data != nil { countMap[string(result.Reply.Data)]++ @@ -288,6 +289,8 @@ func (rp *RelayProcessor) responsesQuorum(results []common.RelayResult, quorumSi bestQosResult = result } } + } else { + nilReplies++ } } var mostCommonResult common.RelayResult @@ -309,7 +312,7 @@ func (rp *RelayProcessor) responsesQuorum(results []common.RelayResult, quorumSi // instead of failing get the best one return &bestQosResult, nil } - return nil, errors.New("majority count is less than quorumSize") + return nil, utils.LavaFormatInfo("majority count is less than quorumSize", utils.LogAttr("nilReplies", nilReplies), utils.LogAttr("results", len(results)), utils.LogAttr("maxCount", maxCount), utils.LogAttr("quorumSize", quorumSize)) } mostCommonResult.Quorum = maxCount return &mostCommonResult, nil From 2927739c698b9bdea596cb7238246c5458448838 Mon Sep 17 00:00:00 2001 From: omerlavanet Date: Mon, 4 Mar 2024 09:11:44 +0200 Subject: [PATCH 26/57] allow empty results for queries without an error --- protocol/rpcconsumer/relay_processor.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/protocol/rpcconsumer/relay_processor.go b/protocol/rpcconsumer/relay_processor.go index b9ed614f31..8863848018 100644 --- a/protocol/rpcconsumer/relay_processor.go +++ b/protocol/rpcconsumer/relay_processor.go @@ -306,7 +306,7 @@ func (rp *RelayProcessor) responsesQuorum(results []common.RelayResult, quorumSi } // Check if the majority count is less than quorumSize - if mostCommonResult.Reply == nil || maxCount < quorumSize { + if maxCount < quorumSize { if !deterministic { // non deterministic apis might not have a quorum // instead of failing get the best one From 75b74da4e7fcbf60713df8f296cbeb13175c00c9 Mon Sep 17 00:00:00 2001 From: omerlavanet Date: Mon, 4 Mar 2024 10:41:33 +0200 Subject: [PATCH 27/57] add missing consumer consistency --- protocol/rpcconsumer/consumer_consistency.go | 3 +++ protocol/rpcconsumer/relay_processor.go | 15 +++++++++++++-- protocol/rpcconsumer/relay_processor_test.go | 14 +++++++------- protocol/rpcconsumer/rpcconsumer_server.go | 6 +++--- testutil/e2e/e2eProviderConfigs/lavaConsumer1.yml | 1 + testutil/e2e/e2eProviderConfigs/lavaConsumer2.yml | 1 + .../e2eProviderConfigs/lavaConsumerEmergency1.yml | 3 ++- 7 files changed, 30 insertions(+), 13 deletions(-) diff --git a/protocol/rpcconsumer/consumer_consistency.go b/protocol/rpcconsumer/consumer_consistency.go index 8f0947faa1..66c1613ef0 100644 --- a/protocol/rpcconsumer/consumer_consistency.go +++ b/protocol/rpcconsumer/consumer_consistency.go @@ -45,6 +45,9 @@ func (cc *ConsumerConsistency) Key(dappId string, ip string) string { } func (cc *ConsumerConsistency) SetSeenBlock(blockSeen int64, dappId string, ip string) { + if cc == nil { + return + } block, _ := cc.getLatestBlock(cc.Key(dappId, ip)) if block < blockSeen { cc.setLatestBlock(cc.Key(dappId, ip), blockSeen) diff --git a/protocol/rpcconsumer/relay_processor.go b/protocol/rpcconsumer/relay_processor.go index 8863848018..5c52be69c8 100644 --- a/protocol/rpcconsumer/relay_processor.go +++ b/protocol/rpcconsumer/relay_processor.go @@ -27,7 +27,7 @@ const ( BestResult // get the best result, even if it means waiting ) -func NewRelayProcessor(ctx context.Context, usedProviders *lavasession.UsedProviders, requiredSuccesses int, chainMessage chainlib.ChainMessage) *RelayProcessor { +func NewRelayProcessor(ctx context.Context, usedProviders *lavasession.UsedProviders, requiredSuccesses int, chainMessage chainlib.ChainMessage, consumerConsistency *ConsumerConsistency, dappID string, consumerIp string) *RelayProcessor { guid, _ := utils.GetUniqueIdentifier(ctx) selection := Quorum // select the majority of node responses if chainlib.GetStateful(chainMessage) == common.CONSISTENCY_SELECT_ALLPROVIDERS { @@ -45,6 +45,9 @@ func NewRelayProcessor(ctx context.Context, usedProviders *lavasession.UsedProvi chainMessage: chainMessage, guid: guid, selection: selection, + consumerConsistency: consumerConsistency, + dappID: dappID, + consumerIp: consumerIp, } } @@ -59,6 +62,9 @@ type RelayProcessor struct { chainMessage chainlib.ChainMessage guid uint64 selection Selection + consumerConsistency *ConsumerConsistency + dappID string + consumerIp string } func (rp *RelayProcessor) String() string { @@ -144,7 +150,12 @@ func (rp *RelayProcessor) setValidResponse(response *relayResponse) { response.relayResult.Finalized = false // shut down data reliability // } } - + if response.err == nil && response.relayResult.Reply != nil { + // no error, update the seen block + blockSeen := response.relayResult.Reply.LatestBlock + // nil safe + rp.consumerConsistency.SetSeenBlock(blockSeen, rp.dappID, rp.consumerIp) + } foundError, errorMessage := rp.chainMessage.CheckResponseError(response.relayResult.Reply.Data, response.relayResult.StatusCode) if foundError { // this is a node error, meaning we still didn't get a good response. diff --git a/protocol/rpcconsumer/relay_processor_test.go b/protocol/rpcconsumer/relay_processor_test.go index b8be633b73..4251823707 100644 --- a/protocol/rpcconsumer/relay_processor_test.go +++ b/protocol/rpcconsumer/relay_processor_test.go @@ -85,7 +85,7 @@ func TestRelayProcessorHappyFlow(t *testing.T) { require.NoError(t, err) chainMsg, err := chainParser.ParseMsg("/cosmos/base/tendermint/v1beta1/blocks/17", nil, http.MethodGet, nil, extensionslib.ExtensionInfo{LatestBlock: 0}) require.NoError(t, err) - relayProcessor := NewRelayProcessor(ctx, lavasession.NewUsedProviders(nil), 1, chainMsg) + relayProcessor := NewRelayProcessor(ctx, lavasession.NewUsedProviders(nil), 1, chainMsg, nil, "", "") usedProviders := relayProcessor.GetUsedProviders() ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond*10) @@ -127,7 +127,7 @@ func TestRelayProcessorTimeout(t *testing.T) { require.NoError(t, err) chainMsg, err := chainParser.ParseMsg("/cosmos/base/tendermint/v1beta1/blocks/17", nil, http.MethodGet, nil, extensionslib.ExtensionInfo{LatestBlock: 0}) require.NoError(t, err) - relayProcessor := NewRelayProcessor(ctx, lavasession.NewUsedProviders(nil), 1, chainMsg) + relayProcessor := NewRelayProcessor(ctx, lavasession.NewUsedProviders(nil), 1, chainMsg, nil, "", "") usedProviders := relayProcessor.GetUsedProviders() ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond*10) @@ -179,7 +179,7 @@ func TestRelayProcessorRetry(t *testing.T) { require.NoError(t, err) chainMsg, err := chainParser.ParseMsg("/cosmos/base/tendermint/v1beta1/blocks/17", nil, http.MethodGet, nil, extensionslib.ExtensionInfo{LatestBlock: 0}) require.NoError(t, err) - relayProcessor := NewRelayProcessor(ctx, lavasession.NewUsedProviders(nil), 1, chainMsg) + relayProcessor := NewRelayProcessor(ctx, lavasession.NewUsedProviders(nil), 1, chainMsg, nil, "", "") usedProviders := relayProcessor.GetUsedProviders() ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond*10) @@ -223,7 +223,7 @@ func TestRelayProcessorRetryNodeError(t *testing.T) { require.NoError(t, err) chainMsg, err := chainParser.ParseMsg("/cosmos/base/tendermint/v1beta1/blocks/17", nil, http.MethodGet, nil, extensionslib.ExtensionInfo{LatestBlock: 0}) require.NoError(t, err) - relayProcessor := NewRelayProcessor(ctx, lavasession.NewUsedProviders(nil), 1, chainMsg) + relayProcessor := NewRelayProcessor(ctx, lavasession.NewUsedProviders(nil), 1, chainMsg, nil, "", "") usedProviders := relayProcessor.GetUsedProviders() ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond*10) @@ -268,7 +268,7 @@ func TestRelayProcessorStatefulApi(t *testing.T) { require.NoError(t, err) chainMsg, err := chainParser.ParseMsg("/cosmos/tx/v1beta1/txs", []byte("data"), http.MethodPost, nil, extensionslib.ExtensionInfo{LatestBlock: 0}) require.NoError(t, err) - relayProcessor := NewRelayProcessor(ctx, lavasession.NewUsedProviders(nil), 1, chainMsg) + relayProcessor := NewRelayProcessor(ctx, lavasession.NewUsedProviders(nil), 1, chainMsg, nil, "", "") usedProviders := relayProcessor.GetUsedProviders() ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond*10) defer cancel() @@ -313,7 +313,7 @@ func TestRelayProcessorStatefulApiErr(t *testing.T) { require.NoError(t, err) chainMsg, err := chainParser.ParseMsg("/cosmos/tx/v1beta1/txs", []byte("data"), http.MethodPost, nil, extensionslib.ExtensionInfo{LatestBlock: 0}) require.NoError(t, err) - relayProcessor := NewRelayProcessor(ctx, lavasession.NewUsedProviders(nil), 1, chainMsg) + relayProcessor := NewRelayProcessor(ctx, lavasession.NewUsedProviders(nil), 1, chainMsg, nil, "", "") usedProviders := relayProcessor.GetUsedProviders() ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond*10) defer cancel() @@ -357,7 +357,7 @@ func TestRelayProcessorLatest(t *testing.T) { require.NoError(t, err) chainMsg, err := chainParser.ParseMsg("/cosmos/base/tendermint/v1beta1/blocks/latest", nil, http.MethodGet, nil, extensionslib.ExtensionInfo{LatestBlock: 0}) require.NoError(t, err) - relayProcessor := NewRelayProcessor(ctx, lavasession.NewUsedProviders(nil), 1, chainMsg) + relayProcessor := NewRelayProcessor(ctx, lavasession.NewUsedProviders(nil), 1, chainMsg, nil, "", "") usedProviders := relayProcessor.GetUsedProviders() ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond*10) defer cancel() diff --git a/protocol/rpcconsumer/rpcconsumer_server.go b/protocol/rpcconsumer/rpcconsumer_server.go index 51af5fb57b..869558e8de 100644 --- a/protocol/rpcconsumer/rpcconsumer_server.go +++ b/protocol/rpcconsumer/rpcconsumer_server.go @@ -190,7 +190,7 @@ func (rpccs *RPCConsumerServer) craftRelay(ctx context.Context) (ok bool, relay func (rpccs *RPCConsumerServer) sendRelayWithRetries(ctx context.Context, retries int, initialRelays bool, relay *pairingtypes.RelayPrivateData, chainMessage chainlib.ChainMessage) (bool, error) { success := false var err error - relayProcessor := NewRelayProcessor(ctx, lavasession.NewUsedProviders(nil), 1, chainMessage) + relayProcessor := NewRelayProcessor(ctx, lavasession.NewUsedProviders(nil), 1, chainMessage, rpccs.consumerConsistency, "-init-", "") for i := 0; i < retries; i++ { err = rpccs.sendRelayToProvider(ctx, chainMessage, relay, "-init-", "", relayProcessor) if lavasession.PairingListEmptyError.Is(err) { @@ -327,7 +327,7 @@ func (rpccs *RPCConsumerServer) ProcessRelaySend(ctx context.Context, directiveH // make sure all of the child contexts are cancelled when we exit ctx, cancel := context.WithCancel(ctx) defer cancel() - relayProcessor := NewRelayProcessor(ctx, lavasession.NewUsedProviders(directiveHeaders), rpccs.requiredResponses, chainMessage) + relayProcessor := NewRelayProcessor(ctx, lavasession.NewUsedProviders(directiveHeaders), rpccs.requiredResponses, chainMessage, rpccs.consumerConsistency, dappID, consumerIp) err := rpccs.sendRelayToProvider(ctx, chainMessage, relayRequestData, dappID, consumerIp, relayProcessor) if err != nil && relayProcessor.usedProviders.CurrentlyUsed() == 0 { // we failed to send a batch of relays, if there are no active sends we can terminate @@ -751,7 +751,7 @@ func (rpccs *RPCConsumerServer) sendDataReliabilityRelayIfApplicable(ctx context relayResult := results[0] if len(results) < 2 { relayRequestData := lavaprotocol.NewRelayData(ctx, relayResult.Request.RelayData.ConnectionType, relayResult.Request.RelayData.ApiUrl, relayResult.Request.RelayData.Data, relayResult.Request.RelayData.SeenBlock, reqBlock, relayResult.Request.RelayData.ApiInterface, chainMessage.GetRPCMessage().GetHeaders(), relayResult.Request.RelayData.Addon, relayResult.Request.RelayData.Extensions) - relayProcessorDataReliability := NewRelayProcessor(ctx, relayProcessor.usedProviders, 1, chainMessage) + relayProcessorDataReliability := NewRelayProcessor(ctx, relayProcessor.usedProviders, 1, chainMessage, rpccs.consumerConsistency, dappID, consumerIp) err := rpccs.sendRelayToProvider(ctx, chainMessage, relayRequestData, dappID, consumerIp, relayProcessorDataReliability) if err != nil { return utils.LavaFormatWarning("failed data reliability relay to provider", err, utils.LogAttr("relayProcessorDataReliability", relayProcessorDataReliability)) diff --git a/testutil/e2e/e2eProviderConfigs/lavaConsumer1.yml b/testutil/e2e/e2eProviderConfigs/lavaConsumer1.yml index f31f2f4c85..af1f67666e 100644 --- a/testutil/e2e/e2eProviderConfigs/lavaConsumer1.yml +++ b/testutil/e2e/e2eProviderConfigs/lavaConsumer1.yml @@ -8,3 +8,4 @@ endpoints: - chain-id: LAV1 api-interface: grpc network-address: 127.0.0.1:3342 +debug-relays: true \ No newline at end of file diff --git a/testutil/e2e/e2eProviderConfigs/lavaConsumer2.yml b/testutil/e2e/e2eProviderConfigs/lavaConsumer2.yml index 403b31c0fd..3e5d3c0192 100644 --- a/testutil/e2e/e2eProviderConfigs/lavaConsumer2.yml +++ b/testutil/e2e/e2eProviderConfigs/lavaConsumer2.yml @@ -8,3 +8,4 @@ endpoints: - chain-id: LAV1 api-interface: grpc network-address: 127.0.0.1:3345 +debug-relays: true \ No newline at end of file diff --git a/testutil/e2e/e2eProviderConfigs/lavaConsumerEmergency1.yml b/testutil/e2e/e2eProviderConfigs/lavaConsumerEmergency1.yml index a8f03c79d0..01a23596ca 100644 --- a/testutil/e2e/e2eProviderConfigs/lavaConsumerEmergency1.yml +++ b/testutil/e2e/e2eProviderConfigs/lavaConsumerEmergency1.yml @@ -7,4 +7,5 @@ endpoints: network-address: 127.0.0.1:3347 - chain-id: LAV1 api-interface: grpc - network-address: 127.0.0.1:3348 \ No newline at end of file + network-address: 127.0.0.1:3348 +debug-relays: true \ No newline at end of file From 24d06f352175d5906a56cdb55ff73f1304b16163 Mon Sep 17 00:00:00 2001 From: omerlavanet Date: Mon, 4 Mar 2024 11:57:00 +0200 Subject: [PATCH 28/57] empty response quorum --- protocol/rpcconsumer/relay_processor.go | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/protocol/rpcconsumer/relay_processor.go b/protocol/rpcconsumer/relay_processor.go index 5c52be69c8..c280b36ab5 100644 --- a/protocol/rpcconsumer/relay_processor.go +++ b/protocol/rpcconsumer/relay_processor.go @@ -287,7 +287,8 @@ func (rp *RelayProcessor) responsesQuorum(results []common.RelayResult, quorumSi var bestQosResult common.RelayResult bestQos := sdktypes.ZeroDec() nilReplies := 0 - for _, result := range results { + nilReplyIdx := -1 + for idx, result := range results { if result.Reply != nil && result.Reply.Data != nil { countMap[string(result.Reply.Data)]++ if !deterministic { @@ -302,6 +303,7 @@ func (rp *RelayProcessor) responsesQuorum(results []common.RelayResult, quorumSi } } else { nilReplies++ + nilReplyIdx = idx } } var mostCommonResult common.RelayResult @@ -316,11 +318,17 @@ func (rp *RelayProcessor) responsesQuorum(results []common.RelayResult, quorumSi } } + if nilReplies >= quorumSize && maxCount < quorumSize { + // we don't have a quorum with a valid response, but we have a quorum with an empty one + maxCount = nilReplies + mostCommonResult = results[nilReplyIdx] + } // Check if the majority count is less than quorumSize if maxCount < quorumSize { if !deterministic { // non deterministic apis might not have a quorum // instead of failing get the best one + bestQosResult.Quorum = 1 return &bestQosResult, nil } return nil, utils.LavaFormatInfo("majority count is less than quorumSize", utils.LogAttr("nilReplies", nilReplies), utils.LogAttr("results", len(results)), utils.LogAttr("maxCount", maxCount), utils.LogAttr("quorumSize", quorumSize)) From e6fbeed813d578e279f35777457ad7a83989ddf4 Mon Sep 17 00:00:00 2001 From: omerlavanet Date: Mon, 4 Mar 2024 12:00:34 +0200 Subject: [PATCH 29/57] add more info on provider error --- protocol/monitoring/health.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/protocol/monitoring/health.go b/protocol/monitoring/health.go index db50955ea0..37a3e631b2 100644 --- a/protocol/monitoring/health.go +++ b/protocol/monitoring/health.go @@ -35,7 +35,7 @@ var QueryRetries = uint64(3) const ( BasicQueryRetries = 3 QuerySleepTime = 100 * time.Millisecond - NiceOutputLength = 40 + NiceOutputLength = 100 ) type LavaEntity struct { From cec200fe20370903afc787017c32ce76d7448369 Mon Sep 17 00:00:00 2001 From: omerlavanet Date: Wed, 6 Mar 2024 14:55:05 +0200 Subject: [PATCH 30/57] added basic consumer unitest --- protocol/integration/mocks.go | 59 ++++++++++++++ protocol/integration/protocol_test.go | 111 ++++++++++++++++++++++++++ 2 files changed, 170 insertions(+) create mode 100644 protocol/integration/mocks.go create mode 100644 protocol/integration/protocol_test.go diff --git a/protocol/integration/mocks.go b/protocol/integration/mocks.go new file mode 100644 index 0000000000..5a370420e9 --- /dev/null +++ b/protocol/integration/mocks.go @@ -0,0 +1,59 @@ +package integration_test + +import ( + "context" + "fmt" + + "github.com/lavanet/lava/protocol/common" + "github.com/lavanet/lava/protocol/lavaprotocol" + "github.com/lavanet/lava/protocol/lavasession" + "github.com/lavanet/lava/protocol/statetracker/updaters" + conflicttypes "github.com/lavanet/lava/x/conflict/types" + plantypes "github.com/lavanet/lava/x/plans/types" + protocoltypes "github.com/lavanet/lava/x/protocol/types" +) + +type mockConsumerStateTracker struct { +} + +func (m *mockConsumerStateTracker) RegisterForVersionUpdates(ctx context.Context, version *protocoltypes.Version, versionValidator updaters.VersionValidationInf) { + +} + +func (m *mockConsumerStateTracker) RegisterConsumerSessionManagerForPairingUpdates(ctx context.Context, consumerSessionManager *lavasession.ConsumerSessionManager) { + +} +func (m *mockConsumerStateTracker) RegisterForSpecUpdates(ctx context.Context, specUpdatable updaters.SpecUpdatable, endpoint lavasession.RPCEndpoint) error { + return nil +} + +func (m *mockConsumerStateTracker) RegisterFinalizationConsensusForUpdates(context.Context, *lavaprotocol.FinalizationConsensus) { + +} + +func (m *mockConsumerStateTracker) RegisterForDowntimeParamsUpdates(ctx context.Context, downtimeParamsUpdatable updaters.DowntimeParamsUpdatable) error { + return nil +} + +func (m *mockConsumerStateTracker) TxConflictDetection(ctx context.Context, finalizationConflict *conflicttypes.FinalizationConflict, responseConflict *conflicttypes.ResponseConflict, sameProviderConflict *conflicttypes.FinalizationConflict, conflictHandler common.ConflictHandlerInterface) error { + return nil +} + +func (m *mockConsumerStateTracker) GetConsumerPolicy(ctx context.Context, consumerAddress, chainID string) (*plantypes.Policy, error) { + return &plantypes.Policy{ + ChainPolicies: []plantypes.ChainPolicy{}, + GeolocationProfile: 1, + TotalCuLimit: 10000, + EpochCuLimit: 1000, + MaxProvidersToPair: 5, + SelectedProvidersMode: 0, + SelectedProviders: []string{}, + }, nil +} + +func (m *mockConsumerStateTracker) GetProtocolVersion(ctx context.Context) (*updaters.ProtocolVersionResponse, error) { + return nil, fmt.Errorf("banana") +} +func (m *mockConsumerStateTracker) GetLatestVirtualEpoch() uint64 { + return 0 +} diff --git a/protocol/integration/protocol_test.go b/protocol/integration/protocol_test.go new file mode 100644 index 0000000000..4d36e0a75b --- /dev/null +++ b/protocol/integration/protocol_test.go @@ -0,0 +1,111 @@ +package integration_test + +import ( + "context" + "net/http" + "os" + "testing" + "time" + + "github.com/lavanet/lava/protocol/chainlib" + "github.com/lavanet/lava/protocol/common" + "github.com/lavanet/lava/protocol/lavaprotocol" + "github.com/lavanet/lava/protocol/lavasession" + "github.com/lavanet/lava/protocol/metrics" + "github.com/lavanet/lava/protocol/provideroptimizer" + "github.com/lavanet/lava/protocol/rpcconsumer" + "github.com/lavanet/lava/utils" + "github.com/lavanet/lava/utils/rand" + "github.com/lavanet/lava/utils/sigs" + "github.com/stretchr/testify/require" + + // pairingtypes "github.com/lavanet/lava/x/pairing/types" + spectypes "github.com/lavanet/lava/x/spec/types" +) + +var seed int64 + +func TestMain(m *testing.M) { + // This code will run once before any test cases are executed. + seed = time.Now().Unix() + + rand.SetSpecificSeed(seed) + // Run the actual tests + exitCode := m.Run() + if exitCode != 0 { + utils.LavaFormatDebug("failed tests seed", utils.Attribute{Key: "seed", Value: seed}) + } + os.Exit(exitCode) +} + +func TestConsumerProviderBasic(t *testing.T) { + ctx := context.Background() + serverHandler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // Handle the incoming request and provide the desired response + w.WriteHeader(http.StatusOK) + }) + // can be any spec and api interface + specId := "LAV1" + epoch := uint64(100) + requiredResponses := 1 + lavaChainID := "lava" + + apiInterface := spectypes.APIInterfaceTendermintRPC + chainParser, _, chainFetcher, closeServer, err := chainlib.CreateChainLibMocks(ctx, specId, apiInterface, serverHandler, "../../", nil) + if closeServer != nil { + defer closeServer() + } + require.NoError(t, err) + require.NotNil(t, chainParser) + require.NotNil(t, chainFetcher) + // create basic consumer stuff + providerListenAddress := "localhost:0" + specIdLava := "LAV1" + chainParserLava, _, chainFetcherLava, closeServer, err := chainlib.CreateChainLibMocks(ctx, specIdLava, spectypes.APIInterfaceRest, serverHandler, "../../", nil) + if closeServer != nil { + defer closeServer() + } + require.NoError(t, err) + require.NotNil(t, chainParserLava) + require.NotNil(t, chainFetcherLava) + rpcConsumerServer := &rpcconsumer.RPCConsumerServer{} + rpcEndpoint := &lavasession.RPCEndpoint{ + NetworkAddress: "", + ChainID: specId, + ApiInterface: apiInterface, + TLSEnabled: false, + HealthCheckPath: "", + Geolocation: 1, + } + consumerStateTracker := &mockConsumerStateTracker{} + finalizationConsensus := lavaprotocol.NewFinalizationConsensus(rpcEndpoint.ChainID) + _, averageBlockTime, _, _ := chainParser.ChainBlockStats() + baseLatency := common.AverageWorldLatency / 2 // we want performance to be half our timeout or better + optimizer := provideroptimizer.NewProviderOptimizer(provideroptimizer.STRATEGY_BALANCED, averageBlockTime, baseLatency, 2) + consumerSessionManager := lavasession.NewConsumerSessionManager(rpcEndpoint, optimizer, nil, nil) + pairingList := map[uint64]*lavasession.ConsumerSessionsWithProvider{ + 1: { + PublicLavaAddress: "", + Endpoints: []*lavasession.Endpoint{ + { + NetworkAddress: providerListenAddress, + Enabled: true, + Geolocation: 0, + }, + }, + Sessions: map[int64]*lavasession.SingleConsumerSession{}, + MaxComputeUnits: 10000, + UsedComputeUnits: 0, + PairingEpoch: epoch, + }, + } + consumerSessionManager.UpdateAllProviders(epoch, pairingList) + randomizer := sigs.NewZeroReader(seed) + account := sigs.GenerateDeterministicFloatingKey(randomizer) + consumerConsistency := rpcconsumer.NewConsumerConsistency(specId) + consumerCmdFlags := common.ConsumerCmdFlags{} + rpcsonumerLogs, err := metrics.NewRPCConsumerLogs(nil, nil) + require.NoError(t, err) + err = rpcConsumerServer.ServeRPCRequests(ctx, rpcEndpoint, consumerStateTracker, chainParser, finalizationConsensus, consumerSessionManager, requiredResponses, account.SK, lavaChainID, nil, rpcsonumerLogs, account.Addr, consumerConsistency, nil, consumerCmdFlags, false, nil, nil) + require.NoError(t, err) +} From bf735febcf7a9731fe59bc1d94a90d57ad4fcab5 Mon Sep 17 00:00:00 2001 From: omerlavanet Date: Wed, 6 Mar 2024 15:23:46 +0200 Subject: [PATCH 31/57] added a check for consumer to be up during unitest --- protocol/integration/protocol_test.go | 87 ++++++++++++++++++--------- 1 file changed, 60 insertions(+), 27 deletions(-) diff --git a/protocol/integration/protocol_test.go b/protocol/integration/protocol_test.go index 4d36e0a75b..ebe9869b31 100644 --- a/protocol/integration/protocol_test.go +++ b/protocol/integration/protocol_test.go @@ -38,6 +38,33 @@ func TestMain(m *testing.M) { os.Exit(exitCode) } +func isServerUp(url string) bool { + client := http.Client{ + Timeout: 20 * time.Millisecond, + } + + resp, err := client.Get(url) + if err != nil { + return false + } + + defer resp.Body.Close() + + return resp.ContentLength > 0 +} + +func checkServerStatusWithTimeout(url string, totalTimeout time.Duration) bool { + startTime := time.Now() + + for time.Since(startTime) < totalTimeout { + if isServerUp(url) { + return true + } + time.Sleep(20 * time.Millisecond) + } + + return false +} func TestConsumerProviderBasic(t *testing.T) { ctx := context.Background() serverHandler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { @@ -46,31 +73,42 @@ func TestConsumerProviderBasic(t *testing.T) { }) // can be any spec and api interface specId := "LAV1" + apiInterface := spectypes.APIInterfaceTendermintRPC epoch := uint64(100) requiredResponses := 1 lavaChainID := "lava" - - apiInterface := spectypes.APIInterfaceTendermintRPC - chainParser, _, chainFetcher, closeServer, err := chainlib.CreateChainLibMocks(ctx, specId, apiInterface, serverHandler, "../../", nil) - if closeServer != nil { - defer closeServer() + providerListenAddress := "localhost:0" + consumerListenAddress := "localhost:11111" + pairingList := map[uint64]*lavasession.ConsumerSessionsWithProvider{ + 1: { + PublicLavaAddress: "", + Endpoints: []*lavasession.Endpoint{ + { + NetworkAddress: providerListenAddress, + Enabled: true, + Geolocation: 0, + }, + }, + Sessions: map[int64]*lavasession.SingleConsumerSession{}, + MaxComputeUnits: 10000, + UsedComputeUnits: 0, + PairingEpoch: epoch, + }, } + + chainParser, _, chainFetcher, _, err := chainlib.CreateChainLibMocks(ctx, specId, apiInterface, serverHandler, "../../", nil) require.NoError(t, err) require.NotNil(t, chainParser) require.NotNil(t, chainFetcher) // create basic consumer stuff - providerListenAddress := "localhost:0" specIdLava := "LAV1" - chainParserLava, _, chainFetcherLava, closeServer, err := chainlib.CreateChainLibMocks(ctx, specIdLava, spectypes.APIInterfaceRest, serverHandler, "../../", nil) - if closeServer != nil { - defer closeServer() - } + chainParserLava, _, chainFetcherLava, _, err := chainlib.CreateChainLibMocks(ctx, specIdLava, spectypes.APIInterfaceRest, serverHandler, "../../", nil) require.NoError(t, err) require.NotNil(t, chainParserLava) require.NotNil(t, chainFetcherLava) rpcConsumerServer := &rpcconsumer.RPCConsumerServer{} rpcEndpoint := &lavasession.RPCEndpoint{ - NetworkAddress: "", + NetworkAddress: consumerListenAddress, ChainID: specId, ApiInterface: apiInterface, TLSEnabled: false, @@ -83,22 +121,7 @@ func TestConsumerProviderBasic(t *testing.T) { baseLatency := common.AverageWorldLatency / 2 // we want performance to be half our timeout or better optimizer := provideroptimizer.NewProviderOptimizer(provideroptimizer.STRATEGY_BALANCED, averageBlockTime, baseLatency, 2) consumerSessionManager := lavasession.NewConsumerSessionManager(rpcEndpoint, optimizer, nil, nil) - pairingList := map[uint64]*lavasession.ConsumerSessionsWithProvider{ - 1: { - PublicLavaAddress: "", - Endpoints: []*lavasession.Endpoint{ - { - NetworkAddress: providerListenAddress, - Enabled: true, - Geolocation: 0, - }, - }, - Sessions: map[int64]*lavasession.SingleConsumerSession{}, - MaxComputeUnits: 10000, - UsedComputeUnits: 0, - PairingEpoch: epoch, - }, - } + consumerSessionManager.UpdateAllProviders(epoch, pairingList) randomizer := sigs.NewZeroReader(seed) account := sigs.GenerateDeterministicFloatingKey(randomizer) @@ -108,4 +131,14 @@ func TestConsumerProviderBasic(t *testing.T) { require.NoError(t, err) err = rpcConsumerServer.ServeRPCRequests(ctx, rpcEndpoint, consumerStateTracker, chainParser, finalizationConsensus, consumerSessionManager, requiredResponses, account.SK, lavaChainID, nil, rpcsonumerLogs, account.Addr, consumerConsistency, nil, consumerCmdFlags, false, nil, nil) require.NoError(t, err) + + // wait for consumer server to be up + consumerUp := checkServerStatusWithTimeout("http://"+consumerListenAddress, time.Millisecond*50) + require.True(t, consumerUp) + + client := http.Client{} + resp, err := client.Get("http://" + consumerListenAddress + "/status") + require.NoError(t, err) + require.Equal(t, http.StatusInternalServerError, resp.StatusCode) + resp.Body.Close() } From 4d41b089f10d74ce30be540f6a5489ed6c726b76 Mon Sep 17 00:00:00 2001 From: omerlavanet Date: Wed, 6 Mar 2024 15:27:06 +0200 Subject: [PATCH 32/57] refactor code for reuse in unitest --- protocol/integration/protocol_test.go | 66 +++++++++++++++------------ 1 file changed, 36 insertions(+), 30 deletions(-) diff --git a/protocol/integration/protocol_test.go b/protocol/integration/protocol_test.go index ebe9869b31..435ae35834 100644 --- a/protocol/integration/protocol_test.go +++ b/protocol/integration/protocol_test.go @@ -65,42 +65,17 @@ func checkServerStatusWithTimeout(url string, totalTimeout time.Duration) bool { return false } -func TestConsumerProviderBasic(t *testing.T) { - ctx := context.Background() + +func createRpcConsumer(t *testing.T, ctx context.Context, specId string, apiInterface string, consumerListenAddress string, epoch uint64, pairingList map[uint64]*lavasession.ConsumerSessionsWithProvider, requiredResponses int, lavaChainID string) *rpcconsumer.RPCConsumerServer { serverHandler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { // Handle the incoming request and provide the desired response w.WriteHeader(http.StatusOK) }) - // can be any spec and api interface - specId := "LAV1" - apiInterface := spectypes.APIInterfaceTendermintRPC - epoch := uint64(100) - requiredResponses := 1 - lavaChainID := "lava" - providerListenAddress := "localhost:0" - consumerListenAddress := "localhost:11111" - pairingList := map[uint64]*lavasession.ConsumerSessionsWithProvider{ - 1: { - PublicLavaAddress: "", - Endpoints: []*lavasession.Endpoint{ - { - NetworkAddress: providerListenAddress, - Enabled: true, - Geolocation: 0, - }, - }, - Sessions: map[int64]*lavasession.SingleConsumerSession{}, - MaxComputeUnits: 10000, - UsedComputeUnits: 0, - PairingEpoch: epoch, - }, - } - chainParser, _, chainFetcher, _, err := chainlib.CreateChainLibMocks(ctx, specId, apiInterface, serverHandler, "../../", nil) require.NoError(t, err) require.NotNil(t, chainParser) require.NotNil(t, chainFetcher) - // create basic consumer stuff + specIdLava := "LAV1" chainParserLava, _, chainFetcherLava, _, err := chainlib.CreateChainLibMocks(ctx, specIdLava, spectypes.APIInterfaceRest, serverHandler, "../../", nil) require.NoError(t, err) @@ -118,7 +93,7 @@ func TestConsumerProviderBasic(t *testing.T) { consumerStateTracker := &mockConsumerStateTracker{} finalizationConsensus := lavaprotocol.NewFinalizationConsensus(rpcEndpoint.ChainID) _, averageBlockTime, _, _ := chainParser.ChainBlockStats() - baseLatency := common.AverageWorldLatency / 2 // we want performance to be half our timeout or better + baseLatency := common.AverageWorldLatency / 2 optimizer := provideroptimizer.NewProviderOptimizer(provideroptimizer.STRATEGY_BALANCED, averageBlockTime, baseLatency, 2) consumerSessionManager := lavasession.NewConsumerSessionManager(rpcEndpoint, optimizer, nil, nil) @@ -131,11 +106,42 @@ func TestConsumerProviderBasic(t *testing.T) { require.NoError(t, err) err = rpcConsumerServer.ServeRPCRequests(ctx, rpcEndpoint, consumerStateTracker, chainParser, finalizationConsensus, consumerSessionManager, requiredResponses, account.SK, lavaChainID, nil, rpcsonumerLogs, account.Addr, consumerConsistency, nil, consumerCmdFlags, false, nil, nil) require.NoError(t, err) - // wait for consumer server to be up consumerUp := checkServerStatusWithTimeout("http://"+consumerListenAddress, time.Millisecond*50) require.True(t, consumerUp) + return rpcConsumerServer +} + +func TestConsumerProviderBasic(t *testing.T) { + ctx := context.Background() + // can be any spec and api interface + specId := "LAV1" + apiInterface := spectypes.APIInterfaceTendermintRPC + epoch := uint64(100) + requiredResponses := 1 + lavaChainID := "lava" + providerListenAddress := "localhost:0" + consumerListenAddress := "localhost:11111" + pairingList := map[uint64]*lavasession.ConsumerSessionsWithProvider{ + 1: { + PublicLavaAddress: "", + Endpoints: []*lavasession.Endpoint{ + { + NetworkAddress: providerListenAddress, + Enabled: true, + Geolocation: 0, + }, + }, + Sessions: map[int64]*lavasession.SingleConsumerSession{}, + MaxComputeUnits: 10000, + UsedComputeUnits: 0, + PairingEpoch: epoch, + }, + } + + rpcconsumerServer := createRpcConsumer(t, ctx, specId, apiInterface, consumerListenAddress, epoch, pairingList, requiredResponses, lavaChainID) + require.NotNil(t, rpcconsumerServer) client := http.Client{} resp, err := client.Get("http://" + consumerListenAddress + "/status") require.NoError(t, err) From a50600a448767e63895583a39163c5626b47c8d0 Mon Sep 17 00:00:00 2001 From: omerlavanet Date: Thu, 7 Mar 2024 11:30:12 +0200 Subject: [PATCH 33/57] added provider and consumer unitests --- protocol/integration/mocks.go | 195 ++++++++++++++++++++- protocol/integration/protocol_test.go | 148 +++++++++++++--- protocol/rpcconsumer/rpcconsumer_server.go | 6 +- protocol/rpcprovider/rpcprovider.go | 4 +- 4 files changed, 322 insertions(+), 31 deletions(-) diff --git a/protocol/integration/mocks.go b/protocol/integration/mocks.go index 5a370420e9..870e8180b0 100644 --- a/protocol/integration/mocks.go +++ b/protocol/integration/mocks.go @@ -3,32 +3,36 @@ package integration_test import ( "context" "fmt" + "strconv" + "sync" + "time" + "github.com/lavanet/lava/protocol/chaintracker" "github.com/lavanet/lava/protocol/common" "github.com/lavanet/lava/protocol/lavaprotocol" "github.com/lavanet/lava/protocol/lavasession" + "github.com/lavanet/lava/protocol/rpcprovider/reliabilitymanager" "github.com/lavanet/lava/protocol/statetracker/updaters" + "github.com/lavanet/lava/utils" conflicttypes "github.com/lavanet/lava/x/conflict/types" + pairingtypes "github.com/lavanet/lava/x/pairing/types" plantypes "github.com/lavanet/lava/x/plans/types" protocoltypes "github.com/lavanet/lava/x/protocol/types" ) -type mockConsumerStateTracker struct { -} +type mockConsumerStateTracker struct{} func (m *mockConsumerStateTracker) RegisterForVersionUpdates(ctx context.Context, version *protocoltypes.Version, versionValidator updaters.VersionValidationInf) { - } func (m *mockConsumerStateTracker) RegisterConsumerSessionManagerForPairingUpdates(ctx context.Context, consumerSessionManager *lavasession.ConsumerSessionManager) { - } + func (m *mockConsumerStateTracker) RegisterForSpecUpdates(ctx context.Context, specUpdatable updaters.SpecUpdatable, endpoint lavasession.RPCEndpoint) error { return nil } func (m *mockConsumerStateTracker) RegisterFinalizationConsensusForUpdates(context.Context, *lavaprotocol.FinalizationConsensus) { - } func (m *mockConsumerStateTracker) RegisterForDowntimeParamsUpdates(ctx context.Context, downtimeParamsUpdatable updaters.DowntimeParamsUpdatable) error { @@ -54,6 +58,187 @@ func (m *mockConsumerStateTracker) GetConsumerPolicy(ctx context.Context, consum func (m *mockConsumerStateTracker) GetProtocolVersion(ctx context.Context) (*updaters.ProtocolVersionResponse, error) { return nil, fmt.Errorf("banana") } + func (m *mockConsumerStateTracker) GetLatestVirtualEpoch() uint64 { return 0 } + +type ReplySetter struct { + status int + replyDataBuf []byte +} + +type mockProviderStateTracker struct { + consumerAddressForPairing string + averageBlockTime time.Duration +} + +func (m *mockProviderStateTracker) RegisterForVersionUpdates(ctx context.Context, version *protocoltypes.Version, versionValidator updaters.VersionValidationInf) { +} + +func (m *mockProviderStateTracker) RegisterForSpecUpdates(ctx context.Context, specUpdatable updaters.SpecUpdatable, endpoint lavasession.RPCEndpoint) error { + return nil +} + +func (m *mockProviderStateTracker) RegisterForSpecVerifications(ctx context.Context, specVerifier updaters.SpecVerifier, chainId string) error { + return nil +} + +func (m *mockProviderStateTracker) RegisterReliabilityManagerForVoteUpdates(ctx context.Context, voteUpdatable updaters.VoteUpdatable, endpointP *lavasession.RPCProviderEndpoint) { +} + +func (m *mockProviderStateTracker) RegisterForEpochUpdates(ctx context.Context, epochUpdatable updaters.EpochUpdatable) { +} + +func (m *mockProviderStateTracker) RegisterForDowntimeParamsUpdates(ctx context.Context, downtimeParamsUpdatable updaters.DowntimeParamsUpdatable) error { + return nil +} + +func (m *mockProviderStateTracker) TxRelayPayment(ctx context.Context, relayRequests []*pairingtypes.RelaySession, description string, latestBlocks []*pairingtypes.LatestBlockReport) error { + return nil +} + +func (m *mockProviderStateTracker) SendVoteReveal(voteID string, vote *reliabilitymanager.VoteData) error { + return nil +} + +func (m *mockProviderStateTracker) SendVoteCommitment(voteID string, vote *reliabilitymanager.VoteData) error { + return nil +} + +func (m *mockProviderStateTracker) LatestBlock() int64 { + return 1000 +} + +func (m *mockProviderStateTracker) GetMaxCuForUser(ctx context.Context, consumerAddress, chainID string, epocu uint64) (maxCu uint64, err error) { + return 10000, nil +} + +func (m *mockProviderStateTracker) VerifyPairing(ctx context.Context, consumerAddress, providerAddress string, epoch uint64, chainID string) (valid bool, total int64, projectId string, err error) { + return true, 10000, m.consumerAddressForPairing, nil +} + +func (m *mockProviderStateTracker) GetEpochSize(ctx context.Context) (uint64, error) { + return 30, nil +} + +func (m *mockProviderStateTracker) EarliestBlockInMemory(ctx context.Context) (uint64, error) { + return 100, nil +} + +func (m *mockProviderStateTracker) RegisterPaymentUpdatableForPayments(ctx context.Context, paymentUpdatable updaters.PaymentUpdatable) { +} + +func (m *mockProviderStateTracker) GetRecommendedEpochNumToCollectPayment(ctx context.Context) (uint64, error) { + return 1000, nil +} + +func (m *mockProviderStateTracker) GetEpochSizeMultipliedByRecommendedEpochNumToCollectPayment(ctx context.Context) (uint64, error) { + return 30000, nil +} + +func (m *mockProviderStateTracker) GetProtocolVersion(ctx context.Context) (*updaters.ProtocolVersionResponse, error) { + return &updaters.ProtocolVersionResponse{ + Version: &protocoltypes.Version{}, + BlockNumber: "", + }, nil +} + +func (m *mockProviderStateTracker) GetVirtualEpoch(epoch uint64) uint64 { + return 0 +} + +func (m *mockProviderStateTracker) GetAverageBlockTime() time.Duration { + return m.averageBlockTime +} + +type MockChainFetcher struct { + latestBlock int64 + blockHashes []*chaintracker.BlockStore + mutex sync.Mutex + fork string + callBack func() +} + +func (mcf *MockChainFetcher) FetchEndpoint() lavasession.RPCProviderEndpoint { + return lavasession.RPCProviderEndpoint{} +} + +func (mcf *MockChainFetcher) FetchLatestBlockNum(ctx context.Context) (int64, error) { + mcf.mutex.Lock() + defer mcf.mutex.Unlock() + if mcf.callBack != nil { + mcf.callBack() + } + return mcf.latestBlock, nil +} + +func (mcf *MockChainFetcher) FetchBlockHashByNum(ctx context.Context, blockNum int64) (string, error) { + mcf.mutex.Lock() + defer mcf.mutex.Unlock() + for _, blockStore := range mcf.blockHashes { + if blockStore.Block == blockNum { + return blockStore.Hash, nil + } + } + return "", fmt.Errorf("invalid block num requested %d, latestBlockSaved: %d, MockChainFetcher blockHashes: %+v", blockNum, mcf.latestBlock, mcf.blockHashes) +} + +func (mcf *MockChainFetcher) FetchChainID(ctx context.Context) (string, string, error) { + return "", "", utils.LavaFormatError("FetchChainID not supported for lava chain fetcher", nil) +} + +func (mcf *MockChainFetcher) hashKey(latestBlock int64) string { + return "stubHash-" + strconv.FormatInt(latestBlock, 10) + mcf.fork +} + +func (mcf *MockChainFetcher) IsCorrectHash(hash string, hashBlock int64) bool { + return hash == mcf.hashKey(hashBlock) +} + +func (mcf *MockChainFetcher) AdvanceBlock() int64 { + mcf.mutex.Lock() + defer mcf.mutex.Unlock() + mcf.latestBlock += 1 + newHash := mcf.hashKey(mcf.latestBlock) + mcf.blockHashes = append(mcf.blockHashes[1:], &chaintracker.BlockStore{Block: mcf.latestBlock, Hash: newHash}) + return mcf.latestBlock +} + +func (mcf *MockChainFetcher) SetBlock(latestBlock int64) { + mcf.latestBlock = latestBlock + newHash := mcf.hashKey(mcf.latestBlock) + mcf.blockHashes = append(mcf.blockHashes, &chaintracker.BlockStore{Block: latestBlock, Hash: newHash}) +} + +func (mcf *MockChainFetcher) Fork(fork string) { + mcf.mutex.Lock() + defer mcf.mutex.Unlock() + if mcf.fork == fork { + // nothing to do + return + } + mcf.fork = fork + for _, blockStore := range mcf.blockHashes { + blockStore.Hash = mcf.hashKey(blockStore.Block) + } +} + +func (mcf *MockChainFetcher) Shrink(newSize int) { + mcf.mutex.Lock() + defer mcf.mutex.Unlock() + currentSize := len(mcf.blockHashes) + if currentSize <= newSize { + return + } + newHashes := make([]*chaintracker.BlockStore, newSize) + copy(newHashes, mcf.blockHashes[currentSize-newSize:]) +} + +func NewMockChainFetcher(startBlock, blocksToSave int64, callback func()) *MockChainFetcher { + mockCHainFetcher := MockChainFetcher{callBack: callback} + for i := int64(0); i < blocksToSave; i++ { + mockCHainFetcher.SetBlock(startBlock + i) + } + return &mockCHainFetcher +} diff --git a/protocol/integration/protocol_test.go b/protocol/integration/protocol_test.go index 435ae35834..33d33f36d5 100644 --- a/protocol/integration/protocol_test.go +++ b/protocol/integration/protocol_test.go @@ -2,34 +2,42 @@ package integration_test import ( "context" + "fmt" "net/http" "os" + "strconv" "testing" "time" "github.com/lavanet/lava/protocol/chainlib" + "github.com/lavanet/lava/protocol/chaintracker" "github.com/lavanet/lava/protocol/common" "github.com/lavanet/lava/protocol/lavaprotocol" "github.com/lavanet/lava/protocol/lavasession" "github.com/lavanet/lava/protocol/metrics" "github.com/lavanet/lava/protocol/provideroptimizer" "github.com/lavanet/lava/protocol/rpcconsumer" + "github.com/lavanet/lava/protocol/rpcprovider" + "github.com/lavanet/lava/protocol/rpcprovider/reliabilitymanager" + "github.com/lavanet/lava/protocol/rpcprovider/rewardserver" "github.com/lavanet/lava/utils" "github.com/lavanet/lava/utils/rand" "github.com/lavanet/lava/utils/sigs" "github.com/stretchr/testify/require" - // pairingtypes "github.com/lavanet/lava/x/pairing/types" spectypes "github.com/lavanet/lava/x/spec/types" ) -var seed int64 +var ( + seed int64 + randomizer *sigs.ZeroReader +) func TestMain(m *testing.M) { // This code will run once before any test cases are executed. seed = time.Now().Unix() - rand.SetSpecificSeed(seed) + randomizer = sigs.NewZeroReader(seed) // Run the actual tests exitCode := m.Run() if exitCode != 0 { @@ -66,7 +74,19 @@ func checkServerStatusWithTimeout(url string, totalTimeout time.Duration) bool { return false } -func createRpcConsumer(t *testing.T, ctx context.Context, specId string, apiInterface string, consumerListenAddress string, epoch uint64, pairingList map[uint64]*lavasession.ConsumerSessionsWithProvider, requiredResponses int, lavaChainID string) *rpcconsumer.RPCConsumerServer { +func createInMemoryRewardDb(specs []string) (*rewardserver.RewardDB, error) { + rewardDB := rewardserver.NewRewardDB() + for _, spec := range specs { + db := rewardserver.NewMemoryDB(spec) + err := rewardDB.AddDB(db) + if err != nil { + return nil, err + } + } + return rewardDB, nil +} + +func createRpcConsumer(t *testing.T, ctx context.Context, specId string, apiInterface string, account sigs.Account, consumerListenAddress string, epoch uint64, pairingList map[uint64]*lavasession.ConsumerSessionsWithProvider, requiredResponses int, lavaChainID string) *rpcconsumer.RPCConsumerServer { serverHandler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { // Handle the incoming request and provide the desired response w.WriteHeader(http.StatusOK) @@ -76,11 +96,6 @@ func createRpcConsumer(t *testing.T, ctx context.Context, specId string, apiInte require.NotNil(t, chainParser) require.NotNil(t, chainFetcher) - specIdLava := "LAV1" - chainParserLava, _, chainFetcherLava, _, err := chainlib.CreateChainLibMocks(ctx, specIdLava, spectypes.APIInterfaceRest, serverHandler, "../../", nil) - require.NoError(t, err) - require.NotNil(t, chainParserLava) - require.NotNil(t, chainFetcherLava) rpcConsumerServer := &rpcconsumer.RPCConsumerServer{} rpcEndpoint := &lavasession.RPCEndpoint{ NetworkAddress: consumerListenAddress, @@ -96,10 +111,8 @@ func createRpcConsumer(t *testing.T, ctx context.Context, specId string, apiInte baseLatency := common.AverageWorldLatency / 2 optimizer := provideroptimizer.NewProviderOptimizer(provideroptimizer.STRATEGY_BALANCED, averageBlockTime, baseLatency, 2) consumerSessionManager := lavasession.NewConsumerSessionManager(rpcEndpoint, optimizer, nil, nil) - consumerSessionManager.UpdateAllProviders(epoch, pairingList) - randomizer := sigs.NewZeroReader(seed) - account := sigs.GenerateDeterministicFloatingKey(randomizer) + consumerConsistency := rpcconsumer.NewConsumerConsistency(specId) consumerCmdFlags := common.ConsumerCmdFlags{} rpcsonumerLogs, err := metrics.NewRPCConsumerLogs(nil, nil) @@ -113,6 +126,79 @@ func createRpcConsumer(t *testing.T, ctx context.Context, specId string, apiInte return rpcConsumerServer } +func createRpcProvider(t *testing.T, ctx context.Context, consumerAddress string, specId string, apiInterface string, providerListenAddress string, account sigs.Account, epoch uint64, lavaChainID string, addons []string) (*rpcprovider.RPCProviderServer, *ReplySetter) { + replySetter := ReplySetter{ + status: http.StatusOK, + replyDataBuf: []byte(`{"reply": "REPLY-STUB"}`), + } + serverHandler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // Handle the incoming request and provide the desired response + w.WriteHeader(replySetter.status) + fmt.Fprint(w, string(replySetter.replyDataBuf)) + }) + chainParser, chainRouter, chainFetcher, _, err := chainlib.CreateChainLibMocks(ctx, specId, apiInterface, serverHandler, "../../", addons) + require.NoError(t, err) + require.NotNil(t, chainParser) + require.NotNil(t, chainFetcher) + require.NotNil(t, chainRouter) + + rpcProviderServer := &rpcprovider.RPCProviderServer{} + rpcProviderEndpoint := &lavasession.RPCProviderEndpoint{ + NetworkAddress: lavasession.NetworkAddressData{ + Address: providerListenAddress, + KeyPem: "", + CertPem: "", + DisableTLS: true, + }, + ChainID: lavaChainID, + ApiInterface: apiInterface, + Geolocation: 1, + NodeUrls: []common.NodeUrl{ + { + Url: "", + InternalPath: "", + AuthConfig: common.AuthConfig{}, + IpForwarding: false, + Timeout: 0, + Addons: addons, + SkipVerifications: []string{}, + }, + }, + } + rewardDB, err := createInMemoryRewardDb([]string{specId}) + require.NoError(t, err) + _, averageBlockTime, blocksToFinalization, blocksInFinalizationData := chainParser.ChainBlockStats() + mockProviderStateTracker := mockProviderStateTracker{consumerAddressForPairing: consumerAddress, averageBlockTime: averageBlockTime} + rws := rewardserver.NewRewardServer(&mockProviderStateTracker, nil, rewardDB, "badger_test", 1, 10, nil) + + blockMemorySize, err := mockProviderStateTracker.GetEpochSizeMultipliedByRecommendedEpochNumToCollectPayment(ctx) + require.NoError(t, err) + providerSessionManager := lavasession.NewProviderSessionManager(rpcProviderEndpoint, blockMemorySize) + providerPolicy := rpcprovider.GetAllAddonsAndExtensionsFromNodeUrlSlice(rpcProviderEndpoint.NodeUrls) + chainParser.SetPolicy(providerPolicy, specId, apiInterface) + + blocksToSaveChainTracker := uint64(blocksToFinalization + blocksInFinalizationData) + chainTrackerConfig := chaintracker.ChainTrackerConfig{ + BlocksToSave: blocksToSaveChainTracker, + AverageBlockTime: averageBlockTime, + ServerBlockMemory: rpcprovider.ChainTrackerDefaultMemory + blocksToSaveChainTracker, + NewLatestCallback: nil, + ConsistencyCallback: nil, + Pmetrics: nil, + } + mockChainFetcher := NewMockChainFetcher(1000, 10, nil) + chainTracker, err := chaintracker.NewChainTracker(ctx, mockChainFetcher, chainTrackerConfig) + require.NoError(t, err) + reliabilityManager := reliabilitymanager.NewReliabilityManager(chainTracker, &mockProviderStateTracker, account.Addr.String(), chainRouter, chainParser) + rpcProviderServer.ServeRPCRequests(ctx, rpcProviderEndpoint, chainParser, rws, providerSessionManager, reliabilityManager, account.SK, nil, chainRouter, &mockProviderStateTracker, account.Addr, lavaChainID, rpcprovider.DEFAULT_ALLOWED_MISSING_CU, nil, nil) + listener := rpcprovider.NewProviderListener(ctx, rpcProviderEndpoint.NetworkAddress, "/health") + err = listener.RegisterReceiver(rpcProviderServer, rpcProviderEndpoint) + require.NoError(t, err) + chainParser.Activate() + chainTracker.RegisterForBlockTimeUpdates(chainParser) + return rpcProviderServer, &replySetter +} + func TestConsumerProviderBasic(t *testing.T) { ctx := context.Background() // can be any spec and api interface @@ -121,26 +207,46 @@ func TestConsumerProviderBasic(t *testing.T) { epoch := uint64(100) requiredResponses := 1 lavaChainID := "lava" - providerListenAddress := "localhost:0" - consumerListenAddress := "localhost:11111" - pairingList := map[uint64]*lavasession.ConsumerSessionsWithProvider{ - 1: { - PublicLavaAddress: "", + + numProviders := 1 + + consumerListenAddress := "localhost:21111" + pairingList := map[uint64]*lavasession.ConsumerSessionsWithProvider{} + type providerData struct { + account sigs.Account + listenAddress string + server *rpcprovider.RPCProviderServer + replySetter *ReplySetter + } + providers := []providerData{} + for i := 0; i < numProviders; i++ { + // providerListenAddress := "localhost:111" + strconv.Itoa(i) + providerListenAddress := "localhost:111" + strconv.Itoa(i) + account := sigs.GenerateDeterministicFloatingKey(randomizer) + providerDataI := providerData{account: account, listenAddress: providerListenAddress} + providers = append(providers, providerDataI) + pairingList[uint64(i)] = &lavasession.ConsumerSessionsWithProvider{ + PublicLavaAddress: account.Addr.String(), Endpoints: []*lavasession.Endpoint{ { NetworkAddress: providerListenAddress, Enabled: true, - Geolocation: 0, + Geolocation: 1, }, }, Sessions: map[int64]*lavasession.SingleConsumerSession{}, MaxComputeUnits: 10000, UsedComputeUnits: 0, PairingEpoch: epoch, - }, + } } - - rpcconsumerServer := createRpcConsumer(t, ctx, specId, apiInterface, consumerListenAddress, epoch, pairingList, requiredResponses, lavaChainID) + consumerAccount := sigs.GenerateDeterministicFloatingKey(randomizer) + for i := 0; i < numProviders; i++ { + ctx := context.Background() + providerDataI := providers[i] + providers[i].server, providers[i].replySetter = createRpcProvider(t, ctx, consumerAccount.Addr.String(), specId, apiInterface, providerDataI.listenAddress, providerDataI.account, epoch, lavaChainID, nil) + } + rpcconsumerServer := createRpcConsumer(t, ctx, specId, apiInterface, consumerAccount, consumerListenAddress, epoch, pairingList, requiredResponses, lavaChainID) require.NotNil(t, rpcconsumerServer) client := http.Client{} resp, err := client.Get("http://" + consumerListenAddress + "/status") diff --git a/protocol/rpcconsumer/rpcconsumer_server.go b/protocol/rpcconsumer/rpcconsumer_server.go index 2f6aa780b5..14780e0f1a 100644 --- a/protocol/rpcconsumer/rpcconsumer_server.go +++ b/protocol/rpcconsumer/rpcconsumer_server.go @@ -47,7 +47,7 @@ type RPCConsumerServer struct { requiredResponses int finalizationConsensus *lavaprotocol.FinalizationConsensus lavaChainID string - consumerAddress sdk.AccAddress + ConsumerAddress sdk.AccAddress consumerConsistency *ConsumerConsistency sharedState bool // using the cache backend to sync the latest seen block with other consumers relaysMonitor *metrics.RelaysMonitor @@ -94,7 +94,7 @@ func (rpccs *RPCConsumerServer) ServeRPCRequests(ctx context.Context, listenEndp rpccs.privKey = privKey rpccs.chainParser = chainParser rpccs.finalizationConsensus = finalizationConsensus - rpccs.consumerAddress = consumerAddress + rpccs.ConsumerAddress = consumerAddress rpccs.consumerConsistency = consumerConsistency rpccs.sharedState = sharedState rpccs.reporter = reporter @@ -683,7 +683,7 @@ func (rpccs *RPCConsumerServer) relayInner(ctx context.Context, singleConsumerSe enabled, _ := rpccs.chainParser.DataReliabilityParams() if enabled { // TODO: DETECTION instead of existingSessionLatestBlock, we need proof of last reply to send the previous reply and the current reply - finalizedBlocks, finalizationConflict, err := lavaprotocol.VerifyFinalizationData(reply, relayRequest, providerPublicAddress, rpccs.consumerAddress, existingSessionLatestBlock, blockDistanceForFinalizedData) + finalizedBlocks, finalizationConflict, err := lavaprotocol.VerifyFinalizationData(reply, relayRequest, providerPublicAddress, rpccs.ConsumerAddress, existingSessionLatestBlock, blockDistanceForFinalizedData) if err != nil { if lavaprotocol.ProviderFinzalizationDataAccountabilityError.Is(err) && finalizationConflict != nil { go rpccs.consumerTxSender.TxConflictDetection(ctx, finalizationConflict, nil, nil, singleConsumerSession.Parent) diff --git a/protocol/rpcprovider/rpcprovider.go b/protocol/rpcprovider/rpcprovider.go index 31a830d707..abc2d22315 100644 --- a/protocol/rpcprovider/rpcprovider.go +++ b/protocol/rpcprovider/rpcprovider.go @@ -313,7 +313,7 @@ func (rpcp *RPCProvider) SetupProviderEndpoints(rpcProviderEndpoints []*lavasess return disabledEndpointsList } -func (rpcp *RPCProvider) getAllAddonsAndExtensionsFromNodeUrlSlice(nodeUrls []common.NodeUrl) *ProviderPolicy { +func GetAllAddonsAndExtensionsFromNodeUrlSlice(nodeUrls []common.NodeUrl) *ProviderPolicy { policy := &ProviderPolicy{} for _, nodeUrl := range nodeUrls { policy.addons = append(policy.addons, nodeUrl.Addons...) // addons are added without validation while extensions are. so we add to the addons all. @@ -342,7 +342,7 @@ func (rpcp *RPCProvider) SetupEndpoint(ctx context.Context, rpcProviderEndpoint } // after registering for spec updates our chain parser contains the spec and we can add our addons and extensions to allow our provider to function properly - providerPolicy := rpcp.getAllAddonsAndExtensionsFromNodeUrlSlice(rpcProviderEndpoint.NodeUrls) + providerPolicy := GetAllAddonsAndExtensionsFromNodeUrlSlice(rpcProviderEndpoint.NodeUrls) utils.LavaFormatDebug("supported services for provider", utils.LogAttr("specId", rpcProviderEndpoint.ChainID), utils.LogAttr("apiInterface", apiInterface), From 6313c30790b122687b2d764e2b59ae6f631d0eef Mon Sep 17 00:00:00 2001 From: omerlavanet Date: Thu, 7 Mar 2024 11:53:56 +0200 Subject: [PATCH 34/57] added rpcproviderServer creation to unitest --- protocol/chainlib/common_test_utils.go | 16 +++---- protocol/chainlib/grpc_test.go | 6 +-- protocol/chainlib/jsonRPC_test.go | 10 ++--- protocol/chainlib/rest_test.go | 6 +-- protocol/chainlib/tendermintRPC_test.go | 8 ++-- protocol/integration/protocol_test.go | 44 ++++++++++--------- .../finalization_consensus_test.go | 4 +- protocol/rpcconsumer/relay_processor_test.go | 14 +++--- .../reliability_manager_test.go | 2 +- .../rpcprovider/rpcprovider_server_test.go | 2 +- 10 files changed, 58 insertions(+), 54 deletions(-) diff --git a/protocol/chainlib/common_test_utils.go b/protocol/chainlib/common_test_utils.go index 3861ece1fd..4ec1f9b094 100644 --- a/protocol/chainlib/common_test_utils.go +++ b/protocol/chainlib/common_test_utils.go @@ -86,11 +86,11 @@ func generateCombinations(arr []string) [][]string { // generates a chain parser, a chain fetcher messages based on it // apiInterface can either be an ApiInterface string as in spectypes.ApiInterfaceXXX or a number for an index in the apiCollections -func CreateChainLibMocks(ctx context.Context, specIndex string, apiInterface string, serverCallback http.HandlerFunc, getToTopMostPath string, services []string) (cpar ChainParser, crout ChainRouter, cfetc chaintracker.ChainFetcher, closeServer func(), errRet error) { +func CreateChainLibMocks(ctx context.Context, specIndex string, apiInterface string, serverCallback http.HandlerFunc, getToTopMostPath string, services []string) (cpar ChainParser, crout ChainRouter, cfetc chaintracker.ChainFetcher, closeServer func(), endpointRet *lavasession.RPCProviderEndpoint, errRet error) { closeServer = nil spec, err := keepertest.GetASpec(specIndex, getToTopMostPath, nil, nil) if err != nil { - return nil, nil, nil, nil, err + return nil, nil, nil, nil, nil, err } index, err := strconv.Atoi(apiInterface) if err == nil && index < len(spec.ApiCollections) { @@ -98,7 +98,7 @@ func CreateChainLibMocks(ctx context.Context, specIndex string, apiInterface str } chainParser, err := NewChainParser(apiInterface) if err != nil { - return nil, nil, nil, nil, err + return nil, nil, nil, nil, nil, err } var chainRouter ChainRouter chainParser.SetSpec(spec) @@ -111,7 +111,7 @@ func CreateChainLibMocks(ctx context.Context, specIndex string, apiInterface str } addons, extensions, err := chainParser.SeparateAddonsExtensions(services) if err != nil { - return nil, nil, nil, nil, err + return nil, nil, nil, nil, nil, err } if apiInterface == spectypes.APIInterfaceGrpc { @@ -119,7 +119,7 @@ func CreateChainLibMocks(ctx context.Context, specIndex string, apiInterface str grpcServer := grpc.NewServer() lis, err := net.Listen("tcp", "localhost:0") if err != nil { - return nil, nil, nil, closeServer, err + return nil, nil, nil, closeServer, nil, err } endpoint.NodeUrls = append(endpoint.NodeUrls, common.NodeUrl{Url: lis.Addr().String(), Addons: addons}) allCombinations := generateCombinations(extensions) @@ -138,7 +138,7 @@ func CreateChainLibMocks(ctx context.Context, specIndex string, apiInterface str time.Sleep(10 * time.Millisecond) chainRouter, err = GetChainRouter(ctx, 1, endpoint, chainParser) if err != nil { - return nil, nil, nil, closeServer, err + return nil, nil, nil, closeServer, nil, err } } else { mockServer := httptest.NewServer(serverCallback) @@ -146,11 +146,11 @@ func CreateChainLibMocks(ctx context.Context, specIndex string, apiInterface str endpoint.NodeUrls = append(endpoint.NodeUrls, common.NodeUrl{Url: mockServer.URL, Addons: addons}) chainRouter, err = GetChainRouter(ctx, 1, endpoint, chainParser) if err != nil { - return nil, nil, nil, closeServer, err + return nil, nil, nil, closeServer, nil, err } } chainFetcher := NewChainFetcher(ctx, &ChainFetcherOptions{chainRouter, chainParser, endpoint, nil}) - return chainParser, chainRouter, chainFetcher, closeServer, err + return chainParser, chainRouter, chainFetcher, closeServer, endpoint, err } type TestStruct struct { diff --git a/protocol/chainlib/grpc_test.go b/protocol/chainlib/grpc_test.go index 508bcaa96e..d5404435c7 100644 --- a/protocol/chainlib/grpc_test.go +++ b/protocol/chainlib/grpc_test.go @@ -142,7 +142,7 @@ func TestGrpcChainProxy(t *testing.T) { // Handle the incoming request and provide the desired response wasCalled = true }) - chainParser, chainProxy, chainFetcher, closeServer, err := CreateChainLibMocks(ctx, "LAV1", spectypes.APIInterfaceGrpc, serverHandle, "../../", nil) + chainParser, chainProxy, chainFetcher, closeServer, _, err := CreateChainLibMocks(ctx, "LAV1", spectypes.APIInterfaceGrpc, serverHandle, "../../", nil) require.NoError(t, err) require.NotNil(t, chainParser) require.NotNil(t, chainProxy) @@ -169,7 +169,7 @@ func TestParsingRequestedBlocksHeadersGrpc(t *testing.T) { w.WriteHeader(244591) } }) - chainParser, chainRouter, _, closeServer, err := CreateChainLibMocks(ctx, "LAV1", spectypes.APIInterfaceGrpc, serverHandler, "../../", nil) + chainParser, chainRouter, _, closeServer, _, err := CreateChainLibMocks(ctx, "LAV1", spectypes.APIInterfaceGrpc, serverHandler, "../../", nil) require.NoError(t, err) defer func() { if closeServer != nil { @@ -237,7 +237,7 @@ func TestSettingBlocksHeadersGrpc(t *testing.T) { w.WriteHeader(244591) } }) - chainParser, chainRouter, _, closeServer, err := CreateChainLibMocks(ctx, "LAV1", spectypes.APIInterfaceGrpc, serverHandler, "../../", nil) + chainParser, chainRouter, _, closeServer, _, err := CreateChainLibMocks(ctx, "LAV1", spectypes.APIInterfaceGrpc, serverHandler, "../../", nil) require.NoError(t, err) defer func() { if closeServer != nil { diff --git a/protocol/chainlib/jsonRPC_test.go b/protocol/chainlib/jsonRPC_test.go index 8d13774a1a..e2bac3eaeb 100644 --- a/protocol/chainlib/jsonRPC_test.go +++ b/protocol/chainlib/jsonRPC_test.go @@ -140,7 +140,7 @@ func TestJsonRpcChainProxy(t *testing.T) { fmt.Fprint(w, `{"jsonrpc":"2.0","id":1,"result":"0x10a7a08"}`) }) - chainParser, chainProxy, chainFetcher, closeServer, err := CreateChainLibMocks(ctx, "ETH1", spectypes.APIInterfaceJsonRPC, serverHandle, "../../", nil) + chainParser, chainProxy, chainFetcher, closeServer, _, err := CreateChainLibMocks(ctx, "ETH1", spectypes.APIInterfaceJsonRPC, serverHandle, "../../", nil) require.NoError(t, err) require.NotNil(t, chainParser) require.NotNil(t, chainProxy) @@ -164,7 +164,7 @@ func TestAddonAndVerifications(t *testing.T) { fmt.Fprint(w, `{"jsonrpc":"2.0","id":1,"result":"0xf9ccdff90234a064"}`) }) - chainParser, chainRouter, chainFetcher, closeServer, err := CreateChainLibMocks(ctx, "ETH1", spectypes.APIInterfaceJsonRPC, serverHandle, "../../", []string{"debug"}) + chainParser, chainRouter, chainFetcher, closeServer, _, err := CreateChainLibMocks(ctx, "ETH1", spectypes.APIInterfaceJsonRPC, serverHandle, "../../", []string{"debug"}) require.NoError(t, err) require.NotNil(t, chainParser) require.NotNil(t, chainRouter) @@ -197,7 +197,7 @@ func TestExtensions(t *testing.T) { }) specname := "ETH1" - chainParser, chainRouter, chainFetcher, closeServer, err := CreateChainLibMocks(ctx, specname, spectypes.APIInterfaceJsonRPC, serverHandle, "../../", []string{"archive"}) + chainParser, chainRouter, chainFetcher, closeServer, _, err := CreateChainLibMocks(ctx, specname, spectypes.APIInterfaceJsonRPC, serverHandle, "../../", []string{"archive"}) require.NoError(t, err) require.NotNil(t, chainParser) require.NotNil(t, chainRouter) @@ -279,7 +279,7 @@ func TestJsonRpcBatchCall(t *testing.T) { fmt.Fprint(w, response) }) - chainParser, chainProxy, chainFetcher, closeServer, err := CreateChainLibMocks(ctx, "ETH1", spectypes.APIInterfaceJsonRPC, serverHandle, "../../", nil) + chainParser, chainProxy, chainFetcher, closeServer, _, err := CreateChainLibMocks(ctx, "ETH1", spectypes.APIInterfaceJsonRPC, serverHandle, "../../", nil) require.NoError(t, err) require.NotNil(t, chainParser) require.NotNil(t, chainProxy) @@ -320,7 +320,7 @@ func TestJsonRpcBatchCallSameID(t *testing.T) { fmt.Fprint(w, response) }) - chainParser, chainProxy, chainFetcher, closeServer, err := CreateChainLibMocks(ctx, "ETH1", spectypes.APIInterfaceJsonRPC, serverHandle, "../../", nil) + chainParser, chainProxy, chainFetcher, closeServer, _, err := CreateChainLibMocks(ctx, "ETH1", spectypes.APIInterfaceJsonRPC, serverHandle, "../../", nil) require.NoError(t, err) require.NotNil(t, chainParser) require.NotNil(t, chainProxy) diff --git a/protocol/chainlib/rest_test.go b/protocol/chainlib/rest_test.go index 5baf9b4a31..6e578f2ae3 100644 --- a/protocol/chainlib/rest_test.go +++ b/protocol/chainlib/rest_test.go @@ -135,7 +135,7 @@ func TestRestChainProxy(t *testing.T) { w.WriteHeader(http.StatusOK) fmt.Fprint(w, `{"block": { "header": {"height": "244591"}}}`) }) - chainParser, chainProxy, chainFetcher, closeServer, err := CreateChainLibMocks(ctx, "LAV1", spectypes.APIInterfaceRest, serverHandler, "../../", nil) + chainParser, chainProxy, chainFetcher, closeServer, _, err := CreateChainLibMocks(ctx, "LAV1", spectypes.APIInterfaceRest, serverHandler, "../../", nil) require.NoError(t, err) require.NotNil(t, chainParser) require.NotNil(t, chainProxy) @@ -166,7 +166,7 @@ func TestParsingRequestedBlocksHeadersRest(t *testing.T) { fmt.Fprint(w, `{"block": { "header": {"height": "244591"}}}`) } }) - chainParser, chainRouter, _, closeServer, err := CreateChainLibMocks(ctx, "LAV1", spectypes.APIInterfaceRest, serverHandler, "../../", nil) + chainParser, chainRouter, _, closeServer, _, err := CreateChainLibMocks(ctx, "LAV1", spectypes.APIInterfaceRest, serverHandler, "../../", nil) require.NoError(t, err) defer func() { if closeServer != nil { @@ -236,7 +236,7 @@ func TestSettingRequestedBlocksHeadersRest(t *testing.T) { } fmt.Fprint(w, `{"block": { "header": {"height": "244591"}}}`) }) - chainParser, chainRouter, _, closeServer, err := CreateChainLibMocks(ctx, "LAV1", spectypes.APIInterfaceRest, serverHandler, "../../", nil) + chainParser, chainRouter, _, closeServer, _, err := CreateChainLibMocks(ctx, "LAV1", spectypes.APIInterfaceRest, serverHandler, "../../", nil) require.NoError(t, err) defer func() { if closeServer != nil { diff --git a/protocol/chainlib/tendermintRPC_test.go b/protocol/chainlib/tendermintRPC_test.go index 61acb31989..a48752b4bc 100644 --- a/protocol/chainlib/tendermintRPC_test.go +++ b/protocol/chainlib/tendermintRPC_test.go @@ -149,7 +149,7 @@ func TestTendermintRpcChainProxy(t *testing.T) { }`) }) - chainParser, chainProxy, chainFetcher, closeServer, err := CreateChainLibMocks(ctx, "LAV1", spectypes.APIInterfaceTendermintRPC, serverHandle, "../../", nil) + chainParser, chainProxy, chainFetcher, closeServer, _, err := CreateChainLibMocks(ctx, "LAV1", spectypes.APIInterfaceTendermintRPC, serverHandle, "../../", nil) require.NoError(t, err) require.NotNil(t, chainParser) require.NotNil(t, chainProxy) @@ -180,7 +180,7 @@ func TestTendermintRpcBatchCall(t *testing.T) { fmt.Fprint(w, response) }) - chainParser, chainProxy, chainFetcher, closeServer, err := CreateChainLibMocks(ctx, "LAV1", spectypes.APIInterfaceTendermintRPC, serverHandle, "../../", nil) + chainParser, chainProxy, chainFetcher, closeServer, _, err := CreateChainLibMocks(ctx, "LAV1", spectypes.APIInterfaceTendermintRPC, serverHandle, "../../", nil) require.NoError(t, err) require.NotNil(t, chainParser) require.NotNil(t, chainProxy) @@ -222,7 +222,7 @@ func TestTendermintRpcBatchCallWithSameID(t *testing.T) { fmt.Fprint(w, nodeResponse) }) - chainParser, chainProxy, chainFetcher, closeServer, err := CreateChainLibMocks(ctx, "LAV1", spectypes.APIInterfaceTendermintRPC, serverHandle, "../../", nil) + chainParser, chainProxy, chainFetcher, closeServer, _, err := CreateChainLibMocks(ctx, "LAV1", spectypes.APIInterfaceTendermintRPC, serverHandle, "../../", nil) require.NoError(t, err) require.NotNil(t, chainParser) require.NotNil(t, chainProxy) @@ -256,7 +256,7 @@ func TestTendermintURIRPC(t *testing.T) { }`) }) - chainParser, chainProxy, chainFetcher, closeServer, err := CreateChainLibMocks(ctx, "LAV1", spectypes.APIInterfaceTendermintRPC, serverHandle, "../../", nil) + chainParser, chainProxy, chainFetcher, closeServer, _, err := CreateChainLibMocks(ctx, "LAV1", spectypes.APIInterfaceTendermintRPC, serverHandle, "../../", nil) require.NoError(t, err) require.NotNil(t, chainParser) require.NotNil(t, chainProxy) diff --git a/protocol/integration/protocol_test.go b/protocol/integration/protocol_test.go index 33d33f36d5..07a1b2affd 100644 --- a/protocol/integration/protocol_test.go +++ b/protocol/integration/protocol_test.go @@ -91,7 +91,7 @@ func createRpcConsumer(t *testing.T, ctx context.Context, specId string, apiInte // Handle the incoming request and provide the desired response w.WriteHeader(http.StatusOK) }) - chainParser, _, chainFetcher, _, err := chainlib.CreateChainLibMocks(ctx, specId, apiInterface, serverHandler, "../../", nil) + chainParser, _, chainFetcher, _, _, err := chainlib.CreateChainLibMocks(ctx, specId, apiInterface, serverHandler, "../../", nil) require.NoError(t, err) require.NotNil(t, chainParser) require.NotNil(t, chainFetcher) @@ -126,7 +126,7 @@ func createRpcConsumer(t *testing.T, ctx context.Context, specId string, apiInte return rpcConsumerServer } -func createRpcProvider(t *testing.T, ctx context.Context, consumerAddress string, specId string, apiInterface string, providerListenAddress string, account sigs.Account, epoch uint64, lavaChainID string, addons []string) (*rpcprovider.RPCProviderServer, *ReplySetter) { +func createRpcProvider(t *testing.T, ctx context.Context, consumerAddress string, specId string, apiInterface string, listenAddress string, account sigs.Account, lavaChainID string, addons []string) (*rpcprovider.RPCProviderServer, *lavasession.RPCProviderEndpoint, *ReplySetter) { replySetter := ReplySetter{ status: http.StatusOK, replyDataBuf: []byte(`{"reply": "REPLY-STUB"}`), @@ -136,16 +136,17 @@ func createRpcProvider(t *testing.T, ctx context.Context, consumerAddress string w.WriteHeader(replySetter.status) fmt.Fprint(w, string(replySetter.replyDataBuf)) }) - chainParser, chainRouter, chainFetcher, _, err := chainlib.CreateChainLibMocks(ctx, specId, apiInterface, serverHandler, "../../", addons) + chainParser, chainRouter, chainFetcher, _, endpoint, err := chainlib.CreateChainLibMocks(ctx, specId, apiInterface, serverHandler, "../../", addons) require.NoError(t, err) require.NotNil(t, chainParser) require.NotNil(t, chainFetcher) require.NotNil(t, chainRouter) + endpoint.NetworkAddress.Address = listenAddress rpcProviderServer := &rpcprovider.RPCProviderServer{} rpcProviderEndpoint := &lavasession.RPCProviderEndpoint{ NetworkAddress: lavasession.NetworkAddressData{ - Address: providerListenAddress, + Address: endpoint.NetworkAddress.Address, KeyPem: "", CertPem: "", DisableTLS: true, @@ -155,7 +156,7 @@ func createRpcProvider(t *testing.T, ctx context.Context, consumerAddress string Geolocation: 1, NodeUrls: []common.NodeUrl{ { - Url: "", + Url: endpoint.NodeUrls[0].Url, InternalPath: "", AuthConfig: common.AuthConfig{}, IpForwarding: false, @@ -196,7 +197,7 @@ func createRpcProvider(t *testing.T, ctx context.Context, consumerAddress string require.NoError(t, err) chainParser.Activate() chainTracker.RegisterForBlockTimeUpdates(chainParser) - return rpcProviderServer, &replySetter + return rpcProviderServer, endpoint, &replySetter } func TestConsumerProviderBasic(t *testing.T) { @@ -213,23 +214,32 @@ func TestConsumerProviderBasic(t *testing.T) { consumerListenAddress := "localhost:21111" pairingList := map[uint64]*lavasession.ConsumerSessionsWithProvider{} type providerData struct { - account sigs.Account - listenAddress string - server *rpcprovider.RPCProviderServer - replySetter *ReplySetter + account sigs.Account + endpoint *lavasession.RPCProviderEndpoint + server *rpcprovider.RPCProviderServer + replySetter *ReplySetter } providers := []providerData{} + for i := 0; i < numProviders; i++ { // providerListenAddress := "localhost:111" + strconv.Itoa(i) - providerListenAddress := "localhost:111" + strconv.Itoa(i) account := sigs.GenerateDeterministicFloatingKey(randomizer) - providerDataI := providerData{account: account, listenAddress: providerListenAddress} + providerDataI := providerData{account: account} providers = append(providers, providerDataI) + } + consumerAccount := sigs.GenerateDeterministicFloatingKey(randomizer) + for i := 0; i < numProviders; i++ { + ctx := context.Background() + providerDataI := providers[i] + listenAddress := "localhost:111" + strconv.Itoa(i) + providers[i].server, providers[i].endpoint, providers[i].replySetter = createRpcProvider(t, ctx, consumerAccount.Addr.String(), specId, apiInterface, listenAddress, providerDataI.account, lavaChainID, []string(nil)) + } + for i := 0; i < numProviders; i++ { pairingList[uint64(i)] = &lavasession.ConsumerSessionsWithProvider{ - PublicLavaAddress: account.Addr.String(), + PublicLavaAddress: providers[i].account.Addr.String(), Endpoints: []*lavasession.Endpoint{ { - NetworkAddress: providerListenAddress, + NetworkAddress: providers[i].endpoint.NetworkAddress.Address, Enabled: true, Geolocation: 1, }, @@ -240,12 +250,6 @@ func TestConsumerProviderBasic(t *testing.T) { PairingEpoch: epoch, } } - consumerAccount := sigs.GenerateDeterministicFloatingKey(randomizer) - for i := 0; i < numProviders; i++ { - ctx := context.Background() - providerDataI := providers[i] - providers[i].server, providers[i].replySetter = createRpcProvider(t, ctx, consumerAccount.Addr.String(), specId, apiInterface, providerDataI.listenAddress, providerDataI.account, epoch, lavaChainID, nil) - } rpcconsumerServer := createRpcConsumer(t, ctx, specId, apiInterface, consumerAccount, consumerListenAddress, epoch, pairingList, requiredResponses, lavaChainID) require.NotNil(t, rpcconsumerServer) client := http.Client{} diff --git a/protocol/lavaprotocol/finalization_consensus_test.go b/protocol/lavaprotocol/finalization_consensus_test.go index 8eaa581e18..2b93fdc86f 100644 --- a/protocol/lavaprotocol/finalization_consensus_test.go +++ b/protocol/lavaprotocol/finalization_consensus_test.go @@ -74,7 +74,7 @@ func TestConsensusHashesInsertion(t *testing.T) { chainsToTest := []string{"APT1", "LAV1", "ETH1"} for _, chainID := range chainsToTest { ctx := context.Background() - chainParser, _, _, closeServer, err := chainlib.CreateChainLibMocks(ctx, chainID, "0", func(http.ResponseWriter, *http.Request) {}, "../../", nil) + chainParser, _, _, closeServer, _, err := chainlib.CreateChainLibMocks(ctx, chainID, "0", func(http.ResponseWriter, *http.Request) {}, "../../", nil) if closeServer != nil { defer closeServer() } @@ -163,7 +163,7 @@ func TestQoS(t *testing.T) { for _, chainID := range chainsToTest { t.Run(chainID, func(t *testing.T) { ctx := context.Background() - chainParser, _, _, closeServer, err := chainlib.CreateChainLibMocks(ctx, chainID, "0", func(http.ResponseWriter, *http.Request) {}, "../../", nil) + chainParser, _, _, closeServer, _, err := chainlib.CreateChainLibMocks(ctx, chainID, "0", func(http.ResponseWriter, *http.Request) {}, "../../", nil) if closeServer != nil { defer closeServer() } diff --git a/protocol/rpcconsumer/relay_processor_test.go b/protocol/rpcconsumer/relay_processor_test.go index 4251823707..37f74646da 100644 --- a/protocol/rpcconsumer/relay_processor_test.go +++ b/protocol/rpcconsumer/relay_processor_test.go @@ -78,7 +78,7 @@ func TestRelayProcessorHappyFlow(t *testing.T) { w.WriteHeader(http.StatusOK) }) specId := "LAV1" - chainParser, _, _, closeServer, err := chainlib.CreateChainLibMocks(ctx, specId, spectypes.APIInterfaceRest, serverHandler, "../../", nil) + chainParser, _, _, closeServer, _, err := chainlib.CreateChainLibMocks(ctx, specId, spectypes.APIInterfaceRest, serverHandler, "../../", nil) if closeServer != nil { defer closeServer() } @@ -120,7 +120,7 @@ func TestRelayProcessorTimeout(t *testing.T) { w.WriteHeader(http.StatusOK) }) specId := "LAV1" - chainParser, _, _, closeServer, err := chainlib.CreateChainLibMocks(ctx, specId, spectypes.APIInterfaceRest, serverHandler, "../../", nil) + chainParser, _, _, closeServer, _, err := chainlib.CreateChainLibMocks(ctx, specId, spectypes.APIInterfaceRest, serverHandler, "../../", nil) if closeServer != nil { defer closeServer() } @@ -172,7 +172,7 @@ func TestRelayProcessorRetry(t *testing.T) { w.WriteHeader(http.StatusOK) }) specId := "LAV1" - chainParser, _, _, closeServer, err := chainlib.CreateChainLibMocks(ctx, specId, spectypes.APIInterfaceRest, serverHandler, "../../", nil) + chainParser, _, _, closeServer, _, err := chainlib.CreateChainLibMocks(ctx, specId, spectypes.APIInterfaceRest, serverHandler, "../../", nil) if closeServer != nil { defer closeServer() } @@ -216,7 +216,7 @@ func TestRelayProcessorRetryNodeError(t *testing.T) { w.WriteHeader(http.StatusOK) }) specId := "LAV1" - chainParser, _, _, closeServer, err := chainlib.CreateChainLibMocks(ctx, specId, spectypes.APIInterfaceRest, serverHandler, "../../", nil) + chainParser, _, _, closeServer, _, err := chainlib.CreateChainLibMocks(ctx, specId, spectypes.APIInterfaceRest, serverHandler, "../../", nil) if closeServer != nil { defer closeServer() } @@ -261,7 +261,7 @@ func TestRelayProcessorStatefulApi(t *testing.T) { w.WriteHeader(http.StatusOK) }) specId := "LAV1" - chainParser, _, _, closeServer, err := chainlib.CreateChainLibMocks(ctx, specId, spectypes.APIInterfaceRest, serverHandler, "../../", nil) + chainParser, _, _, closeServer, _, err := chainlib.CreateChainLibMocks(ctx, specId, spectypes.APIInterfaceRest, serverHandler, "../../", nil) if closeServer != nil { defer closeServer() } @@ -306,7 +306,7 @@ func TestRelayProcessorStatefulApiErr(t *testing.T) { w.WriteHeader(http.StatusOK) }) specId := "LAV1" - chainParser, _, _, closeServer, err := chainlib.CreateChainLibMocks(ctx, specId, spectypes.APIInterfaceRest, serverHandler, "../../", nil) + chainParser, _, _, closeServer, _, err := chainlib.CreateChainLibMocks(ctx, specId, spectypes.APIInterfaceRest, serverHandler, "../../", nil) if closeServer != nil { defer closeServer() } @@ -350,7 +350,7 @@ func TestRelayProcessorLatest(t *testing.T) { w.WriteHeader(http.StatusOK) }) specId := "LAV1" - chainParser, _, _, closeServer, err := chainlib.CreateChainLibMocks(ctx, specId, spectypes.APIInterfaceRest, serverHandler, "../../", nil) + chainParser, _, _, closeServer, _, err := chainlib.CreateChainLibMocks(ctx, specId, spectypes.APIInterfaceRest, serverHandler, "../../", nil) if closeServer != nil { defer closeServer() } diff --git a/protocol/rpcprovider/reliabilitymanager/reliability_manager_test.go b/protocol/rpcprovider/reliabilitymanager/reliability_manager_test.go index a8f9977f9f..084a8498ae 100644 --- a/protocol/rpcprovider/reliabilitymanager/reliability_manager_test.go +++ b/protocol/rpcprovider/reliabilitymanager/reliability_manager_test.go @@ -185,7 +185,7 @@ func TestFullFlowReliabilityConflict(t *testing.T) { w.WriteHeader(http.StatusOK) fmt.Fprint(w, string(replyDataBuf)) }) - chainParser, chainProxy, chainFetcher, closeServer, err := chainlib.CreateChainLibMocks(ts.Ctx, specId, spectypes.APIInterfaceRest, serverHandler, "../../../", nil) + chainParser, chainProxy, chainFetcher, closeServer, _, err := chainlib.CreateChainLibMocks(ts.Ctx, specId, spectypes.APIInterfaceRest, serverHandler, "../../../", nil) if closeServer != nil { defer closeServer() } diff --git a/protocol/rpcprovider/rpcprovider_server_test.go b/protocol/rpcprovider/rpcprovider_server_test.go index 445ef73142..97131e2654 100644 --- a/protocol/rpcprovider/rpcprovider_server_test.go +++ b/protocol/rpcprovider/rpcprovider_server_test.go @@ -222,7 +222,7 @@ func TestHandleConsistency(t *testing.T) { w.WriteHeader(http.StatusOK) fmt.Fprint(w, string(replyDataBuf)) }) - chainParser, chainProxy, _, closeServer, err := chainlib.CreateChainLibMocks(ts.Ctx, specId, spectypes.APIInterfaceRest, serverHandler, "../../", nil) + chainParser, chainProxy, _, closeServer, _, err := chainlib.CreateChainLibMocks(ts.Ctx, specId, spectypes.APIInterfaceRest, serverHandler, "../../", nil) if closeServer != nil { defer closeServer() } From 69b9b3e2ddbe6cf0f66a769e06cd5deb8155b702 Mon Sep 17 00:00:00 2001 From: omerlavanet Date: Thu, 7 Mar 2024 13:38:18 +0200 Subject: [PATCH 35/57] added provider dialing --- protocol/chainlib/common_test_utils.go | 2 +- protocol/integration/protocol_test.go | 47 ++++++++++++++++++++++- protocol/rpcprovider/provider_listener.go | 8 +++- 3 files changed, 53 insertions(+), 4 deletions(-) diff --git a/protocol/chainlib/common_test_utils.go b/protocol/chainlib/common_test_utils.go index 4ec1f9b094..d18e81d49e 100644 --- a/protocol/chainlib/common_test_utils.go +++ b/protocol/chainlib/common_test_utils.go @@ -117,7 +117,7 @@ func CreateChainLibMocks(ctx context.Context, specIndex string, apiInterface str if apiInterface == spectypes.APIInterfaceGrpc { // Start a new gRPC server using the buffered connection grpcServer := grpc.NewServer() - lis, err := net.Listen("tcp", "localhost:0") + lis, err := net.Listen("tcp", "127.0.0.1:0") if err != nil { return nil, nil, nil, closeServer, nil, err } diff --git a/protocol/integration/protocol_test.go b/protocol/integration/protocol_test.go index 07a1b2affd..1a914015d9 100644 --- a/protocol/integration/protocol_test.go +++ b/protocol/integration/protocol_test.go @@ -24,6 +24,9 @@ import ( "github.com/lavanet/lava/utils/rand" "github.com/lavanet/lava/utils/sigs" "github.com/stretchr/testify/require" + "google.golang.org/grpc" + "google.golang.org/grpc/connectivity" + "google.golang.org/grpc/credentials/insecure" spectypes "github.com/lavanet/lava/x/spec/types" ) @@ -38,6 +41,7 @@ func TestMain(m *testing.M) { seed = time.Now().Unix() rand.SetSpecificSeed(seed) randomizer = sigs.NewZeroReader(seed) + lavasession.AllowInsecureConnectionToProviders = true // Run the actual tests exitCode := m.Run() if exitCode != 0 { @@ -46,6 +50,45 @@ func TestMain(m *testing.M) { os.Exit(exitCode) } +func isGrpcServerUp(url string) bool { + ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond*50) + defer cancel() + conn, err := grpc.DialContext(ctx, url, grpc.WithTransportCredentials(insecure.NewCredentials())) + if err != nil { + return false + } + defer conn.Close() + for { + state := conn.GetState() + if state == connectivity.Ready { + return true + } else if state == connectivity.TransientFailure || state == connectivity.Shutdown { + return false + } + + select { + case <-time.After(10 * time.Millisecond): + // Check the connection state again after a short delay + case <-ctx.Done(): + // The context has timed out + return false + } + } +} + +func checkGrpcServerStatusWithTimeout(url string, totalTimeout time.Duration) bool { + startTime := time.Now() + + for time.Since(startTime) < totalTimeout { + if isGrpcServerUp(url) { + return true + } + time.Sleep(20 * time.Millisecond) + } + + return false +} + func isServerUp(url string) bool { client := http.Client{ Timeout: 20 * time.Millisecond, @@ -120,7 +163,7 @@ func createRpcConsumer(t *testing.T, ctx context.Context, specId string, apiInte err = rpcConsumerServer.ServeRPCRequests(ctx, rpcEndpoint, consumerStateTracker, chainParser, finalizationConsensus, consumerSessionManager, requiredResponses, account.SK, lavaChainID, nil, rpcsonumerLogs, account.Addr, consumerConsistency, nil, consumerCmdFlags, false, nil, nil) require.NoError(t, err) // wait for consumer server to be up - consumerUp := checkServerStatusWithTimeout("http://"+consumerListenAddress, time.Millisecond*50) + consumerUp := checkServerStatusWithTimeout("http://"+consumerListenAddress, time.Millisecond*61) require.True(t, consumerUp) return rpcConsumerServer @@ -197,6 +240,8 @@ func createRpcProvider(t *testing.T, ctx context.Context, consumerAddress string require.NoError(t, err) chainParser.Activate() chainTracker.RegisterForBlockTimeUpdates(chainParser) + providerUp := checkGrpcServerStatusWithTimeout(rpcProviderEndpoint.NetworkAddress.Address, time.Millisecond*261) + require.True(t, providerUp) return rpcProviderServer, endpoint, &replySetter } diff --git a/protocol/rpcprovider/provider_listener.go b/protocol/rpcprovider/provider_listener.go index f82918a51c..f5bbd0e8c7 100644 --- a/protocol/rpcprovider/provider_listener.go +++ b/protocol/rpcprovider/provider_listener.go @@ -86,10 +86,14 @@ func NewProviderListener(ctx context.Context, networkAddress lavasession.Network var serveExecutor func() error if networkAddress.DisableTLS { utils.LavaFormatInfo("Running with disabled TLS configuration") - serveExecutor = func() error { return pl.httpServer.Serve(lis) } + serveExecutor = func() error { + return pl.httpServer.Serve(lis) + } } else { pl.httpServer.TLSConfig = lavasession.GetTlsConfig(networkAddress) - serveExecutor = func() error { return pl.httpServer.ServeTLS(lis, "", "") } + serveExecutor = func() error { + return pl.httpServer.ServeTLS(lis, "", "") + } } relayServer := &relayServer{relayReceivers: map[string]*relayReceiverWrapper{}} From a551a32db73e0f98779d88e084b496bc65608f06 Mon Sep 17 00:00:00 2001 From: omerlavanet Date: Thu, 7 Mar 2024 15:49:20 +0200 Subject: [PATCH 36/57] fixed bug in rpcconsumer, finished unitest --- protocol/integration/protocol_test.go | 29 ++++++++++++---------- protocol/rpcconsumer/relay_processor.go | 2 +- protocol/rpcconsumer/rpcconsumer_server.go | 5 ++-- 3 files changed, 20 insertions(+), 16 deletions(-) diff --git a/protocol/integration/protocol_test.go b/protocol/integration/protocol_test.go index 1a914015d9..2d2f2924af 100644 --- a/protocol/integration/protocol_test.go +++ b/protocol/integration/protocol_test.go @@ -3,6 +3,7 @@ package integration_test import ( "context" "fmt" + "io" "net/http" "os" "strconv" @@ -24,9 +25,7 @@ import ( "github.com/lavanet/lava/utils/rand" "github.com/lavanet/lava/utils/sigs" "github.com/stretchr/testify/require" - "google.golang.org/grpc" "google.golang.org/grpc/connectivity" - "google.golang.org/grpc/credentials/insecure" spectypes "github.com/lavanet/lava/x/spec/types" ) @@ -53,7 +52,7 @@ func TestMain(m *testing.M) { func isGrpcServerUp(url string) bool { ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond*50) defer cancel() - conn, err := grpc.DialContext(ctx, url, grpc.WithTransportCredentials(insecure.NewCredentials())) + conn, err := lavasession.ConnectgRPCClient(context.Background(), url, true) if err != nil { return false } @@ -169,7 +168,7 @@ func createRpcConsumer(t *testing.T, ctx context.Context, specId string, apiInte return rpcConsumerServer } -func createRpcProvider(t *testing.T, ctx context.Context, consumerAddress string, specId string, apiInterface string, listenAddress string, account sigs.Account, lavaChainID string, addons []string) (*rpcprovider.RPCProviderServer, *lavasession.RPCProviderEndpoint, *ReplySetter) { +func createRpcProvider(t *testing.T, ctx context.Context, consumerAddress string, specId string, apiInterface string, listenAddress string, account sigs.Account, lavaChainID string, addons []string) (*rpcprovider.RPCProviderServer, *lavasession.RPCProviderEndpoint, *ReplySetter, *MockChainFetcher) { replySetter := ReplySetter{ status: http.StatusOK, replyDataBuf: []byte(`{"reply": "REPLY-STUB"}`), @@ -192,9 +191,9 @@ func createRpcProvider(t *testing.T, ctx context.Context, consumerAddress string Address: endpoint.NetworkAddress.Address, KeyPem: "", CertPem: "", - DisableTLS: true, + DisableTLS: false, }, - ChainID: lavaChainID, + ChainID: specId, ApiInterface: apiInterface, Geolocation: 1, NodeUrls: []common.NodeUrl{ @@ -242,7 +241,7 @@ func createRpcProvider(t *testing.T, ctx context.Context, consumerAddress string chainTracker.RegisterForBlockTimeUpdates(chainParser) providerUp := checkGrpcServerStatusWithTimeout(rpcProviderEndpoint.NetworkAddress.Address, time.Millisecond*261) require.True(t, providerUp) - return rpcProviderServer, endpoint, &replySetter + return rpcProviderServer, endpoint, &replySetter, mockChainFetcher } func TestConsumerProviderBasic(t *testing.T) { @@ -259,10 +258,11 @@ func TestConsumerProviderBasic(t *testing.T) { consumerListenAddress := "localhost:21111" pairingList := map[uint64]*lavasession.ConsumerSessionsWithProvider{} type providerData struct { - account sigs.Account - endpoint *lavasession.RPCProviderEndpoint - server *rpcprovider.RPCProviderServer - replySetter *ReplySetter + account sigs.Account + endpoint *lavasession.RPCProviderEndpoint + server *rpcprovider.RPCProviderServer + replySetter *ReplySetter + mockChainFetcher *MockChainFetcher } providers := []providerData{} @@ -277,7 +277,7 @@ func TestConsumerProviderBasic(t *testing.T) { ctx := context.Background() providerDataI := providers[i] listenAddress := "localhost:111" + strconv.Itoa(i) - providers[i].server, providers[i].endpoint, providers[i].replySetter = createRpcProvider(t, ctx, consumerAccount.Addr.String(), specId, apiInterface, listenAddress, providerDataI.account, lavaChainID, []string(nil)) + providers[i].server, providers[i].endpoint, providers[i].replySetter, providers[i].mockChainFetcher = createRpcProvider(t, ctx, consumerAccount.Addr.String(), specId, apiInterface, listenAddress, providerDataI.account, lavaChainID, []string(nil)) } for i := 0; i < numProviders; i++ { pairingList[uint64(i)] = &lavasession.ConsumerSessionsWithProvider{ @@ -300,6 +300,9 @@ func TestConsumerProviderBasic(t *testing.T) { client := http.Client{} resp, err := client.Get("http://" + consumerListenAddress + "/status") require.NoError(t, err) - require.Equal(t, http.StatusInternalServerError, resp.StatusCode) + require.Equal(t, http.StatusOK, resp.StatusCode) + bodyBytes, err := io.ReadAll(resp.Body) + require.NoError(t, err) + require.Equal(t, providers[0].replySetter.replyDataBuf, bodyBytes) resp.Body.Close() } diff --git a/protocol/rpcconsumer/relay_processor.go b/protocol/rpcconsumer/relay_processor.go index c280b36ab5..7cf9682283 100644 --- a/protocol/rpcconsumer/relay_processor.go +++ b/protocol/rpcconsumer/relay_processor.go @@ -193,7 +193,7 @@ func (rp *RelayProcessor) checkEndProcessing(responsesCount int) bool { } } // check if we got all of the responses - if rp.usedProviders.CurrentlyUsed() == 0 && responsesCount >= rp.usedProviders.SessionsLatestBatch() { + if responsesCount >= rp.usedProviders.SessionsLatestBatch() { // no active sessions, and we read all the responses, we can return return true } diff --git a/protocol/rpcconsumer/rpcconsumer_server.go b/protocol/rpcconsumer/rpcconsumer_server.go index 14780e0f1a..9b9bdfff88 100644 --- a/protocol/rpcconsumer/rpcconsumer_server.go +++ b/protocol/rpcconsumer/rpcconsumer_server.go @@ -498,13 +498,14 @@ func (rpccs *RPCConsumerServer) sendRelayToProvider( goroutineCtx = utils.WithUniqueIdentifier(goroutineCtx, guid) } defer func() { - // Close context - goroutineCtxCancel() // Return response relayProcessor.SetResponse(&relayResponse{ relayResult: *localRelayResult, err: errResponse, }) + + // Close context + goroutineCtxCancel() }() localRelayRequestData := *relayRequestData From 1ad183d339995bbeba5d5cdbf75672a6f33d5aec Mon Sep 17 00:00:00 2001 From: omerlavanet Date: Mon, 11 Mar 2024 12:22:34 +0200 Subject: [PATCH 37/57] fix panic in relayErrors --- protocol/integration/protocol_test.go | 74 +++++++++++++++++++++++++++ protocol/rpcconsumer/relay_errors.go | 27 ++++++++-- 2 files changed, 98 insertions(+), 3 deletions(-) diff --git a/protocol/integration/protocol_test.go b/protocol/integration/protocol_test.go index 2d2f2924af..48e664e123 100644 --- a/protocol/integration/protocol_test.go +++ b/protocol/integration/protocol_test.go @@ -2,6 +2,7 @@ package integration_test import ( "context" + "encoding/json" "fmt" "io" "net/http" @@ -306,3 +307,76 @@ func TestConsumerProviderBasic(t *testing.T) { require.Equal(t, providers[0].replySetter.replyDataBuf, bodyBytes) resp.Body.Close() } + +func TestConsumerProviderWithProviders(t *testing.T) { + ctx := context.Background() + // can be any spec and api interface + specId := "LAV1" + apiInterface := spectypes.APIInterfaceTendermintRPC + epoch := uint64(100) + requiredResponses := 1 + lavaChainID := "lava" + + numProviders := 5 + + consumerListenAddress := "localhost:21111" + pairingList := map[uint64]*lavasession.ConsumerSessionsWithProvider{} + type providerData struct { + account sigs.Account + endpoint *lavasession.RPCProviderEndpoint + server *rpcprovider.RPCProviderServer + replySetter *ReplySetter + mockChainFetcher *MockChainFetcher + } + providers := []providerData{} + + for i := 0; i < numProviders; i++ { + // providerListenAddress := "localhost:111" + strconv.Itoa(i) + account := sigs.GenerateDeterministicFloatingKey(randomizer) + providerDataI := providerData{account: account} + providers = append(providers, providerDataI) + } + consumerAccount := sigs.GenerateDeterministicFloatingKey(randomizer) + for i := 0; i < numProviders; i++ { + ctx := context.Background() + providerDataI := providers[i] + listenAddress := "localhost:111" + strconv.Itoa(i) + providers[i].server, providers[i].endpoint, providers[i].replySetter, providers[i].mockChainFetcher = createRpcProvider(t, ctx, consumerAccount.Addr.String(), specId, apiInterface, listenAddress, providerDataI.account, lavaChainID, []string(nil)) + providers[i].replySetter.replyDataBuf = []byte(fmt.Sprintf(`{"reply": %d}`, i)) + } + for i := 0; i < numProviders; i++ { + pairingList[uint64(i)] = &lavasession.ConsumerSessionsWithProvider{ + PublicLavaAddress: providers[i].account.Addr.String(), + Endpoints: []*lavasession.Endpoint{ + { + NetworkAddress: providers[i].endpoint.NetworkAddress.Address, + Enabled: true, + Geolocation: 1, + }, + }, + Sessions: map[int64]*lavasession.SingleConsumerSession{}, + MaxComputeUnits: 10000, + UsedComputeUnits: 0, + PairingEpoch: epoch, + } + } + rpcconsumerServer := createRpcConsumer(t, ctx, specId, apiInterface, consumerAccount, consumerListenAddress, epoch, pairingList, requiredResponses, lavaChainID) + require.NotNil(t, rpcconsumerServer) + counter := map[int]int{} + for i := 0; i <= 1000; i++ { + client := http.Client{} + resp, err := client.Get("http://" + consumerListenAddress + "/status") + require.NoError(t, err) + require.Equal(t, http.StatusOK, resp.StatusCode) + bodyBytes, err := io.ReadAll(resp.Body) + require.NoError(t, err) + resp.Body.Close() + mapi := map[string]int{} + err = json.Unmarshal(bodyBytes, &mapi) + require.NoError(t, err) + id, ok := mapi["reply"] + require.True(t, ok) + counter[id]++ + } + require.Len(t, counter, numProviders) // make sure to talk with all of them +} diff --git a/protocol/rpcconsumer/relay_errors.go b/protocol/rpcconsumer/relay_errors.go index d5e04485c8..b988edac52 100644 --- a/protocol/rpcconsumer/relay_errors.go +++ b/protocol/rpcconsumer/relay_errors.go @@ -2,6 +2,7 @@ package rpcconsumer import ( "fmt" + "regexp" "strconv" github_com_cosmos_cosmos_sdk_types "github.com/cosmos/cosmos-sdk/types" @@ -31,12 +32,28 @@ func (r *RelayErrors) findMaxAppearances(input map[string][]int) (maxVal int, in return } +func replacePattern(input, pattern, replacement string) string { + re := regexp.MustCompile(pattern) + return re.ReplaceAllString(input, replacement) +} + +func (r *RelayErrors) sanitizeError(errMsg string) string { + // Replace SessionId:(any digit here) with SessionId:* + errMsg = replacePattern(errMsg, `SessionId:\d+`, "SessionId:*") + + // Replace GUID:(any digit here) with GUID:* + errMsg = replacePattern(errMsg, `GUID:\d+`, "GUID:*") + + return errMsg +} + func (r *RelayErrors) GetBestErrorMessageForUser() RelayError { bestIndex := -1 bestResult := github_com_cosmos_cosmos_sdk_types.ZeroDec() errorMap := make(map[string][]int) for idx, relayError := range r.relayErrors { errorMessage := relayError.err.Error() + errorMessage = r.sanitizeError(errorMessage) errorMap[errorMessage] = append(errorMap[errorMessage], idx) if relayError.ProviderInfo.ProviderQoSExcellenceSummery.IsNil() || relayError.ProviderInfo.ProviderStake.Amount.IsNil() { continue @@ -73,16 +90,16 @@ func (r *RelayErrors) GetBestErrorMessageForUser() RelayError { } func (r *RelayErrors) getAllUniqueErrors() []error { - allErrors := make([]error, len(r.relayErrors)) + allErrors := []error{} repeatingErrors := make(map[string]struct{}) - for idx, relayError := range r.relayErrors { + for _, relayError := range r.relayErrors { errString := relayError.err.Error() // using strings to filter repeating errors _, ok := repeatingErrors[errString] if ok { continue } repeatingErrors[errString] = struct{}{} - allErrors[idx] = relayError.err + allErrors = append(allErrors, relayError.err) } return allErrors } @@ -106,3 +123,7 @@ type RelayError struct { ProviderInfo common.ProviderInfo response *relayResponse } + +func (re RelayError) String() string { + return fmt.Sprintf("err: %s, ProviderInfo: %v, response: %v", re.err, re.ProviderInfo, re.response) +} From 650ab233e92ebb7ddda68e63e5c18a20f25aa960 Mon Sep 17 00:00:00 2001 From: omerlavanet Date: Mon, 11 Mar 2024 12:26:06 +0200 Subject: [PATCH 38/57] sanitize unique errors get --- protocol/rpcconsumer/relay_errors.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/protocol/rpcconsumer/relay_errors.go b/protocol/rpcconsumer/relay_errors.go index b988edac52..0adae35b9c 100644 --- a/protocol/rpcconsumer/relay_errors.go +++ b/protocol/rpcconsumer/relay_errors.go @@ -37,7 +37,8 @@ func replacePattern(input, pattern, replacement string) string { return re.ReplaceAllString(input, replacement) } -func (r *RelayErrors) sanitizeError(errMsg string) string { +func (r *RelayErrors) sanitizeError(err error) string { + errMsg := err.Error() // Replace SessionId:(any digit here) with SessionId:* errMsg = replacePattern(errMsg, `SessionId:\d+`, "SessionId:*") @@ -52,8 +53,7 @@ func (r *RelayErrors) GetBestErrorMessageForUser() RelayError { bestResult := github_com_cosmos_cosmos_sdk_types.ZeroDec() errorMap := make(map[string][]int) for idx, relayError := range r.relayErrors { - errorMessage := relayError.err.Error() - errorMessage = r.sanitizeError(errorMessage) + errorMessage := r.sanitizeError(relayError.err) errorMap[errorMessage] = append(errorMap[errorMessage], idx) if relayError.ProviderInfo.ProviderQoSExcellenceSummery.IsNil() || relayError.ProviderInfo.ProviderStake.Amount.IsNil() { continue @@ -93,7 +93,7 @@ func (r *RelayErrors) getAllUniqueErrors() []error { allErrors := []error{} repeatingErrors := make(map[string]struct{}) for _, relayError := range r.relayErrors { - errString := relayError.err.Error() // using strings to filter repeating errors + errString := r.sanitizeError(relayError.err) // using strings to filter repeating errors _, ok := repeatingErrors[errString] if ok { continue From 8d0ff06f2b42989d51353039ae9e2161e2522eb2 Mon Sep 17 00:00:00 2001 From: omerlavanet Date: Mon, 11 Mar 2024 14:15:34 +0200 Subject: [PATCH 39/57] prevent port conflict --- protocol/integration/protocol_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/protocol/integration/protocol_test.go b/protocol/integration/protocol_test.go index 48e664e123..c66ec6a3d7 100644 --- a/protocol/integration/protocol_test.go +++ b/protocol/integration/protocol_test.go @@ -319,7 +319,7 @@ func TestConsumerProviderWithProviders(t *testing.T) { numProviders := 5 - consumerListenAddress := "localhost:21111" + consumerListenAddress := "localhost:21112" pairingList := map[uint64]*lavasession.ConsumerSessionsWithProvider{} type providerData struct { account sigs.Account @@ -340,7 +340,7 @@ func TestConsumerProviderWithProviders(t *testing.T) { for i := 0; i < numProviders; i++ { ctx := context.Background() providerDataI := providers[i] - listenAddress := "localhost:111" + strconv.Itoa(i) + listenAddress := "localhost:112" + strconv.Itoa(i) providers[i].server, providers[i].endpoint, providers[i].replySetter, providers[i].mockChainFetcher = createRpcProvider(t, ctx, consumerAccount.Addr.String(), specId, apiInterface, listenAddress, providerDataI.account, lavaChainID, []string(nil)) providers[i].replySetter.replyDataBuf = []byte(fmt.Sprintf(`{"reply": %d}`, i)) } From 511d2be59618b36109dece4ad02aa45be8d15f01 Mon Sep 17 00:00:00 2001 From: Ran Mishael Date: Mon, 11 Mar 2024 13:37:32 +0100 Subject: [PATCH 40/57] rename --- protocol/chainlib/chain_message_queries.go | 2 +- protocol/common/collections.go | 4 +- protocol/common/timeout.go | 2 +- .../lavasession/consumer_session_manager.go | 2 +- .../consumer_session_manager_test.go | 50 +++++++++---------- .../end_to_end_lavasession_test.go | 6 +-- protocol/rpcconsumer/relay_processor.go | 2 +- 7 files changed, 34 insertions(+), 34 deletions(-) diff --git a/protocol/chainlib/chain_message_queries.go b/protocol/chainlib/chain_message_queries.go index 7fce2df76f..a8e453cdf1 100644 --- a/protocol/chainlib/chain_message_queries.go +++ b/protocol/chainlib/chain_message_queries.go @@ -3,7 +3,7 @@ package chainlib import "github.com/lavanet/lava/protocol/common" func ShouldSendToAllProviders(chainMessage ChainMessage) bool { - return chainMessage.GetApi().Category.Stateful == common.CONSISTENCY_SELECT_ALLPROVIDERS + return chainMessage.GetApi().Category.Stateful == common.CONSISTENCY_SELECT_ALL_PROVIDERS } func GetAddon(chainMessage ChainMessageForSend) string { diff --git a/protocol/common/collections.go b/protocol/common/collections.go index ad688973a8..7e75f5028d 100644 --- a/protocol/common/collections.go +++ b/protocol/common/collections.go @@ -5,8 +5,8 @@ import ( ) const ( - CONSISTENCY_SELECT_ALLPROVIDERS = 1 - NOSTATE = 0 + CONSISTENCY_SELECT_ALL_PROVIDERS = 1 + NO_STATE = 0 ) func GetExtensionNames(extensionCollection []*spectypes.Extension) (extensions []string) { diff --git a/protocol/common/timeout.go b/protocol/common/timeout.go index f456c1f95b..3b6e6d4708 100644 --- a/protocol/common/timeout.go +++ b/protocol/common/timeout.go @@ -72,7 +72,7 @@ type TimeoutInfo struct { func GetTimeoutForProcessing(relayTimeout time.Duration, timeoutInfo TimeoutInfo) time.Duration { ctxTimeout := DefaultTimeout - if timeoutInfo.Hanging || timeoutInfo.CU > 100 || timeoutInfo.Stateful == CONSISTENCY_SELECT_ALLPROVIDERS { + if timeoutInfo.Hanging || timeoutInfo.CU > 100 || timeoutInfo.Stateful == CONSISTENCY_SELECT_ALL_PROVIDERS { ctxTimeout = DefaultTimeoutLong } if relayTimeout > ctxTimeout { diff --git a/protocol/lavasession/consumer_session_manager.go b/protocol/lavasession/consumer_session_manager.go index d5124bbd6a..9d713dce4b 100644 --- a/protocol/lavasession/consumer_session_manager.go +++ b/protocol/lavasession/consumer_session_manager.go @@ -492,7 +492,7 @@ func (csm *ConsumerSessionManager) getValidProviderAddresses(ignoredProvidersLis } } var providers []string - if stateful == common.CONSISTENCY_SELECT_ALLPROVIDERS && csm.providerOptimizer.Strategy() != provideroptimizer.STRATEGY_COST { + if stateful == common.CONSISTENCY_SELECT_ALL_PROVIDERS && csm.providerOptimizer.Strategy() != provideroptimizer.STRATEGY_COST { providers = GetAllProviders(validAddresses, ignoredProvidersList) } else { providers = csm.providerOptimizer.ChooseProvider(validAddresses, ignoredProvidersList, cu, requestedBlock, OptimizerPerturbation) diff --git a/protocol/lavasession/consumer_session_manager_test.go b/protocol/lavasession/consumer_session_manager_test.go index a43be66df7..965f693263 100644 --- a/protocol/lavasession/consumer_session_manager_test.go +++ b/protocol/lavasession/consumer_session_manager_test.go @@ -139,7 +139,7 @@ func TestHappyFlow(t *testing.T) { pairingList := createPairingList("", true) err := csm.UpdateAllProviders(firstEpochHeight, pairingList) // update the providers. require.NoError(t, err) - css, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session + css, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, "", nil, common.NO_STATE, 0) // get a session require.NoError(t, err) for _, cs := range css { @@ -161,7 +161,7 @@ func TestHappyFlowVirtualEpoch(t *testing.T) { pairingList := createPairingList("", true) err := csm.UpdateAllProviders(firstEpochHeight, pairingList) // update the providers. require.NoError(t, err) - css, err := csm.GetSessions(ctx, maxCuForVirtualEpoch*(virtualEpoch+1), NewUsedProviders(nil), servicedBlockNumber, "", nil, common.NOSTATE, virtualEpoch) // get a session + css, err := csm.GetSessions(ctx, maxCuForVirtualEpoch*(virtualEpoch+1), NewUsedProviders(nil), servicedBlockNumber, "", nil, common.NO_STATE, virtualEpoch) // get a session require.NoError(t, err) for _, cs := range css { @@ -185,7 +185,7 @@ func TestVirtualEpochWithFailure(t *testing.T) { err := csm.UpdateAllProviders(firstEpochHeight, pairingList) // update the providers. require.NoError(t, err) - _, err = csm.GetSessions(ctx, maxCuForVirtualEpoch*(virtualEpoch+1)+10, NewUsedProviders(nil), servicedBlockNumber, "", nil, common.NOSTATE, virtualEpoch) // get a session + _, err = csm.GetSessions(ctx, maxCuForVirtualEpoch*(virtualEpoch+1)+10, NewUsedProviders(nil), servicedBlockNumber, "", nil, common.NO_STATE, virtualEpoch) // get a session require.Error(t, err) } @@ -195,8 +195,8 @@ func TestPairingReset(t *testing.T) { pairingList := createPairingList("", true) err := csm.UpdateAllProviders(firstEpochHeight, pairingList) // update the providers. require.NoError(t, err) - csm.validAddresses = []string{} // set valid addresses to zero - css, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session + csm.validAddresses = []string{} // set valid addresses to zero + css, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, "", nil, common.NO_STATE, 0) // get a session require.NoError(t, err) require.Equal(t, len(csm.validAddresses), len(csm.pairingAddresses)) @@ -225,7 +225,7 @@ func TestPairingResetWithFailures(t *testing.T) { if len(csm.validAddresses) == 0 { // wait for all pairings to be blocked. break } - css, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session + css, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, "", nil, common.NO_STATE, 0) // get a session require.NoError(t, err) for _, cs := range css { @@ -234,7 +234,7 @@ func TestPairingResetWithFailures(t *testing.T) { } } require.Equal(t, len(csm.validAddresses), 0) - css, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session + css, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, "", nil, common.NO_STATE, 0) // get a session require.NoError(t, err) require.Equal(t, len(csm.validAddresses), len(csm.pairingAddresses)) @@ -259,7 +259,7 @@ func TestPairingResetWithMultipleFailures(t *testing.T) { if len(csm.validAddresses) == 0 { // wait for all pairings to be blocked. break } - css, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session + css, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, "", nil, common.NO_STATE, 0) // get a session for _, cs := range css { err = csm.OnSessionFailure(cs.Session, nil) @@ -271,7 +271,7 @@ func TestPairingResetWithMultipleFailures(t *testing.T) { } } require.Equal(t, len(csm.validAddresses), 0) - css, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session + css, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, "", nil, common.NO_STATE, 0) // get a session require.NoError(t, err) require.Equal(t, len(csm.validAddresses), len(csm.pairingAddresses)) @@ -283,7 +283,7 @@ func TestPairingResetWithMultipleFailures(t *testing.T) { } } - css, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session + css, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, "", nil, common.NO_STATE, 0) // get a session require.NoError(t, err) for _, cs := range css { @@ -318,7 +318,7 @@ func TestSuccessAndFailureOfSessionWithUpdatePairingsInTheMiddle(t *testing.T) { sessionList := make([]session, numberOfAllowedSessionsPerConsumer) sessionListData := make([]SessTestData, numberOfAllowedSessionsPerConsumer) for i := 0; i < numberOfAllowedSessionsPerConsumer; i++ { - css, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session + css, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, "", nil, common.NO_STATE, 0) // get a session require.NoError(t, err) for _, cs := range css { // get a session @@ -354,7 +354,7 @@ func TestSuccessAndFailureOfSessionWithUpdatePairingsInTheMiddle(t *testing.T) { } for i := 0; i < numberOfAllowedSessionsPerConsumer; i++ { - css, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session + css, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, "", nil, common.NO_STATE, 0) // get a session require.NoError(t, err) for _, cs := range css { // get a session @@ -387,7 +387,7 @@ func TestSuccessAndFailureOfSessionWithUpdatePairingsInTheMiddle(t *testing.T) { } func successfulSession(ctx context.Context, csm *ConsumerSessionManager, t *testing.T, p int, ch chan int) { - css, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session + css, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, "", nil, common.NO_STATE, 0) // get a session require.NoError(t, err) for _, cs := range css { @@ -400,7 +400,7 @@ func successfulSession(ctx context.Context, csm *ConsumerSessionManager, t *test } func failedSession(ctx context.Context, csm *ConsumerSessionManager, t *testing.T, p int, ch chan int) { - css, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session + css, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, "", nil, common.NO_STATE, 0) // get a session require.NoError(t, err) for _, cs := range css { @@ -518,7 +518,7 @@ func TestSessionFailureAndGetReportedProviders(t *testing.T) { pairingList := createPairingList("", true) err := csm.UpdateAllProviders(firstEpochHeight, pairingList) // update the providers. require.NoError(t, err) - css, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session + css, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, "", nil, common.NO_STATE, 0) // get a session require.NoError(t, err) for _, cs := range css { @@ -552,7 +552,7 @@ func TestSessionFailureEpochMisMatch(t *testing.T) { pairingList := createPairingList("", true) err := csm.UpdateAllProviders(firstEpochHeight, pairingList) // update the providers. require.NoError(t, err) - css, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session + css, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, "", nil, common.NO_STATE, 0) // get a session require.NoError(t, err) for _, cs := range css { @@ -573,7 +573,7 @@ func TestAllProvidersEndpointsDisabled(t *testing.T) { pairingList := createPairingList("", false) err := csm.UpdateAllProviders(firstEpochHeight, pairingList) // update the providers. require.NoError(t, err) - cs, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session + cs, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, "", nil, common.NO_STATE, 0) // get a session require.Nil(t, cs) require.Error(t, err) } @@ -613,7 +613,7 @@ func TestGetSession(t *testing.T) { pairingList := createPairingList("", true) err := csm.UpdateAllProviders(firstEpochHeight, pairingList) require.NoError(t, err) - css, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, "", nil, common.NOSTATE, 0) + css, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, "", nil, common.NO_STATE, 0) require.NoError(t, err) for _, cs := range css { @@ -659,7 +659,7 @@ func TestPairingWithAddons(t *testing.T) { // block all providers initialProvidersLen := len(csm.getValidAddresses(addon, nil)) for i := 0; i < initialProvidersLen; i++ { - css, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, addon, nil, common.NOSTATE, 0) // get a session + css, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, addon, nil, common.NO_STATE, 0) // get a session require.NoError(t, err, i) for _, cs := range css { err = csm.OnSessionFailure(cs.Session, ReportAndBlockProviderError) @@ -671,7 +671,7 @@ func TestPairingWithAddons(t *testing.T) { if addon != "" { require.NotEqual(t, csm.getValidAddresses(addon, nil), csm.getValidAddresses("", nil)) } - css, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, addon, nil, common.NOSTATE, 0) // get a session + css, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, addon, nil, common.NO_STATE, 0) // get a session require.NoError(t, err) for _, cs := range css { err = csm.OnSessionDone(cs.Session, servicedBlockNumber, cuForFirstRequest, time.Millisecond, cs.Session.CalculateExpectedLatency(2*time.Millisecond), (servicedBlockNumber - 1), numberOfProviders, numberOfProviders, false) @@ -734,7 +734,7 @@ func TestPairingWithExtensions(t *testing.T) { } initialProvidersLen := len(csm.getValidAddresses(extensionOpt.addon, extensionOpt.extensions)) for i := 0; i < initialProvidersLen; i++ { - css, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, extensionOpt.addon, extensionsList, common.NOSTATE, 0) // get a session + css, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, extensionOpt.addon, extensionsList, common.NO_STATE, 0) // get a session require.NoError(t, err, i) for _, cs := range css { err = csm.OnSessionFailure(cs.Session, ReportAndBlockProviderError) @@ -746,7 +746,7 @@ func TestPairingWithExtensions(t *testing.T) { if len(extensionOpt.extensions) > 0 || extensionOpt.addon != "" { require.NotEqual(t, csm.getValidAddresses(extensionOpt.addon, extensionOpt.extensions), csm.getValidAddresses("", nil)) } - css, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, extensionOpt.addon, extensionsList, common.NOSTATE, 0) // get a session + css, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, extensionOpt.addon, extensionsList, common.NO_STATE, 0) // get a session require.NoError(t, err) for _, cs := range css { err = csm.OnSessionDone(cs.Session, servicedBlockNumber, cuForFirstRequest, time.Millisecond, cs.Session.CalculateExpectedLatency(2*time.Millisecond), (servicedBlockNumber - 1), numberOfProviders, numberOfProviders, false) @@ -762,7 +762,7 @@ func TestNoPairingsError(t *testing.T) { err := csm.UpdateAllProviders(firstEpochHeight, pairingList) // update the providers. require.NoError(t, err) time.Sleep(5 * time.Millisecond) // let probes finish - _, err = csm.getValidProviderAddresses(map[string]struct{}{}, 10, 100, "invalid", nil, common.NOSTATE) + _, err = csm.getValidProviderAddresses(map[string]struct{}{}, 10, 100, "invalid", nil, common.NO_STATE) require.Error(t, err) require.True(t, PairingListEmptyError.Is(err)) } @@ -781,7 +781,7 @@ func TestPairingWithStateful(t *testing.T) { providerAddresses := csm.getValidAddresses(addon, nil) allProviders := len(providerAddresses) require.Equal(t, 10, allProviders) - css, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, addon, nil, common.CONSISTENCY_SELECT_ALLPROVIDERS, 0) // get a session + css, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, addon, nil, common.CONSISTENCY_SELECT_ALL_PROVIDERS, 0) // get a session require.NoError(t, err) require.Equal(t, allProviders, len(css)) for _, cs := range css { @@ -790,7 +790,7 @@ func TestPairingWithStateful(t *testing.T) { } usedProviders := NewUsedProviders(nil) usedProviders.RemoveUsed(providerAddresses[0], nil) - css, err = csm.GetSessions(ctx, cuForFirstRequest, usedProviders, servicedBlockNumber, addon, nil, common.CONSISTENCY_SELECT_ALLPROVIDERS, 0) // get a session + css, err = csm.GetSessions(ctx, cuForFirstRequest, usedProviders, servicedBlockNumber, addon, nil, common.CONSISTENCY_SELECT_ALL_PROVIDERS, 0) // get a session require.NoError(t, err) require.Equal(t, allProviders-1, len(css)) }) diff --git a/protocol/lavasession/end_to_end_lavasession_test.go b/protocol/lavasession/end_to_end_lavasession_test.go index 6cdd141aca..abc42fb087 100644 --- a/protocol/lavasession/end_to_end_lavasession_test.go +++ b/protocol/lavasession/end_to_end_lavasession_test.go @@ -28,7 +28,7 @@ func TestHappyFlowE2EEmergency(t *testing.T) { successfulRelays++ for i := 0; i < len(consumerVirtualEpochs); i++ { - css, err := csm.GetSessions(ctx, maxCuForVirtualEpoch, NewUsedProviders(nil), servicedBlockNumber, "", nil, common.NOSTATE, consumerVirtualEpochs[i]) // get a session + css, err := csm.GetSessions(ctx, maxCuForVirtualEpoch, NewUsedProviders(nil), servicedBlockNumber, "", nil, common.NO_STATE, consumerVirtualEpochs[i]) // get a session require.NoError(t, err) for _, cs := range css { @@ -92,7 +92,7 @@ func TestHappyFlowE2EEmergency(t *testing.T) { func TestHappyFlowEmergencyInConsumer(t *testing.T) { csm, psm, ctx := prepareSessionsWithFirstRelay(t, maxCuForVirtualEpoch) - css, err := csm.GetSessions(ctx, maxCuForVirtualEpoch, NewUsedProviders(nil), servicedBlockNumber, "", nil, common.NOSTATE, virtualEpoch) // get a session + css, err := csm.GetSessions(ctx, maxCuForVirtualEpoch, NewUsedProviders(nil), servicedBlockNumber, "", nil, common.NO_STATE, virtualEpoch) // get a session require.NoError(t, err) for _, cs := range css { @@ -157,7 +157,7 @@ func prepareSessionsWithFirstRelay(t *testing.T, cuForFirstRequest uint64) (*Con err := csm.UpdateAllProviders(epoch1, cswpList) // update the providers. require.NoError(t, err) // get single consumer session - css, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session + css, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, "", nil, common.NO_STATE, 0) // get a session require.NoError(t, err) for _, cs := range css { diff --git a/protocol/rpcconsumer/relay_processor.go b/protocol/rpcconsumer/relay_processor.go index 7cf9682283..98b08393a0 100644 --- a/protocol/rpcconsumer/relay_processor.go +++ b/protocol/rpcconsumer/relay_processor.go @@ -30,7 +30,7 @@ const ( func NewRelayProcessor(ctx context.Context, usedProviders *lavasession.UsedProviders, requiredSuccesses int, chainMessage chainlib.ChainMessage, consumerConsistency *ConsumerConsistency, dappID string, consumerIp string) *RelayProcessor { guid, _ := utils.GetUniqueIdentifier(ctx) selection := Quorum // select the majority of node responses - if chainlib.GetStateful(chainMessage) == common.CONSISTENCY_SELECT_ALLPROVIDERS { + if chainlib.GetStateful(chainMessage) == common.CONSISTENCY_SELECT_ALL_PROVIDERS { selection = BestResult // select the majority of node successes } if requiredSuccesses <= 0 { From 0e225a307b0cd4ad25a8ea2880d3b2885b1efc70 Mon Sep 17 00:00:00 2001 From: omerlavanet Date: Mon, 11 Mar 2024 14:37:48 +0200 Subject: [PATCH 41/57] fix addUsed with len(0) resetting the wait --- protocol/lavasession/consumer_session_manager.go | 2 +- protocol/lavasession/consumer_types.go | 2 +- protocol/lavasession/used_providers.go | 12 +++++++----- protocol/lavasession/used_providers_test.go | 6 +++--- protocol/rpcconsumer/relay_processor_test.go | 16 ++++++++-------- 5 files changed, 20 insertions(+), 18 deletions(-) diff --git a/protocol/lavasession/consumer_session_manager.go b/protocol/lavasession/consumer_session_manager.go index d5124bbd6a..a320d222e3 100644 --- a/protocol/lavasession/consumer_session_manager.go +++ b/protocol/lavasession/consumer_session_manager.go @@ -324,7 +324,7 @@ func (csm *ConsumerSessionManager) GetSessions(ctx context.Context, cuNeededForS if !canSelect { return nil, utils.LavaFormatError("failed getting sessions from used Providers", nil, utils.LogAttr("usedProviders", usedProviders), utils.LogAttr("endpoint", csm.rpcEndpoint)) } - defer func() { usedProviders.AddUsed(consumerSessionMap) }() + defer func() { usedProviders.AddUsed(consumerSessionMap, errRet) }() initUnwantedProviders := usedProviders.GetUnwantedProvidersToSend() extensionNames := common.GetExtensionNames(extensions) diff --git a/protocol/lavasession/consumer_types.go b/protocol/lavasession/consumer_types.go index 1b5c64438b..18a578844f 100644 --- a/protocol/lavasession/consumer_types.go +++ b/protocol/lavasession/consumer_types.go @@ -24,7 +24,7 @@ var AllowInsecureConnectionToProviders = false type UsedProvidersInf interface { RemoveUsed(providerAddress string, err error) TryLockSelection(context.Context) bool - AddUsed(ConsumerSessionsMap) + AddUsed(ConsumerSessionsMap, error) GetUnwantedProvidersToSend() map[string]struct{} } diff --git a/protocol/lavasession/used_providers.go b/protocol/lavasession/used_providers.go index 342e216ab4..21d0e2a898 100644 --- a/protocol/lavasession/used_providers.go +++ b/protocol/lavasession/used_providers.go @@ -100,17 +100,19 @@ func (up *UsedProviders) ClearUnwanted() { up.unwantedProviders = map[string]struct{}{} } -func (up *UsedProviders) AddUsed(sessions ConsumerSessionsMap) { +func (up *UsedProviders) AddUsed(sessions ConsumerSessionsMap, err error) { if up == nil { return } up.lock.Lock() defer up.lock.Unlock() // this is nil safe - up.sessionsLatestBatch = 0 - for provider := range sessions { // the key for ConsumerSessionsMap is the provider public address - up.providers[provider] = struct{}{} - up.sessionsLatestBatch++ + if len(sessions) > 0 && err == nil { + up.sessionsLatestBatch = 0 + for provider := range sessions { // the key for ConsumerSessionsMap is the provider public address + up.providers[provider] = struct{}{} + up.sessionsLatestBatch++ + } } up.selecting = false } diff --git a/protocol/lavasession/used_providers_test.go b/protocol/lavasession/used_providers_test.go index f2f2557b25..042394b4e5 100644 --- a/protocol/lavasession/used_providers_test.go +++ b/protocol/lavasession/used_providers_test.go @@ -23,7 +23,7 @@ func TestUsedProviders(t *testing.T) { unwanted := usedProviders.GetUnwantedProvidersToSend() require.Len(t, unwanted, 0) consumerSessionsMap := ConsumerSessionsMap{"test": &SessionInfo{}, "test2": &SessionInfo{}} - usedProviders.AddUsed(consumerSessionsMap) + usedProviders.AddUsed(consumerSessionsMap, nil) canUseAgain = usedProviders.tryLockSelection() require.True(t, canUseAgain) unwanted = usedProviders.GetUnwantedProvidersToSend() @@ -32,7 +32,7 @@ func TestUsedProviders(t *testing.T) { canUseAgain = usedProviders.tryLockSelection() require.False(t, canUseAgain) consumerSessionsMap = ConsumerSessionsMap{"test3": &SessionInfo{}, "test4": &SessionInfo{}} - usedProviders.AddUsed(consumerSessionsMap) + usedProviders.AddUsed(consumerSessionsMap, nil) unwanted = usedProviders.GetUnwantedProvidersToSend() require.Len(t, unwanted, 4) require.Equal(t, 4, usedProviders.CurrentlyUsed()) @@ -68,7 +68,7 @@ func TestUsedProvidersAsync(t *testing.T) { go func() { time.Sleep(time.Millisecond * 10) consumerSessionsMap := ConsumerSessionsMap{"test": &SessionInfo{}, "test2": &SessionInfo{}} - usedProviders.AddUsed(consumerSessionsMap) + usedProviders.AddUsed(consumerSessionsMap, nil) }() ctx, cancel = context.WithTimeout(context.Background(), time.Millisecond*100) defer cancel() diff --git a/protocol/rpcconsumer/relay_processor_test.go b/protocol/rpcconsumer/relay_processor_test.go index 37f74646da..5396286353 100644 --- a/protocol/rpcconsumer/relay_processor_test.go +++ b/protocol/rpcconsumer/relay_processor_test.go @@ -96,7 +96,7 @@ func TestRelayProcessorHappyFlow(t *testing.T) { require.Zero(t, usedProviders.CurrentlyUsed()) require.Zero(t, usedProviders.SessionsLatestBatch()) consumerSessionsMap := lavasession.ConsumerSessionsMap{"lava@test": &lavasession.SessionInfo{}, "lava@test2": &lavasession.SessionInfo{}} - usedProviders.AddUsed(consumerSessionsMap) + usedProviders.AddUsed(consumerSessionsMap, nil) ctx, cancel = context.WithTimeout(context.Background(), time.Millisecond*10) defer cancel() go sendSuccessResp(relayProcessor, "lava@test", time.Millisecond*5) @@ -138,7 +138,7 @@ func TestRelayProcessorTimeout(t *testing.T) { require.Zero(t, usedProviders.CurrentlyUsed()) require.Zero(t, usedProviders.SessionsLatestBatch()) consumerSessionsMap := lavasession.ConsumerSessionsMap{"lava@test": &lavasession.SessionInfo{}, "lava@test2": &lavasession.SessionInfo{}} - usedProviders.AddUsed(consumerSessionsMap) + usedProviders.AddUsed(consumerSessionsMap, nil) go func() { time.Sleep(time.Millisecond * 5) ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond*10) @@ -147,7 +147,7 @@ func TestRelayProcessorTimeout(t *testing.T) { require.NoError(t, ctx.Err()) require.True(t, canUse) consumerSessionsMap := lavasession.ConsumerSessionsMap{"lava@test3": &lavasession.SessionInfo{}, "lava@test4": &lavasession.SessionInfo{}} - usedProviders.AddUsed(consumerSessionsMap) + usedProviders.AddUsed(consumerSessionsMap, nil) }() go sendSuccessResp(relayProcessor, "lava@test", time.Millisecond*20) ctx, cancel = context.WithTimeout(context.Background(), time.Millisecond*200) @@ -190,7 +190,7 @@ func TestRelayProcessorRetry(t *testing.T) { require.Zero(t, usedProviders.CurrentlyUsed()) require.Zero(t, usedProviders.SessionsLatestBatch()) consumerSessionsMap := lavasession.ConsumerSessionsMap{"lava@test": &lavasession.SessionInfo{}, "lava@test2": &lavasession.SessionInfo{}} - usedProviders.AddUsed(consumerSessionsMap) + usedProviders.AddUsed(consumerSessionsMap, nil) go sendProtocolError(relayProcessor, "lava@test", time.Millisecond*5, fmt.Errorf("bad")) go sendSuccessResp(relayProcessor, "lava@test2", time.Millisecond*20) @@ -234,7 +234,7 @@ func TestRelayProcessorRetryNodeError(t *testing.T) { require.Zero(t, usedProviders.CurrentlyUsed()) require.Zero(t, usedProviders.SessionsLatestBatch()) consumerSessionsMap := lavasession.ConsumerSessionsMap{"lava@test": &lavasession.SessionInfo{}, "lava@test2": &lavasession.SessionInfo{}} - usedProviders.AddUsed(consumerSessionsMap) + usedProviders.AddUsed(consumerSessionsMap, nil) go sendProtocolError(relayProcessor, "lava@test", time.Millisecond*5, fmt.Errorf("bad")) go sendNodeError(relayProcessor, "lava@test2", time.Millisecond*20) @@ -278,7 +278,7 @@ func TestRelayProcessorStatefulApi(t *testing.T) { require.Zero(t, usedProviders.CurrentlyUsed()) require.Zero(t, usedProviders.SessionsLatestBatch()) consumerSessionsMap := lavasession.ConsumerSessionsMap{"lava4@test": &lavasession.SessionInfo{}, "lava3@test": &lavasession.SessionInfo{}, "lava@test": &lavasession.SessionInfo{}, "lava2@test": &lavasession.SessionInfo{}} - usedProviders.AddUsed(consumerSessionsMap) + usedProviders.AddUsed(consumerSessionsMap, nil) go sendProtocolError(relayProcessor, "lava@test", time.Millisecond*5, fmt.Errorf("bad")) go sendNodeError(relayProcessor, "lava2@test", time.Millisecond*20) go sendNodeError(relayProcessor, "lava3@test", time.Millisecond*25) @@ -323,7 +323,7 @@ func TestRelayProcessorStatefulApiErr(t *testing.T) { require.Zero(t, usedProviders.CurrentlyUsed()) require.Zero(t, usedProviders.SessionsLatestBatch()) consumerSessionsMap := lavasession.ConsumerSessionsMap{"lava4@test": &lavasession.SessionInfo{}, "lava3@test": &lavasession.SessionInfo{}, "lava@test": &lavasession.SessionInfo{}, "lava2@test": &lavasession.SessionInfo{}} - usedProviders.AddUsed(consumerSessionsMap) + usedProviders.AddUsed(consumerSessionsMap, nil) go sendProtocolError(relayProcessor, "lava@test", time.Millisecond*5, fmt.Errorf("bad")) go sendNodeError(relayProcessor, "lava2@test", time.Millisecond*20) go sendNodeError(relayProcessor, "lava3@test", time.Millisecond*25) @@ -368,7 +368,7 @@ func TestRelayProcessorLatest(t *testing.T) { require.Zero(t, usedProviders.SessionsLatestBatch()) consumerSessionsMap := lavasession.ConsumerSessionsMap{"lava@test": &lavasession.SessionInfo{}, "lava@test2": &lavasession.SessionInfo{}} - usedProviders.AddUsed(consumerSessionsMap) + usedProviders.AddUsed(consumerSessionsMap, nil) go sendProtocolError(relayProcessor, "lava@test", time.Millisecond*5, fmt.Errorf("bad")) go sendSuccessResp(relayProcessor, "lava@test2", time.Millisecond*20) From b0f001ad777c6ce8db98f8a88162d3fb0b34cbbd Mon Sep 17 00:00:00 2001 From: omerlavanet Date: Mon, 11 Mar 2024 16:15:20 +0200 Subject: [PATCH 42/57] simplify port selection in unitests --- protocol/integration/mocks.go | 15 +++++++++++++++ protocol/integration/protocol_test.go | 11 ++++++----- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/protocol/integration/mocks.go b/protocol/integration/mocks.go index 870e8180b0..6ff2fa107b 100644 --- a/protocol/integration/mocks.go +++ b/protocol/integration/mocks.go @@ -242,3 +242,18 @@ func NewMockChainFetcher(startBlock, blocksToSave int64, callback func()) *MockC } return &mockCHainFetcher } + +type uniqueAddresGenerator struct { + seed int + lock sync.Mutex +} + +func (ug *uniqueAddresGenerator) GetAddress() string { + ug.lock.Lock() + defer ug.lock.Unlock() + ug.seed++ + if ug.seed < 100 { + return "localhost:10" + strconv.Itoa(ug.seed) + } + return "localhost:1" + strconv.Itoa(ug.seed) +} diff --git a/protocol/integration/protocol_test.go b/protocol/integration/protocol_test.go index c66ec6a3d7..670a16696e 100644 --- a/protocol/integration/protocol_test.go +++ b/protocol/integration/protocol_test.go @@ -7,7 +7,6 @@ import ( "io" "net/http" "os" - "strconv" "testing" "time" @@ -34,12 +33,14 @@ import ( var ( seed int64 randomizer *sigs.ZeroReader + addressGen uniqueAddresGenerator ) func TestMain(m *testing.M) { // This code will run once before any test cases are executed. seed = time.Now().Unix() rand.SetSpecificSeed(seed) + addressGen = uniqueAddresGenerator{} randomizer = sigs.NewZeroReader(seed) lavasession.AllowInsecureConnectionToProviders = true // Run the actual tests @@ -256,7 +257,7 @@ func TestConsumerProviderBasic(t *testing.T) { numProviders := 1 - consumerListenAddress := "localhost:21111" + consumerListenAddress := addressGen.GetAddress() pairingList := map[uint64]*lavasession.ConsumerSessionsWithProvider{} type providerData struct { account sigs.Account @@ -277,7 +278,7 @@ func TestConsumerProviderBasic(t *testing.T) { for i := 0; i < numProviders; i++ { ctx := context.Background() providerDataI := providers[i] - listenAddress := "localhost:111" + strconv.Itoa(i) + listenAddress := addressGen.GetAddress() providers[i].server, providers[i].endpoint, providers[i].replySetter, providers[i].mockChainFetcher = createRpcProvider(t, ctx, consumerAccount.Addr.String(), specId, apiInterface, listenAddress, providerDataI.account, lavaChainID, []string(nil)) } for i := 0; i < numProviders; i++ { @@ -319,7 +320,7 @@ func TestConsumerProviderWithProviders(t *testing.T) { numProviders := 5 - consumerListenAddress := "localhost:21112" + consumerListenAddress := addressGen.GetAddress() pairingList := map[uint64]*lavasession.ConsumerSessionsWithProvider{} type providerData struct { account sigs.Account @@ -340,7 +341,7 @@ func TestConsumerProviderWithProviders(t *testing.T) { for i := 0; i < numProviders; i++ { ctx := context.Background() providerDataI := providers[i] - listenAddress := "localhost:112" + strconv.Itoa(i) + listenAddress := addressGen.GetAddress() providers[i].server, providers[i].endpoint, providers[i].replySetter, providers[i].mockChainFetcher = createRpcProvider(t, ctx, consumerAccount.Addr.String(), specId, apiInterface, listenAddress, providerDataI.account, lavaChainID, []string(nil)) providers[i].replySetter.replyDataBuf = []byte(fmt.Sprintf(`{"reply": %d}`, i)) } From 44be705927960cf53654111cf44411741a0a8cd1 Mon Sep 17 00:00:00 2001 From: omerlavanet Date: Mon, 11 Mar 2024 18:06:48 +0200 Subject: [PATCH 43/57] add timeouts and fails to relays in unitest --- protocol/integration/mocks.go | 6 +++-- protocol/integration/protocol_test.go | 36 +++++++++++++++++++++++++-- 2 files changed, 38 insertions(+), 4 deletions(-) diff --git a/protocol/integration/mocks.go b/protocol/integration/mocks.go index 6ff2fa107b..1b8dae06f7 100644 --- a/protocol/integration/mocks.go +++ b/protocol/integration/mocks.go @@ -3,6 +3,7 @@ package integration_test import ( "context" "fmt" + "net/http" "strconv" "sync" "time" @@ -66,6 +67,7 @@ func (m *mockConsumerStateTracker) GetLatestVirtualEpoch() uint64 { type ReplySetter struct { status int replyDataBuf []byte + handler func([]byte, http.Header) ([]byte, int) } type mockProviderStateTracker struct { @@ -253,7 +255,7 @@ func (ug *uniqueAddresGenerator) GetAddress() string { defer ug.lock.Unlock() ug.seed++ if ug.seed < 100 { - return "localhost:10" + strconv.Itoa(ug.seed) + return "localhost:111" + strconv.Itoa(ug.seed) } - return "localhost:1" + strconv.Itoa(ug.seed) + return "localhost:11" + strconv.Itoa(ug.seed) } diff --git a/protocol/integration/protocol_test.go b/protocol/integration/protocol_test.go index 670a16696e..5e3d5b7f4c 100644 --- a/protocol/integration/protocol_test.go +++ b/protocol/integration/protocol_test.go @@ -174,11 +174,21 @@ func createRpcProvider(t *testing.T, ctx context.Context, consumerAddress string replySetter := ReplySetter{ status: http.StatusOK, replyDataBuf: []byte(`{"reply": "REPLY-STUB"}`), + handler: nil, } serverHandler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { // Handle the incoming request and provide the desired response - w.WriteHeader(replySetter.status) - fmt.Fprint(w, string(replySetter.replyDataBuf)) + + status := replySetter.status + data := replySetter.replyDataBuf + // if replySetter.handler != nil { + // data = make([]byte, r.ContentLength) + // r.Body.Read(data) + // data, status = replySetter.handler(data, r.Header) + // } + w.WriteHeader(status) + fmt.Fprint(w, string(data)) + }) chainParser, chainRouter, chainFetcher, _, endpoint, err := chainlib.CreateChainLibMocks(ctx, specId, apiInterface, serverHandler, "../../", addons) require.NoError(t, err) @@ -379,5 +389,27 @@ func TestConsumerProviderWithProviders(t *testing.T) { require.True(t, ok) counter[id]++ } + require.Len(t, counter, numProviders) // make sure to talk with all of them + + // add a chance for node errors and timeouts + for i := 0; i < numProviders; i++ { + handler := func(req []byte, header http.Header) (data []byte, status int) { + randVal := rand.Intn(10) + switch randVal { + case 3: + time.Sleep(1 * time.Second) // cause timeout + case 2: + return []byte(`{"message":"bad","code":123}`), http.StatusInternalServerError + } + return providers[i].replySetter.replyDataBuf, http.StatusOK + } + providers[i].replySetter.handler = handler + } + + for i := 0; i <= 100; i++ { + client := http.Client{Timeout: 300 * time.Millisecond} + _, err := client.Get("http://" + consumerListenAddress + "/status") + require.NoError(t, err, i) + } } From 379f79b1a4edc29b6f3b539924456efe0cb90357 Mon Sep 17 00:00:00 2001 From: omerlavanet Date: Wed, 13 Mar 2024 17:16:57 +0200 Subject: [PATCH 44/57] fix tests --- protocol/integration/protocol_test.go | 224 ++++++++++++--------- protocol/rpcconsumer/rpcconsumer_server.go | 7 +- 2 files changed, 136 insertions(+), 95 deletions(-) diff --git a/protocol/integration/protocol_test.go b/protocol/integration/protocol_test.go index 5e3d5b7f4c..3567c60575 100644 --- a/protocol/integration/protocol_test.go +++ b/protocol/integration/protocol_test.go @@ -181,11 +181,11 @@ func createRpcProvider(t *testing.T, ctx context.Context, consumerAddress string status := replySetter.status data := replySetter.replyDataBuf - // if replySetter.handler != nil { - // data = make([]byte, r.ContentLength) - // r.Body.Read(data) - // data, status = replySetter.handler(data, r.Header) - // } + if replySetter.handler != nil { + data = make([]byte, r.ContentLength) + r.Body.Read(data) + data, status = replySetter.handler(data, r.Header) + } w.WriteHeader(status) fmt.Fprint(w, string(data)) @@ -320,96 +320,136 @@ func TestConsumerProviderBasic(t *testing.T) { } func TestConsumerProviderWithProviders(t *testing.T) { - ctx := context.Background() - // can be any spec and api interface - specId := "LAV1" - apiInterface := spectypes.APIInterfaceTendermintRPC - epoch := uint64(100) - requiredResponses := 1 - lavaChainID := "lava" - - numProviders := 5 - - consumerListenAddress := addressGen.GetAddress() - pairingList := map[uint64]*lavasession.ConsumerSessionsWithProvider{} - type providerData struct { - account sigs.Account - endpoint *lavasession.RPCProviderEndpoint - server *rpcprovider.RPCProviderServer - replySetter *ReplySetter - mockChainFetcher *MockChainFetcher - } - providers := []providerData{} - for i := 0; i < numProviders; i++ { - // providerListenAddress := "localhost:111" + strconv.Itoa(i) - account := sigs.GenerateDeterministicFloatingKey(randomizer) - providerDataI := providerData{account: account} - providers = append(providers, providerDataI) - } - consumerAccount := sigs.GenerateDeterministicFloatingKey(randomizer) - for i := 0; i < numProviders; i++ { - ctx := context.Background() - providerDataI := providers[i] - listenAddress := addressGen.GetAddress() - providers[i].server, providers[i].endpoint, providers[i].replySetter, providers[i].mockChainFetcher = createRpcProvider(t, ctx, consumerAccount.Addr.String(), specId, apiInterface, listenAddress, providerDataI.account, lavaChainID, []string(nil)) - providers[i].replySetter.replyDataBuf = []byte(fmt.Sprintf(`{"reply": %d}`, i)) - } - for i := 0; i < numProviders; i++ { - pairingList[uint64(i)] = &lavasession.ConsumerSessionsWithProvider{ - PublicLavaAddress: providers[i].account.Addr.String(), - Endpoints: []*lavasession.Endpoint{ - { - NetworkAddress: providers[i].endpoint.NetworkAddress.Address, - Enabled: true, - Geolocation: 1, - }, - }, - Sessions: map[int64]*lavasession.SingleConsumerSession{}, - MaxComputeUnits: 10000, - UsedComputeUnits: 0, - PairingEpoch: epoch, - } - } - rpcconsumerServer := createRpcConsumer(t, ctx, specId, apiInterface, consumerAccount, consumerListenAddress, epoch, pairingList, requiredResponses, lavaChainID) - require.NotNil(t, rpcconsumerServer) - counter := map[int]int{} - for i := 0; i <= 1000; i++ { - client := http.Client{} - resp, err := client.Get("http://" + consumerListenAddress + "/status") - require.NoError(t, err) - require.Equal(t, http.StatusOK, resp.StatusCode) - bodyBytes, err := io.ReadAll(resp.Body) - require.NoError(t, err) - resp.Body.Close() - mapi := map[string]int{} - err = json.Unmarshal(bodyBytes, &mapi) - require.NoError(t, err) - id, ok := mapi["reply"] - require.True(t, ok) - counter[id]++ + playbook := []struct { + name string + scenario int + }{ + { + name: "basic-success", + scenario: 0, + }, + { + name: "with errors", + scenario: 1, + }, } - - require.Len(t, counter, numProviders) // make sure to talk with all of them - - // add a chance for node errors and timeouts - for i := 0; i < numProviders; i++ { - handler := func(req []byte, header http.Header) (data []byte, status int) { - randVal := rand.Intn(10) - switch randVal { - case 3: - time.Sleep(1 * time.Second) // cause timeout - case 2: - return []byte(`{"message":"bad","code":123}`), http.StatusInternalServerError + for _, play := range playbook { + t.Run(play.name, func(t *testing.T) { + + ctx := context.Background() + // can be any spec and api interface + specId := "LAV1" + apiInterface := spectypes.APIInterfaceTendermintRPC + epoch := uint64(100) + requiredResponses := 1 + lavaChainID := "lava" + numProviders := 5 + + consumerListenAddress := addressGen.GetAddress() + pairingList := map[uint64]*lavasession.ConsumerSessionsWithProvider{} + type providerData struct { + account sigs.Account + endpoint *lavasession.RPCProviderEndpoint + server *rpcprovider.RPCProviderServer + replySetter *ReplySetter + mockChainFetcher *MockChainFetcher } - return providers[i].replySetter.replyDataBuf, http.StatusOK - } - providers[i].replySetter.handler = handler - } + providers := []providerData{} - for i := 0; i <= 100; i++ { - client := http.Client{Timeout: 300 * time.Millisecond} - _, err := client.Get("http://" + consumerListenAddress + "/status") - require.NoError(t, err, i) + for i := 0; i < numProviders; i++ { + // providerListenAddress := "localhost:111" + strconv.Itoa(i) + account := sigs.GenerateDeterministicFloatingKey(randomizer) + providerDataI := providerData{account: account} + providers = append(providers, providerDataI) + } + consumerAccount := sigs.GenerateDeterministicFloatingKey(randomizer) + for i := 0; i < numProviders; i++ { + ctx := context.Background() + providerDataI := providers[i] + listenAddress := addressGen.GetAddress() + providers[i].server, providers[i].endpoint, providers[i].replySetter, providers[i].mockChainFetcher = createRpcProvider(t, ctx, consumerAccount.Addr.String(), specId, apiInterface, listenAddress, providerDataI.account, lavaChainID, []string(nil)) + providers[i].replySetter.replyDataBuf = []byte(fmt.Sprintf(`{"reply": %d}`, i+1)) + } + for i := 0; i < numProviders; i++ { + pairingList[uint64(i)] = &lavasession.ConsumerSessionsWithProvider{ + PublicLavaAddress: providers[i].account.Addr.String(), + Endpoints: []*lavasession.Endpoint{ + { + NetworkAddress: providers[i].endpoint.NetworkAddress.Address, + Enabled: true, + Geolocation: 1, + }, + }, + Sessions: map[int64]*lavasession.SingleConsumerSession{}, + MaxComputeUnits: 10000, + UsedComputeUnits: 0, + PairingEpoch: epoch, + } + } + rpcconsumerServer := createRpcConsumer(t, ctx, specId, apiInterface, consumerAccount, consumerListenAddress, epoch, pairingList, requiredResponses, lavaChainID) + require.NotNil(t, rpcconsumerServer) + if play.scenario != 1 { + counter := map[int]int{} + for i := 0; i <= 1000; i++ { + client := http.Client{} + resp, err := client.Get("http://" + consumerListenAddress + "/status") + require.NoError(t, err) + require.Equal(t, http.StatusOK, resp.StatusCode) + bodyBytes, err := io.ReadAll(resp.Body) + require.NoError(t, err) + resp.Body.Close() + mapi := map[string]int{} + err = json.Unmarshal(bodyBytes, &mapi) + require.NoError(t, err) + id, ok := mapi["reply"] + require.True(t, ok) + counter[id]++ + handler := func(req []byte, header http.Header) (data []byte, status int) { + time.Sleep(3 * time.Millisecond) // cause timeout for providers we got a reply for so others get chosen with a bigger likelihood + return providers[id].replySetter.replyDataBuf, http.StatusOK + } + providers[id-1].replySetter.handler = handler + } + + require.Len(t, counter, numProviders) // make sure to talk with all of them + } + if play.scenario != 0 { + // add a chance for node errors and timeouts + for i := 0; i < numProviders; i++ { + replySetter := providers[i].replySetter + // index := i + handler := func(req []byte, header http.Header) (data []byte, status int) { + randVal := rand.Intn(10) + switch randVal { + // case 1: + // if index%2 == 0 { + // time.Sleep(2 * time.Second) // cause timeout, but only possible on half the providers so there's always a provider that answers + // } + case 2, 3, 4: + return []byte(`{"message":"bad","code":123}`), http.StatusServiceUnavailable + case 5: + return []byte(`{"message":"bad","code":777}`), http.StatusTooManyRequests // cause protocol error + } + return replySetter.replyDataBuf, http.StatusOK + } + providers[i].replySetter.handler = handler + } + + seenError := false + statuses := map[int]struct{}{} + for i := 0; i <= 100; i++ { + client := http.Client{Timeout: 500 * time.Millisecond} + resp, err := client.Get("http://" + consumerListenAddress + "/status") + require.NoError(t, err, i) + if resp.StatusCode == http.StatusServiceUnavailable { + seenError = true + } + statuses[resp.StatusCode] = struct{}{} + require.NotEqual(t, resp.StatusCode, http.StatusTooManyRequests, i) // should never return too many requests, because it triggers a retry + } + require.True(t, seenError, statuses) + } + }) } } diff --git a/protocol/rpcconsumer/rpcconsumer_server.go b/protocol/rpcconsumer/rpcconsumer_server.go index 9b9bdfff88..a9df52fe85 100644 --- a/protocol/rpcconsumer/rpcconsumer_server.go +++ b/protocol/rpcconsumer/rpcconsumer_server.go @@ -883,6 +883,9 @@ func (rpccs *RPCConsumerServer) appendHeadersToRelayResult(ctx context.Context, Value: strconv.FormatUint(protocolErrors, 10), }) } + if relayResult.Reply == nil { + relayResult.Reply = &pairingtypes.RelayReply{} + } if relayResult.Reply.LatestBlock > 0 { metadataReply = append(metadataReply, pairingtypes.Metadata{ @@ -899,9 +902,7 @@ func (rpccs *RPCConsumerServer) appendHeadersToRelayResult(ctx context.Context, Value: guidStr, }) } - if relayResult.Reply == nil { - relayResult.Reply = &pairingtypes.RelayReply{} - } + relayResult.Reply.Metadata = append(relayResult.Reply.Metadata, metadataReply...) } From a5291918d56c6f9c3bae157fb4cebb27c0b77fb4 Mon Sep 17 00:00:00 2001 From: omerlavanet Date: Wed, 13 Mar 2024 17:38:54 +0200 Subject: [PATCH 45/57] added timeout to the scenarios --- protocol/integration/protocol_test.go | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/protocol/integration/protocol_test.go b/protocol/integration/protocol_test.go index 3567c60575..7098236dfe 100644 --- a/protocol/integration/protocol_test.go +++ b/protocol/integration/protocol_test.go @@ -418,14 +418,14 @@ func TestConsumerProviderWithProviders(t *testing.T) { // add a chance for node errors and timeouts for i := 0; i < numProviders; i++ { replySetter := providers[i].replySetter - // index := i + index := i handler := func(req []byte, header http.Header) (data []byte, status int) { randVal := rand.Intn(10) switch randVal { - // case 1: - // if index%2 == 0 { - // time.Sleep(2 * time.Second) // cause timeout, but only possible on half the providers so there's always a provider that answers - // } + case 1: + if index < (numProviders+1)/2 { + time.Sleep(2 * time.Second) // cause timeout, but only possible on half the providers so there's always a provider that answers + } case 2, 3, 4: return []byte(`{"message":"bad","code":123}`), http.StatusServiceUnavailable case 5: @@ -440,13 +440,24 @@ func TestConsumerProviderWithProviders(t *testing.T) { statuses := map[int]struct{}{} for i := 0; i <= 100; i++ { client := http.Client{Timeout: 500 * time.Millisecond} - resp, err := client.Get("http://" + consumerListenAddress + "/status") + req, err := http.NewRequest("GET", "http://"+consumerListenAddress+"/status", nil) + if err != nil { + // Handle error + panic(err) + } + + // Add custom headers to the request + req.Header.Add(common.RELAY_TIMEOUT_HEADER_NAME, "90ms") + + // Perform the request + resp, err := client.Do(req) require.NoError(t, err, i) if resp.StatusCode == http.StatusServiceUnavailable { seenError = true } statuses[resp.StatusCode] = struct{}{} require.NotEqual(t, resp.StatusCode, http.StatusTooManyRequests, i) // should never return too many requests, because it triggers a retry + resp.Body.Close() } require.True(t, seenError, statuses) } From 0ebfac951f30c374958d2e2537ca6401596394de Mon Sep 17 00:00:00 2001 From: omerlavanet Date: Wed, 13 Mar 2024 17:39:18 +0200 Subject: [PATCH 46/57] lint --- protocol/integration/protocol_test.go | 3 --- 1 file changed, 3 deletions(-) diff --git a/protocol/integration/protocol_test.go b/protocol/integration/protocol_test.go index 7098236dfe..3d66e0ed7e 100644 --- a/protocol/integration/protocol_test.go +++ b/protocol/integration/protocol_test.go @@ -188,7 +188,6 @@ func createRpcProvider(t *testing.T, ctx context.Context, consumerAddress string } w.WriteHeader(status) fmt.Fprint(w, string(data)) - }) chainParser, chainRouter, chainFetcher, _, endpoint, err := chainlib.CreateChainLibMocks(ctx, specId, apiInterface, serverHandler, "../../", addons) require.NoError(t, err) @@ -320,7 +319,6 @@ func TestConsumerProviderBasic(t *testing.T) { } func TestConsumerProviderWithProviders(t *testing.T) { - playbook := []struct { name string scenario int @@ -336,7 +334,6 @@ func TestConsumerProviderWithProviders(t *testing.T) { } for _, play := range playbook { t.Run(play.name, func(t *testing.T) { - ctx := context.Background() // can be any spec and api interface specId := "LAV1" From 5abeea619c3d16642f2542b724ef2ae304c01da0 Mon Sep 17 00:00:00 2001 From: omerlavanet Date: Sat, 16 Mar 2024 17:28:14 +0200 Subject: [PATCH 47/57] add a new test for tx sending --- protocol/integration/protocol_test.go | 97 +++++++++++++++++++++++++-- 1 file changed, 93 insertions(+), 4 deletions(-) diff --git a/protocol/integration/protocol_test.go b/protocol/integration/protocol_test.go index 3d66e0ed7e..cfccbce52f 100644 --- a/protocol/integration/protocol_test.go +++ b/protocol/integration/protocol_test.go @@ -438,10 +438,7 @@ func TestConsumerProviderWithProviders(t *testing.T) { for i := 0; i <= 100; i++ { client := http.Client{Timeout: 500 * time.Millisecond} req, err := http.NewRequest("GET", "http://"+consumerListenAddress+"/status", nil) - if err != nil { - // Handle error - panic(err) - } + require.NoError(t, err) // Add custom headers to the request req.Header.Add(common.RELAY_TIMEOUT_HEADER_NAME, "90ms") @@ -461,3 +458,95 @@ func TestConsumerProviderWithProviders(t *testing.T) { }) } } + +func TestConsumerProviderTx(t *testing.T) { + playbook := []struct { + name string + }{ + { + name: "basic-tx", + }, + } + for _, play := range playbook { + t.Run(play.name, func(t *testing.T) { + ctx := context.Background() + // can be any spec and api interface + specId := "LAV1" + apiInterface := spectypes.APIInterfaceRest + epoch := uint64(100) + requiredResponses := 1 + lavaChainID := "lava" + numProviders := 5 + + consumerListenAddress := addressGen.GetAddress() + pairingList := map[uint64]*lavasession.ConsumerSessionsWithProvider{} + type providerData struct { + account sigs.Account + endpoint *lavasession.RPCProviderEndpoint + server *rpcprovider.RPCProviderServer + replySetter *ReplySetter + mockChainFetcher *MockChainFetcher + } + providers := []providerData{} + + for i := 0; i < numProviders; i++ { + // providerListenAddress := "localhost:111" + strconv.Itoa(i) + account := sigs.GenerateDeterministicFloatingKey(randomizer) + providerDataI := providerData{account: account} + providers = append(providers, providerDataI) + } + consumerAccount := sigs.GenerateDeterministicFloatingKey(randomizer) + for i := 0; i < numProviders; i++ { + ctx := context.Background() + providerDataI := providers[i] + listenAddress := addressGen.GetAddress() + providers[i].server, providers[i].endpoint, providers[i].replySetter, providers[i].mockChainFetcher = createRpcProvider(t, ctx, consumerAccount.Addr.String(), specId, apiInterface, listenAddress, providerDataI.account, lavaChainID, []string(nil)) + providers[i].replySetter.replyDataBuf = []byte(fmt.Sprintf(`{"result": %d}`, i+1)) + } + for i := 0; i < numProviders; i++ { + pairingList[uint64(i)] = &lavasession.ConsumerSessionsWithProvider{ + PublicLavaAddress: providers[i].account.Addr.String(), + Endpoints: []*lavasession.Endpoint{ + { + NetworkAddress: providers[i].endpoint.NetworkAddress.Address, + Enabled: true, + Geolocation: 1, + }, + }, + Sessions: map[int64]*lavasession.SingleConsumerSession{}, + MaxComputeUnits: 10000, + UsedComputeUnits: 0, + PairingEpoch: epoch, + } + } + rpcconsumerServer := createRpcConsumer(t, ctx, specId, apiInterface, consumerAccount, consumerListenAddress, epoch, pairingList, requiredResponses, lavaChainID) + require.NotNil(t, rpcconsumerServer) + + for i := 0; i < numProviders; i++ { + replySetter := providers[i].replySetter + index := i + handler := func(req []byte, header http.Header) (data []byte, status int) { + if index == 1 { + // only one provider responds correctly, but after a delay + time.Sleep(20 * time.Millisecond) + return replySetter.replyDataBuf, http.StatusOK + } else { + return []byte(`{"message":"bad","code":777}`), http.StatusInternalServerError + } + } + providers[i].replySetter.handler = handler + } + + client := http.Client{Timeout: 500 * time.Millisecond} + req, err := http.NewRequest(http.MethodPost, "http://"+consumerListenAddress+"/cosmos/tx/v1beta1/txs", nil) + require.NoError(t, err) + resp, err := client.Do(req) + require.NoError(t, err) + require.Equal(t, http.StatusOK, resp.StatusCode) + bodyBytes, err := io.ReadAll(resp.Body) + require.NoError(t, err) + resp.Body.Close() + require.Equal(t, `{"result": 2}`, string(bodyBytes)) + }) + } +} From e881ef5edf74e79329e15ecbd7bfc4bd22e1bd39 Mon Sep 17 00:00:00 2001 From: Ran Mishael Date: Tue, 19 Mar 2024 19:12:07 +0100 Subject: [PATCH 48/57] sort logs better :) --- protocol/chainlib/grpc.go | 4 ++-- protocol/chainlib/jsonRPC.go | 2 +- protocol/chainlib/rest.go | 6 +++--- protocol/chainlib/tendermintRPC.go | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/protocol/chainlib/grpc.go b/protocol/chainlib/grpc.go index 5c56196620..379972521f 100644 --- a/protocol/chainlib/grpc.go +++ b/protocol/chainlib/grpc.go @@ -310,7 +310,7 @@ func (apil *GrpcChainListener) Serve(ctx context.Context, cmdFlags common.Consum grpcHeaders := convertToMetadataMapOfSlices(metadataValues) utils.LavaFormatDebug("in <<< GRPC Relay ", utils.LogAttr("GUID", ctx), - utils.LogAttr("method", method), + utils.LogAttr("_method", method), utils.LogAttr("headers", grpcHeaders), ) metricsData := metrics.NewRelayAnalytics(dappID, apil.endpoint.ChainID, apiInterface) @@ -520,7 +520,7 @@ func (cp *GrpcChainProxy) SendNodeMsg(ctx context.Context, ch chan interface{}, } if debug { utils.LavaFormatDebug("provider sending node message", - utils.Attribute{Key: "method", Value: nodeMessage.Path}, + utils.Attribute{Key: "_method", Value: nodeMessage.Path}, utils.Attribute{Key: "headers", Value: metadataMap}, utils.Attribute{Key: "apiInterface", Value: "grpc"}, ) diff --git a/protocol/chainlib/jsonRPC.go b/protocol/chainlib/jsonRPC.go index 8f2cb06c80..4cd3fba4e0 100644 --- a/protocol/chainlib/jsonRPC.go +++ b/protocol/chainlib/jsonRPC.go @@ -446,7 +446,7 @@ func (apil *JsonRPCChainListener) Serve(ctx context.Context, cmdFlags common.Con utils.LavaFormatDebug("in <<<", utils.LogAttr("GUID", ctx), utils.LogAttr("seed", msgSeed), - utils.LogAttr("msg", logFormattedMsg), + utils.LogAttr("_msg", logFormattedMsg), utils.LogAttr("dappID", dappID), utils.LogAttr("headers", headers), ) diff --git a/protocol/chainlib/rest.go b/protocol/chainlib/rest.go index c1414a89c6..4e3e755226 100644 --- a/protocol/chainlib/rest.go +++ b/protocol/chainlib/rest.go @@ -300,7 +300,7 @@ func (apil *RestChainListener) Serve(ctx context.Context, cmdFlags common.Consum analytics := metrics.NewRelayAnalytics(dappID, chainID, apiInterface) utils.LavaFormatDebug("in <<<", utils.LogAttr("GUID", ctx), - utils.LogAttr("path", path), + utils.LogAttr("_path", path), utils.LogAttr("dappID", dappID), utils.LogAttr("msgSeed", msgSeed), utils.LogAttr("headers", restHeaders), @@ -366,7 +366,7 @@ func (apil *RestChainListener) Serve(ctx context.Context, cmdFlags common.Consum defer cancel() // incase there's a problem make sure to cancel the connection utils.LavaFormatDebug("in <<<", utils.LogAttr("GUID", ctx), - utils.LogAttr("path", path), + utils.LogAttr("_path", path), utils.LogAttr("dappID", dappID), utils.LogAttr("msgSeed", msgSeed), utils.LogAttr("headers", restHeaders), @@ -496,7 +496,7 @@ func (rcp *RestChainProxy) SendNodeMsg(ctx context.Context, ch chan interface{}, if debug { utils.LavaFormatDebug("provider sending node message", - utils.Attribute{Key: "method", Value: nodeMessage.Path}, + utils.Attribute{Key: "_method", Value: nodeMessage.Path}, utils.Attribute{Key: "headers", Value: req.Header}, utils.Attribute{Key: "apiInterface", Value: "rest"}, ) diff --git a/protocol/chainlib/tendermintRPC.go b/protocol/chainlib/tendermintRPC.go index e6355b5ad1..a95f25c5a4 100644 --- a/protocol/chainlib/tendermintRPC.go +++ b/protocol/chainlib/tendermintRPC.go @@ -469,7 +469,7 @@ func (apil *TendermintRpcChainListener) Serve(ctx context.Context, cmdFlags comm utils.LavaFormatDebug("in <<<", utils.LogAttr("GUID", ctx), utils.LogAttr("seed", msgSeed), - utils.LogAttr("msg", logFormattedMsg), + utils.LogAttr("_msg", logFormattedMsg), utils.LogAttr("dappID", dappID), utils.LogAttr("headers", headers), ) @@ -529,7 +529,7 @@ func (apil *TendermintRpcChainListener) Serve(ctx context.Context, cmdFlags comm headers := convertToMetadataMap(metadataValues) utils.LavaFormatDebug("urirpc in <<<", utils.LogAttr("GUID", ctx), - utils.LogAttr("msg", path), + utils.LogAttr("_msg", path), utils.LogAttr("dappID", dappID), utils.LogAttr("headers", headers), ) From 599c92fe2e653216860ee2b046ce85c0f5a13c33 Mon Sep 17 00:00:00 2001 From: Ran Mishael Date: Wed, 20 Mar 2024 10:13:14 +0100 Subject: [PATCH 49/57] fix Caching error spam when missing cache --- ecosystem/cache/handlers.go | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/ecosystem/cache/handlers.go b/ecosystem/cache/handlers.go index ff87cbcb7a..a604894890 100644 --- a/ecosystem/cache/handlers.go +++ b/ecosystem/cache/handlers.go @@ -126,16 +126,17 @@ func (s *RelayerCacheServer) GetRelay(ctx context.Context, relayCacheGet *pairin }() // wait for all reads to complete before moving forward waitGroup.Wait() - - // validate that the response seen block is larger or equal to our expectations. - if cacheReply.SeenBlock < slices.Min([]int64{relayCacheGet.SeenBlock, relayCacheGet.RequestedBlock}) { // TODO unitest this. - // Error, our reply seen block is not larger than our expectations, meaning we got an old response - // this can happen only in the case relayCacheGet.SeenBlock < relayCacheGet.RequestedBlock - // by setting the err variable we will get a cache miss, and the relay will continue to the node. - err = utils.LavaFormatDebug("reply seen block is smaller than our expectations", - utils.LogAttr("cacheReply.SeenBlock", cacheReply.SeenBlock), - utils.LogAttr("seenBlock", relayCacheGet.SeenBlock), - ) + if err == nil { // in case we got a hit validate seen block of the reply. + // validate that the response seen block is larger or equal to our expectations. + if cacheReply.SeenBlock < slices.Min([]int64{relayCacheGet.SeenBlock, relayCacheGet.RequestedBlock}) { // TODO unitest this. + // Error, our reply seen block is not larger than our expectations, meaning we got an old response + // this can happen only in the case relayCacheGet.SeenBlock < relayCacheGet.RequestedBlock + // by setting the err variable we will get a cache miss, and the relay will continue to the node. + err = utils.LavaFormatDebug("reply seen block is smaller than our expectations", + utils.LogAttr("cacheReply.SeenBlock", cacheReply.SeenBlock), + utils.LogAttr("seenBlock", relayCacheGet.SeenBlock), + ) + } } // set seen block. if relayCacheGet.SeenBlock > cacheReply.SeenBlock { From f1b321633fe3f6dc65f61e125509e529b3e6df8f Mon Sep 17 00:00:00 2001 From: Ran Mishael Date: Thu, 21 Mar 2024 14:26:58 +0100 Subject: [PATCH 50/57] adding nil protection for used Providers --- protocol/lavasession/used_providers.go | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/protocol/lavasession/used_providers.go b/protocol/lavasession/used_providers.go index 21d0e2a898..884ea605ed 100644 --- a/protocol/lavasession/used_providers.go +++ b/protocol/lavasession/used_providers.go @@ -34,18 +34,30 @@ type UsedProviders struct { } func (up *UsedProviders) CurrentlyUsed() int { + if up == nil { + utils.LavaFormatError("UsedProviders.CurrentlyUsed is nil, misuse detected", nil) + return -1 + } up.lock.RLock() defer up.lock.RUnlock() return len(up.providers) } func (up *UsedProviders) SessionsLatestBatch() int { + if up == nil { + utils.LavaFormatError("UsedProviders.SessionsLatestBatch is nil, misuse detected", nil) + return -1 + } up.lock.RLock() defer up.lock.RUnlock() return up.sessionsLatestBatch } func (up *UsedProviders) CurrentlyUsedAddresses() []string { + if up == nil { + utils.LavaFormatError("UsedProviders.CurrentlyUsedAddresses is nil, misuse detected", nil) + return []string{} + } up.lock.RLock() defer up.lock.RUnlock() addresses := []string{} @@ -56,6 +68,10 @@ func (up *UsedProviders) CurrentlyUsedAddresses() []string { } func (up *UsedProviders) UnwantedAddresses() []string { + if up == nil { + utils.LavaFormatError("UsedProviders.UnwantedAddresses is nil, misuse detected", nil) + return []string{} + } up.lock.RLock() defer up.lock.RUnlock() addresses := []string{} From 011ff9964037e602d2eba327f20eef511e5aec63 Mon Sep 17 00:00:00 2001 From: Ran Mishael Date: Thu, 21 Mar 2024 14:27:45 +0100 Subject: [PATCH 51/57] set default 0 --- protocol/lavasession/used_providers.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/protocol/lavasession/used_providers.go b/protocol/lavasession/used_providers.go index 884ea605ed..55eff1ddd1 100644 --- a/protocol/lavasession/used_providers.go +++ b/protocol/lavasession/used_providers.go @@ -36,7 +36,7 @@ type UsedProviders struct { func (up *UsedProviders) CurrentlyUsed() int { if up == nil { utils.LavaFormatError("UsedProviders.CurrentlyUsed is nil, misuse detected", nil) - return -1 + return 0 } up.lock.RLock() defer up.lock.RUnlock() @@ -46,7 +46,7 @@ func (up *UsedProviders) CurrentlyUsed() int { func (up *UsedProviders) SessionsLatestBatch() int { if up == nil { utils.LavaFormatError("UsedProviders.SessionsLatestBatch is nil, misuse detected", nil) - return -1 + return 0 } up.lock.RLock() defer up.lock.RUnlock() From 02980676272c0ce12ffc19ba04be176482716158 Mon Sep 17 00:00:00 2001 From: Ran Mishael Date: Thu, 21 Mar 2024 14:49:30 +0100 Subject: [PATCH 52/57] adding error logs and nil protection. --- protocol/rpcconsumer/relay_processor.go | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/protocol/rpcconsumer/relay_processor.go b/protocol/rpcconsumer/relay_processor.go index 98b08393a0..4a9f16b573 100644 --- a/protocol/rpcconsumer/relay_processor.go +++ b/protocol/rpcconsumer/relay_processor.go @@ -85,6 +85,10 @@ func (rp *RelayProcessor) String() string { } func (rp *RelayProcessor) GetUsedProviders() *lavasession.UsedProviders { + if rp == nil { + utils.LavaFormatError("RelayProcessor.GetUsedProviders is nil, misuse detected", nil) + return nil + } rp.lock.RLock() defer rp.lock.RUnlock() return rp.usedProviders @@ -262,6 +266,10 @@ func (rp *RelayProcessor) readExistingResponses() { // this function waits for the processing results, they are written by multiple go routines and read by this go routine // it then updates the responses in their respective place, node errors, protocol errors or success results func (rp *RelayProcessor) WaitForResults(ctx context.Context) error { + if rp == nil { + utils.LavaFormatError("RelayProcessor.WaitForResults is nil, misuse detected", nil) + return nil + } responsesCount := 0 for { select { @@ -344,6 +352,11 @@ func (rp *RelayProcessor) responsesQuorum(results []common.RelayResult, quorumSi // if strategy == quorum get majority of node responses // on error: we will return a placeholder relayResult, with a provider address and a status code func (rp *RelayProcessor) ProcessingResult() (returnedResult *common.RelayResult, processingError error) { + if rp == nil { + utils.LavaFormatError("RelayProcessor.ProcessingResult is nil, misuse detected", nil) + return nil, nil + } + // this must be here before the lock because this function locks allProvidersAddresses := rp.GetUsedProviders().UnwantedAddresses() From 582a1dd3ed24c251b090696cae475c59f80813b2 Mon Sep 17 00:00:00 2001 From: Ran Mishael Date: Thu, 21 Mar 2024 14:50:27 +0100 Subject: [PATCH 53/57] adding some checks to avoid nil deref --- protocol/rpcconsumer/relay_processor.go | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/protocol/rpcconsumer/relay_processor.go b/protocol/rpcconsumer/relay_processor.go index 4a9f16b573..ab91709641 100644 --- a/protocol/rpcconsumer/relay_processor.go +++ b/protocol/rpcconsumer/relay_processor.go @@ -267,8 +267,7 @@ func (rp *RelayProcessor) readExistingResponses() { // it then updates the responses in their respective place, node errors, protocol errors or success results func (rp *RelayProcessor) WaitForResults(ctx context.Context) error { if rp == nil { - utils.LavaFormatError("RelayProcessor.WaitForResults is nil, misuse detected", nil) - return nil + return utils.LavaFormatError("RelayProcessor.WaitForResults is nil, misuse detected", nil) } responsesCount := 0 for { @@ -353,8 +352,7 @@ func (rp *RelayProcessor) responsesQuorum(results []common.RelayResult, quorumSi // on error: we will return a placeholder relayResult, with a provider address and a status code func (rp *RelayProcessor) ProcessingResult() (returnedResult *common.RelayResult, processingError error) { if rp == nil { - utils.LavaFormatError("RelayProcessor.ProcessingResult is nil, misuse detected", nil) - return nil, nil + return nil, utils.LavaFormatError("RelayProcessor.ProcessingResult is nil, misuse detected", nil) } // this must be here before the lock because this function locks From 6fb97afdb2e415d0a56a3fab81fd25024e9aee1d Mon Sep 17 00:00:00 2001 From: Ran Mishael Date: Thu, 21 Mar 2024 19:08:17 +0100 Subject: [PATCH 54/57] allow error for connection reset by peer when in emergency mode.. as we shut down the node. --- testutil/e2e/allowedErrorList.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/testutil/e2e/allowedErrorList.go b/testutil/e2e/allowedErrorList.go index 43b64c9dce..df4c0f862b 100644 --- a/testutil/e2e/allowedErrorList.go +++ b/testutil/e2e/allowedErrorList.go @@ -12,7 +12,8 @@ var allowedErrors = map[string]string{ } var allowedErrorsDuringEmergencyMode = map[string]string{ - "connection refused": "Connection to tendermint port sometimes can happen as we shut down the node and we try to fetch info during emergency mode", + "connection refused": "Connection to tendermint port sometimes can happen as we shut down the node and we try to fetch info during emergency mode", + "connection reset by peer": "Connection to tendermint port sometimes can happen as we shut down the node and we try to fetch info during emergency mode", } var allowedErrorsPaymentE2E = map[string]string{ From 16c3752db500bde7d3518aa52188d69b798f9f3d Mon Sep 17 00:00:00 2001 From: Ran Mishael Date: Thu, 21 Mar 2024 19:16:09 +0100 Subject: [PATCH 55/57] adding debugging information for next time ts fails. --- .../lava-sdk/src/providerOptimizer/providerOptimizer.test.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/ecosystem/lava-sdk/src/providerOptimizer/providerOptimizer.test.ts b/ecosystem/lava-sdk/src/providerOptimizer/providerOptimizer.test.ts index b1df9e6ce1..31d69a6050 100644 --- a/ecosystem/lava-sdk/src/providerOptimizer/providerOptimizer.test.ts +++ b/ecosystem/lava-sdk/src/providerOptimizer/providerOptimizer.test.ts @@ -157,6 +157,7 @@ describe("ProviderOptimizer", () => { perturbationPercentage ); expect(returnedProviders).toHaveLength(1); + console.log("[Debugging] expect(returnedProviders[0]).toBe(providers[skipIndex]); Optimizer Issue", "returnedProviders", returnedProviders, "providers", providers, "skipIndex", skipIndex) expect(returnedProviders[0]).toBe(providers[skipIndex]); returnedProviders = providerOptimizer.chooseProvider( From 19e2082150931105d6f94d19ed62df3af698beaf Mon Sep 17 00:00:00 2001 From: Ran Mishael Date: Thu, 21 Mar 2024 19:18:44 +0100 Subject: [PATCH 56/57] es lint.. --- .../src/providerOptimizer/providerOptimizer.test.ts | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/ecosystem/lava-sdk/src/providerOptimizer/providerOptimizer.test.ts b/ecosystem/lava-sdk/src/providerOptimizer/providerOptimizer.test.ts index 31d69a6050..faabeaee66 100644 --- a/ecosystem/lava-sdk/src/providerOptimizer/providerOptimizer.test.ts +++ b/ecosystem/lava-sdk/src/providerOptimizer/providerOptimizer.test.ts @@ -157,7 +157,15 @@ describe("ProviderOptimizer", () => { perturbationPercentage ); expect(returnedProviders).toHaveLength(1); - console.log("[Debugging] expect(returnedProviders[0]).toBe(providers[skipIndex]); Optimizer Issue", "returnedProviders", returnedProviders, "providers", providers, "skipIndex", skipIndex) + console.log( + "[Debugging] expect(returnedProviders[0]).toBe(providers[skipIndex]); Optimizer Issue", + "returnedProviders", + returnedProviders, + "providers", + providers, + "skipIndex", + skipIndex + ) expect(returnedProviders[0]).toBe(providers[skipIndex]); returnedProviders = providerOptimizer.chooseProvider( From d4f0801682ac8f291bb9561c9d3839d7c173955c Mon Sep 17 00:00:00 2001 From: Ran Mishael Date: Thu, 21 Mar 2024 19:31:57 +0100 Subject: [PATCH 57/57] eslint fix --- ecosystem/lava-sdk/package.json | 4 +- .../providerOptimizer.test.ts | 2 +- ecosystem/lava-sdk/yarn.lock | 182 ++++++++++-------- 3 files changed, 107 insertions(+), 81 deletions(-) diff --git a/ecosystem/lava-sdk/package.json b/ecosystem/lava-sdk/package.json index 6ffae762b6..8ad4f14f97 100644 --- a/ecosystem/lava-sdk/package.json +++ b/ecosystem/lava-sdk/package.json @@ -56,7 +56,7 @@ "bignumber.js": "^9.1.1", "chalk": "4.1.2", "commander": "^9.4.1", - "eslint": "^8.29.0", + "eslint": "^8.57.0", "eslint-config-prettier": "^8.5.0", "eslint-plugin-prettier": "^4.2.1", "google-protobuf": "^3.21.2", @@ -113,4 +113,4 @@ "node": ">=18", "npm": ">=6.12.0" } -} \ No newline at end of file +} diff --git a/ecosystem/lava-sdk/src/providerOptimizer/providerOptimizer.test.ts b/ecosystem/lava-sdk/src/providerOptimizer/providerOptimizer.test.ts index faabeaee66..a1868aeb9d 100644 --- a/ecosystem/lava-sdk/src/providerOptimizer/providerOptimizer.test.ts +++ b/ecosystem/lava-sdk/src/providerOptimizer/providerOptimizer.test.ts @@ -165,7 +165,7 @@ describe("ProviderOptimizer", () => { providers, "skipIndex", skipIndex - ) + ); expect(returnedProviders[0]).toBe(providers[skipIndex]); returnedProviders = providerOptimizer.chooseProvider( diff --git a/ecosystem/lava-sdk/yarn.lock b/ecosystem/lava-sdk/yarn.lock index 48ecce4077..14949e2f54 100644 --- a/ecosystem/lava-sdk/yarn.lock +++ b/ecosystem/lava-sdk/yarn.lock @@ -2,6 +2,11 @@ # yarn lockfile v1 +"@aashutoshrathi/word-wrap@^1.2.3": + version "1.2.6" + resolved "https://registry.yarnpkg.com/@aashutoshrathi/word-wrap/-/word-wrap-1.2.6.tgz#bd9154aec9983f77b3a034ecaa015c2e4201f6cf" + integrity sha512-1Yjs2SvM8TflER/OD3cOjhWWOZb58A2t7wpE2S9XfBYTiIl+XFhQG2bjy4Pu1I+EAlCNUzRDYDdFwFYUKvXcIA== + "@ampproject/remapping@^2.1.0": version "2.2.0" resolved "https://registry.npmjs.org/@ampproject/remapping/-/remapping-2.2.0.tgz" @@ -551,21 +556,38 @@ resolved "https://registry.npmjs.org/@discoveryjs/json-ext/-/json-ext-0.5.7.tgz" integrity sha512-dBVuXR082gk3jsFp7Rd/JI4kytwGHecnCoTtXFb7DB6CNHp4rg5k1bhg0nWdLGLnOV71lmDzGQaLMy8iPLY0pw== -"@eslint/eslintrc@^1.3.3": - version "1.3.3" - resolved "https://registry.npmjs.org/@eslint/eslintrc/-/eslintrc-1.3.3.tgz" - integrity sha512-uj3pT6Mg+3t39fvLrj8iuCIJ38zKO9FpGtJ4BBJebJhEwjoT+KLVNCcHT5QC9NGRIEi7fZ0ZR8YRb884auB4Lg== +"@eslint-community/eslint-utils@^4.2.0": + version "4.4.0" + resolved "https://registry.yarnpkg.com/@eslint-community/eslint-utils/-/eslint-utils-4.4.0.tgz#a23514e8fb9af1269d5f7788aa556798d61c6b59" + integrity sha512-1/sA4dwrzBAyeUoQ6oxahHKmrZvsnLCg4RfxW3ZFGGmQkSNQPFNLV9CUEFQP1x9EYXHTo5p6xdhZM1Ne9p/AfA== + dependencies: + eslint-visitor-keys "^3.3.0" + +"@eslint-community/regexpp@^4.6.1": + version "4.10.0" + resolved "https://registry.yarnpkg.com/@eslint-community/regexpp/-/regexpp-4.10.0.tgz#548f6de556857c8bb73bbee70c35dc82a2e74d63" + integrity sha512-Cu96Sd2By9mCNTx2iyKOmq10v22jUVQv0lQnlGNy16oE9589yE+QADPbrMGCkA51cKZSg3Pu/aTJVTGfL/qjUA== + +"@eslint/eslintrc@^2.1.4": + version "2.1.4" + resolved "https://registry.yarnpkg.com/@eslint/eslintrc/-/eslintrc-2.1.4.tgz#388a269f0f25c1b6adc317b5a2c55714894c70ad" + integrity sha512-269Z39MS6wVJtsoUl10L60WdkhJVdPG24Q4eZTH3nnF6lpvSShEK3wQjDX9JRWAUPvPh7COouPpU9IrqaZFvtQ== dependencies: ajv "^6.12.4" debug "^4.3.2" - espree "^9.4.0" - globals "^13.15.0" + espree "^9.6.0" + globals "^13.19.0" ignore "^5.2.0" import-fresh "^3.2.1" js-yaml "^4.1.0" minimatch "^3.1.2" strip-json-comments "^3.1.1" +"@eslint/js@8.57.0": + version "8.57.0" + resolved "https://registry.yarnpkg.com/@eslint/js/-/js-8.57.0.tgz#a5417ae8427873f1dd08b70b3574b453e67b5f7f" + integrity sha512-Ys+3g2TaW7gADOJzPt83SJtCDhMjndcDMFVQ/Tj9iA1BfJzFKD9mAUXT3OenpuPHbI6P/myECxRJrofUsDx/5g== + "@grpc/grpc-js@^1.7.1": version "1.7.1" resolved "https://registry.npmjs.org/@grpc/grpc-js/-/grpc-js-1.7.1.tgz" @@ -585,13 +607,13 @@ protobufjs "^7.0.0" yargs "^16.2.0" -"@humanwhocodes/config-array@^0.11.6": - version "0.11.7" - resolved "https://registry.npmjs.org/@humanwhocodes/config-array/-/config-array-0.11.7.tgz" - integrity sha512-kBbPWzN8oVMLb0hOUYXhmxggL/1cJE6ydvjDIGi9EnAGUyA7cLVKQg+d/Dsm+KZwx2czGHrCmMVLiyg8s5JPKw== +"@humanwhocodes/config-array@^0.11.14": + version "0.11.14" + resolved "https://registry.yarnpkg.com/@humanwhocodes/config-array/-/config-array-0.11.14.tgz#d78e481a039f7566ecc9660b4ea7fe6b1fec442b" + integrity sha512-3T8LkOmg45BV5FICb15QQMsyUSWrQ8AygVfC7ZG32zOalnqrilm018ZVCw0eapXux8FtA33q8PSRSstjee3jSg== dependencies: - "@humanwhocodes/object-schema" "^1.2.1" - debug "^4.1.1" + "@humanwhocodes/object-schema" "^2.0.2" + debug "^4.3.1" minimatch "^3.0.5" "@humanwhocodes/module-importer@^1.0.1": @@ -599,10 +621,10 @@ resolved "https://registry.npmjs.org/@humanwhocodes/module-importer/-/module-importer-1.0.1.tgz" integrity sha512-bxveV4V8v5Yb4ncFTT3rPSgZBOpCkjfK0y4oVVVJwIuDVBRMDXrPyXRL988i5ap9m9bnyEEjWfm5WkBmtffLfA== -"@humanwhocodes/object-schema@^1.2.1": - version "1.2.1" - resolved "https://registry.npmjs.org/@humanwhocodes/object-schema/-/object-schema-1.2.1.tgz" - integrity sha512-ZnQMnLV4e7hDlUvw8H+U8ASL02SS2Gn6+9Ac3wGGLIe7+je2AeAOxPY+izIPJDfFDb7eDjev0Us8MO1iFRN8hA== +"@humanwhocodes/object-schema@^2.0.2": + version "2.0.2" + resolved "https://registry.yarnpkg.com/@humanwhocodes/object-schema/-/object-schema-2.0.2.tgz#d9fae00a2d5cb40f92cfe64b47ad749fbc38f917" + integrity sha512-6EwiSjwWYP7pTckG6I5eyFANjPhmPjUX9JRLUSfNPC7FX7zK9gyZAfUEaECL6ALTpGX5AjnBq3C9XmVWPitNpw== "@improbable-eng/grpc-web-node-http-transport@^0.15.0": version "0.15.0" @@ -2613,6 +2635,11 @@ "@typescript-eslint/types" "5.46.0" eslint-visitor-keys "^3.3.0" +"@ungap/structured-clone@^1.2.0": + version "1.2.0" + resolved "https://registry.yarnpkg.com/@ungap/structured-clone/-/structured-clone-1.2.0.tgz#756641adb587851b5ccb3e095daf27ae581c8406" + integrity sha512-zuVdFrMJiuCDQUMCzQaD6KL28MjnqqN8XnAqiEq9PNm/hCPTSGfrXCOfwj1ow4LFb/tNymJPwsNbVePc1xFqrQ== + "@webassemblyjs/ast@1.11.1": version "1.11.1" resolved "https://registry.npmjs.org/@webassemblyjs/ast/-/ast-1.11.1.tgz" @@ -2776,11 +2803,16 @@ acorn-walk@^8.1.1: resolved "https://registry.npmjs.org/acorn-walk/-/acorn-walk-8.2.0.tgz" integrity sha512-k+iyHEuPgSw6SbuDpGQM+06HQUa04DZ3o+F6CSzXMvvI5KMvnaEqXe+YVe555R9nn6GPt404fos4wcgpw12SDA== -acorn@^8.4.1, acorn@^8.5.0, acorn@^8.7.1, acorn@^8.8.0: +acorn@^8.4.1, acorn@^8.5.0, acorn@^8.7.1: version "8.8.1" resolved "https://registry.npmjs.org/acorn/-/acorn-8.8.1.tgz" integrity sha512-7zFpHzhnqYKrkYdUjF1HI1bzd0VygEGX8lFk4k5zVMqHEoES+P+7TKI+EvLO9WVMJ8eekdO0aDEK044xTXwPPA== +acorn@^8.9.0: + version "8.11.3" + resolved "https://registry.yarnpkg.com/acorn/-/acorn-8.11.3.tgz#71e0b14e13a4ec160724b38fb7b0f233b1b81d7a" + integrity sha512-Y9rRfJG5jcKOE0CLisYbojUjIrIEE7AGMzA/Sm4BslANhbS+cDMpgBdcPT91oJ7OuJ9hYJBx59RjbhxVnrF8Xg== + adm-zip@^0.5.10: version "0.5.10" resolved "https://registry.npmjs.org/adm-zip/-/adm-zip-0.5.10.tgz" @@ -2791,7 +2823,7 @@ ajv-keywords@^3.5.2: resolved "https://registry.npmjs.org/ajv-keywords/-/ajv-keywords-3.5.2.tgz" integrity sha512-5p6WTN0DdTGVQk6VjcEju19IgaHudalcfabD7yhDGeA6bcQnmL+CpveLJq/3hvfwd1aof6L386Ougkx6RfyMIQ== -ajv@^6.10.0, ajv@^6.12.4, ajv@^6.12.5: +ajv@^6.12.4, ajv@^6.12.5: version "6.12.6" resolved "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz" integrity sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g== @@ -3337,7 +3369,7 @@ debug@^3.2.7: dependencies: ms "^2.1.1" -debug@^4.1.0, debug@^4.1.1, debug@^4.3.2, debug@^4.3.4: +debug@^4.1.0, debug@^4.1.1, debug@^4.3.1, debug@^4.3.2, debug@^4.3.4: version "4.3.4" resolved "https://registry.npmjs.org/debug/-/debug-4.3.4.tgz" integrity sha512-PRWFHuSU3eDtQJPvnNY7Jcket1j0t5OuOsFzPPzsekD52Zl8qUfFIPEiswXqIvHWGVHOgX+7G/vCNNhehwxfkQ== @@ -3595,10 +3627,10 @@ eslint-scope@5.1.1, eslint-scope@^5.1.1: esrecurse "^4.3.0" estraverse "^4.1.1" -eslint-scope@^7.1.1: - version "7.1.1" - resolved "https://registry.npmjs.org/eslint-scope/-/eslint-scope-7.1.1.tgz" - integrity sha512-QKQM/UXpIiHcLqJ5AOyIW7XZmzjkzQXYE54n1++wb0u9V/abW3l9uQnxX8Z5Xd18xyKIMTUAyQ0k1e8pz6LUrw== +eslint-scope@^7.2.2: + version "7.2.2" + resolved "https://registry.yarnpkg.com/eslint-scope/-/eslint-scope-7.2.2.tgz#deb4f92563390f32006894af62a22dba1c46423f" + integrity sha512-dOt21O7lTMhDM+X9mB4GX+DZrZtCUJPL/wlcTqxyrx5IvO0IYtILdtrQGQp+8n5S0gwSVmOf9NQrjMOgfQZlIg== dependencies: esrecurse "^4.3.0" estraverse "^5.2.0" @@ -3620,69 +3652,73 @@ eslint-visitor-keys@^3.3.0: resolved "https://registry.npmjs.org/eslint-visitor-keys/-/eslint-visitor-keys-3.3.0.tgz" integrity sha512-mQ+suqKJVyeuwGYHAdjMFqjCyfl8+Ldnxuyp3ldiMBFKkvytrXUZWaiPCEav8qDHKty44bD+qV1IP4T+w+xXRA== -eslint@^8.29.0: - version "8.29.0" - resolved "https://registry.npmjs.org/eslint/-/eslint-8.29.0.tgz" - integrity sha512-isQ4EEiyUjZFbEKvEGJKKGBwXtvXX+zJbkVKCgTuB9t/+jUBcy8avhkEwWJecI15BkRkOYmvIM5ynbhRjEkoeg== - dependencies: - "@eslint/eslintrc" "^1.3.3" - "@humanwhocodes/config-array" "^0.11.6" +eslint-visitor-keys@^3.4.1, eslint-visitor-keys@^3.4.3: + version "3.4.3" + resolved "https://registry.yarnpkg.com/eslint-visitor-keys/-/eslint-visitor-keys-3.4.3.tgz#0cd72fe8550e3c2eae156a96a4dddcd1c8ac5800" + integrity sha512-wpc+LXeiyiisxPlEkUzU6svyS1frIO3Mgxj1fdy7Pm8Ygzguax2N3Fa/D/ag1WqbOprdI+uY6wMUl8/a2G+iag== + +eslint@^8.57.0: + version "8.57.0" + resolved "https://registry.yarnpkg.com/eslint/-/eslint-8.57.0.tgz#c786a6fd0e0b68941aaf624596fb987089195668" + integrity sha512-dZ6+mexnaTIbSBZWgou51U6OmzIhYM2VcNdtiTtI7qPNZm35Akpr0f6vtw3w1Kmn5PYo+tZVfh13WrhpS6oLqQ== + dependencies: + "@eslint-community/eslint-utils" "^4.2.0" + "@eslint-community/regexpp" "^4.6.1" + "@eslint/eslintrc" "^2.1.4" + "@eslint/js" "8.57.0" + "@humanwhocodes/config-array" "^0.11.14" "@humanwhocodes/module-importer" "^1.0.1" "@nodelib/fs.walk" "^1.2.8" - ajv "^6.10.0" + "@ungap/structured-clone" "^1.2.0" + ajv "^6.12.4" chalk "^4.0.0" cross-spawn "^7.0.2" debug "^4.3.2" doctrine "^3.0.0" escape-string-regexp "^4.0.0" - eslint-scope "^7.1.1" - eslint-utils "^3.0.0" - eslint-visitor-keys "^3.3.0" - espree "^9.4.0" - esquery "^1.4.0" + eslint-scope "^7.2.2" + eslint-visitor-keys "^3.4.3" + espree "^9.6.1" + esquery "^1.4.2" esutils "^2.0.2" fast-deep-equal "^3.1.3" file-entry-cache "^6.0.1" find-up "^5.0.0" glob-parent "^6.0.2" - globals "^13.15.0" - grapheme-splitter "^1.0.4" + globals "^13.19.0" + graphemer "^1.4.0" ignore "^5.2.0" - import-fresh "^3.0.0" imurmurhash "^0.1.4" is-glob "^4.0.0" is-path-inside "^3.0.3" - js-sdsl "^4.1.4" js-yaml "^4.1.0" json-stable-stringify-without-jsonify "^1.0.1" levn "^0.4.1" lodash.merge "^4.6.2" minimatch "^3.1.2" natural-compare "^1.4.0" - optionator "^0.9.1" - regexpp "^3.2.0" + optionator "^0.9.3" strip-ansi "^6.0.1" - strip-json-comments "^3.1.0" text-table "^0.2.0" -espree@^9.4.0: - version "9.4.1" - resolved "https://registry.npmjs.org/espree/-/espree-9.4.1.tgz" - integrity sha512-XwctdmTO6SIvCzd9810yyNzIrOrqNYV9Koizx4C/mRhf9uq0o4yHoCEU/670pOxOL/MSraektvSAji79kX90Vg== +espree@^9.6.0, espree@^9.6.1: + version "9.6.1" + resolved "https://registry.yarnpkg.com/espree/-/espree-9.6.1.tgz#a2a17b8e434690a5432f2f8018ce71d331a48c6f" + integrity sha512-oruZaFkjorTpF32kDSI5/75ViwGeZginGGy2NoOSg3Q9bnwlnmDm4HLnkl0RE3n+njDXR037aY1+x58Z/zFdwQ== dependencies: - acorn "^8.8.0" + acorn "^8.9.0" acorn-jsx "^5.3.2" - eslint-visitor-keys "^3.3.0" + eslint-visitor-keys "^3.4.1" esprima@^4.0.0: version "4.0.1" resolved "https://registry.npmjs.org/esprima/-/esprima-4.0.1.tgz" integrity sha512-eGuFFw7Upda+g4p+QHvnW0RyTX/SVeJBDM/gCtMARO0cLuT2HcEKnTPvhjV6aGeqrCB/sbNop0Kszm0jsaWU4A== -esquery@^1.4.0: - version "1.4.0" - resolved "https://registry.npmjs.org/esquery/-/esquery-1.4.0.tgz" - integrity sha512-cCDispWt5vHHtwMY2YrAQ4ibFkAL8RbH5YGBnZBc90MolvvfkkQcJro/aZiAQUlQ3qgrYS6D6v8Gc5G5CQsc9w== +esquery@^1.4.2: + version "1.5.0" + resolved "https://registry.yarnpkg.com/esquery/-/esquery-1.5.0.tgz#6ce17738de8577694edd7361c57182ac8cb0db0b" + integrity sha512-YQLXUplAwJgCydQ78IMJywZCceoqk1oH01OERdSAJc/7U2AylwjhSCLDEtqwg811idIS/9fIU5GjG73IgjKMVg== dependencies: estraverse "^5.1.0" @@ -4021,10 +4057,10 @@ globals@^11.1.0: resolved "https://registry.npmjs.org/globals/-/globals-11.12.0.tgz" integrity sha512-WOBp/EEGUiIsJSp7wcv/y6MO+lV9UoncWqxuFfm8eBwzWNgyfBd6Gz+IeKQ9jCmyhoH99g15M3T+QaVHFjizVA== -globals@^13.15.0: - version "13.18.0" - resolved "https://registry.npmjs.org/globals/-/globals-13.18.0.tgz" - integrity sha512-/mR4KI8Ps2spmoc0Ulu9L7agOF0du1CZNQ3dke8yItYlyKNmGrkONemBbd6V8UTc1Wgcqn21t3WYB7dbRmh6/A== +globals@^13.19.0: + version "13.24.0" + resolved "https://registry.yarnpkg.com/globals/-/globals-13.24.0.tgz#8432a19d78ce0c1e833949c36adb345400bb1171" + integrity sha512-AhO5QUcj8llrbG09iWhPU2B204J1xnPeL8kQmVorSsy+Sjj1sk8gIyh6cUocGmH4L0UuhAJy+hJMRA4mgA4mFQ== dependencies: type-fest "^0.20.2" @@ -4080,10 +4116,10 @@ graceful-fs@^4.1.10, graceful-fs@^4.1.2, graceful-fs@^4.2.4, graceful-fs@^4.2.9: resolved "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.10.tgz" integrity sha512-9ByhssR2fPVsNZj478qUUbKfmL0+t5BDVyjShtyZZLiK7ZDAArFFfopyOTj0M05wE2tJPisA4iTnnXl2YoPvOA== -grapheme-splitter@^1.0.4: - version "1.0.4" - resolved "https://registry.npmjs.org/grapheme-splitter/-/grapheme-splitter-1.0.4.tgz" - integrity sha512-bzh50DW9kTPM00T8y4o8vQg89Di9oLJVLW/KaOGIXJWP/iqCN6WKYkbNOF04vFLJhwcpYUh9ydh/+5vpOqV4YQ== +graphemer@^1.4.0: + version "1.4.0" + resolved "https://registry.yarnpkg.com/graphemer/-/graphemer-1.4.0.tgz#fb2f1d55e0e3a1849aeffc90c4fa0dd53a0e66c6" + integrity sha512-EtKwoO6kxCL9WO5xipiHTZlSzBm7WLT627TqC/uVRd0HKmq8NXyebnNYxDoBi7wt8eTWrUrKXCOVaFq9x1kgag== grpc-web@^1.4.2: version "1.4.2" @@ -4229,7 +4265,7 @@ ignore@^5.2.0: resolved "https://registry.npmjs.org/ignore/-/ignore-5.2.1.tgz" integrity sha512-d2qQLzTJ9WxQftPAuEQpSPmKqzxePjzVbpAVv62AQ64NTL+wR4JkrVqR/LqFsFEUsHDAiId52mJteHDFuDkElA== -import-fresh@^3.0.0, import-fresh@^3.2.1: +import-fresh@^3.2.1: version "3.3.0" resolved "https://registry.npmjs.org/import-fresh/-/import-fresh-3.3.0.tgz" integrity sha512-veYYhQa+D1QBKznvhUHxb8faxlrwUnxseDAbAp457E0wLNio2bOSKnjYDhMj+YiAq61xrMGhQk9iXVk5FzgQMw== @@ -4805,11 +4841,6 @@ jest@^29.3.1: import-local "^3.0.2" jest-cli "^29.7.0" -js-sdsl@^4.1.4: - version "4.2.0" - resolved "https://registry.npmjs.org/js-sdsl/-/js-sdsl-4.2.0.tgz" - integrity sha512-dyBIzQBDkCqCu+0upx25Y2jGdbTGxE9fshMsCdK0ViOongpV+n5tXRcZY9v7CaVQ79AGS9KA1KHtojxiM7aXSQ== - js-sha3@^0.8.0: version "0.8.0" resolved "https://registry.npmjs.org/js-sha3/-/js-sha3-0.8.0.tgz" @@ -5186,17 +5217,17 @@ opener@^1.5.1: resolved "https://registry.npmjs.org/opener/-/opener-1.5.2.tgz" integrity sha512-ur5UIdyw5Y7yEj9wLzhqXiy6GZ3Mwx0yGI+5sMn2r0N0v3cKJvUmFH5yPP+WXh9e0xfyzyJX95D8l088DNFj7A== -optionator@^0.9.1: - version "0.9.1" - resolved "https://registry.npmjs.org/optionator/-/optionator-0.9.1.tgz" - integrity sha512-74RlY5FCnhq4jRxVUPKDaRwrVNXMqsGsiW6AJw4XK8hmtm10wC0ypZBLw5IIp85NZMr91+qd1RvvENwg7jjRFw== +optionator@^0.9.3: + version "0.9.3" + resolved "https://registry.yarnpkg.com/optionator/-/optionator-0.9.3.tgz#007397d44ed1872fdc6ed31360190f81814e2c64" + integrity sha512-JjCoypp+jKn1ttEFExxhetCKeJt9zhAgAve5FXHixTvFDW/5aEktX9bufBKLRRMdU7bNtpLfcGu94B3cdEJgjg== dependencies: + "@aashutoshrathi/word-wrap" "^1.2.3" deep-is "^0.1.3" fast-levenshtein "^2.0.6" levn "^0.4.1" prelude-ls "^1.2.1" type-check "^0.4.0" - word-wrap "^1.2.3" p-cancelable@^0.4.0: version "0.4.1" @@ -5871,7 +5902,7 @@ strip-final-newline@^2.0.0: resolved "https://registry.npmjs.org/strip-final-newline/-/strip-final-newline-2.0.0.tgz" integrity sha512-BrpvfNAE3dcvq7ll3xVumzjKjZQ5tI1sEUIKr3Uoks0XUl45St3FlatVqef9prk4jRDzhW6WZg+3bk93y6pLjA== -strip-json-comments@^3.1.0, strip-json-comments@^3.1.1: +strip-json-comments@^3.1.1: version "3.1.1" resolved "https://registry.npmjs.org/strip-json-comments/-/strip-json-comments-3.1.1.tgz" integrity sha512-6fPc+R4ihwqP6N/aIv2f1gMH8lOVtWQHoqC4yK6oSDVVocumAsfCqjkXnqiYMhmMwS/mEHLp7Vehlt3ql6lEig== @@ -6284,11 +6315,6 @@ wildcard@^2.0.0: resolved "https://registry.npmjs.org/wildcard/-/wildcard-2.0.0.tgz" integrity sha512-JcKqAHLPxcdb9KM49dufGXn2x3ssnfjbcaQdLlfZsL9rH9wgDQjUtDxbo8NE0F6SFvydeu1VhZe7hZuHsB2/pw== -word-wrap@^1.2.3: - version "1.2.3" - resolved "https://registry.npmjs.org/word-wrap/-/word-wrap-1.2.3.tgz" - integrity sha512-Hz/mrNwitNRh/HUAtM/VT/5VH+ygD6DV7mYKZAtHOrbs8U7lvPS6xf7EJKMF0uW1KJCl0H701g3ZGus+muE5vQ== - wrap-ansi@^7.0.0: version "7.0.0" resolved "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz"