diff --git a/ecosystem/cache/handlers.go b/ecosystem/cache/handlers.go index ff87cbcb7a..a604894890 100644 --- a/ecosystem/cache/handlers.go +++ b/ecosystem/cache/handlers.go @@ -126,16 +126,17 @@ func (s *RelayerCacheServer) GetRelay(ctx context.Context, relayCacheGet *pairin }() // wait for all reads to complete before moving forward waitGroup.Wait() - - // validate that the response seen block is larger or equal to our expectations. - if cacheReply.SeenBlock < slices.Min([]int64{relayCacheGet.SeenBlock, relayCacheGet.RequestedBlock}) { // TODO unitest this. - // Error, our reply seen block is not larger than our expectations, meaning we got an old response - // this can happen only in the case relayCacheGet.SeenBlock < relayCacheGet.RequestedBlock - // by setting the err variable we will get a cache miss, and the relay will continue to the node. - err = utils.LavaFormatDebug("reply seen block is smaller than our expectations", - utils.LogAttr("cacheReply.SeenBlock", cacheReply.SeenBlock), - utils.LogAttr("seenBlock", relayCacheGet.SeenBlock), - ) + if err == nil { // in case we got a hit validate seen block of the reply. + // validate that the response seen block is larger or equal to our expectations. + if cacheReply.SeenBlock < slices.Min([]int64{relayCacheGet.SeenBlock, relayCacheGet.RequestedBlock}) { // TODO unitest this. + // Error, our reply seen block is not larger than our expectations, meaning we got an old response + // this can happen only in the case relayCacheGet.SeenBlock < relayCacheGet.RequestedBlock + // by setting the err variable we will get a cache miss, and the relay will continue to the node. + err = utils.LavaFormatDebug("reply seen block is smaller than our expectations", + utils.LogAttr("cacheReply.SeenBlock", cacheReply.SeenBlock), + utils.LogAttr("seenBlock", relayCacheGet.SeenBlock), + ) + } } // set seen block. if relayCacheGet.SeenBlock > cacheReply.SeenBlock { diff --git a/ecosystem/lava-sdk/package.json b/ecosystem/lava-sdk/package.json index 6ffae762b6..8ad4f14f97 100644 --- a/ecosystem/lava-sdk/package.json +++ b/ecosystem/lava-sdk/package.json @@ -56,7 +56,7 @@ "bignumber.js": "^9.1.1", "chalk": "4.1.2", "commander": "^9.4.1", - "eslint": "^8.29.0", + "eslint": "^8.57.0", "eslint-config-prettier": "^8.5.0", "eslint-plugin-prettier": "^4.2.1", "google-protobuf": "^3.21.2", @@ -113,4 +113,4 @@ "node": ">=18", "npm": ">=6.12.0" } -} \ No newline at end of file +} diff --git a/ecosystem/lava-sdk/src/providerOptimizer/providerOptimizer.test.ts b/ecosystem/lava-sdk/src/providerOptimizer/providerOptimizer.test.ts index b1df9e6ce1..a1868aeb9d 100644 --- a/ecosystem/lava-sdk/src/providerOptimizer/providerOptimizer.test.ts +++ b/ecosystem/lava-sdk/src/providerOptimizer/providerOptimizer.test.ts @@ -157,6 +157,15 @@ describe("ProviderOptimizer", () => { perturbationPercentage ); expect(returnedProviders).toHaveLength(1); + console.log( + "[Debugging] expect(returnedProviders[0]).toBe(providers[skipIndex]); Optimizer Issue", + "returnedProviders", + returnedProviders, + "providers", + providers, + "skipIndex", + skipIndex + ); expect(returnedProviders[0]).toBe(providers[skipIndex]); returnedProviders = providerOptimizer.chooseProvider( diff --git a/ecosystem/lava-sdk/yarn.lock b/ecosystem/lava-sdk/yarn.lock index 48ecce4077..14949e2f54 100644 --- a/ecosystem/lava-sdk/yarn.lock +++ b/ecosystem/lava-sdk/yarn.lock @@ -2,6 +2,11 @@ # yarn lockfile v1 +"@aashutoshrathi/word-wrap@^1.2.3": + version "1.2.6" + resolved "https://registry.yarnpkg.com/@aashutoshrathi/word-wrap/-/word-wrap-1.2.6.tgz#bd9154aec9983f77b3a034ecaa015c2e4201f6cf" + integrity sha512-1Yjs2SvM8TflER/OD3cOjhWWOZb58A2t7wpE2S9XfBYTiIl+XFhQG2bjy4Pu1I+EAlCNUzRDYDdFwFYUKvXcIA== + "@ampproject/remapping@^2.1.0": version "2.2.0" resolved "https://registry.npmjs.org/@ampproject/remapping/-/remapping-2.2.0.tgz" @@ -551,21 +556,38 @@ resolved "https://registry.npmjs.org/@discoveryjs/json-ext/-/json-ext-0.5.7.tgz" integrity sha512-dBVuXR082gk3jsFp7Rd/JI4kytwGHecnCoTtXFb7DB6CNHp4rg5k1bhg0nWdLGLnOV71lmDzGQaLMy8iPLY0pw== -"@eslint/eslintrc@^1.3.3": - version "1.3.3" - resolved "https://registry.npmjs.org/@eslint/eslintrc/-/eslintrc-1.3.3.tgz" - integrity sha512-uj3pT6Mg+3t39fvLrj8iuCIJ38zKO9FpGtJ4BBJebJhEwjoT+KLVNCcHT5QC9NGRIEi7fZ0ZR8YRb884auB4Lg== +"@eslint-community/eslint-utils@^4.2.0": + version "4.4.0" + resolved "https://registry.yarnpkg.com/@eslint-community/eslint-utils/-/eslint-utils-4.4.0.tgz#a23514e8fb9af1269d5f7788aa556798d61c6b59" + integrity sha512-1/sA4dwrzBAyeUoQ6oxahHKmrZvsnLCg4RfxW3ZFGGmQkSNQPFNLV9CUEFQP1x9EYXHTo5p6xdhZM1Ne9p/AfA== + dependencies: + eslint-visitor-keys "^3.3.0" + +"@eslint-community/regexpp@^4.6.1": + version "4.10.0" + resolved "https://registry.yarnpkg.com/@eslint-community/regexpp/-/regexpp-4.10.0.tgz#548f6de556857c8bb73bbee70c35dc82a2e74d63" + integrity sha512-Cu96Sd2By9mCNTx2iyKOmq10v22jUVQv0lQnlGNy16oE9589yE+QADPbrMGCkA51cKZSg3Pu/aTJVTGfL/qjUA== + +"@eslint/eslintrc@^2.1.4": + version "2.1.4" + resolved "https://registry.yarnpkg.com/@eslint/eslintrc/-/eslintrc-2.1.4.tgz#388a269f0f25c1b6adc317b5a2c55714894c70ad" + integrity sha512-269Z39MS6wVJtsoUl10L60WdkhJVdPG24Q4eZTH3nnF6lpvSShEK3wQjDX9JRWAUPvPh7COouPpU9IrqaZFvtQ== dependencies: ajv "^6.12.4" debug "^4.3.2" - espree "^9.4.0" - globals "^13.15.0" + espree "^9.6.0" + globals "^13.19.0" ignore "^5.2.0" import-fresh "^3.2.1" js-yaml "^4.1.0" minimatch "^3.1.2" strip-json-comments "^3.1.1" +"@eslint/js@8.57.0": + version "8.57.0" + resolved "https://registry.yarnpkg.com/@eslint/js/-/js-8.57.0.tgz#a5417ae8427873f1dd08b70b3574b453e67b5f7f" + integrity sha512-Ys+3g2TaW7gADOJzPt83SJtCDhMjndcDMFVQ/Tj9iA1BfJzFKD9mAUXT3OenpuPHbI6P/myECxRJrofUsDx/5g== + "@grpc/grpc-js@^1.7.1": version "1.7.1" resolved "https://registry.npmjs.org/@grpc/grpc-js/-/grpc-js-1.7.1.tgz" @@ -585,13 +607,13 @@ protobufjs "^7.0.0" yargs "^16.2.0" -"@humanwhocodes/config-array@^0.11.6": - version "0.11.7" - resolved "https://registry.npmjs.org/@humanwhocodes/config-array/-/config-array-0.11.7.tgz" - integrity sha512-kBbPWzN8oVMLb0hOUYXhmxggL/1cJE6ydvjDIGi9EnAGUyA7cLVKQg+d/Dsm+KZwx2czGHrCmMVLiyg8s5JPKw== +"@humanwhocodes/config-array@^0.11.14": + version "0.11.14" + resolved "https://registry.yarnpkg.com/@humanwhocodes/config-array/-/config-array-0.11.14.tgz#d78e481a039f7566ecc9660b4ea7fe6b1fec442b" + integrity sha512-3T8LkOmg45BV5FICb15QQMsyUSWrQ8AygVfC7ZG32zOalnqrilm018ZVCw0eapXux8FtA33q8PSRSstjee3jSg== dependencies: - "@humanwhocodes/object-schema" "^1.2.1" - debug "^4.1.1" + "@humanwhocodes/object-schema" "^2.0.2" + debug "^4.3.1" minimatch "^3.0.5" "@humanwhocodes/module-importer@^1.0.1": @@ -599,10 +621,10 @@ resolved "https://registry.npmjs.org/@humanwhocodes/module-importer/-/module-importer-1.0.1.tgz" integrity sha512-bxveV4V8v5Yb4ncFTT3rPSgZBOpCkjfK0y4oVVVJwIuDVBRMDXrPyXRL988i5ap9m9bnyEEjWfm5WkBmtffLfA== -"@humanwhocodes/object-schema@^1.2.1": - version "1.2.1" - resolved "https://registry.npmjs.org/@humanwhocodes/object-schema/-/object-schema-1.2.1.tgz" - integrity sha512-ZnQMnLV4e7hDlUvw8H+U8ASL02SS2Gn6+9Ac3wGGLIe7+je2AeAOxPY+izIPJDfFDb7eDjev0Us8MO1iFRN8hA== +"@humanwhocodes/object-schema@^2.0.2": + version "2.0.2" + resolved "https://registry.yarnpkg.com/@humanwhocodes/object-schema/-/object-schema-2.0.2.tgz#d9fae00a2d5cb40f92cfe64b47ad749fbc38f917" + integrity sha512-6EwiSjwWYP7pTckG6I5eyFANjPhmPjUX9JRLUSfNPC7FX7zK9gyZAfUEaECL6ALTpGX5AjnBq3C9XmVWPitNpw== "@improbable-eng/grpc-web-node-http-transport@^0.15.0": version "0.15.0" @@ -2613,6 +2635,11 @@ "@typescript-eslint/types" "5.46.0" eslint-visitor-keys "^3.3.0" +"@ungap/structured-clone@^1.2.0": + version "1.2.0" + resolved "https://registry.yarnpkg.com/@ungap/structured-clone/-/structured-clone-1.2.0.tgz#756641adb587851b5ccb3e095daf27ae581c8406" + integrity sha512-zuVdFrMJiuCDQUMCzQaD6KL28MjnqqN8XnAqiEq9PNm/hCPTSGfrXCOfwj1ow4LFb/tNymJPwsNbVePc1xFqrQ== + "@webassemblyjs/ast@1.11.1": version "1.11.1" resolved "https://registry.npmjs.org/@webassemblyjs/ast/-/ast-1.11.1.tgz" @@ -2776,11 +2803,16 @@ acorn-walk@^8.1.1: resolved "https://registry.npmjs.org/acorn-walk/-/acorn-walk-8.2.0.tgz" integrity sha512-k+iyHEuPgSw6SbuDpGQM+06HQUa04DZ3o+F6CSzXMvvI5KMvnaEqXe+YVe555R9nn6GPt404fos4wcgpw12SDA== -acorn@^8.4.1, acorn@^8.5.0, acorn@^8.7.1, acorn@^8.8.0: +acorn@^8.4.1, acorn@^8.5.0, acorn@^8.7.1: version "8.8.1" resolved "https://registry.npmjs.org/acorn/-/acorn-8.8.1.tgz" integrity sha512-7zFpHzhnqYKrkYdUjF1HI1bzd0VygEGX8lFk4k5zVMqHEoES+P+7TKI+EvLO9WVMJ8eekdO0aDEK044xTXwPPA== +acorn@^8.9.0: + version "8.11.3" + resolved "https://registry.yarnpkg.com/acorn/-/acorn-8.11.3.tgz#71e0b14e13a4ec160724b38fb7b0f233b1b81d7a" + integrity sha512-Y9rRfJG5jcKOE0CLisYbojUjIrIEE7AGMzA/Sm4BslANhbS+cDMpgBdcPT91oJ7OuJ9hYJBx59RjbhxVnrF8Xg== + adm-zip@^0.5.10: version "0.5.10" resolved "https://registry.npmjs.org/adm-zip/-/adm-zip-0.5.10.tgz" @@ -2791,7 +2823,7 @@ ajv-keywords@^3.5.2: resolved "https://registry.npmjs.org/ajv-keywords/-/ajv-keywords-3.5.2.tgz" integrity sha512-5p6WTN0DdTGVQk6VjcEju19IgaHudalcfabD7yhDGeA6bcQnmL+CpveLJq/3hvfwd1aof6L386Ougkx6RfyMIQ== -ajv@^6.10.0, ajv@^6.12.4, ajv@^6.12.5: +ajv@^6.12.4, ajv@^6.12.5: version "6.12.6" resolved "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz" integrity sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g== @@ -3337,7 +3369,7 @@ debug@^3.2.7: dependencies: ms "^2.1.1" -debug@^4.1.0, debug@^4.1.1, debug@^4.3.2, debug@^4.3.4: +debug@^4.1.0, debug@^4.1.1, debug@^4.3.1, debug@^4.3.2, debug@^4.3.4: version "4.3.4" resolved "https://registry.npmjs.org/debug/-/debug-4.3.4.tgz" integrity sha512-PRWFHuSU3eDtQJPvnNY7Jcket1j0t5OuOsFzPPzsekD52Zl8qUfFIPEiswXqIvHWGVHOgX+7G/vCNNhehwxfkQ== @@ -3595,10 +3627,10 @@ eslint-scope@5.1.1, eslint-scope@^5.1.1: esrecurse "^4.3.0" estraverse "^4.1.1" -eslint-scope@^7.1.1: - version "7.1.1" - resolved "https://registry.npmjs.org/eslint-scope/-/eslint-scope-7.1.1.tgz" - integrity sha512-QKQM/UXpIiHcLqJ5AOyIW7XZmzjkzQXYE54n1++wb0u9V/abW3l9uQnxX8Z5Xd18xyKIMTUAyQ0k1e8pz6LUrw== +eslint-scope@^7.2.2: + version "7.2.2" + resolved "https://registry.yarnpkg.com/eslint-scope/-/eslint-scope-7.2.2.tgz#deb4f92563390f32006894af62a22dba1c46423f" + integrity sha512-dOt21O7lTMhDM+X9mB4GX+DZrZtCUJPL/wlcTqxyrx5IvO0IYtILdtrQGQp+8n5S0gwSVmOf9NQrjMOgfQZlIg== dependencies: esrecurse "^4.3.0" estraverse "^5.2.0" @@ -3620,69 +3652,73 @@ eslint-visitor-keys@^3.3.0: resolved "https://registry.npmjs.org/eslint-visitor-keys/-/eslint-visitor-keys-3.3.0.tgz" integrity sha512-mQ+suqKJVyeuwGYHAdjMFqjCyfl8+Ldnxuyp3ldiMBFKkvytrXUZWaiPCEav8qDHKty44bD+qV1IP4T+w+xXRA== -eslint@^8.29.0: - version "8.29.0" - resolved "https://registry.npmjs.org/eslint/-/eslint-8.29.0.tgz" - integrity sha512-isQ4EEiyUjZFbEKvEGJKKGBwXtvXX+zJbkVKCgTuB9t/+jUBcy8avhkEwWJecI15BkRkOYmvIM5ynbhRjEkoeg== - dependencies: - "@eslint/eslintrc" "^1.3.3" - "@humanwhocodes/config-array" "^0.11.6" +eslint-visitor-keys@^3.4.1, eslint-visitor-keys@^3.4.3: + version "3.4.3" + resolved "https://registry.yarnpkg.com/eslint-visitor-keys/-/eslint-visitor-keys-3.4.3.tgz#0cd72fe8550e3c2eae156a96a4dddcd1c8ac5800" + integrity sha512-wpc+LXeiyiisxPlEkUzU6svyS1frIO3Mgxj1fdy7Pm8Ygzguax2N3Fa/D/ag1WqbOprdI+uY6wMUl8/a2G+iag== + +eslint@^8.57.0: + version "8.57.0" + resolved "https://registry.yarnpkg.com/eslint/-/eslint-8.57.0.tgz#c786a6fd0e0b68941aaf624596fb987089195668" + integrity sha512-dZ6+mexnaTIbSBZWgou51U6OmzIhYM2VcNdtiTtI7qPNZm35Akpr0f6vtw3w1Kmn5PYo+tZVfh13WrhpS6oLqQ== + dependencies: + "@eslint-community/eslint-utils" "^4.2.0" + "@eslint-community/regexpp" "^4.6.1" + "@eslint/eslintrc" "^2.1.4" + "@eslint/js" "8.57.0" + "@humanwhocodes/config-array" "^0.11.14" "@humanwhocodes/module-importer" "^1.0.1" "@nodelib/fs.walk" "^1.2.8" - ajv "^6.10.0" + "@ungap/structured-clone" "^1.2.0" + ajv "^6.12.4" chalk "^4.0.0" cross-spawn "^7.0.2" debug "^4.3.2" doctrine "^3.0.0" escape-string-regexp "^4.0.0" - eslint-scope "^7.1.1" - eslint-utils "^3.0.0" - eslint-visitor-keys "^3.3.0" - espree "^9.4.0" - esquery "^1.4.0" + eslint-scope "^7.2.2" + eslint-visitor-keys "^3.4.3" + espree "^9.6.1" + esquery "^1.4.2" esutils "^2.0.2" fast-deep-equal "^3.1.3" file-entry-cache "^6.0.1" find-up "^5.0.0" glob-parent "^6.0.2" - globals "^13.15.0" - grapheme-splitter "^1.0.4" + globals "^13.19.0" + graphemer "^1.4.0" ignore "^5.2.0" - import-fresh "^3.0.0" imurmurhash "^0.1.4" is-glob "^4.0.0" is-path-inside "^3.0.3" - js-sdsl "^4.1.4" js-yaml "^4.1.0" json-stable-stringify-without-jsonify "^1.0.1" levn "^0.4.1" lodash.merge "^4.6.2" minimatch "^3.1.2" natural-compare "^1.4.0" - optionator "^0.9.1" - regexpp "^3.2.0" + optionator "^0.9.3" strip-ansi "^6.0.1" - strip-json-comments "^3.1.0" text-table "^0.2.0" -espree@^9.4.0: - version "9.4.1" - resolved "https://registry.npmjs.org/espree/-/espree-9.4.1.tgz" - integrity sha512-XwctdmTO6SIvCzd9810yyNzIrOrqNYV9Koizx4C/mRhf9uq0o4yHoCEU/670pOxOL/MSraektvSAji79kX90Vg== +espree@^9.6.0, espree@^9.6.1: + version "9.6.1" + resolved "https://registry.yarnpkg.com/espree/-/espree-9.6.1.tgz#a2a17b8e434690a5432f2f8018ce71d331a48c6f" + integrity sha512-oruZaFkjorTpF32kDSI5/75ViwGeZginGGy2NoOSg3Q9bnwlnmDm4HLnkl0RE3n+njDXR037aY1+x58Z/zFdwQ== dependencies: - acorn "^8.8.0" + acorn "^8.9.0" acorn-jsx "^5.3.2" - eslint-visitor-keys "^3.3.0" + eslint-visitor-keys "^3.4.1" esprima@^4.0.0: version "4.0.1" resolved "https://registry.npmjs.org/esprima/-/esprima-4.0.1.tgz" integrity sha512-eGuFFw7Upda+g4p+QHvnW0RyTX/SVeJBDM/gCtMARO0cLuT2HcEKnTPvhjV6aGeqrCB/sbNop0Kszm0jsaWU4A== -esquery@^1.4.0: - version "1.4.0" - resolved "https://registry.npmjs.org/esquery/-/esquery-1.4.0.tgz" - integrity sha512-cCDispWt5vHHtwMY2YrAQ4ibFkAL8RbH5YGBnZBc90MolvvfkkQcJro/aZiAQUlQ3qgrYS6D6v8Gc5G5CQsc9w== +esquery@^1.4.2: + version "1.5.0" + resolved "https://registry.yarnpkg.com/esquery/-/esquery-1.5.0.tgz#6ce17738de8577694edd7361c57182ac8cb0db0b" + integrity sha512-YQLXUplAwJgCydQ78IMJywZCceoqk1oH01OERdSAJc/7U2AylwjhSCLDEtqwg811idIS/9fIU5GjG73IgjKMVg== dependencies: estraverse "^5.1.0" @@ -4021,10 +4057,10 @@ globals@^11.1.0: resolved "https://registry.npmjs.org/globals/-/globals-11.12.0.tgz" integrity sha512-WOBp/EEGUiIsJSp7wcv/y6MO+lV9UoncWqxuFfm8eBwzWNgyfBd6Gz+IeKQ9jCmyhoH99g15M3T+QaVHFjizVA== -globals@^13.15.0: - version "13.18.0" - resolved "https://registry.npmjs.org/globals/-/globals-13.18.0.tgz" - integrity sha512-/mR4KI8Ps2spmoc0Ulu9L7agOF0du1CZNQ3dke8yItYlyKNmGrkONemBbd6V8UTc1Wgcqn21t3WYB7dbRmh6/A== +globals@^13.19.0: + version "13.24.0" + resolved "https://registry.yarnpkg.com/globals/-/globals-13.24.0.tgz#8432a19d78ce0c1e833949c36adb345400bb1171" + integrity sha512-AhO5QUcj8llrbG09iWhPU2B204J1xnPeL8kQmVorSsy+Sjj1sk8gIyh6cUocGmH4L0UuhAJy+hJMRA4mgA4mFQ== dependencies: type-fest "^0.20.2" @@ -4080,10 +4116,10 @@ graceful-fs@^4.1.10, graceful-fs@^4.1.2, graceful-fs@^4.2.4, graceful-fs@^4.2.9: resolved "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.10.tgz" integrity sha512-9ByhssR2fPVsNZj478qUUbKfmL0+t5BDVyjShtyZZLiK7ZDAArFFfopyOTj0M05wE2tJPisA4iTnnXl2YoPvOA== -grapheme-splitter@^1.0.4: - version "1.0.4" - resolved "https://registry.npmjs.org/grapheme-splitter/-/grapheme-splitter-1.0.4.tgz" - integrity sha512-bzh50DW9kTPM00T8y4o8vQg89Di9oLJVLW/KaOGIXJWP/iqCN6WKYkbNOF04vFLJhwcpYUh9ydh/+5vpOqV4YQ== +graphemer@^1.4.0: + version "1.4.0" + resolved "https://registry.yarnpkg.com/graphemer/-/graphemer-1.4.0.tgz#fb2f1d55e0e3a1849aeffc90c4fa0dd53a0e66c6" + integrity sha512-EtKwoO6kxCL9WO5xipiHTZlSzBm7WLT627TqC/uVRd0HKmq8NXyebnNYxDoBi7wt8eTWrUrKXCOVaFq9x1kgag== grpc-web@^1.4.2: version "1.4.2" @@ -4229,7 +4265,7 @@ ignore@^5.2.0: resolved "https://registry.npmjs.org/ignore/-/ignore-5.2.1.tgz" integrity sha512-d2qQLzTJ9WxQftPAuEQpSPmKqzxePjzVbpAVv62AQ64NTL+wR4JkrVqR/LqFsFEUsHDAiId52mJteHDFuDkElA== -import-fresh@^3.0.0, import-fresh@^3.2.1: +import-fresh@^3.2.1: version "3.3.0" resolved "https://registry.npmjs.org/import-fresh/-/import-fresh-3.3.0.tgz" integrity sha512-veYYhQa+D1QBKznvhUHxb8faxlrwUnxseDAbAp457E0wLNio2bOSKnjYDhMj+YiAq61xrMGhQk9iXVk5FzgQMw== @@ -4805,11 +4841,6 @@ jest@^29.3.1: import-local "^3.0.2" jest-cli "^29.7.0" -js-sdsl@^4.1.4: - version "4.2.0" - resolved "https://registry.npmjs.org/js-sdsl/-/js-sdsl-4.2.0.tgz" - integrity sha512-dyBIzQBDkCqCu+0upx25Y2jGdbTGxE9fshMsCdK0ViOongpV+n5tXRcZY9v7CaVQ79AGS9KA1KHtojxiM7aXSQ== - js-sha3@^0.8.0: version "0.8.0" resolved "https://registry.npmjs.org/js-sha3/-/js-sha3-0.8.0.tgz" @@ -5186,17 +5217,17 @@ opener@^1.5.1: resolved "https://registry.npmjs.org/opener/-/opener-1.5.2.tgz" integrity sha512-ur5UIdyw5Y7yEj9wLzhqXiy6GZ3Mwx0yGI+5sMn2r0N0v3cKJvUmFH5yPP+WXh9e0xfyzyJX95D8l088DNFj7A== -optionator@^0.9.1: - version "0.9.1" - resolved "https://registry.npmjs.org/optionator/-/optionator-0.9.1.tgz" - integrity sha512-74RlY5FCnhq4jRxVUPKDaRwrVNXMqsGsiW6AJw4XK8hmtm10wC0ypZBLw5IIp85NZMr91+qd1RvvENwg7jjRFw== +optionator@^0.9.3: + version "0.9.3" + resolved "https://registry.yarnpkg.com/optionator/-/optionator-0.9.3.tgz#007397d44ed1872fdc6ed31360190f81814e2c64" + integrity sha512-JjCoypp+jKn1ttEFExxhetCKeJt9zhAgAve5FXHixTvFDW/5aEktX9bufBKLRRMdU7bNtpLfcGu94B3cdEJgjg== dependencies: + "@aashutoshrathi/word-wrap" "^1.2.3" deep-is "^0.1.3" fast-levenshtein "^2.0.6" levn "^0.4.1" prelude-ls "^1.2.1" type-check "^0.4.0" - word-wrap "^1.2.3" p-cancelable@^0.4.0: version "0.4.1" @@ -5871,7 +5902,7 @@ strip-final-newline@^2.0.0: resolved "https://registry.npmjs.org/strip-final-newline/-/strip-final-newline-2.0.0.tgz" integrity sha512-BrpvfNAE3dcvq7ll3xVumzjKjZQ5tI1sEUIKr3Uoks0XUl45St3FlatVqef9prk4jRDzhW6WZg+3bk93y6pLjA== -strip-json-comments@^3.1.0, strip-json-comments@^3.1.1: +strip-json-comments@^3.1.1: version "3.1.1" resolved "https://registry.npmjs.org/strip-json-comments/-/strip-json-comments-3.1.1.tgz" integrity sha512-6fPc+R4ihwqP6N/aIv2f1gMH8lOVtWQHoqC4yK6oSDVVocumAsfCqjkXnqiYMhmMwS/mEHLp7Vehlt3ql6lEig== @@ -6284,11 +6315,6 @@ wildcard@^2.0.0: resolved "https://registry.npmjs.org/wildcard/-/wildcard-2.0.0.tgz" integrity sha512-JcKqAHLPxcdb9KM49dufGXn2x3ssnfjbcaQdLlfZsL9rH9wgDQjUtDxbo8NE0F6SFvydeu1VhZe7hZuHsB2/pw== -word-wrap@^1.2.3: - version "1.2.3" - resolved "https://registry.npmjs.org/word-wrap/-/word-wrap-1.2.3.tgz" - integrity sha512-Hz/mrNwitNRh/HUAtM/VT/5VH+ygD6DV7mYKZAtHOrbs8U7lvPS6xf7EJKMF0uW1KJCl0H701g3ZGus+muE5vQ== - wrap-ansi@^7.0.0: version "7.0.0" resolved "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz" diff --git a/protocol/chainlib/chain_message_queries.go b/protocol/chainlib/chain_message_queries.go index 0f964152d4..a8e453cdf1 100644 --- a/protocol/chainlib/chain_message_queries.go +++ b/protocol/chainlib/chain_message_queries.go @@ -3,25 +3,25 @@ package chainlib import "github.com/lavanet/lava/protocol/common" func ShouldSendToAllProviders(chainMessage ChainMessage) bool { - return chainMessage.GetApi().Category.Stateful == common.CONSISTENCY_SELECT_ALLPROVIDERS + return chainMessage.GetApi().Category.Stateful == common.CONSISTENCY_SELECT_ALL_PROVIDERS } -func GetAddon(chainMessage ChainMessage) string { +func GetAddon(chainMessage ChainMessageForSend) string { return chainMessage.GetApiCollection().CollectionData.AddOn } -func IsSubscription(chainMessage ChainMessage) bool { +func IsSubscription(chainMessage ChainMessageForSend) bool { return chainMessage.GetApi().Category.Subscription } -func IsHangingApi(chainMessage ChainMessage) bool { +func IsHangingApi(chainMessage ChainMessageForSend) bool { return chainMessage.GetApi().Category.HangingApi } -func GetComputeUnits(chainMessage ChainMessage) uint64 { +func GetComputeUnits(chainMessage ChainMessageForSend) uint64 { return chainMessage.GetApi().ComputeUnits } -func GetStateful(chainMessage ChainMessage) uint32 { +func GetStateful(chainMessage ChainMessageForSend) uint32 { return chainMessage.GetApi().Category.Stateful } diff --git a/protocol/chainlib/chainlib.go b/protocol/chainlib/chainlib.go index f0f0d395a1..5be294391c 100644 --- a/protocol/chainlib/chainlib.go +++ b/protocol/chainlib/chainlib.go @@ -85,6 +85,7 @@ type ChainMessage interface { } type ChainMessageForSend interface { + TimeoutOverride(...time.Duration) time.Duration GetApi() *spectypes.Api GetRPCMessage() rpcInterfaceMessages.GenericMessage GetApiCollection() *spectypes.ApiCollection diff --git a/protocol/chainlib/common.go b/protocol/chainlib/common.go index 34efe3202f..6624180fb2 100644 --- a/protocol/chainlib/common.go +++ b/protocol/chainlib/common.go @@ -1,6 +1,7 @@ package chainlib import ( + "context" "encoding/json" "fmt" "net" @@ -86,6 +87,13 @@ func (bcp *BaseChainProxy) GetChainProxyInformation() (common.NodeUrl, string) { return bcp.NodeUrl, bcp.ChainID } +func (bcp *BaseChainProxy) CapTimeoutForSend(ctx context.Context, chainMessage ChainMessageForSend) (context.Context, context.CancelFunc) { + relayTimeout := GetRelayTimeout(chainMessage, bcp.averageBlockTime) + processingTimeout := common.GetTimeoutForProcessing(relayTimeout, GetTimeoutInfo(chainMessage)) + connectCtx, cancel := bcp.NodeUrl.LowerContextTimeout(ctx, processingTimeout) + return connectCtx, cancel +} + func extractDappIDFromFiberContext(c *fiber.Ctx) (dappID string) { // Read the dappID from the headers dappID = c.Get("dapp-id") @@ -299,21 +307,21 @@ func CompareRequestedBlockInBatch(firstRequestedBlock int64, second int64) (late return returnBigger(firstRequestedBlock, second) } -func GetRelayTimeout(chainMessage ChainMessage, chainParser ChainParser, timeouts int) time.Duration { +func GetRelayTimeout(chainMessage ChainMessageForSend, averageBlockTime time.Duration) time.Duration { if chainMessage.TimeoutOverride() != 0 { return chainMessage.TimeoutOverride() } // Calculate extra RelayTimeout extraRelayTimeout := time.Duration(0) if IsHangingApi(chainMessage) { - _, extraRelayTimeout, _, _ = chainParser.ChainBlockStats() + extraRelayTimeout = averageBlockTime * 2 } relayTimeAddition := common.GetTimePerCu(GetComputeUnits(chainMessage)) if chainMessage.GetApi().TimeoutMs > 0 { relayTimeAddition = time.Millisecond * time.Duration(chainMessage.GetApi().TimeoutMs) } // Set relay timout, increase it every time we fail a relay on timeout - return extraRelayTimeout + time.Duration(timeouts+1)*relayTimeAddition + common.AverageWorldLatency + return extraRelayTimeout + relayTimeAddition + common.AverageWorldLatency } // setup a common preflight and cors configuration allowing wild cards and preflight caching. @@ -416,3 +424,11 @@ func (rd *RefererData) SendReferer(refererMatchString string, chainId string, ms rd.ReferrerClient.AppendReferrer(metrics.NewReferrerRequest(refererMatchString, chainId, msg, referer, origin, userAgent)) return nil } + +func GetTimeoutInfo(chainMessage ChainMessageForSend) common.TimeoutInfo { + return common.TimeoutInfo{ + CU: chainMessage.GetApi().ComputeUnits, + Hanging: IsHangingApi(chainMessage), + Stateful: GetStateful(chainMessage), + } +} diff --git a/protocol/chainlib/common_test_utils.go b/protocol/chainlib/common_test_utils.go index 3861ece1fd..d18e81d49e 100644 --- a/protocol/chainlib/common_test_utils.go +++ b/protocol/chainlib/common_test_utils.go @@ -86,11 +86,11 @@ func generateCombinations(arr []string) [][]string { // generates a chain parser, a chain fetcher messages based on it // apiInterface can either be an ApiInterface string as in spectypes.ApiInterfaceXXX or a number for an index in the apiCollections -func CreateChainLibMocks(ctx context.Context, specIndex string, apiInterface string, serverCallback http.HandlerFunc, getToTopMostPath string, services []string) (cpar ChainParser, crout ChainRouter, cfetc chaintracker.ChainFetcher, closeServer func(), errRet error) { +func CreateChainLibMocks(ctx context.Context, specIndex string, apiInterface string, serverCallback http.HandlerFunc, getToTopMostPath string, services []string) (cpar ChainParser, crout ChainRouter, cfetc chaintracker.ChainFetcher, closeServer func(), endpointRet *lavasession.RPCProviderEndpoint, errRet error) { closeServer = nil spec, err := keepertest.GetASpec(specIndex, getToTopMostPath, nil, nil) if err != nil { - return nil, nil, nil, nil, err + return nil, nil, nil, nil, nil, err } index, err := strconv.Atoi(apiInterface) if err == nil && index < len(spec.ApiCollections) { @@ -98,7 +98,7 @@ func CreateChainLibMocks(ctx context.Context, specIndex string, apiInterface str } chainParser, err := NewChainParser(apiInterface) if err != nil { - return nil, nil, nil, nil, err + return nil, nil, nil, nil, nil, err } var chainRouter ChainRouter chainParser.SetSpec(spec) @@ -111,15 +111,15 @@ func CreateChainLibMocks(ctx context.Context, specIndex string, apiInterface str } addons, extensions, err := chainParser.SeparateAddonsExtensions(services) if err != nil { - return nil, nil, nil, nil, err + return nil, nil, nil, nil, nil, err } if apiInterface == spectypes.APIInterfaceGrpc { // Start a new gRPC server using the buffered connection grpcServer := grpc.NewServer() - lis, err := net.Listen("tcp", "localhost:0") + lis, err := net.Listen("tcp", "127.0.0.1:0") if err != nil { - return nil, nil, nil, closeServer, err + return nil, nil, nil, closeServer, nil, err } endpoint.NodeUrls = append(endpoint.NodeUrls, common.NodeUrl{Url: lis.Addr().String(), Addons: addons}) allCombinations := generateCombinations(extensions) @@ -138,7 +138,7 @@ func CreateChainLibMocks(ctx context.Context, specIndex string, apiInterface str time.Sleep(10 * time.Millisecond) chainRouter, err = GetChainRouter(ctx, 1, endpoint, chainParser) if err != nil { - return nil, nil, nil, closeServer, err + return nil, nil, nil, closeServer, nil, err } } else { mockServer := httptest.NewServer(serverCallback) @@ -146,11 +146,11 @@ func CreateChainLibMocks(ctx context.Context, specIndex string, apiInterface str endpoint.NodeUrls = append(endpoint.NodeUrls, common.NodeUrl{Url: mockServer.URL, Addons: addons}) chainRouter, err = GetChainRouter(ctx, 1, endpoint, chainParser) if err != nil { - return nil, nil, nil, closeServer, err + return nil, nil, nil, closeServer, nil, err } } chainFetcher := NewChainFetcher(ctx, &ChainFetcherOptions{chainRouter, chainParser, endpoint, nil}) - return chainParser, chainRouter, chainFetcher, closeServer, err + return chainParser, chainRouter, chainFetcher, closeServer, endpoint, err } type TestStruct struct { diff --git a/protocol/chainlib/grpc.go b/protocol/chainlib/grpc.go index 8c27df6dfa..379972521f 100644 --- a/protocol/chainlib/grpc.go +++ b/protocol/chainlib/grpc.go @@ -310,7 +310,7 @@ func (apil *GrpcChainListener) Serve(ctx context.Context, cmdFlags common.Consum grpcHeaders := convertToMetadataMapOfSlices(metadataValues) utils.LavaFormatDebug("in <<< GRPC Relay ", utils.LogAttr("GUID", ctx), - utils.LogAttr("method", method), + utils.LogAttr("_method", method), utils.LogAttr("headers", grpcHeaders), ) metricsData := metrics.NewRelayAnalytics(dappID, apil.endpoint.ChainID, apiInterface) @@ -520,14 +520,14 @@ func (cp *GrpcChainProxy) SendNodeMsg(ctx context.Context, ch chan interface{}, } if debug { utils.LavaFormatDebug("provider sending node message", - utils.Attribute{Key: "method", Value: nodeMessage.Path}, + utils.Attribute{Key: "_method", Value: nodeMessage.Path}, utils.Attribute{Key: "headers", Value: metadataMap}, utils.Attribute{Key: "apiInterface", Value: "grpc"}, ) } var respHeaders metadata.MD response := msgFactory.NewMessage(methodDescriptor.GetOutputType()) - connectCtx, cancel := cp.NodeUrl.LowerContextTimeout(ctx, chainMessage, cp.averageBlockTime) + connectCtx, cancel := cp.CapTimeoutForSend(ctx, chainMessage) defer cancel() err = conn.Invoke(connectCtx, "/"+nodeMessage.Path, msg, response, grpc.Header(&respHeaders)) if err != nil { diff --git a/protocol/chainlib/grpc_test.go b/protocol/chainlib/grpc_test.go index 508bcaa96e..d5404435c7 100644 --- a/protocol/chainlib/grpc_test.go +++ b/protocol/chainlib/grpc_test.go @@ -142,7 +142,7 @@ func TestGrpcChainProxy(t *testing.T) { // Handle the incoming request and provide the desired response wasCalled = true }) - chainParser, chainProxy, chainFetcher, closeServer, err := CreateChainLibMocks(ctx, "LAV1", spectypes.APIInterfaceGrpc, serverHandle, "../../", nil) + chainParser, chainProxy, chainFetcher, closeServer, _, err := CreateChainLibMocks(ctx, "LAV1", spectypes.APIInterfaceGrpc, serverHandle, "../../", nil) require.NoError(t, err) require.NotNil(t, chainParser) require.NotNil(t, chainProxy) @@ -169,7 +169,7 @@ func TestParsingRequestedBlocksHeadersGrpc(t *testing.T) { w.WriteHeader(244591) } }) - chainParser, chainRouter, _, closeServer, err := CreateChainLibMocks(ctx, "LAV1", spectypes.APIInterfaceGrpc, serverHandler, "../../", nil) + chainParser, chainRouter, _, closeServer, _, err := CreateChainLibMocks(ctx, "LAV1", spectypes.APIInterfaceGrpc, serverHandler, "../../", nil) require.NoError(t, err) defer func() { if closeServer != nil { @@ -237,7 +237,7 @@ func TestSettingBlocksHeadersGrpc(t *testing.T) { w.WriteHeader(244591) } }) - chainParser, chainRouter, _, closeServer, err := CreateChainLibMocks(ctx, "LAV1", spectypes.APIInterfaceGrpc, serverHandler, "../../", nil) + chainParser, chainRouter, _, closeServer, _, err := CreateChainLibMocks(ctx, "LAV1", spectypes.APIInterfaceGrpc, serverHandler, "../../", nil) require.NoError(t, err) defer func() { if closeServer != nil { diff --git a/protocol/chainlib/jsonRPC.go b/protocol/chainlib/jsonRPC.go index 13863ba950..4cd3fba4e0 100644 --- a/protocol/chainlib/jsonRPC.go +++ b/protocol/chainlib/jsonRPC.go @@ -446,7 +446,7 @@ func (apil *JsonRPCChainListener) Serve(ctx context.Context, cmdFlags common.Con utils.LavaFormatDebug("in <<<", utils.LogAttr("GUID", ctx), utils.LogAttr("seed", msgSeed), - utils.LogAttr("msg", logFormattedMsg), + utils.LogAttr("_msg", logFormattedMsg), utils.LogAttr("dappID", dappID), utils.LogAttr("headers", headers), ) @@ -596,7 +596,7 @@ func (cp *JrpcChainProxy) sendBatchMessage(ctx context.Context, nodeMessage *rpc } } // set context with timeout - connectCtx, cancel := cp.NodeUrl.LowerContextTimeout(ctx, chainMessage, cp.averageBlockTime) + connectCtx, cancel := cp.CapTimeoutForSend(ctx, chainMessage) defer cancel() cp.NodeUrl.SetIpForwardingIfNecessary(ctx, rpc.SetHeader) @@ -668,7 +668,7 @@ func (cp *JrpcChainProxy) SendNodeMsg(ctx context.Context, ch chan interface{}, } else { // we use the minimum timeout between the two, spec or context. to prevent the provider from hanging // we don't use the context alone so the provider won't be hanging forever by an attack - connectCtx, cancel := cp.NodeUrl.LowerContextTimeout(ctx, chainMessage, cp.averageBlockTime) + connectCtx, cancel := cp.CapTimeoutForSend(ctx, chainMessage) defer cancel() cp.NodeUrl.SetIpForwardingIfNecessary(ctx, rpc.SetHeader) diff --git a/protocol/chainlib/jsonRPC_test.go b/protocol/chainlib/jsonRPC_test.go index 8d13774a1a..e2bac3eaeb 100644 --- a/protocol/chainlib/jsonRPC_test.go +++ b/protocol/chainlib/jsonRPC_test.go @@ -140,7 +140,7 @@ func TestJsonRpcChainProxy(t *testing.T) { fmt.Fprint(w, `{"jsonrpc":"2.0","id":1,"result":"0x10a7a08"}`) }) - chainParser, chainProxy, chainFetcher, closeServer, err := CreateChainLibMocks(ctx, "ETH1", spectypes.APIInterfaceJsonRPC, serverHandle, "../../", nil) + chainParser, chainProxy, chainFetcher, closeServer, _, err := CreateChainLibMocks(ctx, "ETH1", spectypes.APIInterfaceJsonRPC, serverHandle, "../../", nil) require.NoError(t, err) require.NotNil(t, chainParser) require.NotNil(t, chainProxy) @@ -164,7 +164,7 @@ func TestAddonAndVerifications(t *testing.T) { fmt.Fprint(w, `{"jsonrpc":"2.0","id":1,"result":"0xf9ccdff90234a064"}`) }) - chainParser, chainRouter, chainFetcher, closeServer, err := CreateChainLibMocks(ctx, "ETH1", spectypes.APIInterfaceJsonRPC, serverHandle, "../../", []string{"debug"}) + chainParser, chainRouter, chainFetcher, closeServer, _, err := CreateChainLibMocks(ctx, "ETH1", spectypes.APIInterfaceJsonRPC, serverHandle, "../../", []string{"debug"}) require.NoError(t, err) require.NotNil(t, chainParser) require.NotNil(t, chainRouter) @@ -197,7 +197,7 @@ func TestExtensions(t *testing.T) { }) specname := "ETH1" - chainParser, chainRouter, chainFetcher, closeServer, err := CreateChainLibMocks(ctx, specname, spectypes.APIInterfaceJsonRPC, serverHandle, "../../", []string{"archive"}) + chainParser, chainRouter, chainFetcher, closeServer, _, err := CreateChainLibMocks(ctx, specname, spectypes.APIInterfaceJsonRPC, serverHandle, "../../", []string{"archive"}) require.NoError(t, err) require.NotNil(t, chainParser) require.NotNil(t, chainRouter) @@ -279,7 +279,7 @@ func TestJsonRpcBatchCall(t *testing.T) { fmt.Fprint(w, response) }) - chainParser, chainProxy, chainFetcher, closeServer, err := CreateChainLibMocks(ctx, "ETH1", spectypes.APIInterfaceJsonRPC, serverHandle, "../../", nil) + chainParser, chainProxy, chainFetcher, closeServer, _, err := CreateChainLibMocks(ctx, "ETH1", spectypes.APIInterfaceJsonRPC, serverHandle, "../../", nil) require.NoError(t, err) require.NotNil(t, chainParser) require.NotNil(t, chainProxy) @@ -320,7 +320,7 @@ func TestJsonRpcBatchCallSameID(t *testing.T) { fmt.Fprint(w, response) }) - chainParser, chainProxy, chainFetcher, closeServer, err := CreateChainLibMocks(ctx, "ETH1", spectypes.APIInterfaceJsonRPC, serverHandle, "../../", nil) + chainParser, chainProxy, chainFetcher, closeServer, _, err := CreateChainLibMocks(ctx, "ETH1", spectypes.APIInterfaceJsonRPC, serverHandle, "../../", nil) require.NoError(t, err) require.NotNil(t, chainParser) require.NotNil(t, chainProxy) diff --git a/protocol/chainlib/rest.go b/protocol/chainlib/rest.go index 6602e24cab..4e3e755226 100644 --- a/protocol/chainlib/rest.go +++ b/protocol/chainlib/rest.go @@ -300,7 +300,7 @@ func (apil *RestChainListener) Serve(ctx context.Context, cmdFlags common.Consum analytics := metrics.NewRelayAnalytics(dappID, chainID, apiInterface) utils.LavaFormatDebug("in <<<", utils.LogAttr("GUID", ctx), - utils.LogAttr("path", path), + utils.LogAttr("_path", path), utils.LogAttr("dappID", dappID), utils.LogAttr("msgSeed", msgSeed), utils.LogAttr("headers", restHeaders), @@ -366,7 +366,7 @@ func (apil *RestChainListener) Serve(ctx context.Context, cmdFlags common.Consum defer cancel() // incase there's a problem make sure to cancel the connection utils.LavaFormatDebug("in <<<", utils.LogAttr("GUID", ctx), - utils.LogAttr("path", path), + utils.LogAttr("_path", path), utils.LogAttr("dappID", dappID), utils.LogAttr("msgSeed", msgSeed), utils.LogAttr("headers", restHeaders), @@ -473,7 +473,7 @@ func (rcp *RestChainProxy) SendNodeMsg(ctx context.Context, ch chan interface{}, urlPath := rcp.NodeUrl.Url + nodeMessage.Path // set context with timeout - connectCtx, cancel := rcp.NodeUrl.LowerContextTimeout(ctx, chainMessage, rcp.averageBlockTime) + connectCtx, cancel := rcp.CapTimeoutForSend(ctx, chainMessage) defer cancel() req, err := http.NewRequestWithContext(connectCtx, connectionTypeSlected, rcp.NodeUrl.AuthConfig.AddAuthPath(urlPath), msgBuffer) @@ -496,7 +496,7 @@ func (rcp *RestChainProxy) SendNodeMsg(ctx context.Context, ch chan interface{}, if debug { utils.LavaFormatDebug("provider sending node message", - utils.Attribute{Key: "method", Value: nodeMessage.Path}, + utils.Attribute{Key: "_method", Value: nodeMessage.Path}, utils.Attribute{Key: "headers", Value: req.Header}, utils.Attribute{Key: "apiInterface", Value: "rest"}, ) diff --git a/protocol/chainlib/rest_test.go b/protocol/chainlib/rest_test.go index 5baf9b4a31..6e578f2ae3 100644 --- a/protocol/chainlib/rest_test.go +++ b/protocol/chainlib/rest_test.go @@ -135,7 +135,7 @@ func TestRestChainProxy(t *testing.T) { w.WriteHeader(http.StatusOK) fmt.Fprint(w, `{"block": { "header": {"height": "244591"}}}`) }) - chainParser, chainProxy, chainFetcher, closeServer, err := CreateChainLibMocks(ctx, "LAV1", spectypes.APIInterfaceRest, serverHandler, "../../", nil) + chainParser, chainProxy, chainFetcher, closeServer, _, err := CreateChainLibMocks(ctx, "LAV1", spectypes.APIInterfaceRest, serverHandler, "../../", nil) require.NoError(t, err) require.NotNil(t, chainParser) require.NotNil(t, chainProxy) @@ -166,7 +166,7 @@ func TestParsingRequestedBlocksHeadersRest(t *testing.T) { fmt.Fprint(w, `{"block": { "header": {"height": "244591"}}}`) } }) - chainParser, chainRouter, _, closeServer, err := CreateChainLibMocks(ctx, "LAV1", spectypes.APIInterfaceRest, serverHandler, "../../", nil) + chainParser, chainRouter, _, closeServer, _, err := CreateChainLibMocks(ctx, "LAV1", spectypes.APIInterfaceRest, serverHandler, "../../", nil) require.NoError(t, err) defer func() { if closeServer != nil { @@ -236,7 +236,7 @@ func TestSettingRequestedBlocksHeadersRest(t *testing.T) { } fmt.Fprint(w, `{"block": { "header": {"height": "244591"}}}`) }) - chainParser, chainRouter, _, closeServer, err := CreateChainLibMocks(ctx, "LAV1", spectypes.APIInterfaceRest, serverHandler, "../../", nil) + chainParser, chainRouter, _, closeServer, _, err := CreateChainLibMocks(ctx, "LAV1", spectypes.APIInterfaceRest, serverHandler, "../../", nil) require.NoError(t, err) defer func() { if closeServer != nil { diff --git a/protocol/chainlib/tendermintRPC.go b/protocol/chainlib/tendermintRPC.go index 3e54c8af25..a95f25c5a4 100644 --- a/protocol/chainlib/tendermintRPC.go +++ b/protocol/chainlib/tendermintRPC.go @@ -469,7 +469,7 @@ func (apil *TendermintRpcChainListener) Serve(ctx context.Context, cmdFlags comm utils.LavaFormatDebug("in <<<", utils.LogAttr("GUID", ctx), utils.LogAttr("seed", msgSeed), - utils.LogAttr("msg", logFormattedMsg), + utils.LogAttr("_msg", logFormattedMsg), utils.LogAttr("dappID", dappID), utils.LogAttr("headers", headers), ) @@ -529,7 +529,7 @@ func (apil *TendermintRpcChainListener) Serve(ctx context.Context, cmdFlags comm headers := convertToMetadataMap(metadataValues) utils.LavaFormatDebug("urirpc in <<<", utils.LogAttr("GUID", ctx), - utils.LogAttr("msg", path), + utils.LogAttr("_msg", path), utils.LogAttr("dappID", dappID), utils.LogAttr("headers", headers), ) @@ -674,7 +674,7 @@ func (cp *tendermintRpcChainProxy) SendURI(ctx context.Context, nodeMessage *rpc url := cp.httpNodeUrl.Url + "/" + nodeMessage.Path // set context with timeout - connectCtx, cancel := cp.NodeUrl.LowerContextTimeout(ctx, chainMessage, cp.averageBlockTime) + connectCtx, cancel := cp.CapTimeoutForSend(ctx, chainMessage) defer cancel() // create a new http request @@ -776,7 +776,7 @@ func (cp *tendermintRpcChainProxy) SendRPC(ctx context.Context, nodeMessage *rpc sub, rpcMessage, err = rpc.Subscribe(context.Background(), nodeMessage.ID, nodeMessage.Method, ch, nodeMessage.Params) } else { // set context with timeout - connectCtx, cancel := cp.NodeUrl.LowerContextTimeout(ctx, chainMessage, cp.averageBlockTime) + connectCtx, cancel := cp.CapTimeoutForSend(ctx, chainMessage) defer cancel() cp.NodeUrl.SetIpForwardingIfNecessary(ctx, rpc.SetHeader) diff --git a/protocol/chainlib/tendermintRPC_test.go b/protocol/chainlib/tendermintRPC_test.go index 61acb31989..a48752b4bc 100644 --- a/protocol/chainlib/tendermintRPC_test.go +++ b/protocol/chainlib/tendermintRPC_test.go @@ -149,7 +149,7 @@ func TestTendermintRpcChainProxy(t *testing.T) { }`) }) - chainParser, chainProxy, chainFetcher, closeServer, err := CreateChainLibMocks(ctx, "LAV1", spectypes.APIInterfaceTendermintRPC, serverHandle, "../../", nil) + chainParser, chainProxy, chainFetcher, closeServer, _, err := CreateChainLibMocks(ctx, "LAV1", spectypes.APIInterfaceTendermintRPC, serverHandle, "../../", nil) require.NoError(t, err) require.NotNil(t, chainParser) require.NotNil(t, chainProxy) @@ -180,7 +180,7 @@ func TestTendermintRpcBatchCall(t *testing.T) { fmt.Fprint(w, response) }) - chainParser, chainProxy, chainFetcher, closeServer, err := CreateChainLibMocks(ctx, "LAV1", spectypes.APIInterfaceTendermintRPC, serverHandle, "../../", nil) + chainParser, chainProxy, chainFetcher, closeServer, _, err := CreateChainLibMocks(ctx, "LAV1", spectypes.APIInterfaceTendermintRPC, serverHandle, "../../", nil) require.NoError(t, err) require.NotNil(t, chainParser) require.NotNil(t, chainProxy) @@ -222,7 +222,7 @@ func TestTendermintRpcBatchCallWithSameID(t *testing.T) { fmt.Fprint(w, nodeResponse) }) - chainParser, chainProxy, chainFetcher, closeServer, err := CreateChainLibMocks(ctx, "LAV1", spectypes.APIInterfaceTendermintRPC, serverHandle, "../../", nil) + chainParser, chainProxy, chainFetcher, closeServer, _, err := CreateChainLibMocks(ctx, "LAV1", spectypes.APIInterfaceTendermintRPC, serverHandle, "../../", nil) require.NoError(t, err) require.NotNil(t, chainParser) require.NotNil(t, chainProxy) @@ -256,7 +256,7 @@ func TestTendermintURIRPC(t *testing.T) { }`) }) - chainParser, chainProxy, chainFetcher, closeServer, err := CreateChainLibMocks(ctx, "LAV1", spectypes.APIInterfaceTendermintRPC, serverHandle, "../../", nil) + chainParser, chainProxy, chainFetcher, closeServer, _, err := CreateChainLibMocks(ctx, "LAV1", spectypes.APIInterfaceTendermintRPC, serverHandle, "../../", nil) require.NoError(t, err) require.NotNil(t, chainParser) require.NotNil(t, chainProxy) diff --git a/protocol/common/collections.go b/protocol/common/collections.go index ad688973a8..7e75f5028d 100644 --- a/protocol/common/collections.go +++ b/protocol/common/collections.go @@ -5,8 +5,8 @@ import ( ) const ( - CONSISTENCY_SELECT_ALLPROVIDERS = 1 - NOSTATE = 0 + CONSISTENCY_SELECT_ALL_PROVIDERS = 1 + NO_STATE = 0 ) func GetExtensionNames(extensionCollection []*spectypes.Extension) (extensions []string) { diff --git a/protocol/common/endpoints.go b/protocol/common/endpoints.go index 02f965ed51..ab133cebf9 100644 --- a/protocol/common/endpoints.go +++ b/protocol/common/endpoints.go @@ -22,6 +22,7 @@ const ( IP_FORWARDING_HEADER_NAME = "X-Forwarded-For" PROVIDER_ADDRESS_HEADER_NAME = "Lava-Provider-Address" RETRY_COUNT_HEADER_NAME = "Lava-Retries" + PROVIDER_LATEST_BLOCK_HEADER_NAME = "Provider-Latest-Block" GUID_HEADER_NAME = "Lava-Guid" // these headers need to be lowercase BLOCK_PROVIDERS_ADDRESSES_HEADER_NAME = "lava-providers-block" @@ -89,27 +90,15 @@ func (url *NodeUrl) LowerContextTimeoutWithDuration(ctx context.Context, timeout return CapContextTimeout(ctx, timeout+url.Timeout) } -func (url *NodeUrl) LowerContextTimeout(ctx context.Context, chainMessage ChainMessageGetApiInterface, averageBlockTime time.Duration) (context.Context, context.CancelFunc) { - var timeout time.Duration - specOverwriteTimeout := chainMessage.GetApi().TimeoutMs - if specOverwriteTimeout > 0 { - timeout = time.Millisecond * time.Duration(specOverwriteTimeout) - } else { - timeout = LocalNodeTimePerCu(chainMessage.GetApi().ComputeUnits) - } - - // check if this API is hanging (waiting for block confirmation) - if chainMessage.GetApi().Category.HangingApi { - timeout += averageBlockTime - } +func (url *NodeUrl) LowerContextTimeout(ctx context.Context, processingTimeout time.Duration) (context.Context, context.CancelFunc) { // allowing the consumer's context to increase the timeout by up to x2 // this allows the consumer to get extra timeout than the spec up to a threshold so // the provider wont be attacked by infinite context timeout - timeout *= MAXIMUM_ALLOWED_TIMEOUT_EXTEND_MULTIPLIER_BY_THE_CONSUMER + processingTimeout *= MAXIMUM_ALLOWED_TIMEOUT_EXTEND_MULTIPLIER_BY_THE_CONSUMER if url == nil || url.Timeout <= 0 { - return CapContextTimeout(ctx, timeout) + return CapContextTimeout(ctx, processingTimeout) } - return CapContextTimeout(ctx, timeout+url.Timeout) + return CapContextTimeout(ctx, processingTimeout+url.Timeout) } type AuthConfig struct { @@ -221,6 +210,7 @@ type RelayResult struct { Finalized bool ConflictHandler ConflictHandlerInterface StatusCode int + Quorum int } func (rr *RelayResult) GetReplyServer() *pairingtypes.Relayer_RelaySubscribeClient { diff --git a/protocol/common/timeout.go b/protocol/common/timeout.go index 1827dd78bd..3b6e6d4708 100644 --- a/protocol/common/timeout.go +++ b/protocol/common/timeout.go @@ -16,6 +16,8 @@ const ( DataReliabilityTimeoutIncrease = 5 * time.Second AverageWorldLatency = 300 * time.Millisecond CommunicateWithLocalLavaNodeTimeout = (3 * time.Second) + AverageWorldLatency + DefaultTimeout = 20 * time.Second + DefaultTimeoutLong = 3 * time.Minute CacheTimeout = 50 * time.Millisecond ) @@ -61,3 +63,20 @@ func IsTimeout(errArg error) bool { } return false } + +type TimeoutInfo struct { + CU uint64 + Hanging bool + Stateful uint32 +} + +func GetTimeoutForProcessing(relayTimeout time.Duration, timeoutInfo TimeoutInfo) time.Duration { + ctxTimeout := DefaultTimeout + if timeoutInfo.Hanging || timeoutInfo.CU > 100 || timeoutInfo.Stateful == CONSISTENCY_SELECT_ALL_PROVIDERS { + ctxTimeout = DefaultTimeoutLong + } + if relayTimeout > ctxTimeout { + ctxTimeout = relayTimeout + } + return ctxTimeout +} diff --git a/protocol/integration/mocks.go b/protocol/integration/mocks.go new file mode 100644 index 0000000000..1b8dae06f7 --- /dev/null +++ b/protocol/integration/mocks.go @@ -0,0 +1,261 @@ +package integration_test + +import ( + "context" + "fmt" + "net/http" + "strconv" + "sync" + "time" + + "github.com/lavanet/lava/protocol/chaintracker" + "github.com/lavanet/lava/protocol/common" + "github.com/lavanet/lava/protocol/lavaprotocol" + "github.com/lavanet/lava/protocol/lavasession" + "github.com/lavanet/lava/protocol/rpcprovider/reliabilitymanager" + "github.com/lavanet/lava/protocol/statetracker/updaters" + "github.com/lavanet/lava/utils" + conflicttypes "github.com/lavanet/lava/x/conflict/types" + pairingtypes "github.com/lavanet/lava/x/pairing/types" + plantypes "github.com/lavanet/lava/x/plans/types" + protocoltypes "github.com/lavanet/lava/x/protocol/types" +) + +type mockConsumerStateTracker struct{} + +func (m *mockConsumerStateTracker) RegisterForVersionUpdates(ctx context.Context, version *protocoltypes.Version, versionValidator updaters.VersionValidationInf) { +} + +func (m *mockConsumerStateTracker) RegisterConsumerSessionManagerForPairingUpdates(ctx context.Context, consumerSessionManager *lavasession.ConsumerSessionManager) { +} + +func (m *mockConsumerStateTracker) RegisterForSpecUpdates(ctx context.Context, specUpdatable updaters.SpecUpdatable, endpoint lavasession.RPCEndpoint) error { + return nil +} + +func (m *mockConsumerStateTracker) RegisterFinalizationConsensusForUpdates(context.Context, *lavaprotocol.FinalizationConsensus) { +} + +func (m *mockConsumerStateTracker) RegisterForDowntimeParamsUpdates(ctx context.Context, downtimeParamsUpdatable updaters.DowntimeParamsUpdatable) error { + return nil +} + +func (m *mockConsumerStateTracker) TxConflictDetection(ctx context.Context, finalizationConflict *conflicttypes.FinalizationConflict, responseConflict *conflicttypes.ResponseConflict, sameProviderConflict *conflicttypes.FinalizationConflict, conflictHandler common.ConflictHandlerInterface) error { + return nil +} + +func (m *mockConsumerStateTracker) GetConsumerPolicy(ctx context.Context, consumerAddress, chainID string) (*plantypes.Policy, error) { + return &plantypes.Policy{ + ChainPolicies: []plantypes.ChainPolicy{}, + GeolocationProfile: 1, + TotalCuLimit: 10000, + EpochCuLimit: 1000, + MaxProvidersToPair: 5, + SelectedProvidersMode: 0, + SelectedProviders: []string{}, + }, nil +} + +func (m *mockConsumerStateTracker) GetProtocolVersion(ctx context.Context) (*updaters.ProtocolVersionResponse, error) { + return nil, fmt.Errorf("banana") +} + +func (m *mockConsumerStateTracker) GetLatestVirtualEpoch() uint64 { + return 0 +} + +type ReplySetter struct { + status int + replyDataBuf []byte + handler func([]byte, http.Header) ([]byte, int) +} + +type mockProviderStateTracker struct { + consumerAddressForPairing string + averageBlockTime time.Duration +} + +func (m *mockProviderStateTracker) RegisterForVersionUpdates(ctx context.Context, version *protocoltypes.Version, versionValidator updaters.VersionValidationInf) { +} + +func (m *mockProviderStateTracker) RegisterForSpecUpdates(ctx context.Context, specUpdatable updaters.SpecUpdatable, endpoint lavasession.RPCEndpoint) error { + return nil +} + +func (m *mockProviderStateTracker) RegisterForSpecVerifications(ctx context.Context, specVerifier updaters.SpecVerifier, chainId string) error { + return nil +} + +func (m *mockProviderStateTracker) RegisterReliabilityManagerForVoteUpdates(ctx context.Context, voteUpdatable updaters.VoteUpdatable, endpointP *lavasession.RPCProviderEndpoint) { +} + +func (m *mockProviderStateTracker) RegisterForEpochUpdates(ctx context.Context, epochUpdatable updaters.EpochUpdatable) { +} + +func (m *mockProviderStateTracker) RegisterForDowntimeParamsUpdates(ctx context.Context, downtimeParamsUpdatable updaters.DowntimeParamsUpdatable) error { + return nil +} + +func (m *mockProviderStateTracker) TxRelayPayment(ctx context.Context, relayRequests []*pairingtypes.RelaySession, description string, latestBlocks []*pairingtypes.LatestBlockReport) error { + return nil +} + +func (m *mockProviderStateTracker) SendVoteReveal(voteID string, vote *reliabilitymanager.VoteData) error { + return nil +} + +func (m *mockProviderStateTracker) SendVoteCommitment(voteID string, vote *reliabilitymanager.VoteData) error { + return nil +} + +func (m *mockProviderStateTracker) LatestBlock() int64 { + return 1000 +} + +func (m *mockProviderStateTracker) GetMaxCuForUser(ctx context.Context, consumerAddress, chainID string, epocu uint64) (maxCu uint64, err error) { + return 10000, nil +} + +func (m *mockProviderStateTracker) VerifyPairing(ctx context.Context, consumerAddress, providerAddress string, epoch uint64, chainID string) (valid bool, total int64, projectId string, err error) { + return true, 10000, m.consumerAddressForPairing, nil +} + +func (m *mockProviderStateTracker) GetEpochSize(ctx context.Context) (uint64, error) { + return 30, nil +} + +func (m *mockProviderStateTracker) EarliestBlockInMemory(ctx context.Context) (uint64, error) { + return 100, nil +} + +func (m *mockProviderStateTracker) RegisterPaymentUpdatableForPayments(ctx context.Context, paymentUpdatable updaters.PaymentUpdatable) { +} + +func (m *mockProviderStateTracker) GetRecommendedEpochNumToCollectPayment(ctx context.Context) (uint64, error) { + return 1000, nil +} + +func (m *mockProviderStateTracker) GetEpochSizeMultipliedByRecommendedEpochNumToCollectPayment(ctx context.Context) (uint64, error) { + return 30000, nil +} + +func (m *mockProviderStateTracker) GetProtocolVersion(ctx context.Context) (*updaters.ProtocolVersionResponse, error) { + return &updaters.ProtocolVersionResponse{ + Version: &protocoltypes.Version{}, + BlockNumber: "", + }, nil +} + +func (m *mockProviderStateTracker) GetVirtualEpoch(epoch uint64) uint64 { + return 0 +} + +func (m *mockProviderStateTracker) GetAverageBlockTime() time.Duration { + return m.averageBlockTime +} + +type MockChainFetcher struct { + latestBlock int64 + blockHashes []*chaintracker.BlockStore + mutex sync.Mutex + fork string + callBack func() +} + +func (mcf *MockChainFetcher) FetchEndpoint() lavasession.RPCProviderEndpoint { + return lavasession.RPCProviderEndpoint{} +} + +func (mcf *MockChainFetcher) FetchLatestBlockNum(ctx context.Context) (int64, error) { + mcf.mutex.Lock() + defer mcf.mutex.Unlock() + if mcf.callBack != nil { + mcf.callBack() + } + return mcf.latestBlock, nil +} + +func (mcf *MockChainFetcher) FetchBlockHashByNum(ctx context.Context, blockNum int64) (string, error) { + mcf.mutex.Lock() + defer mcf.mutex.Unlock() + for _, blockStore := range mcf.blockHashes { + if blockStore.Block == blockNum { + return blockStore.Hash, nil + } + } + return "", fmt.Errorf("invalid block num requested %d, latestBlockSaved: %d, MockChainFetcher blockHashes: %+v", blockNum, mcf.latestBlock, mcf.blockHashes) +} + +func (mcf *MockChainFetcher) FetchChainID(ctx context.Context) (string, string, error) { + return "", "", utils.LavaFormatError("FetchChainID not supported for lava chain fetcher", nil) +} + +func (mcf *MockChainFetcher) hashKey(latestBlock int64) string { + return "stubHash-" + strconv.FormatInt(latestBlock, 10) + mcf.fork +} + +func (mcf *MockChainFetcher) IsCorrectHash(hash string, hashBlock int64) bool { + return hash == mcf.hashKey(hashBlock) +} + +func (mcf *MockChainFetcher) AdvanceBlock() int64 { + mcf.mutex.Lock() + defer mcf.mutex.Unlock() + mcf.latestBlock += 1 + newHash := mcf.hashKey(mcf.latestBlock) + mcf.blockHashes = append(mcf.blockHashes[1:], &chaintracker.BlockStore{Block: mcf.latestBlock, Hash: newHash}) + return mcf.latestBlock +} + +func (mcf *MockChainFetcher) SetBlock(latestBlock int64) { + mcf.latestBlock = latestBlock + newHash := mcf.hashKey(mcf.latestBlock) + mcf.blockHashes = append(mcf.blockHashes, &chaintracker.BlockStore{Block: latestBlock, Hash: newHash}) +} + +func (mcf *MockChainFetcher) Fork(fork string) { + mcf.mutex.Lock() + defer mcf.mutex.Unlock() + if mcf.fork == fork { + // nothing to do + return + } + mcf.fork = fork + for _, blockStore := range mcf.blockHashes { + blockStore.Hash = mcf.hashKey(blockStore.Block) + } +} + +func (mcf *MockChainFetcher) Shrink(newSize int) { + mcf.mutex.Lock() + defer mcf.mutex.Unlock() + currentSize := len(mcf.blockHashes) + if currentSize <= newSize { + return + } + newHashes := make([]*chaintracker.BlockStore, newSize) + copy(newHashes, mcf.blockHashes[currentSize-newSize:]) +} + +func NewMockChainFetcher(startBlock, blocksToSave int64, callback func()) *MockChainFetcher { + mockCHainFetcher := MockChainFetcher{callBack: callback} + for i := int64(0); i < blocksToSave; i++ { + mockCHainFetcher.SetBlock(startBlock + i) + } + return &mockCHainFetcher +} + +type uniqueAddresGenerator struct { + seed int + lock sync.Mutex +} + +func (ug *uniqueAddresGenerator) GetAddress() string { + ug.lock.Lock() + defer ug.lock.Unlock() + ug.seed++ + if ug.seed < 100 { + return "localhost:111" + strconv.Itoa(ug.seed) + } + return "localhost:11" + strconv.Itoa(ug.seed) +} diff --git a/protocol/integration/protocol_test.go b/protocol/integration/protocol_test.go new file mode 100644 index 0000000000..cfccbce52f --- /dev/null +++ b/protocol/integration/protocol_test.go @@ -0,0 +1,552 @@ +package integration_test + +import ( + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "os" + "testing" + "time" + + "github.com/lavanet/lava/protocol/chainlib" + "github.com/lavanet/lava/protocol/chaintracker" + "github.com/lavanet/lava/protocol/common" + "github.com/lavanet/lava/protocol/lavaprotocol" + "github.com/lavanet/lava/protocol/lavasession" + "github.com/lavanet/lava/protocol/metrics" + "github.com/lavanet/lava/protocol/provideroptimizer" + "github.com/lavanet/lava/protocol/rpcconsumer" + "github.com/lavanet/lava/protocol/rpcprovider" + "github.com/lavanet/lava/protocol/rpcprovider/reliabilitymanager" + "github.com/lavanet/lava/protocol/rpcprovider/rewardserver" + "github.com/lavanet/lava/utils" + "github.com/lavanet/lava/utils/rand" + "github.com/lavanet/lava/utils/sigs" + "github.com/stretchr/testify/require" + "google.golang.org/grpc/connectivity" + + spectypes "github.com/lavanet/lava/x/spec/types" +) + +var ( + seed int64 + randomizer *sigs.ZeroReader + addressGen uniqueAddresGenerator +) + +func TestMain(m *testing.M) { + // This code will run once before any test cases are executed. + seed = time.Now().Unix() + rand.SetSpecificSeed(seed) + addressGen = uniqueAddresGenerator{} + randomizer = sigs.NewZeroReader(seed) + lavasession.AllowInsecureConnectionToProviders = true + // Run the actual tests + exitCode := m.Run() + if exitCode != 0 { + utils.LavaFormatDebug("failed tests seed", utils.Attribute{Key: "seed", Value: seed}) + } + os.Exit(exitCode) +} + +func isGrpcServerUp(url string) bool { + ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond*50) + defer cancel() + conn, err := lavasession.ConnectgRPCClient(context.Background(), url, true) + if err != nil { + return false + } + defer conn.Close() + for { + state := conn.GetState() + if state == connectivity.Ready { + return true + } else if state == connectivity.TransientFailure || state == connectivity.Shutdown { + return false + } + + select { + case <-time.After(10 * time.Millisecond): + // Check the connection state again after a short delay + case <-ctx.Done(): + // The context has timed out + return false + } + } +} + +func checkGrpcServerStatusWithTimeout(url string, totalTimeout time.Duration) bool { + startTime := time.Now() + + for time.Since(startTime) < totalTimeout { + if isGrpcServerUp(url) { + return true + } + time.Sleep(20 * time.Millisecond) + } + + return false +} + +func isServerUp(url string) bool { + client := http.Client{ + Timeout: 20 * time.Millisecond, + } + + resp, err := client.Get(url) + if err != nil { + return false + } + + defer resp.Body.Close() + + return resp.ContentLength > 0 +} + +func checkServerStatusWithTimeout(url string, totalTimeout time.Duration) bool { + startTime := time.Now() + + for time.Since(startTime) < totalTimeout { + if isServerUp(url) { + return true + } + time.Sleep(20 * time.Millisecond) + } + + return false +} + +func createInMemoryRewardDb(specs []string) (*rewardserver.RewardDB, error) { + rewardDB := rewardserver.NewRewardDB() + for _, spec := range specs { + db := rewardserver.NewMemoryDB(spec) + err := rewardDB.AddDB(db) + if err != nil { + return nil, err + } + } + return rewardDB, nil +} + +func createRpcConsumer(t *testing.T, ctx context.Context, specId string, apiInterface string, account sigs.Account, consumerListenAddress string, epoch uint64, pairingList map[uint64]*lavasession.ConsumerSessionsWithProvider, requiredResponses int, lavaChainID string) *rpcconsumer.RPCConsumerServer { + serverHandler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // Handle the incoming request and provide the desired response + w.WriteHeader(http.StatusOK) + }) + chainParser, _, chainFetcher, _, _, err := chainlib.CreateChainLibMocks(ctx, specId, apiInterface, serverHandler, "../../", nil) + require.NoError(t, err) + require.NotNil(t, chainParser) + require.NotNil(t, chainFetcher) + + rpcConsumerServer := &rpcconsumer.RPCConsumerServer{} + rpcEndpoint := &lavasession.RPCEndpoint{ + NetworkAddress: consumerListenAddress, + ChainID: specId, + ApiInterface: apiInterface, + TLSEnabled: false, + HealthCheckPath: "", + Geolocation: 1, + } + consumerStateTracker := &mockConsumerStateTracker{} + finalizationConsensus := lavaprotocol.NewFinalizationConsensus(rpcEndpoint.ChainID) + _, averageBlockTime, _, _ := chainParser.ChainBlockStats() + baseLatency := common.AverageWorldLatency / 2 + optimizer := provideroptimizer.NewProviderOptimizer(provideroptimizer.STRATEGY_BALANCED, averageBlockTime, baseLatency, 2) + consumerSessionManager := lavasession.NewConsumerSessionManager(rpcEndpoint, optimizer, nil, nil) + consumerSessionManager.UpdateAllProviders(epoch, pairingList) + + consumerConsistency := rpcconsumer.NewConsumerConsistency(specId) + consumerCmdFlags := common.ConsumerCmdFlags{} + rpcsonumerLogs, err := metrics.NewRPCConsumerLogs(nil, nil) + require.NoError(t, err) + err = rpcConsumerServer.ServeRPCRequests(ctx, rpcEndpoint, consumerStateTracker, chainParser, finalizationConsensus, consumerSessionManager, requiredResponses, account.SK, lavaChainID, nil, rpcsonumerLogs, account.Addr, consumerConsistency, nil, consumerCmdFlags, false, nil, nil) + require.NoError(t, err) + // wait for consumer server to be up + consumerUp := checkServerStatusWithTimeout("http://"+consumerListenAddress, time.Millisecond*61) + require.True(t, consumerUp) + + return rpcConsumerServer +} + +func createRpcProvider(t *testing.T, ctx context.Context, consumerAddress string, specId string, apiInterface string, listenAddress string, account sigs.Account, lavaChainID string, addons []string) (*rpcprovider.RPCProviderServer, *lavasession.RPCProviderEndpoint, *ReplySetter, *MockChainFetcher) { + replySetter := ReplySetter{ + status: http.StatusOK, + replyDataBuf: []byte(`{"reply": "REPLY-STUB"}`), + handler: nil, + } + serverHandler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // Handle the incoming request and provide the desired response + + status := replySetter.status + data := replySetter.replyDataBuf + if replySetter.handler != nil { + data = make([]byte, r.ContentLength) + r.Body.Read(data) + data, status = replySetter.handler(data, r.Header) + } + w.WriteHeader(status) + fmt.Fprint(w, string(data)) + }) + chainParser, chainRouter, chainFetcher, _, endpoint, err := chainlib.CreateChainLibMocks(ctx, specId, apiInterface, serverHandler, "../../", addons) + require.NoError(t, err) + require.NotNil(t, chainParser) + require.NotNil(t, chainFetcher) + require.NotNil(t, chainRouter) + endpoint.NetworkAddress.Address = listenAddress + + rpcProviderServer := &rpcprovider.RPCProviderServer{} + rpcProviderEndpoint := &lavasession.RPCProviderEndpoint{ + NetworkAddress: lavasession.NetworkAddressData{ + Address: endpoint.NetworkAddress.Address, + KeyPem: "", + CertPem: "", + DisableTLS: false, + }, + ChainID: specId, + ApiInterface: apiInterface, + Geolocation: 1, + NodeUrls: []common.NodeUrl{ + { + Url: endpoint.NodeUrls[0].Url, + InternalPath: "", + AuthConfig: common.AuthConfig{}, + IpForwarding: false, + Timeout: 0, + Addons: addons, + SkipVerifications: []string{}, + }, + }, + } + rewardDB, err := createInMemoryRewardDb([]string{specId}) + require.NoError(t, err) + _, averageBlockTime, blocksToFinalization, blocksInFinalizationData := chainParser.ChainBlockStats() + mockProviderStateTracker := mockProviderStateTracker{consumerAddressForPairing: consumerAddress, averageBlockTime: averageBlockTime} + rws := rewardserver.NewRewardServer(&mockProviderStateTracker, nil, rewardDB, "badger_test", 1, 10, nil) + + blockMemorySize, err := mockProviderStateTracker.GetEpochSizeMultipliedByRecommendedEpochNumToCollectPayment(ctx) + require.NoError(t, err) + providerSessionManager := lavasession.NewProviderSessionManager(rpcProviderEndpoint, blockMemorySize) + providerPolicy := rpcprovider.GetAllAddonsAndExtensionsFromNodeUrlSlice(rpcProviderEndpoint.NodeUrls) + chainParser.SetPolicy(providerPolicy, specId, apiInterface) + + blocksToSaveChainTracker := uint64(blocksToFinalization + blocksInFinalizationData) + chainTrackerConfig := chaintracker.ChainTrackerConfig{ + BlocksToSave: blocksToSaveChainTracker, + AverageBlockTime: averageBlockTime, + ServerBlockMemory: rpcprovider.ChainTrackerDefaultMemory + blocksToSaveChainTracker, + NewLatestCallback: nil, + ConsistencyCallback: nil, + Pmetrics: nil, + } + mockChainFetcher := NewMockChainFetcher(1000, 10, nil) + chainTracker, err := chaintracker.NewChainTracker(ctx, mockChainFetcher, chainTrackerConfig) + require.NoError(t, err) + reliabilityManager := reliabilitymanager.NewReliabilityManager(chainTracker, &mockProviderStateTracker, account.Addr.String(), chainRouter, chainParser) + rpcProviderServer.ServeRPCRequests(ctx, rpcProviderEndpoint, chainParser, rws, providerSessionManager, reliabilityManager, account.SK, nil, chainRouter, &mockProviderStateTracker, account.Addr, lavaChainID, rpcprovider.DEFAULT_ALLOWED_MISSING_CU, nil, nil) + listener := rpcprovider.NewProviderListener(ctx, rpcProviderEndpoint.NetworkAddress, "/health") + err = listener.RegisterReceiver(rpcProviderServer, rpcProviderEndpoint) + require.NoError(t, err) + chainParser.Activate() + chainTracker.RegisterForBlockTimeUpdates(chainParser) + providerUp := checkGrpcServerStatusWithTimeout(rpcProviderEndpoint.NetworkAddress.Address, time.Millisecond*261) + require.True(t, providerUp) + return rpcProviderServer, endpoint, &replySetter, mockChainFetcher +} + +func TestConsumerProviderBasic(t *testing.T) { + ctx := context.Background() + // can be any spec and api interface + specId := "LAV1" + apiInterface := spectypes.APIInterfaceTendermintRPC + epoch := uint64(100) + requiredResponses := 1 + lavaChainID := "lava" + + numProviders := 1 + + consumerListenAddress := addressGen.GetAddress() + pairingList := map[uint64]*lavasession.ConsumerSessionsWithProvider{} + type providerData struct { + account sigs.Account + endpoint *lavasession.RPCProviderEndpoint + server *rpcprovider.RPCProviderServer + replySetter *ReplySetter + mockChainFetcher *MockChainFetcher + } + providers := []providerData{} + + for i := 0; i < numProviders; i++ { + // providerListenAddress := "localhost:111" + strconv.Itoa(i) + account := sigs.GenerateDeterministicFloatingKey(randomizer) + providerDataI := providerData{account: account} + providers = append(providers, providerDataI) + } + consumerAccount := sigs.GenerateDeterministicFloatingKey(randomizer) + for i := 0; i < numProviders; i++ { + ctx := context.Background() + providerDataI := providers[i] + listenAddress := addressGen.GetAddress() + providers[i].server, providers[i].endpoint, providers[i].replySetter, providers[i].mockChainFetcher = createRpcProvider(t, ctx, consumerAccount.Addr.String(), specId, apiInterface, listenAddress, providerDataI.account, lavaChainID, []string(nil)) + } + for i := 0; i < numProviders; i++ { + pairingList[uint64(i)] = &lavasession.ConsumerSessionsWithProvider{ + PublicLavaAddress: providers[i].account.Addr.String(), + Endpoints: []*lavasession.Endpoint{ + { + NetworkAddress: providers[i].endpoint.NetworkAddress.Address, + Enabled: true, + Geolocation: 1, + }, + }, + Sessions: map[int64]*lavasession.SingleConsumerSession{}, + MaxComputeUnits: 10000, + UsedComputeUnits: 0, + PairingEpoch: epoch, + } + } + rpcconsumerServer := createRpcConsumer(t, ctx, specId, apiInterface, consumerAccount, consumerListenAddress, epoch, pairingList, requiredResponses, lavaChainID) + require.NotNil(t, rpcconsumerServer) + client := http.Client{} + resp, err := client.Get("http://" + consumerListenAddress + "/status") + require.NoError(t, err) + require.Equal(t, http.StatusOK, resp.StatusCode) + bodyBytes, err := io.ReadAll(resp.Body) + require.NoError(t, err) + require.Equal(t, providers[0].replySetter.replyDataBuf, bodyBytes) + resp.Body.Close() +} + +func TestConsumerProviderWithProviders(t *testing.T) { + playbook := []struct { + name string + scenario int + }{ + { + name: "basic-success", + scenario: 0, + }, + { + name: "with errors", + scenario: 1, + }, + } + for _, play := range playbook { + t.Run(play.name, func(t *testing.T) { + ctx := context.Background() + // can be any spec and api interface + specId := "LAV1" + apiInterface := spectypes.APIInterfaceTendermintRPC + epoch := uint64(100) + requiredResponses := 1 + lavaChainID := "lava" + numProviders := 5 + + consumerListenAddress := addressGen.GetAddress() + pairingList := map[uint64]*lavasession.ConsumerSessionsWithProvider{} + type providerData struct { + account sigs.Account + endpoint *lavasession.RPCProviderEndpoint + server *rpcprovider.RPCProviderServer + replySetter *ReplySetter + mockChainFetcher *MockChainFetcher + } + providers := []providerData{} + + for i := 0; i < numProviders; i++ { + // providerListenAddress := "localhost:111" + strconv.Itoa(i) + account := sigs.GenerateDeterministicFloatingKey(randomizer) + providerDataI := providerData{account: account} + providers = append(providers, providerDataI) + } + consumerAccount := sigs.GenerateDeterministicFloatingKey(randomizer) + for i := 0; i < numProviders; i++ { + ctx := context.Background() + providerDataI := providers[i] + listenAddress := addressGen.GetAddress() + providers[i].server, providers[i].endpoint, providers[i].replySetter, providers[i].mockChainFetcher = createRpcProvider(t, ctx, consumerAccount.Addr.String(), specId, apiInterface, listenAddress, providerDataI.account, lavaChainID, []string(nil)) + providers[i].replySetter.replyDataBuf = []byte(fmt.Sprintf(`{"reply": %d}`, i+1)) + } + for i := 0; i < numProviders; i++ { + pairingList[uint64(i)] = &lavasession.ConsumerSessionsWithProvider{ + PublicLavaAddress: providers[i].account.Addr.String(), + Endpoints: []*lavasession.Endpoint{ + { + NetworkAddress: providers[i].endpoint.NetworkAddress.Address, + Enabled: true, + Geolocation: 1, + }, + }, + Sessions: map[int64]*lavasession.SingleConsumerSession{}, + MaxComputeUnits: 10000, + UsedComputeUnits: 0, + PairingEpoch: epoch, + } + } + rpcconsumerServer := createRpcConsumer(t, ctx, specId, apiInterface, consumerAccount, consumerListenAddress, epoch, pairingList, requiredResponses, lavaChainID) + require.NotNil(t, rpcconsumerServer) + if play.scenario != 1 { + counter := map[int]int{} + for i := 0; i <= 1000; i++ { + client := http.Client{} + resp, err := client.Get("http://" + consumerListenAddress + "/status") + require.NoError(t, err) + require.Equal(t, http.StatusOK, resp.StatusCode) + bodyBytes, err := io.ReadAll(resp.Body) + require.NoError(t, err) + resp.Body.Close() + mapi := map[string]int{} + err = json.Unmarshal(bodyBytes, &mapi) + require.NoError(t, err) + id, ok := mapi["reply"] + require.True(t, ok) + counter[id]++ + handler := func(req []byte, header http.Header) (data []byte, status int) { + time.Sleep(3 * time.Millisecond) // cause timeout for providers we got a reply for so others get chosen with a bigger likelihood + return providers[id].replySetter.replyDataBuf, http.StatusOK + } + providers[id-1].replySetter.handler = handler + } + + require.Len(t, counter, numProviders) // make sure to talk with all of them + } + if play.scenario != 0 { + // add a chance for node errors and timeouts + for i := 0; i < numProviders; i++ { + replySetter := providers[i].replySetter + index := i + handler := func(req []byte, header http.Header) (data []byte, status int) { + randVal := rand.Intn(10) + switch randVal { + case 1: + if index < (numProviders+1)/2 { + time.Sleep(2 * time.Second) // cause timeout, but only possible on half the providers so there's always a provider that answers + } + case 2, 3, 4: + return []byte(`{"message":"bad","code":123}`), http.StatusServiceUnavailable + case 5: + return []byte(`{"message":"bad","code":777}`), http.StatusTooManyRequests // cause protocol error + } + return replySetter.replyDataBuf, http.StatusOK + } + providers[i].replySetter.handler = handler + } + + seenError := false + statuses := map[int]struct{}{} + for i := 0; i <= 100; i++ { + client := http.Client{Timeout: 500 * time.Millisecond} + req, err := http.NewRequest("GET", "http://"+consumerListenAddress+"/status", nil) + require.NoError(t, err) + + // Add custom headers to the request + req.Header.Add(common.RELAY_TIMEOUT_HEADER_NAME, "90ms") + + // Perform the request + resp, err := client.Do(req) + require.NoError(t, err, i) + if resp.StatusCode == http.StatusServiceUnavailable { + seenError = true + } + statuses[resp.StatusCode] = struct{}{} + require.NotEqual(t, resp.StatusCode, http.StatusTooManyRequests, i) // should never return too many requests, because it triggers a retry + resp.Body.Close() + } + require.True(t, seenError, statuses) + } + }) + } +} + +func TestConsumerProviderTx(t *testing.T) { + playbook := []struct { + name string + }{ + { + name: "basic-tx", + }, + } + for _, play := range playbook { + t.Run(play.name, func(t *testing.T) { + ctx := context.Background() + // can be any spec and api interface + specId := "LAV1" + apiInterface := spectypes.APIInterfaceRest + epoch := uint64(100) + requiredResponses := 1 + lavaChainID := "lava" + numProviders := 5 + + consumerListenAddress := addressGen.GetAddress() + pairingList := map[uint64]*lavasession.ConsumerSessionsWithProvider{} + type providerData struct { + account sigs.Account + endpoint *lavasession.RPCProviderEndpoint + server *rpcprovider.RPCProviderServer + replySetter *ReplySetter + mockChainFetcher *MockChainFetcher + } + providers := []providerData{} + + for i := 0; i < numProviders; i++ { + // providerListenAddress := "localhost:111" + strconv.Itoa(i) + account := sigs.GenerateDeterministicFloatingKey(randomizer) + providerDataI := providerData{account: account} + providers = append(providers, providerDataI) + } + consumerAccount := sigs.GenerateDeterministicFloatingKey(randomizer) + for i := 0; i < numProviders; i++ { + ctx := context.Background() + providerDataI := providers[i] + listenAddress := addressGen.GetAddress() + providers[i].server, providers[i].endpoint, providers[i].replySetter, providers[i].mockChainFetcher = createRpcProvider(t, ctx, consumerAccount.Addr.String(), specId, apiInterface, listenAddress, providerDataI.account, lavaChainID, []string(nil)) + providers[i].replySetter.replyDataBuf = []byte(fmt.Sprintf(`{"result": %d}`, i+1)) + } + for i := 0; i < numProviders; i++ { + pairingList[uint64(i)] = &lavasession.ConsumerSessionsWithProvider{ + PublicLavaAddress: providers[i].account.Addr.String(), + Endpoints: []*lavasession.Endpoint{ + { + NetworkAddress: providers[i].endpoint.NetworkAddress.Address, + Enabled: true, + Geolocation: 1, + }, + }, + Sessions: map[int64]*lavasession.SingleConsumerSession{}, + MaxComputeUnits: 10000, + UsedComputeUnits: 0, + PairingEpoch: epoch, + } + } + rpcconsumerServer := createRpcConsumer(t, ctx, specId, apiInterface, consumerAccount, consumerListenAddress, epoch, pairingList, requiredResponses, lavaChainID) + require.NotNil(t, rpcconsumerServer) + + for i := 0; i < numProviders; i++ { + replySetter := providers[i].replySetter + index := i + handler := func(req []byte, header http.Header) (data []byte, status int) { + if index == 1 { + // only one provider responds correctly, but after a delay + time.Sleep(20 * time.Millisecond) + return replySetter.replyDataBuf, http.StatusOK + } else { + return []byte(`{"message":"bad","code":777}`), http.StatusInternalServerError + } + } + providers[i].replySetter.handler = handler + } + + client := http.Client{Timeout: 500 * time.Millisecond} + req, err := http.NewRequest(http.MethodPost, "http://"+consumerListenAddress+"/cosmos/tx/v1beta1/txs", nil) + require.NoError(t, err) + resp, err := client.Do(req) + require.NoError(t, err) + require.Equal(t, http.StatusOK, resp.StatusCode) + bodyBytes, err := io.ReadAll(resp.Body) + require.NoError(t, err) + resp.Body.Close() + require.Equal(t, `{"result": 2}`, string(bodyBytes)) + }) + } +} diff --git a/protocol/lavaprotocol/finalization_consensus_test.go b/protocol/lavaprotocol/finalization_consensus_test.go index 8eaa581e18..2b93fdc86f 100644 --- a/protocol/lavaprotocol/finalization_consensus_test.go +++ b/protocol/lavaprotocol/finalization_consensus_test.go @@ -74,7 +74,7 @@ func TestConsensusHashesInsertion(t *testing.T) { chainsToTest := []string{"APT1", "LAV1", "ETH1"} for _, chainID := range chainsToTest { ctx := context.Background() - chainParser, _, _, closeServer, err := chainlib.CreateChainLibMocks(ctx, chainID, "0", func(http.ResponseWriter, *http.Request) {}, "../../", nil) + chainParser, _, _, closeServer, _, err := chainlib.CreateChainLibMocks(ctx, chainID, "0", func(http.ResponseWriter, *http.Request) {}, "../../", nil) if closeServer != nil { defer closeServer() } @@ -163,7 +163,7 @@ func TestQoS(t *testing.T) { for _, chainID := range chainsToTest { t.Run(chainID, func(t *testing.T) { ctx := context.Background() - chainParser, _, _, closeServer, err := chainlib.CreateChainLibMocks(ctx, chainID, "0", func(http.ResponseWriter, *http.Request) {}, "../../", nil) + chainParser, _, _, closeServer, _, err := chainlib.CreateChainLibMocks(ctx, chainID, "0", func(http.ResponseWriter, *http.Request) {}, "../../", nil) if closeServer != nil { defer closeServer() } diff --git a/protocol/lavaprotocol/request_builder.go b/protocol/lavaprotocol/request_builder.go index 45c32aae36..078c4f4075 100644 --- a/protocol/lavaprotocol/request_builder.go +++ b/protocol/lavaprotocol/request_builder.go @@ -165,7 +165,7 @@ func compareRelaysFindConflict(ctx context.Context, reply1 pairingtypes.RelayRep secondAsString := string(reply2.Data) _, idxDiff := findFirstDifferentChar(firstAsString, secondAsString) if idxDiff > 0 && idxDiff+100 < len(firstAsString) && idxDiff+100 < len(secondAsString) { - utils.LavaFormatDebug("different in responses detected", utils.Attribute{Key: "index", Value: idxDiff}, utils.Attribute{Key: "first_diff", Value: firstAsString[idxDiff : idxDiff+100]}, utils.Attribute{Key: "second_diff", Value: secondAsString[idxDiff : idxDiff+100]}) + utils.LavaFormatDebug("difference in responses detected", utils.Attribute{Key: "index", Value: idxDiff}, utils.Attribute{Key: "first_diff", Value: firstAsString[idxDiff : idxDiff+100]}, utils.Attribute{Key: "second_diff", Value: secondAsString[idxDiff : idxDiff+100]}) } } return true, responseConflict diff --git a/protocol/lavasession/consumer_session_manager.go b/protocol/lavasession/consumer_session_manager.go index 6e993f7438..10ff5bcd39 100644 --- a/protocol/lavasession/consumer_session_manager.go +++ b/protocol/lavasession/consumer_session_manager.go @@ -314,18 +314,23 @@ func (csm *ConsumerSessionManager) validatePairingListNotEmpty(addon string, ext // GetSessions will return a ConsumerSession, given cu needed for that session. // The user can also request specific providers to not be included in the search for a session. -func (csm *ConsumerSessionManager) GetSessions(ctx context.Context, cuNeededForSession uint64, initUnwantedProviders map[string]struct{}, requestedBlock int64, addon string, extensions []*spectypes.Extension, stateful uint32, virtualEpoch uint64) ( +func (csm *ConsumerSessionManager) GetSessions(ctx context.Context, cuNeededForSession uint64, usedProviders UsedProvidersInf, requestedBlock int64, addon string, extensions []*spectypes.Extension, stateful uint32, virtualEpoch uint64) ( consumerSessionMap ConsumerSessionsMap, errRet error, ) { + // set usedProviders if they were chosen for this relay + timeoutCtx, cancel := context.WithTimeout(ctx, time.Second) + defer cancel() + canSelect := usedProviders.TryLockSelection(timeoutCtx) + if !canSelect { + return nil, utils.LavaFormatError("failed getting sessions from used Providers", nil, utils.LogAttr("usedProviders", usedProviders), utils.LogAttr("endpoint", csm.rpcEndpoint)) + } + defer func() { usedProviders.AddUsed(consumerSessionMap, errRet) }() + initUnwantedProviders := usedProviders.GetUnwantedProvidersToSend() + extensionNames := common.GetExtensionNames(extensions) // if pairing list is empty we reset the state. numberOfResets := csm.validatePairingListNotEmpty(addon, extensionNames) - // verify initUnwantedProviders is not nil - if initUnwantedProviders == nil { - initUnwantedProviders = make(map[string]struct{}) - } - // providers that we don't try to connect this iteration. tempIgnoredProviders := &ignoredProviders{ providers: initUnwantedProviders, @@ -407,15 +412,14 @@ func (csm *ConsumerSessionManager) GetSessions(ctx context.Context, cuNeededForS if MaxComputeUnitsExceededError.Is(err) { tempIgnoredProviders.providers[providerAddress] = struct{}{} // We must unlock the consumer session before continuing. - consumerSession.lock.Unlock() + consumerSession.Free(nil) continue } else { utils.LavaFormatFatal("Unsupported Error", err) } } else { // consumer session is locked and valid, we need to set the relayNumber and the relay cu. before returning. - consumerSession.LatestRelayCu = cuNeededForSession // set latestRelayCu - consumerSession.RelayNum += RelayNumberIncrement // increase relayNum + // Successfully created/got a consumerSession. if debug { utils.LavaFormatDebug("Consumer get session", @@ -440,10 +444,7 @@ func (csm *ConsumerSessionManager) GetSessions(ctx context.Context, cuNeededForS sessionInfo.QoSSummeryResult = consumerSession.getQosComputedResultOrZero() sessions[providerAddress] = sessionInfo - if consumerSession.RelayNum > 1 { - // we only set excellence for sessions with more than one successful relays, this guarantees data within the epoch exists - consumerSession.QoSInfo.LastExcellenceQoSReport = csm.providerOptimizer.GetExcellenceQoSReportForProvider(providerAddress) - } + consumerSession.SetUsageForSession(cuNeededForSession, csm.providerOptimizer.GetExcellenceQoSReportForProvider(providerAddress), usedProviders) // We successfully added provider, we should ignore it if we need to fetch new tempIgnoredProviders.providers[providerAddress] = struct{}{} @@ -491,7 +492,7 @@ func (csm *ConsumerSessionManager) getValidProviderAddresses(ignoredProvidersLis } } var providers []string - if stateful == common.CONSISTENCY_SELECT_ALLPROVIDERS && csm.providerOptimizer.Strategy() != provideroptimizer.STRATEGY_COST { + if stateful == common.CONSISTENCY_SELECT_ALL_PROVIDERS && csm.providerOptimizer.Strategy() != provideroptimizer.STRATEGY_COST { providers = GetAllProviders(validAddresses, ignoredProvidersList) } else { providers = csm.providerOptimizer.ChooseProvider(validAddresses, ignoredProvidersList, cu, requestedBlock, OptimizerPerturbation) @@ -640,41 +641,10 @@ func (csm *ConsumerSessionManager) blockProvider(address string, reportProvider return nil } -// Verify the consumerSession is locked when getting to this function, if its not locked throw an error -func (csm *ConsumerSessionManager) verifyLock(consumerSession *SingleConsumerSession) error { - if consumerSession.lock.TryLock() { // verify. - // if we managed to lock throw an error for misuse. - defer consumerSession.lock.Unlock() - // if failed to lock we should block session as it seems like a very rare case. - consumerSession.BlockListed = true // block this session from future usages - utils.LavaFormatError("Verify Lock failed on session Failure, blocking session", nil, utils.LogAttr("consumerSession", consumerSession)) - return LockMisUseDetectedError - } - return nil -} - -// A Session can be created but unused if consumer found the response in the cache. -// So we need to unlock the session and decrease the cu that were applied -func (csm *ConsumerSessionManager) OnSessionUnUsed(consumerSession *SingleConsumerSession) error { - if err := csm.verifyLock(consumerSession); err != nil { - return sdkerrors.Wrapf(err, "OnSessionUnUsed, consumerSession.lock must be locked before accessing this method, additional info:") - } - cuToDecrease := consumerSession.LatestRelayCu - consumerSession.LatestRelayCu = 0 // making sure no one uses it in a wrong way - parentConsumerSessionsWithProvider := consumerSession.Parent // must read this pointer before unlocking - // finished with consumerSession here can unlock. - consumerSession.lock.Unlock() // we unlock before we change anything in the parent ConsumerSessionsWithProvider - err := parentConsumerSessionsWithProvider.decreaseUsedComputeUnits(cuToDecrease) // change the cu in parent - if err != nil { - return err - } - return nil -} - // Report session failure, mark it as blocked from future usages, report if timeout happened. func (csm *ConsumerSessionManager) OnSessionFailure(consumerSession *SingleConsumerSession, errorReceived error) error { // consumerSession must be locked when getting here. - if err := csm.verifyLock(consumerSession); err != nil { + if err := consumerSession.VerifyLock(); err != nil { return sdkerrors.Wrapf(err, "OnSessionFailure, consumerSession.lock must be locked before accessing this method, additional info:") } @@ -718,7 +688,7 @@ func (csm *ConsumerSessionManager) OnSessionFailure(consumerSession *SingleConsu parentConsumerSessionsWithProvider := consumerSession.Parent // must read this pointer before unlocking csm.updateMetricsManager(consumerSession) // finished with consumerSession here can unlock. - consumerSession.lock.Unlock() // we unlock before we change anything in the parent ConsumerSessionsWithProvider + consumerSession.Free(errorReceived) // we unlock before we change anything in the parent ConsumerSessionsWithProvider err := parentConsumerSessionsWithProvider.decreaseUsedComputeUnits(cuToDecrease) // change the cu in parent if err != nil { @@ -738,35 +708,6 @@ func (csm *ConsumerSessionManager) OnSessionFailure(consumerSession *SingleConsu return nil } -// On a successful DataReliability session we don't need to increase and update any field, we just need to unlock the session. -func (csm *ConsumerSessionManager) OnDataReliabilitySessionDone(consumerSession *SingleConsumerSession, - latestServicedBlock int64, - specComputeUnits uint64, - currentLatency time.Duration, - expectedLatency time.Duration, - expectedBH int64, - numOfProviders int, - providersCount uint64, -) error { - if err := csm.verifyLock(consumerSession); err != nil { - return sdkerrors.Wrapf(err, "OnDataReliabilitySessionDone, consumerSession.lock must be locked before accessing this method") - } - - defer consumerSession.lock.Unlock() // we need to be locked here, if we didn't get it locked we try lock anyway - consumerSession.ConsecutiveErrors = []error{} - consumerSession.LatestBlock = latestServicedBlock // update latest serviced block - if expectedBH-latestServicedBlock > 1000 { - utils.LavaFormatWarning("identified block gap", nil, - utils.Attribute{Key: "expectedBH", Value: expectedBH}, - utils.Attribute{Key: "latestServicedBlock", Value: latestServicedBlock}, - utils.Attribute{Key: "session_id", Value: consumerSession.SessionId}, - utils.Attribute{Key: "provider_address", Value: consumerSession.Parent.PublicLavaAddress}, - ) - } - consumerSession.CalculateQoS(currentLatency, expectedLatency, expectedBH-latestServicedBlock, numOfProviders, int64(providersCount)) - return nil -} - // On a successful session this function will update all necessary fields in the consumerSession. and unlock it when it finishes func (csm *ConsumerSessionManager) OnSessionDone( consumerSession *SingleConsumerSession, @@ -780,11 +721,11 @@ func (csm *ConsumerSessionManager) OnSessionDone( isHangingApi bool, ) error { // release locks, update CU, relaynum etc.. - if err := csm.verifyLock(consumerSession); err != nil { + if err := consumerSession.VerifyLock(); err != nil { return sdkerrors.Wrapf(err, "OnSessionDone, consumerSession.lock must be locked before accessing this method") } - defer consumerSession.lock.Unlock() // we need to be locked here, if we didn't get it locked we try lock anyway + defer consumerSession.Free(nil) // we need to be locked here, if we didn't get it locked we try lock anyway consumerSession.CuSum += consumerSession.LatestRelayCu // add CuSum to current cu usage. consumerSession.LatestRelayCu = 0 // reset cu just in case consumerSession.ConsecutiveErrors = []error{} @@ -835,110 +776,18 @@ func (csm *ConsumerSessionManager) GetReportedProviders(epoch uint64) []*pairing return csm.reportedProviders.GetReportedProviders() } -// Data Reliability Section: - // Atomically read csm.pairingAddressesLength for data reliability. func (csm *ConsumerSessionManager) GetAtomicPairingAddressesLength() uint64 { return atomic.LoadUint64(&csm.pairingAddressesLength) } -func (csm *ConsumerSessionManager) getDataReliabilityProviderIndex(unAllowedAddress string, index uint64) (cswp *ConsumerSessionsWithProvider, providerAddress string, epoch uint64, err error) { - csm.lock.RLock() - defer csm.lock.RUnlock() - currentEpoch := csm.atomicReadCurrentEpoch() - pairingAddressesLength := csm.GetAtomicPairingAddressesLength() - if index >= pairingAddressesLength { - utils.LavaFormatInfo(DataReliabilityIndexOutOfRangeError.Error(), utils.Attribute{Key: "index", Value: index}, utils.Attribute{Key: "pairingAddressesLength", Value: pairingAddressesLength}) - return nil, "", currentEpoch, DataReliabilityIndexOutOfRangeError - } - providerAddress = csm.pairingAddresses[index] - if providerAddress == unAllowedAddress { - return nil, "", currentEpoch, DataReliabilityIndexRequestedIsOriginalProviderError - } - // if address is valid return the ConsumerSessionsWithProvider - return csm.pairing[providerAddress], providerAddress, currentEpoch, nil -} - -func (csm *ConsumerSessionManager) fetchEndpointFromConsumerSessionsWithProviderWithRetry(ctx context.Context, consumerSessionsWithProvider *ConsumerSessionsWithProvider, sessionEpoch uint64) (endpoint *Endpoint, err error) { - var connected bool - var providerAddress string - for idx := 0; idx < MaxConsecutiveConnectionAttempts; idx++ { // try to connect to the endpoint 3 times - connected, endpoint, providerAddress, err = consumerSessionsWithProvider.fetchEndpointConnectionFromConsumerSessionWithProvider(ctx) - if err != nil { - // verify err is AllProviderEndpointsDisabled and report. - if AllProviderEndpointsDisabledError.Is(err) { - err = csm.blockProvider(providerAddress, true, sessionEpoch, MaxConsecutiveConnectionAttempts, 0, csm.GenerateReconnectCallback(consumerSessionsWithProvider)) // reporting and blocking provider this epoch - if err != nil { - if !EpochMismatchError.Is(err) { - // only acceptable error is EpochMismatchError so if different, throw fatal - utils.LavaFormatFatal("Unsupported Error", err) - } - } - break // all endpoints are disabled, no reason to continue with this provider. - } else { - utils.LavaFormatFatal("Unsupported Error", err) - } - } - if connected { - // if we are connected we can stop trying and return the endpoint - break - } else { - continue - } - } - if !connected { // if we are not connected at the end - // failed to get an endpoint connection from that provider. return an error. - return nil, utils.LavaFormatError("Not Connected", FailedToConnectToEndPointForDataReliabilityError, utils.Attribute{Key: "provider", Value: providerAddress}) - } - return endpoint, nil -} - -// Get a Data Reliability Session -func (csm *ConsumerSessionManager) GetDataReliabilitySession(ctx context.Context, originalProviderAddress string, index int64, sessionEpoch uint64) (singleConsumerSession *SingleConsumerSession, providerAddress string, epoch uint64, err error) { - consumerSessionWithProvider, providerAddress, currentEpoch, err := csm.getDataReliabilityProviderIndex(originalProviderAddress, uint64(index)) - if err != nil { - return nil, "", 0, err - } - if sessionEpoch != currentEpoch { // validate we are in the same epoch. - return nil, "", currentEpoch, DataReliabilityEpochMismatchError - } - - // after choosing a provider, try to see if it already has an existing data reliability session. - consumerSession, pairingEpoch, err := consumerSessionWithProvider.verifyDataReliabilitySessionWasNotAlreadyCreated() - if NoDataReliabilitySessionWasCreatedError.Is(err) { // need to create a new data reliability session - // We can get an endpoint now and create a data reliability session. - endpoint, err := csm.fetchEndpointFromConsumerSessionsWithProviderWithRetry(ctx, consumerSessionWithProvider, currentEpoch) - if err != nil { - return nil, "", currentEpoch, err - } - - // get data reliability session from endpoint - consumerSession, pairingEpoch, err = consumerSessionWithProvider.getDataReliabilitySingleConsumerSession(endpoint) - if err != nil { - return nil, "", currentEpoch, err - } - } else if err != nil { - return nil, "", currentEpoch, err - } - - if currentEpoch != pairingEpoch { // validate they are the same, if not print an error and set currentEpoch to pairingEpoch. - utils.LavaFormatError("currentEpoch and pairingEpoch mismatch", nil, utils.Attribute{Key: "sessionEpoch", Value: currentEpoch}, utils.Attribute{Key: "pairingEpoch", Value: pairingEpoch}) - currentEpoch = pairingEpoch - } - - // DR consumer session is locked, we can increment data reliability relay number. - consumerSession.RelayNum += 1 - - return consumerSession, providerAddress, currentEpoch, nil -} - // On a successful Subscribe relay func (csm *ConsumerSessionManager) OnSessionDoneIncreaseCUOnly(consumerSession *SingleConsumerSession) error { - if err := csm.verifyLock(consumerSession); err != nil { + if err := consumerSession.VerifyLock(); err != nil { return sdkerrors.Wrapf(err, "OnSessionDoneIncreaseRelayAndCu consumerSession.lock must be locked before accessing this method") } - defer consumerSession.lock.Unlock() // we need to be locked here, if we didn't get it locked we try lock anyway + defer consumerSession.Free(nil) // we need to be locked here, if we didn't get it locked we try lock anyway consumerSession.CuSum += consumerSession.LatestRelayCu // add CuSum to current cu usage. consumerSession.LatestRelayCu = 0 // reset cu just in case consumerSession.ConsecutiveErrors = []error{} diff --git a/protocol/lavasession/consumer_session_manager_test.go b/protocol/lavasession/consumer_session_manager_test.go index 3b58e84e18..965f693263 100644 --- a/protocol/lavasession/consumer_session_manager_test.go +++ b/protocol/lavasession/consumer_session_manager_test.go @@ -139,7 +139,7 @@ func TestHappyFlow(t *testing.T) { pairingList := createPairingList("", true) err := csm.UpdateAllProviders(firstEpochHeight, pairingList) // update the providers. require.NoError(t, err) - css, err := csm.GetSessions(ctx, cuForFirstRequest, nil, servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session + css, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, "", nil, common.NO_STATE, 0) // get a session require.NoError(t, err) for _, cs := range css { @@ -161,7 +161,7 @@ func TestHappyFlowVirtualEpoch(t *testing.T) { pairingList := createPairingList("", true) err := csm.UpdateAllProviders(firstEpochHeight, pairingList) // update the providers. require.NoError(t, err) - css, err := csm.GetSessions(ctx, maxCuForVirtualEpoch*(virtualEpoch+1), nil, servicedBlockNumber, "", nil, common.NOSTATE, virtualEpoch) // get a session + css, err := csm.GetSessions(ctx, maxCuForVirtualEpoch*(virtualEpoch+1), NewUsedProviders(nil), servicedBlockNumber, "", nil, common.NO_STATE, virtualEpoch) // get a session require.NoError(t, err) for _, cs := range css { @@ -185,7 +185,7 @@ func TestVirtualEpochWithFailure(t *testing.T) { err := csm.UpdateAllProviders(firstEpochHeight, pairingList) // update the providers. require.NoError(t, err) - _, err = csm.GetSessions(ctx, maxCuForVirtualEpoch*(virtualEpoch+1)+10, nil, servicedBlockNumber, "", nil, common.NOSTATE, virtualEpoch) // get a session + _, err = csm.GetSessions(ctx, maxCuForVirtualEpoch*(virtualEpoch+1)+10, NewUsedProviders(nil), servicedBlockNumber, "", nil, common.NO_STATE, virtualEpoch) // get a session require.Error(t, err) } @@ -195,8 +195,8 @@ func TestPairingReset(t *testing.T) { pairingList := createPairingList("", true) err := csm.UpdateAllProviders(firstEpochHeight, pairingList) // update the providers. require.NoError(t, err) - csm.validAddresses = []string{} // set valid addresses to zero - css, err := csm.GetSessions(ctx, cuForFirstRequest, nil, servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session + csm.validAddresses = []string{} // set valid addresses to zero + css, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, "", nil, common.NO_STATE, 0) // get a session require.NoError(t, err) require.Equal(t, len(csm.validAddresses), len(csm.pairingAddresses)) @@ -225,7 +225,7 @@ func TestPairingResetWithFailures(t *testing.T) { if len(csm.validAddresses) == 0 { // wait for all pairings to be blocked. break } - css, err := csm.GetSessions(ctx, cuForFirstRequest, nil, servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session + css, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, "", nil, common.NO_STATE, 0) // get a session require.NoError(t, err) for _, cs := range css { @@ -234,7 +234,7 @@ func TestPairingResetWithFailures(t *testing.T) { } } require.Equal(t, len(csm.validAddresses), 0) - css, err := csm.GetSessions(ctx, cuForFirstRequest, nil, servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session + css, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, "", nil, common.NO_STATE, 0) // get a session require.NoError(t, err) require.Equal(t, len(csm.validAddresses), len(csm.pairingAddresses)) @@ -259,7 +259,7 @@ func TestPairingResetWithMultipleFailures(t *testing.T) { if len(csm.validAddresses) == 0 { // wait for all pairings to be blocked. break } - css, err := csm.GetSessions(ctx, cuForFirstRequest, nil, servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session + css, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, "", nil, common.NO_STATE, 0) // get a session for _, cs := range css { err = csm.OnSessionFailure(cs.Session, nil) @@ -271,7 +271,7 @@ func TestPairingResetWithMultipleFailures(t *testing.T) { } } require.Equal(t, len(csm.validAddresses), 0) - css, err := csm.GetSessions(ctx, cuForFirstRequest, nil, servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session + css, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, "", nil, common.NO_STATE, 0) // get a session require.NoError(t, err) require.Equal(t, len(csm.validAddresses), len(csm.pairingAddresses)) @@ -283,7 +283,7 @@ func TestPairingResetWithMultipleFailures(t *testing.T) { } } - css, err := csm.GetSessions(ctx, cuForFirstRequest, nil, servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session + css, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, "", nil, common.NO_STATE, 0) // get a session require.NoError(t, err) for _, cs := range css { @@ -318,7 +318,7 @@ func TestSuccessAndFailureOfSessionWithUpdatePairingsInTheMiddle(t *testing.T) { sessionList := make([]session, numberOfAllowedSessionsPerConsumer) sessionListData := make([]SessTestData, numberOfAllowedSessionsPerConsumer) for i := 0; i < numberOfAllowedSessionsPerConsumer; i++ { - css, err := csm.GetSessions(ctx, cuForFirstRequest, nil, servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session + css, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, "", nil, common.NO_STATE, 0) // get a session require.NoError(t, err) for _, cs := range css { // get a session @@ -354,7 +354,7 @@ func TestSuccessAndFailureOfSessionWithUpdatePairingsInTheMiddle(t *testing.T) { } for i := 0; i < numberOfAllowedSessionsPerConsumer; i++ { - css, err := csm.GetSessions(ctx, cuForFirstRequest, nil, servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session + css, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, "", nil, common.NO_STATE, 0) // get a session require.NoError(t, err) for _, cs := range css { // get a session @@ -387,7 +387,7 @@ func TestSuccessAndFailureOfSessionWithUpdatePairingsInTheMiddle(t *testing.T) { } func successfulSession(ctx context.Context, csm *ConsumerSessionManager, t *testing.T, p int, ch chan int) { - css, err := csm.GetSessions(ctx, cuForFirstRequest, nil, servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session + css, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, "", nil, common.NO_STATE, 0) // get a session require.NoError(t, err) for _, cs := range css { @@ -400,7 +400,7 @@ func successfulSession(ctx context.Context, csm *ConsumerSessionManager, t *test } func failedSession(ctx context.Context, csm *ConsumerSessionManager, t *testing.T, p int, ch chan int) { - css, err := csm.GetSessions(ctx, cuForFirstRequest, nil, servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session + css, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, "", nil, common.NO_STATE, 0) // get a session require.NoError(t, err) for _, cs := range css { @@ -518,7 +518,7 @@ func TestSessionFailureAndGetReportedProviders(t *testing.T) { pairingList := createPairingList("", true) err := csm.UpdateAllProviders(firstEpochHeight, pairingList) // update the providers. require.NoError(t, err) - css, err := csm.GetSessions(ctx, cuForFirstRequest, nil, servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session + css, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, "", nil, common.NO_STATE, 0) // get a session require.NoError(t, err) for _, cs := range css { @@ -552,7 +552,7 @@ func TestSessionFailureEpochMisMatch(t *testing.T) { pairingList := createPairingList("", true) err := csm.UpdateAllProviders(firstEpochHeight, pairingList) // update the providers. require.NoError(t, err) - css, err := csm.GetSessions(ctx, cuForFirstRequest, nil, servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session + css, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, "", nil, common.NO_STATE, 0) // get a session require.NoError(t, err) for _, cs := range css { @@ -573,7 +573,7 @@ func TestAllProvidersEndpointsDisabled(t *testing.T) { pairingList := createPairingList("", false) err := csm.UpdateAllProviders(firstEpochHeight, pairingList) // update the providers. require.NoError(t, err) - cs, err := csm.GetSessions(ctx, cuForFirstRequest, nil, servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session + cs, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, "", nil, common.NO_STATE, 0) // get a session require.Nil(t, cs) require.Error(t, err) } @@ -613,7 +613,7 @@ func TestGetSession(t *testing.T) { pairingList := createPairingList("", true) err := csm.UpdateAllProviders(firstEpochHeight, pairingList) require.NoError(t, err) - css, err := csm.GetSessions(ctx, cuForFirstRequest, nil, servicedBlockNumber, "", nil, common.NOSTATE, 0) + css, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, "", nil, common.NO_STATE, 0) require.NoError(t, err) for _, cs := range css { @@ -659,7 +659,7 @@ func TestPairingWithAddons(t *testing.T) { // block all providers initialProvidersLen := len(csm.getValidAddresses(addon, nil)) for i := 0; i < initialProvidersLen; i++ { - css, err := csm.GetSessions(ctx, cuForFirstRequest, nil, servicedBlockNumber, addon, nil, common.NOSTATE, 0) // get a session + css, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, addon, nil, common.NO_STATE, 0) // get a session require.NoError(t, err, i) for _, cs := range css { err = csm.OnSessionFailure(cs.Session, ReportAndBlockProviderError) @@ -671,7 +671,7 @@ func TestPairingWithAddons(t *testing.T) { if addon != "" { require.NotEqual(t, csm.getValidAddresses(addon, nil), csm.getValidAddresses("", nil)) } - css, err := csm.GetSessions(ctx, cuForFirstRequest, nil, servicedBlockNumber, addon, nil, common.NOSTATE, 0) // get a session + css, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, addon, nil, common.NO_STATE, 0) // get a session require.NoError(t, err) for _, cs := range css { err = csm.OnSessionDone(cs.Session, servicedBlockNumber, cuForFirstRequest, time.Millisecond, cs.Session.CalculateExpectedLatency(2*time.Millisecond), (servicedBlockNumber - 1), numberOfProviders, numberOfProviders, false) @@ -734,7 +734,7 @@ func TestPairingWithExtensions(t *testing.T) { } initialProvidersLen := len(csm.getValidAddresses(extensionOpt.addon, extensionOpt.extensions)) for i := 0; i < initialProvidersLen; i++ { - css, err := csm.GetSessions(ctx, cuForFirstRequest, nil, servicedBlockNumber, extensionOpt.addon, extensionsList, common.NOSTATE, 0) // get a session + css, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, extensionOpt.addon, extensionsList, common.NO_STATE, 0) // get a session require.NoError(t, err, i) for _, cs := range css { err = csm.OnSessionFailure(cs.Session, ReportAndBlockProviderError) @@ -746,7 +746,7 @@ func TestPairingWithExtensions(t *testing.T) { if len(extensionOpt.extensions) > 0 || extensionOpt.addon != "" { require.NotEqual(t, csm.getValidAddresses(extensionOpt.addon, extensionOpt.extensions), csm.getValidAddresses("", nil)) } - css, err := csm.GetSessions(ctx, cuForFirstRequest, nil, servicedBlockNumber, extensionOpt.addon, extensionsList, common.NOSTATE, 0) // get a session + css, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, extensionOpt.addon, extensionsList, common.NO_STATE, 0) // get a session require.NoError(t, err) for _, cs := range css { err = csm.OnSessionDone(cs.Session, servicedBlockNumber, cuForFirstRequest, time.Millisecond, cs.Session.CalculateExpectedLatency(2*time.Millisecond), (servicedBlockNumber - 1), numberOfProviders, numberOfProviders, false) @@ -762,7 +762,7 @@ func TestNoPairingsError(t *testing.T) { err := csm.UpdateAllProviders(firstEpochHeight, pairingList) // update the providers. require.NoError(t, err) time.Sleep(5 * time.Millisecond) // let probes finish - _, err = csm.getValidProviderAddresses(map[string]struct{}{}, 10, 100, "invalid", nil, common.NOSTATE) + _, err = csm.getValidProviderAddresses(map[string]struct{}{}, 10, 100, "invalid", nil, common.NO_STATE) require.Error(t, err) require.True(t, PairingListEmptyError.Is(err)) } @@ -781,15 +781,16 @@ func TestPairingWithStateful(t *testing.T) { providerAddresses := csm.getValidAddresses(addon, nil) allProviders := len(providerAddresses) require.Equal(t, 10, allProviders) - css, err := csm.GetSessions(ctx, cuForFirstRequest, nil, servicedBlockNumber, addon, nil, common.CONSISTENCY_SELECT_ALLPROVIDERS, 0) // get a session + css, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, addon, nil, common.CONSISTENCY_SELECT_ALL_PROVIDERS, 0) // get a session require.NoError(t, err) require.Equal(t, allProviders, len(css)) for _, cs := range css { err = csm.OnSessionDone(cs.Session, servicedBlockNumber, cuForFirstRequest, time.Millisecond, cs.Session.CalculateExpectedLatency(2*time.Millisecond), (servicedBlockNumber - 1), numberOfProviders, numberOfProviders, false) require.NoError(t, err) } - unwantedProvider := map[string]struct{}{providerAddresses[0]: {}} - css, err = csm.GetSessions(ctx, cuForFirstRequest, unwantedProvider, servicedBlockNumber, addon, nil, common.CONSISTENCY_SELECT_ALLPROVIDERS, 0) // get a session + usedProviders := NewUsedProviders(nil) + usedProviders.RemoveUsed(providerAddresses[0], nil) + css, err = csm.GetSessions(ctx, cuForFirstRequest, usedProviders, servicedBlockNumber, addon, nil, common.CONSISTENCY_SELECT_ALL_PROVIDERS, 0) // get a session require.NoError(t, err) require.Equal(t, allProviders-1, len(css)) }) diff --git a/protocol/lavasession/consumer_types.go b/protocol/lavasession/consumer_types.go index 3de0bafa62..18a578844f 100644 --- a/protocol/lavasession/consumer_types.go +++ b/protocol/lavasession/consumer_types.go @@ -2,8 +2,6 @@ package lavasession import ( "context" - "math" - "sort" "strconv" "sync" "sync/atomic" @@ -23,6 +21,13 @@ const AllowInsecureConnectionToProvidersFlag = "allow-insecure-provider-dialing" var AllowInsecureConnectionToProviders = false +type UsedProvidersInf interface { + RemoveUsed(providerAddress string, err error) + TryLockSelection(context.Context) bool + AddUsed(ConsumerSessionsMap, error) + GetUnwantedProvidersToSend() map[string]struct{} +} + type SessionInfo struct { Session *SingleConsumerSession StakeSize sdk.Coin @@ -57,21 +62,6 @@ type QoSReport struct { AnsweredRelays uint64 } -type SingleConsumerSession struct { - CuSum uint64 - LatestRelayCu uint64 // set by GetSessions cuNeededForSession - QoSInfo QoSReport - SessionId int64 - Parent *ConsumerSessionsWithProvider - lock utils.LavaMutex - RelayNum uint64 - LatestBlock int64 - Endpoint *Endpoint - BlockListed bool // if session lost sync we blacklist it. - ConsecutiveErrors []error - errorsCount uint64 -} - type DataReliabilitySession struct { SingleConsumerSession *SingleConsumerSession Epoch uint64 @@ -202,46 +192,6 @@ func (cswp *ConsumerSessionsWithProvider) atomicReadUsedComputeUnits() uint64 { return atomic.LoadUint64(&cswp.UsedComputeUnits) } -// verify data reliability session exists or not -func (cswp *ConsumerSessionsWithProvider) verifyDataReliabilitySessionWasNotAlreadyCreated() (singleConsumerSession *SingleConsumerSession, pairingEpoch uint64, err error) { - cswp.Lock.RLock() - defer cswp.Lock.RUnlock() - if dataReliabilitySession, ok := cswp.Sessions[DataReliabilitySessionId]; ok { // check if we already have a data reliability session. - // validate our relay number reached the data reliability relay number limit - if dataReliabilitySession.RelayNum >= DataReliabilityRelayNumber { - return nil, cswp.PairingEpoch, DataReliabilityAlreadySentThisEpochError - } - dataReliabilitySession.lock.Lock() // lock before returning. - return dataReliabilitySession, cswp.PairingEpoch, nil - } - return nil, cswp.PairingEpoch, NoDataReliabilitySessionWasCreatedError -} - -// get a data reliability session from an endpoint -func (cswp *ConsumerSessionsWithProvider) getDataReliabilitySingleConsumerSession(endpoint *Endpoint) (singleConsumerSession *SingleConsumerSession, pairingEpoch uint64, err error) { - cswp.Lock.Lock() - defer cswp.Lock.Unlock() - // we re validate the data reliability session now that we are locked. - if dataReliabilitySession, ok := cswp.Sessions[DataReliabilitySessionId]; ok { // check if we already have a data reliability session. - if dataReliabilitySession.RelayNum >= DataReliabilityRelayNumber { - return nil, cswp.PairingEpoch, DataReliabilityAlreadySentThisEpochError - } - // we already have the dr session. so return it. - return dataReliabilitySession, cswp.PairingEpoch, nil - } - - singleDataReliabilitySession := &SingleConsumerSession{ - SessionId: DataReliabilitySessionId, - Parent: cswp, - Endpoint: endpoint, - RelayNum: 0, - } - singleDataReliabilitySession.lock.Lock() // we must lock the session so other requests wont get it. - - cswp.Sessions[singleDataReliabilitySession.SessionId] = singleDataReliabilitySession // applying the session to the pool of sessions. - return singleDataReliabilitySession, cswp.PairingEpoch, nil -} - func (cswp *ConsumerSessionsWithProvider) GetPairingEpoch() uint64 { return atomic.LoadUint64(&cswp.PairingEpoch) } @@ -345,16 +295,13 @@ func (cswp *ConsumerSessionsWithProvider) GetConsumerSessionInstanceFromEndpoint if numberOfBlockedSessions >= maximumBlockedSessionsAllowed { return nil, 0, MaximumNumberOfBlockListedSessionsError } - - if session.lock.TryLock() { - if session.BlockListed { // this session cannot be used. - numberOfBlockedSessions += 1 // increase the number of blocked sessions so we can block this provider is too many are blocklisted - session.lock.Unlock() - continue - } - // if we locked the session its available to use, otherwise someone else is already using it + blocked, ok := session.TryUseSession() + if ok { return session, cswp.PairingEpoch, nil } + if blocked { + numberOfBlockedSessions += 1 // increase the number of blocked sessions so we can block this provider is too many are blocklisted + } } // No Sessions available, create a new session or return an error upon maximum sessions allowed if len(cswp.Sessions) > MaxSessionsAllowedPerProvider { @@ -371,7 +318,7 @@ func (cswp *ConsumerSessionsWithProvider) GetConsumerSessionInstanceFromEndpoint Parent: cswp, Endpoint: endpoint, } - consumerSession.lock.Lock() // we must lock the session so other requests wont get it. + consumerSession.TryUseSession() // we must lock the session so other requests wont get it. cswp.Sessions[consumerSession.SessionId] = consumerSession // applying the session to the pool of sessions. return consumerSession, cswp.PairingEpoch, nil @@ -458,86 +405,8 @@ func (cswp *ConsumerSessionsWithProvider) fetchEndpointConnectionFromConsumerSes return connected, endpointPtr, cswp.PublicLavaAddress, nil } -// returns the expected latency to a threshold. -func (cs *SingleConsumerSession) CalculateExpectedLatency(timeoutGivenToRelay time.Duration) time.Duration { - expectedLatency := (timeoutGivenToRelay / 2) - return expectedLatency -} - -// cs should be locked here to use this method, returns the computed qos or zero if last qos is nil or failed to compute. -func (cs *SingleConsumerSession) getQosComputedResultOrZero() sdk.Dec { - if cs.QoSInfo.LastExcellenceQoSReport != nil { - qosComputed, errComputing := cs.QoSInfo.LastExcellenceQoSReport.ComputeQoSExcellence() - if errComputing == nil { // if we failed to compute the qos will be 0 so this provider wont be picked to return the error in case we get it - return qosComputed - } - utils.LavaFormatError("Failed computing QoS used for error parsing", errComputing, utils.LogAttr("Report", cs.QoSInfo.LastExcellenceQoSReport)) - } - return sdk.ZeroDec() -} - -func (cs *SingleConsumerSession) CalculateQoS(latency, expectedLatency time.Duration, blockHeightDiff int64, numOfProviders int, servicersToCount int64) { - // Add current Session QoS - cs.QoSInfo.TotalRelays++ // increase total relays - cs.QoSInfo.AnsweredRelays++ // increase answered relays - - if cs.QoSInfo.LastQoSReport == nil { - cs.QoSInfo.LastQoSReport = &pairingtypes.QualityOfServiceReport{} - } - - downtimePercentage, scaledAvailabilityScore := CalculateAvailabilityScore(&cs.QoSInfo) - cs.QoSInfo.LastQoSReport.Availability = scaledAvailabilityScore - if sdk.OneDec().GT(cs.QoSInfo.LastQoSReport.Availability) { - utils.LavaFormatInfo("QoS Availability report", utils.Attribute{Key: "Availability", Value: cs.QoSInfo.LastQoSReport.Availability}, utils.Attribute{Key: "down percent", Value: downtimePercentage}) - } - - latencyScore := sdk.MinDec(sdk.OneDec(), sdk.NewDecFromInt(sdk.NewInt(int64(expectedLatency))).Quo(sdk.NewDecFromInt(sdk.NewInt(int64(latency))))) - - insertSorted := func(list []sdk.Dec, value sdk.Dec) []sdk.Dec { - index := sort.Search(len(list), func(i int) bool { - return list[i].GTE(value) - }) - if len(list) == index { // nil or empty slice or after last element - return append(list, value) - } - list = append(list[:index+1], list[index:]...) // index < len(a) - list[index] = value - return list - } - cs.QoSInfo.LatencyScoreList = insertSorted(cs.QoSInfo.LatencyScoreList, latencyScore) - cs.QoSInfo.LastQoSReport.Latency = cs.QoSInfo.LatencyScoreList[int(float64(len(cs.QoSInfo.LatencyScoreList))*PercentileToCalculateLatency)] - - // checking if we have enough information to calculate the sync score for the providers, if we haven't talked - // with enough providers we don't have enough information and we will wait to have more information before setting the sync score - shouldCalculateSyncScore := int64(numOfProviders) > int64(math.Ceil(float64(servicersToCount)*MinProvidersForSync)) - if shouldCalculateSyncScore { // - if blockHeightDiff <= 0 { // if the diff is bigger than 0 than the block is too old (blockHeightDiff = expected - allowedLag - blockHeight) and we don't give him the score - cs.QoSInfo.SyncScoreSum++ - } - cs.QoSInfo.TotalSyncScore++ - cs.QoSInfo.LastQoSReport.Sync = sdk.NewDec(cs.QoSInfo.SyncScoreSum).QuoInt64(cs.QoSInfo.TotalSyncScore) - if sdk.OneDec().GT(cs.QoSInfo.LastQoSReport.Sync) { - utils.LavaFormatDebug("QoS Sync report", - utils.Attribute{Key: "Sync", Value: cs.QoSInfo.LastQoSReport.Sync}, - utils.Attribute{Key: "block diff", Value: blockHeightDiff}, - utils.Attribute{Key: "sync score", Value: strconv.FormatInt(cs.QoSInfo.SyncScoreSum, 10) + "/" + strconv.FormatInt(cs.QoSInfo.TotalSyncScore, 10)}, - utils.Attribute{Key: "session_id", Value: cs.SessionId}, - utils.Attribute{Key: "provider", Value: cs.Parent.PublicLavaAddress}, - ) - } - } else { - // we prefer to give them a score of 1 when there is no other data, since otherwise we damage their payments - cs.QoSInfo.LastQoSReport.Sync = sdk.NewDec(1) - } -} - func CalculateAvailabilityScore(qosReport *QoSReport) (downtimePercentageRet, scaledAvailabilityScoreRet sdk.Dec) { downtimePercentage := sdk.NewDecWithPrec(int64(qosReport.TotalRelays-qosReport.AnsweredRelays), 0).Quo(sdk.NewDecWithPrec(int64(qosReport.TotalRelays), 0)) scaledAvailabilityScore := sdk.MaxDec(sdk.ZeroDec(), AvailabilityPercentage.Sub(downtimePercentage).Quo(AvailabilityPercentage)) return downtimePercentage, scaledAvailabilityScore } - -// validate if this is a data reliability session -func (scs *SingleConsumerSession) IsDataReliabilitySession() bool { - return scs.SessionId <= DataReliabilitySessionId -} diff --git a/protocol/lavasession/end_to_end_lavasession_test.go b/protocol/lavasession/end_to_end_lavasession_test.go index 9d7b081fea..abc42fb087 100644 --- a/protocol/lavasession/end_to_end_lavasession_test.go +++ b/protocol/lavasession/end_to_end_lavasession_test.go @@ -28,7 +28,7 @@ func TestHappyFlowE2EEmergency(t *testing.T) { successfulRelays++ for i := 0; i < len(consumerVirtualEpochs); i++ { - css, err := csm.GetSessions(ctx, maxCuForVirtualEpoch, nil, servicedBlockNumber, "", nil, common.NOSTATE, consumerVirtualEpochs[i]) // get a session + css, err := csm.GetSessions(ctx, maxCuForVirtualEpoch, NewUsedProviders(nil), servicedBlockNumber, "", nil, common.NO_STATE, consumerVirtualEpochs[i]) // get a session require.NoError(t, err) for _, cs := range css { @@ -92,7 +92,7 @@ func TestHappyFlowE2EEmergency(t *testing.T) { func TestHappyFlowEmergencyInConsumer(t *testing.T) { csm, psm, ctx := prepareSessionsWithFirstRelay(t, maxCuForVirtualEpoch) - css, err := csm.GetSessions(ctx, maxCuForVirtualEpoch, nil, servicedBlockNumber, "", nil, common.NOSTATE, virtualEpoch) // get a session + css, err := csm.GetSessions(ctx, maxCuForVirtualEpoch, NewUsedProviders(nil), servicedBlockNumber, "", nil, common.NO_STATE, virtualEpoch) // get a session require.NoError(t, err) for _, cs := range css { @@ -157,7 +157,7 @@ func prepareSessionsWithFirstRelay(t *testing.T, cuForFirstRequest uint64) (*Con err := csm.UpdateAllProviders(epoch1, cswpList) // update the providers. require.NoError(t, err) // get single consumer session - css, err := csm.GetSessions(ctx, cuForFirstRequest, nil, servicedBlockNumber, "", nil, common.NOSTATE, 0) // get a session + css, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, "", nil, common.NO_STATE, 0) // get a session require.NoError(t, err) for _, cs := range css { diff --git a/protocol/lavasession/single_consumer_session.go b/protocol/lavasession/single_consumer_session.go new file mode 100644 index 0000000000..561720c40d --- /dev/null +++ b/protocol/lavasession/single_consumer_session.go @@ -0,0 +1,150 @@ +package lavasession + +import ( + "math" + "sort" + "strconv" + "time" + + sdk "github.com/cosmos/cosmos-sdk/types" + "github.com/lavanet/lava/utils" + pairingtypes "github.com/lavanet/lava/x/pairing/types" +) + +type SingleConsumerSession struct { + CuSum uint64 + LatestRelayCu uint64 // set by GetSessions cuNeededForSession + QoSInfo QoSReport + SessionId int64 + Parent *ConsumerSessionsWithProvider + lock utils.LavaMutex + RelayNum uint64 + LatestBlock int64 + Endpoint *Endpoint + BlockListed bool // if session lost sync we blacklist it. + ConsecutiveErrors []error + errorsCount uint64 + relayProcessor UsedProvidersInf +} + +// returns the expected latency to a threshold. +func (cs *SingleConsumerSession) CalculateExpectedLatency(timeoutGivenToRelay time.Duration) time.Duration { + expectedLatency := (timeoutGivenToRelay / 2) + return expectedLatency +} + +// cs should be locked here to use this method, returns the computed qos or zero if last qos is nil or failed to compute. +func (cs *SingleConsumerSession) getQosComputedResultOrZero() sdk.Dec { + if cs.QoSInfo.LastExcellenceQoSReport != nil { + qosComputed, errComputing := cs.QoSInfo.LastExcellenceQoSReport.ComputeQoSExcellence() + if errComputing == nil { // if we failed to compute the qos will be 0 so this provider wont be picked to return the error in case we get it + return qosComputed + } + utils.LavaFormatError("Failed computing QoS used for error parsing", errComputing, utils.LogAttr("Report", cs.QoSInfo.LastExcellenceQoSReport)) + } + return sdk.ZeroDec() +} + +func (cs *SingleConsumerSession) CalculateQoS(latency, expectedLatency time.Duration, blockHeightDiff int64, numOfProviders int, servicersToCount int64) { + // Add current Session QoS + cs.QoSInfo.TotalRelays++ // increase total relays + cs.QoSInfo.AnsweredRelays++ // increase answered relays + + if cs.QoSInfo.LastQoSReport == nil { + cs.QoSInfo.LastQoSReport = &pairingtypes.QualityOfServiceReport{} + } + + downtimePercentage, scaledAvailabilityScore := CalculateAvailabilityScore(&cs.QoSInfo) + cs.QoSInfo.LastQoSReport.Availability = scaledAvailabilityScore + if sdk.OneDec().GT(cs.QoSInfo.LastQoSReport.Availability) { + utils.LavaFormatInfo("QoS Availability report", utils.Attribute{Key: "Availability", Value: cs.QoSInfo.LastQoSReport.Availability}, utils.Attribute{Key: "down percent", Value: downtimePercentage}) + } + + latencyScore := sdk.MinDec(sdk.OneDec(), sdk.NewDecFromInt(sdk.NewInt(int64(expectedLatency))).Quo(sdk.NewDecFromInt(sdk.NewInt(int64(latency))))) + + insertSorted := func(list []sdk.Dec, value sdk.Dec) []sdk.Dec { + index := sort.Search(len(list), func(i int) bool { + return list[i].GTE(value) + }) + if len(list) == index { // nil or empty slice or after last element + return append(list, value) + } + list = append(list[:index+1], list[index:]...) // index < len(a) + list[index] = value + return list + } + cs.QoSInfo.LatencyScoreList = insertSorted(cs.QoSInfo.LatencyScoreList, latencyScore) + cs.QoSInfo.LastQoSReport.Latency = cs.QoSInfo.LatencyScoreList[int(float64(len(cs.QoSInfo.LatencyScoreList))*PercentileToCalculateLatency)] + + // checking if we have enough information to calculate the sync score for the providers, if we haven't talked + // with enough providers we don't have enough information and we will wait to have more information before setting the sync score + shouldCalculateSyncScore := int64(numOfProviders) > int64(math.Ceil(float64(servicersToCount)*MinProvidersForSync)) + if shouldCalculateSyncScore { // + if blockHeightDiff <= 0 { // if the diff is bigger than 0 than the block is too old (blockHeightDiff = expected - allowedLag - blockHeight) and we don't give him the score + cs.QoSInfo.SyncScoreSum++ + } + cs.QoSInfo.TotalSyncScore++ + cs.QoSInfo.LastQoSReport.Sync = sdk.NewDec(cs.QoSInfo.SyncScoreSum).QuoInt64(cs.QoSInfo.TotalSyncScore) + if sdk.OneDec().GT(cs.QoSInfo.LastQoSReport.Sync) { + utils.LavaFormatDebug("QoS Sync report", + utils.Attribute{Key: "Sync", Value: cs.QoSInfo.LastQoSReport.Sync}, + utils.Attribute{Key: "block diff", Value: blockHeightDiff}, + utils.Attribute{Key: "sync score", Value: strconv.FormatInt(cs.QoSInfo.SyncScoreSum, 10) + "/" + strconv.FormatInt(cs.QoSInfo.TotalSyncScore, 10)}, + utils.Attribute{Key: "session_id", Value: cs.SessionId}, + utils.Attribute{Key: "provider", Value: cs.Parent.PublicLavaAddress}, + ) + } + } else { + // we prefer to give them a score of 1 when there is no other data, since otherwise we damage their payments + cs.QoSInfo.LastQoSReport.Sync = sdk.NewDec(1) + } +} + +func (scs *SingleConsumerSession) SetUsageForSession(cuNeededForSession uint64, qoSExcellenceReport *pairingtypes.QualityOfServiceReport, usedProviders UsedProvidersInf) error { + scs.LatestRelayCu = cuNeededForSession // set latestRelayCu + scs.RelayNum += RelayNumberIncrement // increase relayNum + if scs.RelayNum > 1 { + // we only set excellence for sessions with more than one successful relays, this guarantees data within the epoch exists + scs.QoSInfo.LastExcellenceQoSReport = qoSExcellenceReport + } + scs.relayProcessor = usedProviders + return nil +} + +func (scs *SingleConsumerSession) Free(err error) { + if scs.relayProcessor != nil { + scs.relayProcessor.RemoveUsed(scs.Parent.PublicLavaAddress, err) + scs.relayProcessor = nil + } + scs.lock.Unlock() +} + +func (session *SingleConsumerSession) TryUseSession() (blocked bool, ok bool) { + if session.lock.TryLock() { + if session.BlockListed { // this session cannot be used. + session.lock.Unlock() + return true, false + } + if session.relayProcessor != nil { + utils.LavaFormatError("session misuse detected, usedProviders isn't nil, missing Free call, blocking", nil, utils.LogAttr("session", session.SessionId)) + session.BlockListed = true + session.lock.Unlock() + return true, false + } + return false, true + } + return false, false +} + +// Verify the consumerSession is locked when getting to this function, if its not locked throw an error +func (consumerSession *SingleConsumerSession) VerifyLock() error { + if consumerSession.lock.TryLock() { // verify. + // if we managed to lock throw an error for misuse. + defer consumerSession.Free(nil) + // if failed to lock we should block session as it seems like a very rare case. + consumerSession.BlockListed = true // block this session from future usages + utils.LavaFormatError("Verify Lock failed on session Failure, blocking session", nil, utils.LogAttr("consumerSession", consumerSession)) + return LockMisUseDetectedError + } + return nil +} diff --git a/protocol/lavasession/used_providers.go b/protocol/lavasession/used_providers.go new file mode 100644 index 0000000000..55eff1ddd1 --- /dev/null +++ b/protocol/lavasession/used_providers.go @@ -0,0 +1,191 @@ +package lavasession + +import ( + "context" + "strings" + "sync" + "time" + + "github.com/lavanet/lava/protocol/common" + "github.com/lavanet/lava/utils" +) + +func NewUsedProviders(directiveHeaders map[string]string) *UsedProviders { + unwantedProviders := map[string]struct{}{} + if len(directiveHeaders) > 0 { + blockedProviders, ok := directiveHeaders[common.BLOCK_PROVIDERS_ADDRESSES_HEADER_NAME] + if ok { + providerAddressesToBlock := strings.Split(blockedProviders, ",") + for _, providerAddress := range providerAddressesToBlock { + unwantedProviders[providerAddress] = struct{}{} + } + } + } + return &UsedProviders{providers: map[string]struct{}{}, unwantedProviders: unwantedProviders, blockOnSyncLoss: map[string]struct{}{}} +} + +type UsedProviders struct { + lock sync.RWMutex + providers map[string]struct{} + selecting bool + unwantedProviders map[string]struct{} + blockOnSyncLoss map[string]struct{} + sessionsLatestBatch int +} + +func (up *UsedProviders) CurrentlyUsed() int { + if up == nil { + utils.LavaFormatError("UsedProviders.CurrentlyUsed is nil, misuse detected", nil) + return 0 + } + up.lock.RLock() + defer up.lock.RUnlock() + return len(up.providers) +} + +func (up *UsedProviders) SessionsLatestBatch() int { + if up == nil { + utils.LavaFormatError("UsedProviders.SessionsLatestBatch is nil, misuse detected", nil) + return 0 + } + up.lock.RLock() + defer up.lock.RUnlock() + return up.sessionsLatestBatch +} + +func (up *UsedProviders) CurrentlyUsedAddresses() []string { + if up == nil { + utils.LavaFormatError("UsedProviders.CurrentlyUsedAddresses is nil, misuse detected", nil) + return []string{} + } + up.lock.RLock() + defer up.lock.RUnlock() + addresses := []string{} + for addr := range up.providers { + addresses = append(addresses, addr) + } + return addresses +} + +func (up *UsedProviders) UnwantedAddresses() []string { + if up == nil { + utils.LavaFormatError("UsedProviders.UnwantedAddresses is nil, misuse detected", nil) + return []string{} + } + up.lock.RLock() + defer up.lock.RUnlock() + addresses := []string{} + for addr := range up.unwantedProviders { + addresses = append(addresses, addr) + } + return addresses +} + +func (up *UsedProviders) RemoveUsed(provider string, err error) { + if up == nil { + return + } + up.lock.Lock() + defer up.lock.Unlock() + if err != nil { + if shouldRetryWithThisError(err) { + _, ok := up.blockOnSyncLoss[provider] + if !ok && IsSessionSyncLoss(err) { + up.blockOnSyncLoss[provider] = struct{}{} + utils.LavaFormatWarning("Identified SyncLoss in provider, allowing retry", err, utils.Attribute{Key: "address", Value: provider}) + } else { + up.setUnwanted(provider) + } + } else { + up.setUnwanted(provider) + } + } else { + // we got a valid response from this provider, no reason to keep using it + up.setUnwanted(provider) + } + delete(up.providers, provider) +} + +func (up *UsedProviders) ClearUnwanted() { + if up == nil { + return + } + up.lock.Lock() + defer up.lock.Unlock() + // this is nil safe + up.unwantedProviders = map[string]struct{}{} +} + +func (up *UsedProviders) AddUsed(sessions ConsumerSessionsMap, err error) { + if up == nil { + return + } + up.lock.Lock() + defer up.lock.Unlock() + // this is nil safe + if len(sessions) > 0 && err == nil { + up.sessionsLatestBatch = 0 + for provider := range sessions { // the key for ConsumerSessionsMap is the provider public address + up.providers[provider] = struct{}{} + up.sessionsLatestBatch++ + } + } + up.selecting = false +} + +func (up *UsedProviders) setUnwanted(provider string) { + if up == nil { + return + } + up.unwantedProviders[provider] = struct{}{} +} + +func (up *UsedProviders) TryLockSelection(ctx context.Context) bool { + if up == nil { + return true + } + for { + select { + case <-ctx.Done(): + return false + default: + canSelect := up.tryLockSelection() + if canSelect { + return true + } + time.Sleep(10 * time.Millisecond) + } + } +} + +func (up *UsedProviders) tryLockSelection() bool { + up.lock.Lock() + defer up.lock.Unlock() + if !up.selecting { + up.selecting = true + return true + } + return false +} + +func (up *UsedProviders) GetUnwantedProvidersToSend() map[string]struct{} { + if up == nil { + return map[string]struct{}{} + } + up.lock.RLock() + defer up.lock.RUnlock() + unwantedProvidersToSend := map[string]struct{}{} + // block the currently used providers + for provider := range up.providers { + unwantedProvidersToSend[provider] = struct{}{} + } + // block providers that we have a response for + for provider := range up.unwantedProviders { + unwantedProvidersToSend[provider] = struct{}{} + } + return unwantedProvidersToSend +} + +func shouldRetryWithThisError(err error) bool { + return IsSessionSyncLoss(err) +} diff --git a/protocol/lavasession/used_providers_test.go b/protocol/lavasession/used_providers_test.go new file mode 100644 index 0000000000..042394b4e5 --- /dev/null +++ b/protocol/lavasession/used_providers_test.go @@ -0,0 +1,99 @@ +package lavasession + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/gogo/status" + "github.com/stretchr/testify/require" + "google.golang.org/grpc/codes" +) + +func TestUsedProviders(t *testing.T) { + t.Run("basic", func(t *testing.T) { + usedProviders := NewUsedProviders(nil) + canUse := usedProviders.tryLockSelection() + require.True(t, canUse) + canUseAgain := usedProviders.tryLockSelection() + require.False(t, canUseAgain) + require.Zero(t, usedProviders.CurrentlyUsed()) + require.Zero(t, usedProviders.SessionsLatestBatch()) + unwanted := usedProviders.GetUnwantedProvidersToSend() + require.Len(t, unwanted, 0) + consumerSessionsMap := ConsumerSessionsMap{"test": &SessionInfo{}, "test2": &SessionInfo{}} + usedProviders.AddUsed(consumerSessionsMap, nil) + canUseAgain = usedProviders.tryLockSelection() + require.True(t, canUseAgain) + unwanted = usedProviders.GetUnwantedProvidersToSend() + require.Len(t, unwanted, 2) + require.Equal(t, 2, usedProviders.CurrentlyUsed()) + canUseAgain = usedProviders.tryLockSelection() + require.False(t, canUseAgain) + consumerSessionsMap = ConsumerSessionsMap{"test3": &SessionInfo{}, "test4": &SessionInfo{}} + usedProviders.AddUsed(consumerSessionsMap, nil) + unwanted = usedProviders.GetUnwantedProvidersToSend() + require.Len(t, unwanted, 4) + require.Equal(t, 4, usedProviders.CurrentlyUsed()) + // one provider gives a retry + usedProviders.RemoveUsed("test", status.Error(codes.Code(SessionOutOfSyncError.ABCICode()), "")) + require.Equal(t, 3, usedProviders.CurrentlyUsed()) + unwanted = usedProviders.GetUnwantedProvidersToSend() + require.Len(t, unwanted, 3) + // one provider gives a result + usedProviders.RemoveUsed("test2", nil) + unwanted = usedProviders.GetUnwantedProvidersToSend() + require.Len(t, unwanted, 3) + require.Equal(t, 2, usedProviders.CurrentlyUsed()) + // one provider gives an error + usedProviders.RemoveUsed("test3", fmt.Errorf("bad")) + unwanted = usedProviders.GetUnwantedProvidersToSend() + require.Len(t, unwanted, 3) + require.Equal(t, 1, usedProviders.CurrentlyUsed()) + canUseAgain = usedProviders.tryLockSelection() + require.True(t, canUseAgain) + }) +} + +func TestUsedProvidersAsync(t *testing.T) { + t.Run("concurrency", func(t *testing.T) { + usedProviders := NewUsedProviders(nil) + ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond*100) + defer cancel() + canUse := usedProviders.TryLockSelection(ctx) + require.True(t, canUse) + require.Zero(t, usedProviders.CurrentlyUsed()) + require.Zero(t, usedProviders.SessionsLatestBatch()) + go func() { + time.Sleep(time.Millisecond * 10) + consumerSessionsMap := ConsumerSessionsMap{"test": &SessionInfo{}, "test2": &SessionInfo{}} + usedProviders.AddUsed(consumerSessionsMap, nil) + }() + ctx, cancel = context.WithTimeout(context.Background(), time.Millisecond*100) + defer cancel() + canUseAgain := usedProviders.TryLockSelection(ctx) + require.True(t, canUseAgain) + unwanted := usedProviders.GetUnwantedProvidersToSend() + require.Len(t, unwanted, 2) + require.Equal(t, 2, usedProviders.CurrentlyUsed()) + }) +} + +func TestUsedProvidersAsyncFail(t *testing.T) { + t.Run("concurrency", func(t *testing.T) { + usedProviders := NewUsedProviders(nil) + ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond*100) + defer cancel() + canUse := usedProviders.TryLockSelection(ctx) + require.True(t, canUse) + require.Zero(t, usedProviders.CurrentlyUsed()) + require.Zero(t, usedProviders.SessionsLatestBatch()) + ctx, cancel = context.WithTimeout(context.Background(), time.Millisecond*10) + defer cancel() + canUseAgain := usedProviders.TryLockSelection(ctx) + require.False(t, canUseAgain) + err := ctx.Err() + require.Error(t, err) + }) +} diff --git a/protocol/monitoring/health.go b/protocol/monitoring/health.go index 8282f0f3aa..aa1050e288 100644 --- a/protocol/monitoring/health.go +++ b/protocol/monitoring/health.go @@ -35,7 +35,7 @@ var QueryRetries = uint64(3) const ( BasicQueryRetries = 3 QuerySleepTime = 100 * time.Millisecond - NiceOutputLength = 40 + NiceOutputLength = 100 ) type LavaEntity struct { diff --git a/protocol/rpcconsumer/consumer_consistency.go b/protocol/rpcconsumer/consumer_consistency.go index 8f0947faa1..66c1613ef0 100644 --- a/protocol/rpcconsumer/consumer_consistency.go +++ b/protocol/rpcconsumer/consumer_consistency.go @@ -45,6 +45,9 @@ func (cc *ConsumerConsistency) Key(dappId string, ip string) string { } func (cc *ConsumerConsistency) SetSeenBlock(blockSeen int64, dappId string, ip string) { + if cc == nil { + return + } block, _ := cc.getLatestBlock(cc.Key(dappId, ip)) if block < blockSeen { cc.setLatestBlock(cc.Key(dappId, ip), blockSeen) diff --git a/protocol/rpcconsumer/relay_errors.go b/protocol/rpcconsumer/relay_errors.go index 88db666326..0adae35b9c 100644 --- a/protocol/rpcconsumer/relay_errors.go +++ b/protocol/rpcconsumer/relay_errors.go @@ -2,6 +2,7 @@ package rpcconsumer import ( "fmt" + "regexp" "strconv" github_com_cosmos_cosmos_sdk_types "github.com/cosmos/cosmos-sdk/types" @@ -31,12 +32,28 @@ func (r *RelayErrors) findMaxAppearances(input map[string][]int) (maxVal int, in return } +func replacePattern(input, pattern, replacement string) string { + re := regexp.MustCompile(pattern) + return re.ReplaceAllString(input, replacement) +} + +func (r *RelayErrors) sanitizeError(err error) string { + errMsg := err.Error() + // Replace SessionId:(any digit here) with SessionId:* + errMsg = replacePattern(errMsg, `SessionId:\d+`, "SessionId:*") + + // Replace GUID:(any digit here) with GUID:* + errMsg = replacePattern(errMsg, `GUID:\d+`, "GUID:*") + + return errMsg +} + func (r *RelayErrors) GetBestErrorMessageForUser() RelayError { bestIndex := -1 bestResult := github_com_cosmos_cosmos_sdk_types.ZeroDec() errorMap := make(map[string][]int) for idx, relayError := range r.relayErrors { - errorMessage := relayError.err.Error() + errorMessage := r.sanitizeError(relayError.err) errorMap[errorMessage] = append(errorMap[errorMessage], idx) if relayError.ProviderInfo.ProviderQoSExcellenceSummery.IsNil() || relayError.ProviderInfo.ProviderStake.Amount.IsNil() { continue @@ -51,6 +68,9 @@ func (r *RelayErrors) GetBestErrorMessageForUser() RelayError { errorCount, index := r.findMaxAppearances(errorMap) if index >= 0 && errorCount >= (len(r.relayErrors)/2) { // we have majority of errors we can return this error. + if r.relayErrors[index].response != nil { + r.relayErrors[index].response.relayResult.Quorum = errorCount + } return r.relayErrors[index] } @@ -70,16 +90,16 @@ func (r *RelayErrors) GetBestErrorMessageForUser() RelayError { } func (r *RelayErrors) getAllUniqueErrors() []error { - allErrors := make([]error, len(r.relayErrors)) + allErrors := []error{} repeatingErrors := make(map[string]struct{}) - for idx, relayError := range r.relayErrors { - errString := relayError.err.Error() // using strings to filter repeating errors + for _, relayError := range r.relayErrors { + errString := r.sanitizeError(relayError.err) // using strings to filter repeating errors _, ok := repeatingErrors[errString] if ok { continue } repeatingErrors[errString] = struct{}{} - allErrors[idx] = relayError.err + allErrors = append(allErrors, relayError.err) } return allErrors } @@ -97,8 +117,13 @@ func (r *RelayErrors) mergeAllErrors() error { return fmt.Errorf(mergedMessage) } +// TODO: there's no need to save error twice and provider info twice, this can just be a relayResponse type RelayError struct { err error ProviderInfo common.ProviderInfo response *relayResponse } + +func (re RelayError) String() string { + return fmt.Sprintf("err: %s, ProviderInfo: %v, response: %v", re.err, re.ProviderInfo, re.response) +} diff --git a/protocol/rpcconsumer/relay_processor.go b/protocol/rpcconsumer/relay_processor.go new file mode 100644 index 0000000000..ab91709641 --- /dev/null +++ b/protocol/rpcconsumer/relay_processor.go @@ -0,0 +1,404 @@ +package rpcconsumer + +import ( + "context" + "errors" + "fmt" + "net/http" + "strings" + "sync" + + sdktypes "github.com/cosmos/cosmos-sdk/types" + "github.com/lavanet/lava/protocol/chainlib" + "github.com/lavanet/lava/protocol/common" + "github.com/lavanet/lava/protocol/lavasession" + "github.com/lavanet/lava/utils" + spectypes "github.com/lavanet/lava/x/spec/types" +) + +const ( + MaxCallsPerRelay = 50 +) + +type Selection int + +const ( + Quorum Selection = iota // get the majority out of requiredSuccesses + BestResult // get the best result, even if it means waiting +) + +func NewRelayProcessor(ctx context.Context, usedProviders *lavasession.UsedProviders, requiredSuccesses int, chainMessage chainlib.ChainMessage, consumerConsistency *ConsumerConsistency, dappID string, consumerIp string) *RelayProcessor { + guid, _ := utils.GetUniqueIdentifier(ctx) + selection := Quorum // select the majority of node responses + if chainlib.GetStateful(chainMessage) == common.CONSISTENCY_SELECT_ALL_PROVIDERS { + selection = BestResult // select the majority of node successes + } + if requiredSuccesses <= 0 { + utils.LavaFormatFatal("invalid requirement, successes count must be greater than 0", nil, utils.LogAttr("requiredSuccesses", requiredSuccesses)) + } + return &RelayProcessor{ + usedProviders: usedProviders, + requiredSuccesses: requiredSuccesses, + responses: make(chan *relayResponse, MaxCallsPerRelay), // we set it as buffered so it is not blocking + nodeResponseErrors: RelayErrors{relayErrors: []RelayError{}}, + protocolResponseErrors: RelayErrors{relayErrors: []RelayError{}, onFailureMergeAll: true}, + chainMessage: chainMessage, + guid: guid, + selection: selection, + consumerConsistency: consumerConsistency, + dappID: dappID, + consumerIp: consumerIp, + } +} + +type RelayProcessor struct { + usedProviders *lavasession.UsedProviders + responses chan *relayResponse + requiredSuccesses int + nodeResponseErrors RelayErrors + protocolResponseErrors RelayErrors + successResults []common.RelayResult + lock sync.RWMutex + chainMessage chainlib.ChainMessage + guid uint64 + selection Selection + consumerConsistency *ConsumerConsistency + dappID string + consumerIp string +} + +func (rp *RelayProcessor) String() string { + if rp == nil { + return "" + } + rp.lock.RLock() + nodeErrors := len(rp.nodeResponseErrors.relayErrors) + protocolErrors := len(rp.protocolResponseErrors.relayErrors) + results := len(rp.successResults) + usedProviders := rp.usedProviders + rp.lock.RUnlock() + + currentlyUsedAddresses := usedProviders.CurrentlyUsedAddresses() + unwantedAddresses := usedProviders.UnwantedAddresses() + return fmt.Sprintf("relayProcessor {results:%d, nodeErrors:%d, protocolErrors:%d,unwantedAddresses: %s,currentlyUsedAddresses:%s}", + results, nodeErrors, protocolErrors, strings.Join(unwantedAddresses, ";"), strings.Join(currentlyUsedAddresses, ";")) +} + +func (rp *RelayProcessor) GetUsedProviders() *lavasession.UsedProviders { + if rp == nil { + utils.LavaFormatError("RelayProcessor.GetUsedProviders is nil, misuse detected", nil) + return nil + } + rp.lock.RLock() + defer rp.lock.RUnlock() + return rp.usedProviders +} + +// this function returns all results that came from a node, meaning success, and node errors +func (rp *RelayProcessor) NodeResults() []common.RelayResult { + if rp == nil { + return nil + } + rp.readExistingResponses() + rp.lock.RLock() + defer rp.lock.RUnlock() + return rp.nodeResultsInner() +} + +// only when locked +func (rp *RelayProcessor) nodeResultsInner() []common.RelayResult { + // start with results and add to them node results + nodeResults := rp.successResults + nodeResults = append(nodeResults, rp.nodeErrors()...) + return nodeResults +} + +// only when locked +func (rp *RelayProcessor) nodeErrors() (ret []common.RelayResult) { + for _, relayError := range rp.nodeResponseErrors.relayErrors { + ret = append(ret, relayError.response.relayResult) + } + return ret +} + +func (rp *RelayProcessor) ProtocolErrors() uint64 { + if rp == nil { + return 0 + } + rp.lock.RLock() + defer rp.lock.RUnlock() + return uint64(len(rp.protocolResponseErrors.relayErrors)) +} + +func (rp *RelayProcessor) SetResponse(response *relayResponse) { + if rp == nil { + return + } + if response == nil { + return + } + rp.responses <- response +} + +func (rp *RelayProcessor) setValidResponse(response *relayResponse) { + rp.lock.Lock() + defer rp.lock.Unlock() + + // future relay requests and data reliability requests need to ask for the same specific block height to get consensus on the reply + // we do not modify the chain message data on the consumer, only it's requested block, so we let the provider know it can't put any block height it wants by setting a specific block height + reqBlock, _ := rp.chainMessage.RequestedBlock() + if reqBlock == spectypes.LATEST_BLOCK { + // TODO: when we turn on dataReliability on latest call UpdateLatest, until then we turn it off always + // modifiedOnLatestReq := rp.chainMessage.UpdateLatestBlockInMessage(response.relayResult.Reply.LatestBlock, false) + // if !modifiedOnLatestReq { + response.relayResult.Finalized = false // shut down data reliability + // } + } + if response.err == nil && response.relayResult.Reply != nil { + // no error, update the seen block + blockSeen := response.relayResult.Reply.LatestBlock + // nil safe + rp.consumerConsistency.SetSeenBlock(blockSeen, rp.dappID, rp.consumerIp) + } + foundError, errorMessage := rp.chainMessage.CheckResponseError(response.relayResult.Reply.Data, response.relayResult.StatusCode) + if foundError { + // this is a node error, meaning we still didn't get a good response. + // we may choose to wait until there will be a response or timeout happens + // if we decide to wait and timeout happens we will take the majority of response messages + err := fmt.Errorf(errorMessage) + rp.nodeResponseErrors.relayErrors = append(rp.nodeResponseErrors.relayErrors, RelayError{err: err, ProviderInfo: response.relayResult.ProviderInfo, response: response}) + return + } + + rp.successResults = append(rp.successResults, response.relayResult) +} + +func (rp *RelayProcessor) setErrorResponse(response *relayResponse) { + rp.lock.Lock() + defer rp.lock.Unlock() + utils.LavaFormatDebug("could not send relay to provider", utils.Attribute{Key: "GUID", Value: rp.guid}, utils.Attribute{Key: "provider", Value: response.relayResult.ProviderInfo.ProviderAddress}, utils.Attribute{Key: "error", Value: response.err.Error()}) + rp.protocolResponseErrors.relayErrors = append(rp.protocolResponseErrors.relayErrors, RelayError{err: response.err, ProviderInfo: response.relayResult.ProviderInfo, response: response}) +} + +func (rp *RelayProcessor) checkEndProcessing(responsesCount int) bool { + rp.lock.RLock() + defer rp.lock.RUnlock() + resultsCount := len(rp.successResults) + if resultsCount >= rp.requiredSuccesses { + // we have enough successes, we can return + return true + } + if rp.selection == Quorum { + // we need a quorum of all node results + nodeErrors := len(rp.nodeResponseErrors.relayErrors) + if nodeErrors+resultsCount >= rp.requiredSuccesses { + // we have enough node results for our quorum + return true + } + } + // check if we got all of the responses + if responsesCount >= rp.usedProviders.SessionsLatestBatch() { + // no active sessions, and we read all the responses, we can return + return true + } + + return false +} + +// this function defines if we should use the processor to return the result (meaning it has some insight and responses) or just return to the user +func (rp *RelayProcessor) HasResults() bool { + if rp == nil { + return false + } + rp.lock.RLock() + defer rp.lock.RUnlock() + resultsCount := len(rp.successResults) + nodeErrors := len(rp.nodeResponseErrors.relayErrors) + protocolErrors := len(rp.protocolResponseErrors.relayErrors) + return resultsCount+nodeErrors+protocolErrors > 0 +} + +func (rp *RelayProcessor) HasRequiredNodeResults() bool { + if rp == nil { + return false + } + rp.lock.RLock() + defer rp.lock.RUnlock() + resultsCount := len(rp.successResults) + if resultsCount >= rp.requiredSuccesses { + return true + } + if rp.selection == Quorum { + // we need a quorum of all node results + nodeErrors := len(rp.nodeResponseErrors.relayErrors) + if nodeErrors+resultsCount >= rp.requiredSuccesses { + // we have enough node results for our quorum + return true + } + } + // on BestResult we want to retry if there is no success + return false +} + +func (rp *RelayProcessor) handleResponse(response *relayResponse) { + if response == nil { + return + } + if response.err != nil { + rp.setErrorResponse(response) + } else { + rp.setValidResponse(response) + } +} + +func (rp *RelayProcessor) readExistingResponses() { + for { + select { + case response := <-rp.responses: + rp.handleResponse(response) + default: + // No more responses immediately available, exit the loop + return + } + } +} + +// this function waits for the processing results, they are written by multiple go routines and read by this go routine +// it then updates the responses in their respective place, node errors, protocol errors or success results +func (rp *RelayProcessor) WaitForResults(ctx context.Context) error { + if rp == nil { + return utils.LavaFormatError("RelayProcessor.WaitForResults is nil, misuse detected", nil) + } + responsesCount := 0 + for { + select { + case response := <-rp.responses: + responsesCount++ + rp.handleResponse(response) + if rp.checkEndProcessing(responsesCount) { + // we can finish processing + return nil + } + case <-ctx.Done(): + return utils.LavaFormatWarning("cancelled relay processor", nil, utils.LogAttr("total responses", responsesCount)) + } + } +} + +func (rp *RelayProcessor) responsesQuorum(results []common.RelayResult, quorumSize int) (returnedResult *common.RelayResult, processingError error) { + if quorumSize <= 0 { + return nil, errors.New("quorumSize must be greater than zero") + } + countMap := make(map[string]int) // Map to store the count of each unique result.Reply.Data + deterministic := rp.chainMessage.GetApi().Category.Deterministic + var bestQosResult common.RelayResult + bestQos := sdktypes.ZeroDec() + nilReplies := 0 + nilReplyIdx := -1 + for idx, result := range results { + if result.Reply != nil && result.Reply.Data != nil { + countMap[string(result.Reply.Data)]++ + if !deterministic { + if result.ProviderInfo.ProviderQoSExcellenceSummery.IsNil() || result.ProviderInfo.ProviderStake.Amount.IsNil() { + continue + } + currentResult := result.ProviderInfo.ProviderQoSExcellenceSummery.MulInt(result.ProviderInfo.ProviderStake.Amount) + if currentResult.GTE(bestQos) { + bestQos.Set(currentResult) + bestQosResult = result + } + } + } else { + nilReplies++ + nilReplyIdx = idx + } + } + var mostCommonResult common.RelayResult + var maxCount int + for _, result := range results { + if result.Reply != nil && result.Reply.Data != nil { + count := countMap[string(result.Reply.Data)] + if count > maxCount { + maxCount = count + mostCommonResult = result + } + } + } + + if nilReplies >= quorumSize && maxCount < quorumSize { + // we don't have a quorum with a valid response, but we have a quorum with an empty one + maxCount = nilReplies + mostCommonResult = results[nilReplyIdx] + } + // Check if the majority count is less than quorumSize + if maxCount < quorumSize { + if !deterministic { + // non deterministic apis might not have a quorum + // instead of failing get the best one + bestQosResult.Quorum = 1 + return &bestQosResult, nil + } + return nil, utils.LavaFormatInfo("majority count is less than quorumSize", utils.LogAttr("nilReplies", nilReplies), utils.LogAttr("results", len(results)), utils.LogAttr("maxCount", maxCount), utils.LogAttr("quorumSize", quorumSize)) + } + mostCommonResult.Quorum = maxCount + return &mostCommonResult, nil +} + +// this function returns the results according to the defined strategy +// results were stored in WaitForResults and now there's logic to select which results are returned to the user +// will return an error if we did not meet quota of replies, if we did we follow the strategies: +// if return strategy == get_first: return the first success, if none: get best node error +// if strategy == quorum get majority of node responses +// on error: we will return a placeholder relayResult, with a provider address and a status code +func (rp *RelayProcessor) ProcessingResult() (returnedResult *common.RelayResult, processingError error) { + if rp == nil { + return nil, utils.LavaFormatError("RelayProcessor.ProcessingResult is nil, misuse detected", nil) + } + + // this must be here before the lock because this function locks + allProvidersAddresses := rp.GetUsedProviders().UnwantedAddresses() + + rp.lock.RLock() + defer rp.lock.RUnlock() + // there are enough successes + successResultsCount := len(rp.successResults) + if successResultsCount >= rp.requiredSuccesses { + return rp.responsesQuorum(rp.successResults, rp.requiredSuccesses) + } + nodeResults := rp.nodeResultsInner() + // there are not enough successes, let's check if there are enough node errors + + if len(nodeResults) >= rp.requiredSuccesses { + if rp.selection == Quorum { + return rp.responsesQuorum(nodeResults, rp.requiredSuccesses) + } else if rp.selection == BestResult && successResultsCount > len(rp.nodeResponseErrors.relayErrors) { + // we have more than half succeeded, and we are success oriented + return rp.responsesQuorum(rp.successResults, (rp.requiredSuccesses+1)/2) + } + } + // we don't have enough for a quorum, prefer a node error on protocol errors + if len(rp.nodeResponseErrors.relayErrors) >= rp.requiredSuccesses { // if we have node errors, we prefer returning them over protocol errors. + nodeErr := rp.nodeResponseErrors.GetBestErrorMessageForUser() + return &nodeErr.response.relayResult, nil + } + + // if we got here we trigger a protocol error + returnedResult = &common.RelayResult{StatusCode: http.StatusInternalServerError} + if len(rp.nodeResponseErrors.relayErrors) > 0 { // if we have node errors, we prefer returning them over protocol errors, even if it's just the one + nodeErr := rp.nodeResponseErrors.GetBestErrorMessageForUser() + processingError = nodeErr.err + errorResponse := nodeErr.response + if errorResponse != nil { + returnedResult = &errorResponse.relayResult + } + } else if len(rp.protocolResponseErrors.relayErrors) > 0 { + protocolErr := rp.protocolResponseErrors.GetBestErrorMessageForUser() + processingError = protocolErr.err + errorResponse := protocolErr.response + if errorResponse != nil { + returnedResult = &errorResponse.relayResult + } + } + returnedResult.ProviderInfo.ProviderAddress = strings.Join(allProvidersAddresses, ",") + return returnedResult, utils.LavaFormatError("failed relay, insufficient results", processingError) +} diff --git a/protocol/rpcconsumer/relay_processor_test.go b/protocol/rpcconsumer/relay_processor_test.go new file mode 100644 index 0000000000..5396286353 --- /dev/null +++ b/protocol/rpcconsumer/relay_processor_test.go @@ -0,0 +1,389 @@ +package rpcconsumer + +import ( + "context" + "fmt" + "net/http" + "testing" + "time" + + "github.com/lavanet/lava/protocol/chainlib" + "github.com/lavanet/lava/protocol/chainlib/extensionslib" + "github.com/lavanet/lava/protocol/common" + "github.com/lavanet/lava/protocol/lavasession" + pairingtypes "github.com/lavanet/lava/x/pairing/types" + spectypes "github.com/lavanet/lava/x/spec/types" + "github.com/stretchr/testify/require" +) + +func sendSuccessResp(relayProcessor *RelayProcessor, provider string, delay time.Duration) { + time.Sleep(delay) + relayProcessor.GetUsedProviders().RemoveUsed(provider, nil) + response := &relayResponse{ + relayResult: common.RelayResult{ + Request: &pairingtypes.RelayRequest{ + RelaySession: &pairingtypes.RelaySession{}, + RelayData: &pairingtypes.RelayPrivateData{}, + }, + Reply: &pairingtypes.RelayReply{Data: []byte("ok")}, + ProviderInfo: common.ProviderInfo{ProviderAddress: provider}, + StatusCode: http.StatusOK, + }, + err: nil, + } + relayProcessor.SetResponse(response) +} + +func sendProtocolError(relayProcessor *RelayProcessor, provider string, delay time.Duration, err error) { + time.Sleep(delay) + relayProcessor.GetUsedProviders().RemoveUsed(provider, err) + response := &relayResponse{ + relayResult: common.RelayResult{ + Request: &pairingtypes.RelayRequest{ + RelaySession: &pairingtypes.RelaySession{}, + RelayData: &pairingtypes.RelayPrivateData{}, + }, + Reply: &pairingtypes.RelayReply{Data: []byte(`{"message":"bad","code":123}`)}, + ProviderInfo: common.ProviderInfo{ProviderAddress: provider}, + StatusCode: 0, + }, + err: err, + } + relayProcessor.SetResponse(response) +} + +func sendNodeError(relayProcessor *RelayProcessor, provider string, delay time.Duration) { + time.Sleep(delay) + relayProcessor.GetUsedProviders().RemoveUsed(provider, nil) + response := &relayResponse{ + relayResult: common.RelayResult{ + Request: &pairingtypes.RelayRequest{ + RelaySession: &pairingtypes.RelaySession{}, + RelayData: &pairingtypes.RelayPrivateData{}, + }, + Reply: &pairingtypes.RelayReply{Data: []byte(`{"message":"bad","code":123}`)}, + ProviderInfo: common.ProviderInfo{ProviderAddress: provider}, + StatusCode: http.StatusInternalServerError, + }, + err: nil, + } + relayProcessor.SetResponse(response) +} + +func TestRelayProcessorHappyFlow(t *testing.T) { + t.Run("happy", func(t *testing.T) { + ctx := context.Background() + serverHandler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // Handle the incoming request and provide the desired response + w.WriteHeader(http.StatusOK) + }) + specId := "LAV1" + chainParser, _, _, closeServer, _, err := chainlib.CreateChainLibMocks(ctx, specId, spectypes.APIInterfaceRest, serverHandler, "../../", nil) + if closeServer != nil { + defer closeServer() + } + require.NoError(t, err) + chainMsg, err := chainParser.ParseMsg("/cosmos/base/tendermint/v1beta1/blocks/17", nil, http.MethodGet, nil, extensionslib.ExtensionInfo{LatestBlock: 0}) + require.NoError(t, err) + relayProcessor := NewRelayProcessor(ctx, lavasession.NewUsedProviders(nil), 1, chainMsg, nil, "", "") + + usedProviders := relayProcessor.GetUsedProviders() + ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond*10) + defer cancel() + canUse := usedProviders.TryLockSelection(ctx) + require.NoError(t, ctx.Err()) + require.True(t, canUse) + require.Zero(t, usedProviders.CurrentlyUsed()) + require.Zero(t, usedProviders.SessionsLatestBatch()) + consumerSessionsMap := lavasession.ConsumerSessionsMap{"lava@test": &lavasession.SessionInfo{}, "lava@test2": &lavasession.SessionInfo{}} + usedProviders.AddUsed(consumerSessionsMap, nil) + ctx, cancel = context.WithTimeout(context.Background(), time.Millisecond*10) + defer cancel() + go sendSuccessResp(relayProcessor, "lava@test", time.Millisecond*5) + err = relayProcessor.WaitForResults(ctx) + require.NoError(t, err) + resultsOk := relayProcessor.HasResults() + require.True(t, resultsOk) + protocolErrors := relayProcessor.ProtocolErrors() + require.Zero(t, protocolErrors) + returnedResult, err := relayProcessor.ProcessingResult() + require.NoError(t, err) + require.Equal(t, string(returnedResult.Reply.Data), "ok") + }) +} + +func TestRelayProcessorTimeout(t *testing.T) { + t.Run("timeout", func(t *testing.T) { + ctx := context.Background() + serverHandler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // Handle the incoming request and provide the desired response + w.WriteHeader(http.StatusOK) + }) + specId := "LAV1" + chainParser, _, _, closeServer, _, err := chainlib.CreateChainLibMocks(ctx, specId, spectypes.APIInterfaceRest, serverHandler, "../../", nil) + if closeServer != nil { + defer closeServer() + } + require.NoError(t, err) + chainMsg, err := chainParser.ParseMsg("/cosmos/base/tendermint/v1beta1/blocks/17", nil, http.MethodGet, nil, extensionslib.ExtensionInfo{LatestBlock: 0}) + require.NoError(t, err) + relayProcessor := NewRelayProcessor(ctx, lavasession.NewUsedProviders(nil), 1, chainMsg, nil, "", "") + + usedProviders := relayProcessor.GetUsedProviders() + ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond*10) + defer cancel() + canUse := usedProviders.TryLockSelection(ctx) + require.NoError(t, ctx.Err()) + require.True(t, canUse) + require.Zero(t, usedProviders.CurrentlyUsed()) + require.Zero(t, usedProviders.SessionsLatestBatch()) + consumerSessionsMap := lavasession.ConsumerSessionsMap{"lava@test": &lavasession.SessionInfo{}, "lava@test2": &lavasession.SessionInfo{}} + usedProviders.AddUsed(consumerSessionsMap, nil) + go func() { + time.Sleep(time.Millisecond * 5) + ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond*10) + defer cancel() + canUse := usedProviders.TryLockSelection(ctx) + require.NoError(t, ctx.Err()) + require.True(t, canUse) + consumerSessionsMap := lavasession.ConsumerSessionsMap{"lava@test3": &lavasession.SessionInfo{}, "lava@test4": &lavasession.SessionInfo{}} + usedProviders.AddUsed(consumerSessionsMap, nil) + }() + go sendSuccessResp(relayProcessor, "lava@test", time.Millisecond*20) + ctx, cancel = context.WithTimeout(context.Background(), time.Millisecond*200) + defer cancel() + err = relayProcessor.WaitForResults(ctx) + require.NoError(t, err) + resultsOk := relayProcessor.HasResults() + require.True(t, resultsOk) + protocolErrors := relayProcessor.ProtocolErrors() + require.Zero(t, protocolErrors) + returnedResult, err := relayProcessor.ProcessingResult() + require.NoError(t, err) + require.Equal(t, string(returnedResult.Reply.Data), "ok") + }) +} + +func TestRelayProcessorRetry(t *testing.T) { + t.Run("retry", func(t *testing.T) { + ctx := context.Background() + serverHandler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // Handle the incoming request and provide the desired response + w.WriteHeader(http.StatusOK) + }) + specId := "LAV1" + chainParser, _, _, closeServer, _, err := chainlib.CreateChainLibMocks(ctx, specId, spectypes.APIInterfaceRest, serverHandler, "../../", nil) + if closeServer != nil { + defer closeServer() + } + require.NoError(t, err) + chainMsg, err := chainParser.ParseMsg("/cosmos/base/tendermint/v1beta1/blocks/17", nil, http.MethodGet, nil, extensionslib.ExtensionInfo{LatestBlock: 0}) + require.NoError(t, err) + relayProcessor := NewRelayProcessor(ctx, lavasession.NewUsedProviders(nil), 1, chainMsg, nil, "", "") + + usedProviders := relayProcessor.GetUsedProviders() + ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond*10) + defer cancel() + canUse := usedProviders.TryLockSelection(ctx) + require.NoError(t, ctx.Err()) + require.True(t, canUse) + require.Zero(t, usedProviders.CurrentlyUsed()) + require.Zero(t, usedProviders.SessionsLatestBatch()) + consumerSessionsMap := lavasession.ConsumerSessionsMap{"lava@test": &lavasession.SessionInfo{}, "lava@test2": &lavasession.SessionInfo{}} + usedProviders.AddUsed(consumerSessionsMap, nil) + + go sendProtocolError(relayProcessor, "lava@test", time.Millisecond*5, fmt.Errorf("bad")) + go sendSuccessResp(relayProcessor, "lava@test2", time.Millisecond*20) + ctx, cancel = context.WithTimeout(context.Background(), time.Millisecond*200) + defer cancel() + err = relayProcessor.WaitForResults(ctx) + require.NoError(t, err) + resultsOk := relayProcessor.HasResults() + require.True(t, resultsOk) + protocolErrors := relayProcessor.ProtocolErrors() + require.Equal(t, uint64(1), protocolErrors) + returnedResult, err := relayProcessor.ProcessingResult() + require.NoError(t, err) + require.Equal(t, string(returnedResult.Reply.Data), "ok") + }) +} + +func TestRelayProcessorRetryNodeError(t *testing.T) { + t.Run("retry", func(t *testing.T) { + ctx := context.Background() + serverHandler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // Handle the incoming request and provide the desired response + w.WriteHeader(http.StatusOK) + }) + specId := "LAV1" + chainParser, _, _, closeServer, _, err := chainlib.CreateChainLibMocks(ctx, specId, spectypes.APIInterfaceRest, serverHandler, "../../", nil) + if closeServer != nil { + defer closeServer() + } + require.NoError(t, err) + chainMsg, err := chainParser.ParseMsg("/cosmos/base/tendermint/v1beta1/blocks/17", nil, http.MethodGet, nil, extensionslib.ExtensionInfo{LatestBlock: 0}) + require.NoError(t, err) + relayProcessor := NewRelayProcessor(ctx, lavasession.NewUsedProviders(nil), 1, chainMsg, nil, "", "") + + usedProviders := relayProcessor.GetUsedProviders() + ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond*10) + defer cancel() + canUse := usedProviders.TryLockSelection(ctx) + require.NoError(t, ctx.Err()) + require.True(t, canUse) + require.Zero(t, usedProviders.CurrentlyUsed()) + require.Zero(t, usedProviders.SessionsLatestBatch()) + consumerSessionsMap := lavasession.ConsumerSessionsMap{"lava@test": &lavasession.SessionInfo{}, "lava@test2": &lavasession.SessionInfo{}} + usedProviders.AddUsed(consumerSessionsMap, nil) + + go sendProtocolError(relayProcessor, "lava@test", time.Millisecond*5, fmt.Errorf("bad")) + go sendNodeError(relayProcessor, "lava@test2", time.Millisecond*20) + ctx, cancel = context.WithTimeout(context.Background(), time.Millisecond*200) + defer cancel() + err = relayProcessor.WaitForResults(ctx) + require.NoError(t, err) + resultsOk := relayProcessor.HasResults() + require.True(t, resultsOk) + protocolErrors := relayProcessor.ProtocolErrors() + require.Equal(t, uint64(1), protocolErrors) + returnedResult, err := relayProcessor.ProcessingResult() + require.NoError(t, err) + require.Equal(t, string(returnedResult.Reply.Data), `{"message":"bad","code":123}`) + require.Equal(t, returnedResult.StatusCode, http.StatusInternalServerError) + }) +} + +func TestRelayProcessorStatefulApi(t *testing.T) { + t.Run("stateful", func(t *testing.T) { + ctx := context.Background() + serverHandler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // Handle the incoming request and provide the desired response + w.WriteHeader(http.StatusOK) + }) + specId := "LAV1" + chainParser, _, _, closeServer, _, err := chainlib.CreateChainLibMocks(ctx, specId, spectypes.APIInterfaceRest, serverHandler, "../../", nil) + if closeServer != nil { + defer closeServer() + } + require.NoError(t, err) + chainMsg, err := chainParser.ParseMsg("/cosmos/tx/v1beta1/txs", []byte("data"), http.MethodPost, nil, extensionslib.ExtensionInfo{LatestBlock: 0}) + require.NoError(t, err) + relayProcessor := NewRelayProcessor(ctx, lavasession.NewUsedProviders(nil), 1, chainMsg, nil, "", "") + usedProviders := relayProcessor.GetUsedProviders() + ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond*10) + defer cancel() + canUse := usedProviders.TryLockSelection(ctx) + require.NoError(t, ctx.Err()) + require.True(t, canUse) + require.Zero(t, usedProviders.CurrentlyUsed()) + require.Zero(t, usedProviders.SessionsLatestBatch()) + consumerSessionsMap := lavasession.ConsumerSessionsMap{"lava4@test": &lavasession.SessionInfo{}, "lava3@test": &lavasession.SessionInfo{}, "lava@test": &lavasession.SessionInfo{}, "lava2@test": &lavasession.SessionInfo{}} + usedProviders.AddUsed(consumerSessionsMap, nil) + go sendProtocolError(relayProcessor, "lava@test", time.Millisecond*5, fmt.Errorf("bad")) + go sendNodeError(relayProcessor, "lava2@test", time.Millisecond*20) + go sendNodeError(relayProcessor, "lava3@test", time.Millisecond*25) + go sendSuccessResp(relayProcessor, "lava4@test", time.Millisecond*100) + ctx, cancel = context.WithTimeout(context.Background(), time.Millisecond*200) + defer cancel() + err = relayProcessor.WaitForResults(ctx) + require.NoError(t, err) + resultsOk := relayProcessor.HasResults() + require.True(t, resultsOk) + protocolErrors := relayProcessor.ProtocolErrors() + require.Equal(t, uint64(1), protocolErrors) + returnedResult, err := relayProcessor.ProcessingResult() + require.NoError(t, err) + require.Equal(t, string(returnedResult.Reply.Data), "ok") + require.Equal(t, http.StatusOK, returnedResult.StatusCode) + }) +} + +func TestRelayProcessorStatefulApiErr(t *testing.T) { + t.Run("stateful", func(t *testing.T) { + ctx := context.Background() + serverHandler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // Handle the incoming request and provide the desired response + w.WriteHeader(http.StatusOK) + }) + specId := "LAV1" + chainParser, _, _, closeServer, _, err := chainlib.CreateChainLibMocks(ctx, specId, spectypes.APIInterfaceRest, serverHandler, "../../", nil) + if closeServer != nil { + defer closeServer() + } + require.NoError(t, err) + chainMsg, err := chainParser.ParseMsg("/cosmos/tx/v1beta1/txs", []byte("data"), http.MethodPost, nil, extensionslib.ExtensionInfo{LatestBlock: 0}) + require.NoError(t, err) + relayProcessor := NewRelayProcessor(ctx, lavasession.NewUsedProviders(nil), 1, chainMsg, nil, "", "") + usedProviders := relayProcessor.GetUsedProviders() + ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond*10) + defer cancel() + canUse := usedProviders.TryLockSelection(ctx) + require.NoError(t, ctx.Err()) + require.True(t, canUse) + require.Zero(t, usedProviders.CurrentlyUsed()) + require.Zero(t, usedProviders.SessionsLatestBatch()) + consumerSessionsMap := lavasession.ConsumerSessionsMap{"lava4@test": &lavasession.SessionInfo{}, "lava3@test": &lavasession.SessionInfo{}, "lava@test": &lavasession.SessionInfo{}, "lava2@test": &lavasession.SessionInfo{}} + usedProviders.AddUsed(consumerSessionsMap, nil) + go sendProtocolError(relayProcessor, "lava@test", time.Millisecond*5, fmt.Errorf("bad")) + go sendNodeError(relayProcessor, "lava2@test", time.Millisecond*20) + go sendNodeError(relayProcessor, "lava3@test", time.Millisecond*25) + ctx, cancel = context.WithTimeout(context.Background(), time.Millisecond*50) + defer cancel() + err = relayProcessor.WaitForResults(ctx) + require.Error(t, err) + resultsOk := relayProcessor.HasResults() + require.True(t, resultsOk) + protocolErrors := relayProcessor.ProtocolErrors() + require.Equal(t, uint64(1), protocolErrors) + returnedResult, err := relayProcessor.ProcessingResult() + require.NoError(t, err) + require.Equal(t, string(returnedResult.Reply.Data), `{"message":"bad","code":123}`) + require.Equal(t, returnedResult.StatusCode, http.StatusInternalServerError) + }) +} + +func TestRelayProcessorLatest(t *testing.T) { + t.Run("latest req", func(t *testing.T) { + ctx := context.Background() + serverHandler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // Handle the incoming request and provide the desired response + w.WriteHeader(http.StatusOK) + }) + specId := "LAV1" + chainParser, _, _, closeServer, _, err := chainlib.CreateChainLibMocks(ctx, specId, spectypes.APIInterfaceRest, serverHandler, "../../", nil) + if closeServer != nil { + defer closeServer() + } + require.NoError(t, err) + chainMsg, err := chainParser.ParseMsg("/cosmos/base/tendermint/v1beta1/blocks/latest", nil, http.MethodGet, nil, extensionslib.ExtensionInfo{LatestBlock: 0}) + require.NoError(t, err) + relayProcessor := NewRelayProcessor(ctx, lavasession.NewUsedProviders(nil), 1, chainMsg, nil, "", "") + usedProviders := relayProcessor.GetUsedProviders() + ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond*10) + defer cancel() + canUse := usedProviders.TryLockSelection(ctx) + require.NoError(t, ctx.Err()) + require.True(t, canUse) + require.Zero(t, usedProviders.CurrentlyUsed()) + require.Zero(t, usedProviders.SessionsLatestBatch()) + + consumerSessionsMap := lavasession.ConsumerSessionsMap{"lava@test": &lavasession.SessionInfo{}, "lava@test2": &lavasession.SessionInfo{}} + usedProviders.AddUsed(consumerSessionsMap, nil) + + go sendProtocolError(relayProcessor, "lava@test", time.Millisecond*5, fmt.Errorf("bad")) + go sendSuccessResp(relayProcessor, "lava@test2", time.Millisecond*20) + ctx, cancel = context.WithTimeout(context.Background(), time.Millisecond*200) + defer cancel() + err = relayProcessor.WaitForResults(ctx) + require.NoError(t, err) + resultsOk := relayProcessor.HasResults() + require.True(t, resultsOk) + protocolErrors := relayProcessor.ProtocolErrors() + require.Equal(t, uint64(1), protocolErrors) + returnedResult, err := relayProcessor.ProcessingResult() + require.NoError(t, err) + require.Equal(t, string(returnedResult.Reply.Data), "ok") + // reqBlock, _ := chainMsg.RequestedBlock() + // require.NotEqual(t, spectypes.LATEST_BLOCK, reqBlock) // disabled until we enable requested block modification again + }) +} diff --git a/protocol/rpcconsumer/rpcconsumer_server.go b/protocol/rpcconsumer/rpcconsumer_server.go index 5b5be9ee7a..a1680e85f4 100644 --- a/protocol/rpcconsumer/rpcconsumer_server.go +++ b/protocol/rpcconsumer/rpcconsumer_server.go @@ -3,7 +3,6 @@ package rpcconsumer import ( "context" "errors" - "fmt" "strconv" "strings" "time" @@ -12,6 +11,7 @@ import ( "github.com/btcsuite/btcd/btcec" sdk "github.com/cosmos/cosmos-sdk/types" "github.com/lavanet/lava/protocol/chainlib" + "github.com/lavanet/lava/protocol/chainlib/chainproxy/rpcclient" "github.com/lavanet/lava/protocol/chainlib/extensionslib" "github.com/lavanet/lava/protocol/common" "github.com/lavanet/lava/protocol/lavaprotocol" @@ -47,7 +47,7 @@ type RPCConsumerServer struct { requiredResponses int finalizationConsensus *lavaprotocol.FinalizationConsensus lavaChainID string - consumerAddress sdk.AccAddress + ConsumerAddress sdk.AccAddress consumerConsistency *ConsumerConsistency sharedState bool // using the cache backend to sync the latest seen block with other consumers relaysMonitor *metrics.RelaysMonitor @@ -56,7 +56,7 @@ type RPCConsumerServer struct { } type relayResponse struct { - relayResult *common.RelayResult + relayResult common.RelayResult err error } @@ -94,7 +94,7 @@ func (rpccs *RPCConsumerServer) ServeRPCRequests(ctx context.Context, listenEndp rpccs.privKey = privKey rpccs.chainParser = chainParser rpccs.finalizationConsensus = finalizationConsensus - rpccs.consumerAddress = consumerAddress + rpccs.ConsumerAddress = consumerAddress rpccs.consumerConsistency = consumerConsistency rpccs.sharedState = sharedState rpccs.reporter = reporter @@ -188,33 +188,36 @@ func (rpccs *RPCConsumerServer) craftRelay(ctx context.Context) (ok bool, relay } func (rpccs *RPCConsumerServer) sendRelayWithRetries(ctx context.Context, retries int, initialRelays bool, relay *pairingtypes.RelayPrivateData, chainMessage chainlib.ChainMessage) (bool, error) { - unwantedProviders := map[string]struct{}{} - timeouts := 0 success := false var err error - + relayProcessor := NewRelayProcessor(ctx, lavasession.NewUsedProviders(nil), 1, chainMessage, rpccs.consumerConsistency, "-init-", "") for i := 0; i < retries; i++ { - var relayResult *common.RelayResult - relayResult, err = rpccs.sendRelayToProvider(ctx, chainMessage, relay, "-init-", "", &unwantedProviders, timeouts) + err = rpccs.sendRelayToProvider(ctx, chainMessage, relay, "-init-", "", relayProcessor) + if lavasession.PairingListEmptyError.Is(err) { + // we don't have pairings anymore, could be related to unwanted providers + relayProcessor.GetUsedProviders().ClearUnwanted() + err = rpccs.sendRelayToProvider(ctx, chainMessage, relay, "-init-", "", relayProcessor) + } if err != nil { - utils.LavaFormatError("[-] failed sending init relay", err, []utils.Attribute{{Key: "chainID", Value: rpccs.listenEndpoint.ChainID}, {Key: "APIInterface", Value: rpccs.listenEndpoint.ApiInterface}, {Key: "unwantedProviders", Value: unwantedProviders}}...) - if relayResult != nil && relayResult.ProviderInfo.ProviderAddress != "" { - unwantedProviders[relayResult.ProviderInfo.ProviderAddress] = struct{}{} - } - if common.IsTimeout(err) { - timeouts++ - } + utils.LavaFormatError("[-] failed sending init relay", err, []utils.Attribute{{Key: "chainID", Value: rpccs.listenEndpoint.ChainID}, {Key: "APIInterface", Value: rpccs.listenEndpoint.ApiInterface}, {Key: "relayProcessor", Value: relayProcessor}}...) } else { - unwantedProviders = map[string]struct{}{} - utils.LavaFormatInfo("[+] init relay succeeded", []utils.Attribute{{Key: "chainID", Value: rpccs.listenEndpoint.ChainID}, {Key: "APIInterface", Value: rpccs.listenEndpoint.ApiInterface}, {Key: "latestBlock", Value: relayResult.Reply.LatestBlock}, {Key: "provider address", Value: relayResult.ProviderInfo.ProviderAddress}}...) - - rpccs.relaysMonitor.LogRelay() - success = true - - // If this is the first time we send relays, we want to send all of them, instead of break on first successful relay - // That way, we populate the providers with the latest blocks with successful relays - if !initialRelays { - break + err := relayProcessor.WaitForResults(ctx) + if err != nil { + utils.LavaFormatError("[-] failed sending init relay", err, []utils.Attribute{{Key: "chainID", Value: rpccs.listenEndpoint.ChainID}, {Key: "APIInterface", Value: rpccs.listenEndpoint.ApiInterface}, {Key: "relayProcessor", Value: relayProcessor}}...) + } else { + relayResult, err := relayProcessor.ProcessingResult() + if err == nil { + utils.LavaFormatInfo("[+] init relay succeeded", []utils.Attribute{{Key: "chainID", Value: rpccs.listenEndpoint.ChainID}, {Key: "APIInterface", Value: rpccs.listenEndpoint.ApiInterface}, {Key: "latestBlock", Value: relayResult.Reply.LatestBlock}, {Key: "provider address", Value: relayResult.ProviderInfo.ProviderAddress}}...) + rpccs.relaysMonitor.LogRelay() + success = true + // If this is the first time we send relays, we want to send all of them, instead of break on first successful relay + // That way, we populate the providers with the latest blocks with successful relays + if !initialRelays { + break + } + } else { + utils.LavaFormatError("[-] failed sending init relay", err, []utils.Attribute{{Key: "chainID", Value: rpccs.listenEndpoint.ChainID}, {Key: "APIInterface", Value: rpccs.listenEndpoint.ApiInterface}, {Key: "relayProcessor", Value: relayProcessor}}...) + } } } time.Sleep(2 * time.Millisecond) @@ -287,127 +290,101 @@ func (rpccs *RPCConsumerServer) SendRelay( seenBlock = 0 } relayRequestData := lavaprotocol.NewRelayData(ctx, connectionType, url, []byte(req), seenBlock, reqBlock, rpccs.listenEndpoint.ApiInterface, chainMessage.GetRPCMessage().GetHeaders(), chainlib.GetAddon(chainMessage), common.GetExtensionNames(chainMessage.GetExtensions())) - relayResults := []*common.RelayResult{} - relayErrors := &RelayErrors{onFailureMergeAll: true} - blockOnSyncLoss := map[string]struct{}{} - modifiedOnLatestReq := false - errorRelayResult := &common.RelayResult{} // returned on error - retries := uint64(0) - timeouts := 0 - unwantedProviders := rpccs.GetInitialUnwantedProviders(directiveHeaders) - - for ; retries < MaxRelayRetries; retries++ { - // TODO: make this async between different providers - relayResult, err := rpccs.sendRelayToProvider(ctx, chainMessage, relayRequestData, dappID, consumerIp, &unwantedProviders, timeouts) - if relayResult == nil { - utils.LavaFormatError("unexpected behavior relay result returned nil from sendRelayToProvider", nil) - continue - } else if relayResult.ProviderInfo.ProviderAddress != "" { - if err != nil { - // add this provider to the erroring providers - if errorRelayResult.ProviderInfo.ProviderAddress != "" { - errorRelayResult.ProviderInfo.ProviderAddress += "," - } - errorRelayResult.ProviderInfo.ProviderAddress += relayResult.ProviderInfo.ProviderAddress - _, ok := blockOnSyncLoss[relayResult.ProviderInfo.ProviderAddress] - if !ok && lavasession.IsSessionSyncLoss(err) { - // allow this provider to be wantedProvider on a retry, if it didn't fail once on syncLoss - blockOnSyncLoss[relayResult.ProviderInfo.ProviderAddress] = struct{}{} - utils.LavaFormatWarning("Identified SyncLoss in provider, not removing it from list for another attempt", err, utils.Attribute{Key: "address", Value: relayResult.ProviderInfo.ProviderAddress}) - } else { - unwantedProviders[relayResult.ProviderInfo.ProviderAddress] = struct{}{} - } - if common.IsTimeout(err) { - timeouts++ - } - } - } - if err != nil { - if relayResult.GetStatusCode() != 0 { - // keep the error status code - errorRelayResult.StatusCode = relayResult.GetStatusCode() - } - relayErrors.relayErrors = append(relayErrors.relayErrors, RelayError{err: err, ProviderInfo: relayResult.ProviderInfo}) - if lavasession.PairingListEmptyError.Is(err) { - // if we ran out of pairings because unwantedProviders is too long or validProviders is too short, continue to reply handling code - break - } - // decide if we should break here if its something retry won't solve - utils.LavaFormatDebug("could not send relay to provider", utils.Attribute{Key: "GUID", Value: ctx}, utils.Attribute{Key: "error", Value: err.Error()}, utils.Attribute{Key: "endpoint", Value: rpccs.listenEndpoint}) - continue - } - relayResults = append(relayResults, relayResult) - unwantedProviders[relayResult.ProviderInfo.ProviderAddress] = struct{}{} - // future relay requests and data reliability requests need to ask for the same specific block height to get consensus on the reply - // we do not modify the chain message data on the consumer, only it's requested block, so we let the provider know it can't put any block height it wants by setting a specific block height - reqBlock, _ := chainMessage.RequestedBlock() - if reqBlock == spectypes.LATEST_BLOCK { - modifiedOnLatestReq = chainMessage.UpdateLatestBlockInMessage(relayResult.Request.RelayData.RequestBlock, false) - if !modifiedOnLatestReq { - relayResult.Finalized = false // shut down data reliability - } - } - if len(relayResults) >= rpccs.requiredResponses { - break - } - } + relayProcessor, err := rpccs.ProcessRelaySend(ctx, directiveHeaders, chainMessage, relayRequestData, dappID, consumerIp) + if err != nil && !relayProcessor.HasResults() { + // we can't send anymore, and we don't have any responses + return nil, utils.LavaFormatError("failed getting responses from providers", err, utils.Attribute{Key: "GUID", Value: ctx}, utils.LogAttr("endpoint", rpccs.listenEndpoint.Key())) + } + // Handle Data Reliability enabled, dataReliabilityThreshold := rpccs.chainParser.DataReliabilityParams() if enabled { - for _, relayResult := range relayResults { - // new context is needed for data reliability as some clients cancel the context they provide when the relay returns - // as data reliability happens in a go routine it will continue while the response returns. - guid, found := utils.GetUniqueIdentifier(ctx) - dataReliabilityContext := context.Background() - if found { - dataReliabilityContext = utils.WithUniqueIdentifier(dataReliabilityContext, guid) - } - go rpccs.sendDataReliabilityRelayIfApplicable(dataReliabilityContext, dappID, consumerIp, relayResult, chainMessage, dataReliabilityThreshold, unwantedProviders) // runs asynchronously + // new context is needed for data reliability as some clients cancel the context they provide when the relay returns + // as data reliability happens in a go routine it will continue while the response returns. + guid, found := utils.GetUniqueIdentifier(ctx) + dataReliabilityContext := context.Background() + if found { + dataReliabilityContext = utils.WithUniqueIdentifier(dataReliabilityContext, guid) } + go rpccs.sendDataReliabilityRelayIfApplicable(dataReliabilityContext, dappID, consumerIp, chainMessage, dataReliabilityThreshold, relayProcessor) // runs asynchronously } - if len(relayResults) == 0 { - rpccs.appendHeadersToRelayResult(ctx, errorRelayResult, retries) - // suggest the user to add the timeout flag - if uint64(timeouts) == retries && retries > 0 { - utils.LavaFormatDebug("all relays timeout", utils.Attribute{Key: "GUID", Value: ctx}, utils.Attribute{Key: "errors", Value: relayErrors.relayErrors}) - return errorRelayResult, utils.LavaFormatError("Failed all relay retries due to timeout consider adding 'lava-relay-timeout' header to extend the allowed timeout duration", nil, utils.Attribute{Key: "GUID", Value: ctx}) - } - bestRelayError := relayErrors.GetBestErrorMessageForUser() - return errorRelayResult, utils.LavaFormatError("Failed all retries", nil, utils.Attribute{Key: "GUID", Value: ctx}, utils.LogAttr("error", bestRelayError.err), utils.LogAttr("chain_id", rpccs.listenEndpoint.ChainID)) - } else if len(relayErrors.relayErrors) > 0 { - utils.LavaFormatDebug("relay succeeded but had some errors", utils.Attribute{Key: "GUID", Value: ctx}, utils.Attribute{Key: "errors", Value: relayErrors}) - } - var returnedResult *common.RelayResult - for _, iteratedResult := range relayResults { - // TODO: go over rpccs.requiredResponses and get majority - returnedResult = iteratedResult + returnedResult, err := relayProcessor.ProcessingResult() + rpccs.appendHeadersToRelayResult(ctx, returnedResult, relayProcessor.ProtocolErrors()) + if err != nil { + return returnedResult, utils.LavaFormatError("failed processing responses from providers", err, utils.Attribute{Key: "GUID", Value: ctx}, utils.LogAttr("endpoint", rpccs.listenEndpoint.Key())) } - if analytics != nil { currentLatency := time.Since(relaySentTime) analytics.Latency = currentLatency.Milliseconds() analytics.ComputeUnits = chainMessage.GetApi().ComputeUnits } - if retries > 0 { - utils.LavaFormatDebug("relay succeeded after retries", utils.Attribute{Key: "GUID", Value: ctx}, utils.Attribute{Key: "retries", Value: retries}) - } - rpccs.appendHeadersToRelayResult(ctx, returnedResult, retries) - rpccs.relaysMonitor.LogRelay() - return returnedResult, nil } +func (rpccs *RPCConsumerServer) ProcessRelaySend(ctx context.Context, directiveHeaders map[string]string, chainMessage chainlib.ChainMessage, relayRequestData *pairingtypes.RelayPrivateData, dappID string, consumerIp string) (*RelayProcessor, error) { + // make sure all of the child contexts are cancelled when we exit + ctx, cancel := context.WithCancel(ctx) + defer cancel() + relayProcessor := NewRelayProcessor(ctx, lavasession.NewUsedProviders(directiveHeaders), rpccs.requiredResponses, chainMessage, rpccs.consumerConsistency, dappID, consumerIp) + err := rpccs.sendRelayToProvider(ctx, chainMessage, relayRequestData, dappID, consumerIp, relayProcessor) + if err != nil && relayProcessor.usedProviders.CurrentlyUsed() == 0 { + // we failed to send a batch of relays, if there are no active sends we can terminate + return relayProcessor, err + } + // a channel to be notified processing was done, true means we have results and can return + gotResults := make(chan bool) + processingTimeout, relayTimeout := rpccs.getProcessingTimeout(chainMessage) + + readResultsFromProcessor := func() { + processingCtx, cancel := context.WithTimeout(ctx, processingTimeout) + defer cancel() + // ProcessResults is reading responses while blocking until the conditions are met + relayProcessor.WaitForResults(processingCtx) + // decide if we need to resend or not + if relayProcessor.HasRequiredNodeResults() { + gotResults <- true + } else { + gotResults <- false + } + } + go readResultsFromProcessor() + // every relay timeout we send a new batch + startNewBatchTicker := time.NewTicker(relayTimeout) + for { + select { + case success := <-gotResults: + if success { + return relayProcessor, nil + } + err := rpccs.sendRelayToProvider(ctx, chainMessage, relayRequestData, dappID, consumerIp, relayProcessor) + if err != nil && relayProcessor.usedProviders.CurrentlyUsed() == 0 { + // we failed to send a batch of relays, if there are no active sends we can terminate + return relayProcessor, err + } + go readResultsFromProcessor() + case <-startNewBatchTicker.C: + // only trigger another batch for non BestResult relays + if relayProcessor.selection != BestResult { + err := rpccs.sendRelayToProvider(ctx, chainMessage, relayRequestData, dappID, consumerIp, relayProcessor) + if err != nil && relayProcessor.usedProviders.CurrentlyUsed() == 0 { + // we failed to send a batch of relays, if there are no active sends we can terminate + return relayProcessor, err + } + } + } + } +} + func (rpccs *RPCConsumerServer) sendRelayToProvider( ctx context.Context, chainMessage chainlib.ChainMessage, relayRequestData *pairingtypes.RelayPrivateData, dappID string, consumerIp string, - unwantedProviders *map[string]struct{}, - timeouts int, -) (relayResult *common.RelayResult, errRet error) { + relayProcessor *RelayProcessor, +) (errRet error) { // get a session for the relay from the ConsumerSessionManager // construct a relay message with lavaprotocol package, include QoS and jail providers // sign the relay message with the lavaprotocol package @@ -419,12 +396,11 @@ func (rpccs *RPCConsumerServer) sendRelayToProvider( // if necessary send detection tx for hashes consensus mismatch // handle QoS updates // in case connection totally fails, update unresponsive providers in ConsumerSessionManager - isSubscription := chainlib.IsSubscription(chainMessage) if isSubscription { // temporarily disable subscriptions // TODO: fix subscription and disable this case. - return &common.RelayResult{ProviderInfo: common.ProviderInfo{ProviderAddress: ""}}, utils.LavaFormatError("Subscriptions are disabled currently", nil) + return utils.LavaFormatError("Subscriptions are disabled currently", nil) } var sharedStateId string // defaults to "", if shared state is disabled then no shared state will be used. @@ -476,15 +452,20 @@ func (rpccs *RPCConsumerServer) sendRelayToProvider( // Info was fetched from cache, so we don't need to change the state // so we can return here, no need to update anything and calculate as this info was fetched from the cache reply.Data = outputFormatter(reply.Data) - relayResult = &common.RelayResult{ + relayResult := common.RelayResult{ Reply: reply, Request: &pairingtypes.RelayRequest{ RelayData: relayRequestData, }, Finalized: false, // set false to skip data reliability + StatusCode: 200, ProviderInfo: common.ProviderInfo{ProviderAddress: ""}, } - return relayResult, nil + relayProcessor.SetResponse(&relayResponse{ + relayResult: relayResult, + err: nil, + }) + return nil } // cache failed, move on to regular relay if performance.NotConnectedError.Is(cacheError) { @@ -503,20 +484,15 @@ func (rpccs *RPCConsumerServer) sendRelayToProvider( virtualEpoch := rpccs.consumerTxSender.GetLatestVirtualEpoch() addon := chainlib.GetAddon(chainMessage) extensions := chainMessage.GetExtensions() - - sessions, err := rpccs.consumerSessionManager.GetSessions(ctx, chainlib.GetComputeUnits(chainMessage), *unwantedProviders, reqBlock, addon, extensions, chainlib.GetStateful(chainMessage), virtualEpoch) + sessions, err := rpccs.consumerSessionManager.GetSessions(ctx, chainlib.GetComputeUnits(chainMessage), relayProcessor.GetUsedProviders(), reqBlock, addon, extensions, chainlib.GetStateful(chainMessage), virtualEpoch) if err != nil { if lavasession.PairingListEmptyError.Is(err) && (addon != "" || len(extensions) > 0) { // if we have no providers for a specific addon or extension, return an indicative error err = utils.LavaFormatError("No Providers For Addon Or Extension", err, utils.LogAttr("addon", addon), utils.LogAttr("extensions", extensions)) } - return &common.RelayResult{ProviderInfo: common.ProviderInfo{ProviderAddress: ""}}, err + return err } - // Make a channel for all providers to send responses - responses := make(chan *relayResponse, len(sessions)) - - relayTimeout := chainlib.GetRelayTimeout(chainMessage, rpccs.chainParser, timeouts) // Iterate over the sessions map for providerPublicAddress, sessionInfo := range sessions { // Launch a separate goroutine for each session @@ -538,10 +514,11 @@ func (rpccs *RPCConsumerServer) sendRelayToProvider( } defer func() { // Return response - responses <- &relayResponse{ - relayResult: localRelayResult, + relayProcessor.SetResponse(&relayResponse{ + relayResult: *localRelayResult, err: errResponse, - } + }) + // Close context goroutineCtxCancel() }() @@ -571,7 +548,8 @@ func (rpccs *RPCConsumerServer) sendRelayToProvider( // unique per dappId and ip consumerToken := common.GetUniqueToken(dappID, consumerIp) - relayLatency, errResponse, backoff := rpccs.relayInner(goroutineCtx, singleConsumerSession, localRelayResult, relayTimeout, chainMessage, consumerToken) + processingTimeout, relayTimeout := rpccs.getProcessingTimeout(chainMessage) + relayLatency, errResponse, backoff := rpccs.relayInner(goroutineCtx, singleConsumerSession, localRelayResult, processingTimeout, chainMessage, consumerToken) if errResponse != nil { failRelaySession := func(origErr error, backoff_ bool) { backOffDuration := 0 * time.Second @@ -616,9 +594,10 @@ func (rpccs *RPCConsumerServer) sendRelayToProvider( utils.Attribute{Key: "finalizationConsensus", Value: rpccs.finalizationConsensus.String()}, ) } + errResponse = rpccs.consumerSessionManager.OnSessionDone(singleConsumerSession, latestBlock, chainlib.GetComputeUnits(chainMessage), relayLatency, singleConsumerSession.CalculateExpectedLatency(relayTimeout), expectedBH, numOfProviders, pairingAddressesLen, chainMessage.GetApi().Category.HangingApi) // session done successfully - if rpccs.cache.CacheActive() { + if rpccs.cache.CacheActive() && rpcclient.ValidateStatusCodes(localRelayResult.StatusCode, true) == nil { // copy reply data so if it changes it doesn't panic mid async send copyReply := &pairingtypes.RelayReply{} copyReplyErr := protocopy.DeepCopyProtoObject(localRelayResult.Reply, copyReply) @@ -664,98 +643,11 @@ func (rpccs *RPCConsumerServer) sendRelayToProvider( } }() } + // localRelayResult is being sent on the relayProcessor by a deferred function }(providerPublicAddress, sessionInfo) } - - // Getting the best result from the providers, - // if there was an error we wait for the next result util timeout or a valid response - // priority order {valid response -> error response -> relay error} - // if there were multiple error responses picking the majority - response := rpccs.getBestResult(relayTimeout, responses, len(sessions), chainMessage) - - if response == nil { - return &common.RelayResult{ProviderInfo: common.ProviderInfo{ProviderAddress: ""}}, utils.LavaFormatError("Received unexpected nil response from getBestResult", nil, utils.LogAttr("sessions", sessions), utils.LogAttr("chainMessage", chainMessage)) - } - - if response.err == nil && response.relayResult != nil && response.relayResult.Reply != nil { - // no error, update the seen block - blockSeen := response.relayResult.Reply.LatestBlock - rpccs.consumerConsistency.SetSeenBlock(blockSeen, dappID, consumerIp) - } - - return response.relayResult, response.err -} - -func (rpccs *RPCConsumerServer) getBestResult(timeout time.Duration, responses chan *relayResponse, numberOfSessions int, chainMessage chainlib.ChainMessage) *relayResponse { - responsesReceived := 0 - nodeResponseErrors := &RelayErrors{relayErrors: []RelayError{}} - protocolResponseErrors := &RelayErrors{relayErrors: []RelayError{}, onFailureMergeAll: true} - // a helper function to fetch the best response (prioritize node over protocol) - getBestResponseBetweenNodeAndProtocolErrors := func() (*relayResponse, error) { - if len(nodeResponseErrors.relayErrors) > 0 { // if we have node errors, we prefer returning them over protocol errors. - bestErrorMessage := nodeResponseErrors.GetBestErrorMessageForUser() - return bestErrorMessage.response, nil - } - if len(protocolResponseErrors.relayErrors) > 0 { // if we have protocol errors at this point return the best one - protocolsBestErrorMessage := protocolResponseErrors.GetBestErrorMessageForUser() - return protocolsBestErrorMessage.response, nil - } - return nil, fmt.Errorf("failed getting best response") - } - startTime := time.Now() - for { - select { - case response := <-responses: - // increase responses received - responsesReceived++ - if response.err == nil { - // validate if its a error response (from the node not the provider) - foundError, errorMessage := chainMessage.CheckResponseError(response.relayResult.Reply.Data, response.relayResult.StatusCode) - // print debug only when we have multiple responses - if numberOfSessions > 1 { - utils.LavaFormatDebug("Got Response", utils.LogAttr("responsesReceived", responsesReceived), utils.LogAttr("out_of", numberOfSessions), utils.LogAttr("foundError", foundError), utils.LogAttr("errorMessage", errorMessage), utils.LogAttr("Status code", response.relayResult.StatusCode)) - } - if foundError { - // this is a node error, meaning we still didn't get a good response. - // we will choose to wait until there will be a response or timeout happens - // if timeout happens we will take the majority of response messages - nodeResponseErrors.relayErrors = append(nodeResponseErrors.relayErrors, RelayError{err: fmt.Errorf(errorMessage), ProviderInfo: response.relayResult.ProviderInfo, response: response}) - } else { - // Return the first successful response - return response // returning response - } - } else { - // we want to keep the error message in a separate response error structure - // in case we got only errors and we want to return the best one - protocolResponseErrors.relayErrors = append(protocolResponseErrors.relayErrors, RelayError{err: response.err, ProviderInfo: response.relayResult.ProviderInfo, response: response}) - } - // check if this is the last response we are going to receive - // we get here only if all other responses including this one are not valid responses - // (whether its a node error or protocol errors) - if responsesReceived == numberOfSessions { - bestRelayResult, err := getBestResponseBetweenNodeAndProtocolErrors() - if err == nil { // successfully sent the channel response - return bestRelayResult - } - // if we got here, we for some reason failed to fetch both the best node error and the protocol error - // it indicates mostly an unwanted behavior. - utils.LavaFormatWarning("failed getting best error message for both node and protocol", nil, - utils.LogAttr("nodeResponseErrors", nodeResponseErrors), - utils.LogAttr("protocolsBestErrorMessage", protocolResponseErrors), - utils.LogAttr("numberOfSessions", numberOfSessions), - ) - return response - } - case <-time.After(timeout + 3*time.Second - time.Since(startTime)): - // Timeout occurred, try fetching the best result we have, prefer node errors over protocol errors - bestRelayResponse, err := getBestResponseBetweenNodeAndProtocolErrors() - if err == nil { // successfully sent the channel response - return bestRelayResponse - } - // failed fetching any error, getting here indicates a real context timeout happened. - return &relayResponse{nil, NoResponseTimeout} - } - } + // finished setting up go routines, can return and wait for responses + return nil } func (rpccs *RPCConsumerServer) relayInner(ctx context.Context, singleConsumerSession *lavasession.SingleConsumerSession, relayResult *common.RelayResult, relayTimeout time.Duration, chainMessage chainlib.ChainMessage, consumerToken string) (relayLatency time.Duration, err error, needsBackoff bool) { @@ -826,7 +718,7 @@ func (rpccs *RPCConsumerServer) relayInner(ctx context.Context, singleConsumerSe enabled, _ := rpccs.chainParser.DataReliabilityParams() if enabled { // TODO: DETECTION instead of existingSessionLatestBlock, we need proof of last reply to send the previous reply and the current reply - finalizedBlocks, finalizationConflict, err := lavaprotocol.VerifyFinalizationData(reply, relayRequest, providerPublicAddress, rpccs.consumerAddress, existingSessionLatestBlock, blockDistanceForFinalizedData) + finalizedBlocks, finalizationConflict, err := lavaprotocol.VerifyFinalizationData(reply, relayRequest, providerPublicAddress, rpccs.ConsumerAddress, existingSessionLatestBlock, blockDistanceForFinalizedData) if err != nil { if lavaprotocol.ProviderFinzalizationDataAccountabilityError.Is(err) && finalizationConflict != nil { go rpccs.consumerTxSender.TxConflictDetection(ctx, finalizationConflict, nil, nil, singleConsumerSession.Parent) @@ -863,17 +755,30 @@ func (rpccs *RPCConsumerServer) relaySubscriptionInner(ctx context.Context, endp return err } -func (rpccs *RPCConsumerServer) sendDataReliabilityRelayIfApplicable(ctx context.Context, dappID string, consumerIp string, relayResult *common.RelayResult, chainMessage chainlib.ChainMessage, dataReliabilityThreshold uint32, unwantedProviders map[string]struct{}) error { - // validate relayResult is not nil - if relayResult == nil || relayResult.Reply == nil || relayResult.Request == nil { - return utils.LavaFormatError("sendDataReliabilityRelayIfApplicable relayResult nil check", nil, utils.Attribute{Key: "GUID", Value: ctx}, utils.Attribute{Key: "relayResult", Value: relayResult}) - } - +func (rpccs *RPCConsumerServer) sendDataReliabilityRelayIfApplicable(ctx context.Context, dappID string, consumerIp string, chainMessage chainlib.ChainMessage, dataReliabilityThreshold uint32, relayProcessor *RelayProcessor) error { + ctx, cancel := context.WithTimeout(ctx, 30*time.Second) + defer cancel() specCategory := chainMessage.GetApi().Category - if !specCategory.Deterministic || !relayResult.Finalized { + if !specCategory.Deterministic { return nil // disabled for this spec and requested block so no data reliability messages } + if rand.Uint32() > dataReliabilityThreshold { + // decided not to do data reliability + return nil + } + // only need to send another relay if we don't have enough replies + results := []common.RelayResult{} + for _, result := range relayProcessor.NodeResults() { + if result.Finalized { + results = append(results, result) + } + } + if len(results) == 0 { + // nothing to check + return nil + } + reqBlock, _ := chainMessage.RequestedBlock() if reqBlock <= spectypes.NOT_APPLICABLE { if reqBlock <= spectypes.LATEST_BLOCK { @@ -882,46 +787,64 @@ func (rpccs *RPCConsumerServer) sendDataReliabilityRelayIfApplicable(ctx context // does not support sending data reliability requests on a block that is not specific return nil } - - if rand.Uint32() > dataReliabilityThreshold { - // decided not to do data reliability - return nil - } - relayRequestData := lavaprotocol.NewRelayData(ctx, relayResult.Request.RelayData.ConnectionType, relayResult.Request.RelayData.ApiUrl, relayResult.Request.RelayData.Data, relayResult.Request.RelayData.SeenBlock, reqBlock, relayResult.Request.RelayData.ApiInterface, chainMessage.GetRPCMessage().GetHeaders(), relayResult.Request.RelayData.Addon, relayResult.Request.RelayData.Extensions) - // TODO: give the same timeout the original provider got by setting the same retry - relayResultDataReliability, err := rpccs.sendRelayToProvider(ctx, chainMessage, relayRequestData, dappID, consumerIp, &unwantedProviders, 0) - if err != nil { - errAttributes := []utils.Attribute{} - // failed to send to a provider - if relayResultDataReliability.ProviderInfo.ProviderAddress != "" { - errAttributes = append(errAttributes, utils.Attribute{Key: "address", Value: relayResultDataReliability.ProviderInfo.ProviderAddress}) + relayResult := results[0] + if len(results) < 2 { + relayRequestData := lavaprotocol.NewRelayData(ctx, relayResult.Request.RelayData.ConnectionType, relayResult.Request.RelayData.ApiUrl, relayResult.Request.RelayData.Data, relayResult.Request.RelayData.SeenBlock, reqBlock, relayResult.Request.RelayData.ApiInterface, chainMessage.GetRPCMessage().GetHeaders(), relayResult.Request.RelayData.Addon, relayResult.Request.RelayData.Extensions) + relayProcessorDataReliability := NewRelayProcessor(ctx, relayProcessor.usedProviders, 1, chainMessage, rpccs.consumerConsistency, dappID, consumerIp) + err := rpccs.sendRelayToProvider(ctx, chainMessage, relayRequestData, dappID, consumerIp, relayProcessorDataReliability) + if err != nil { + return utils.LavaFormatWarning("failed data reliability relay to provider", err, utils.LogAttr("relayProcessorDataReliability", relayProcessorDataReliability)) } - errAttributes = append(errAttributes, utils.Attribute{Key: "relayRequestData", Value: relayRequestData}) - return utils.LavaFormatWarning("failed data reliability relay to provider", err, errAttributes...) - } - if !relayResultDataReliability.Finalized { - utils.LavaFormatInfo("skipping data reliability check since response from second provider was not finalized", utils.Attribute{Key: "providerAddress", Value: relayResultDataReliability.ProviderInfo.ProviderAddress}) - return nil - } - conflict := lavaprotocol.VerifyReliabilityResults(ctx, relayResult, relayResultDataReliability, chainMessage.GetApiCollection(), rpccs.chainParser) - if conflict != nil { - // TODO: remove this check when we fix the missing extensions information on conflict detection transaction - if relayRequestData.Extensions == nil || len(relayRequestData.Extensions) == 0 { - err := rpccs.consumerTxSender.TxConflictDetection(ctx, nil, conflict, nil, relayResultDataReliability.ConflictHandler) - if err != nil { - utils.LavaFormatError("could not send detection Transaction", err, utils.Attribute{Key: "GUID", Value: ctx}, utils.Attribute{Key: "conflict", Value: conflict}) + processingTimeout, _ := rpccs.getProcessingTimeout(chainMessage) + processingCtx, cancel := context.WithTimeout(ctx, processingTimeout) + defer cancel() + err = relayProcessorDataReliability.WaitForResults(processingCtx) + if err != nil { + return utils.LavaFormatWarning("failed sending data reliability relays", err, utils.Attribute{Key: "relayProcessorDataReliability", Value: relayProcessorDataReliability}) + } + relayResultsDataReliability := relayProcessorDataReliability.NodeResults() + resultsDataReliability := []common.RelayResult{} + for _, result := range relayResultsDataReliability { + if result.Finalized { + resultsDataReliability = append(resultsDataReliability, result) } - if rpccs.reporter != nil { - utils.LavaFormatDebug("sending conflict report to BE", utils.LogAttr("conflicting api", chainMessage.GetApi().Name)) - rpccs.reporter.AppendConflict(metrics.NewConflictRequest(relayResult.Request, relayResult.Reply, relayResultDataReliability.Request, relayResultDataReliability.Reply)) + } + if len(resultsDataReliability) == 0 { + utils.LavaFormatDebug("skipping data reliability check since responses from second batch was not finalized", utils.Attribute{Key: "results", Value: relayResultsDataReliability}) + return nil + } + results = append(results, resultsDataReliability...) + } + for i := 0; i < len(results)-1; i++ { + relayResult := results[i] + relayResultDataReliability := results[i+1] + conflict := lavaprotocol.VerifyReliabilityResults(ctx, &relayResult, &relayResultDataReliability, chainMessage.GetApiCollection(), rpccs.chainParser) + if conflict != nil { + // TODO: remove this check when we fix the missing extensions information on conflict detection transaction + if len(chainMessage.GetExtensions()) == 0 { + err := rpccs.consumerTxSender.TxConflictDetection(ctx, nil, conflict, nil, relayResultDataReliability.ConflictHandler) + if err != nil { + utils.LavaFormatError("could not send detection Transaction", err, utils.Attribute{Key: "GUID", Value: ctx}, utils.Attribute{Key: "conflict", Value: conflict}) + } + if rpccs.reporter != nil { + utils.LavaFormatDebug("sending conflict report to BE", utils.LogAttr("conflicting api", chainMessage.GetApi().Name)) + rpccs.reporter.AppendConflict(metrics.NewConflictRequest(relayResult.Request, relayResult.Reply, relayResultDataReliability.Request, relayResultDataReliability.Reply)) + } } + } else { + utils.LavaFormatDebug("[+] verified relay successfully with data reliability", utils.LogAttr("api", chainMessage.GetApi().Name)) } - } else { - utils.LavaFormatDebug("[+] verified relay successfully with data reliability", utils.LogAttr("api", chainMessage.GetApi().Name)) } return nil } +func (rpccs *RPCConsumerServer) getProcessingTimeout(chainMessage chainlib.ChainMessage) (processingTimeout time.Duration, relayTimeout time.Duration) { + _, averageBlockTime, _, _ := rpccs.chainParser.ChainBlockStats() + relayTimeout = chainlib.GetRelayTimeout(chainMessage, averageBlockTime) + processingTimeout = common.GetTimeoutForProcessing(relayTimeout, chainlib.GetTimeoutInfo(chainMessage)) + return processingTimeout, relayTimeout +} + func (rpccs *RPCConsumerServer) LavaDirectiveHeaders(metadata []pairingtypes.Metadata) ([]pairingtypes.Metadata, map[string]string) { metadataRet := []pairingtypes.Metadata{} headerDirectives := map[string]string{} @@ -943,18 +866,6 @@ func (rpccs *RPCConsumerServer) LavaDirectiveHeaders(metadata []pairingtypes.Met return metadataRet, headerDirectives } -func (rpccs *RPCConsumerServer) GetInitialUnwantedProviders(directiveHeaders map[string]string) map[string]struct{} { - unwantedProviders := map[string]struct{}{} - blockedProviders, ok := directiveHeaders[common.BLOCK_PROVIDERS_ADDRESSES_HEADER_NAME] - if ok { - providerAddressesToBlock := strings.Split(blockedProviders, ",") - for _, providerAddress := range providerAddressesToBlock { - unwantedProviders[providerAddress] = struct{}{} - } - } - return unwantedProviders -} - func (rpccs *RPCConsumerServer) getExtensionsFromDirectiveHeaders(directiveHeaders map[string]string) extensionslib.ExtensionInfo { extensionsStr, ok := directiveHeaders[common.EXTENSION_OVERRIDE_HEADER_NAME] if ok { @@ -985,7 +896,7 @@ func (rpccs *RPCConsumerServer) HandleDirectiveHeadersForMessage(chainMessage ch chainMessage.SetForceCacheRefresh(ok) } -func (rpccs *RPCConsumerServer) appendHeadersToRelayResult(ctx context.Context, relayResult *common.RelayResult, retries uint64) { +func (rpccs *RPCConsumerServer) appendHeadersToRelayResult(ctx context.Context, relayResult *common.RelayResult, protocolErrors uint64) { if relayResult == nil { return } @@ -999,11 +910,21 @@ func (rpccs *RPCConsumerServer) appendHeadersToRelayResult(ctx context.Context, }) } // add the relay retried count - if retries > 0 { + if protocolErrors > 0 { metadataReply = append(metadataReply, pairingtypes.Metadata{ Name: common.RETRY_COUNT_HEADER_NAME, - Value: strconv.FormatUint(retries, 10), + Value: strconv.FormatUint(protocolErrors, 10), + }) + } + if relayResult.Reply == nil { + relayResult.Reply = &pairingtypes.RelayReply{} + } + if relayResult.Reply.LatestBlock > 0 { + metadataReply = append(metadataReply, + pairingtypes.Metadata{ + Name: common.PROVIDER_LATEST_BLOCK_HEADER_NAME, + Value: strconv.FormatInt(relayResult.Reply.LatestBlock, 10), }) } guid, found := utils.GetUniqueIdentifier(ctx) @@ -1015,9 +936,7 @@ func (rpccs *RPCConsumerServer) appendHeadersToRelayResult(ctx context.Context, Value: guidStr, }) } - if relayResult.Reply == nil { - relayResult.Reply = &pairingtypes.RelayReply{} - } + relayResult.Reply.Metadata = append(relayResult.Reply.Metadata, metadataReply...) } diff --git a/protocol/rpcprovider/provider_listener.go b/protocol/rpcprovider/provider_listener.go index f82918a51c..f5bbd0e8c7 100644 --- a/protocol/rpcprovider/provider_listener.go +++ b/protocol/rpcprovider/provider_listener.go @@ -86,10 +86,14 @@ func NewProviderListener(ctx context.Context, networkAddress lavasession.Network var serveExecutor func() error if networkAddress.DisableTLS { utils.LavaFormatInfo("Running with disabled TLS configuration") - serveExecutor = func() error { return pl.httpServer.Serve(lis) } + serveExecutor = func() error { + return pl.httpServer.Serve(lis) + } } else { pl.httpServer.TLSConfig = lavasession.GetTlsConfig(networkAddress) - serveExecutor = func() error { return pl.httpServer.ServeTLS(lis, "", "") } + serveExecutor = func() error { + return pl.httpServer.ServeTLS(lis, "", "") + } } relayServer := &relayServer{relayReceivers: map[string]*relayReceiverWrapper{}} diff --git a/protocol/rpcprovider/reliabilitymanager/reliability_manager_test.go b/protocol/rpcprovider/reliabilitymanager/reliability_manager_test.go index a8f9977f9f..084a8498ae 100644 --- a/protocol/rpcprovider/reliabilitymanager/reliability_manager_test.go +++ b/protocol/rpcprovider/reliabilitymanager/reliability_manager_test.go @@ -185,7 +185,7 @@ func TestFullFlowReliabilityConflict(t *testing.T) { w.WriteHeader(http.StatusOK) fmt.Fprint(w, string(replyDataBuf)) }) - chainParser, chainProxy, chainFetcher, closeServer, err := chainlib.CreateChainLibMocks(ts.Ctx, specId, spectypes.APIInterfaceRest, serverHandler, "../../../", nil) + chainParser, chainProxy, chainFetcher, closeServer, _, err := chainlib.CreateChainLibMocks(ts.Ctx, specId, spectypes.APIInterfaceRest, serverHandler, "../../../", nil) if closeServer != nil { defer closeServer() } diff --git a/protocol/rpcprovider/rpcprovider.go b/protocol/rpcprovider/rpcprovider.go index a9d7a15d3a..44ed4e056d 100644 --- a/protocol/rpcprovider/rpcprovider.go +++ b/protocol/rpcprovider/rpcprovider.go @@ -313,7 +313,7 @@ func (rpcp *RPCProvider) SetupProviderEndpoints(rpcProviderEndpoints []*lavasess return disabledEndpointsList } -func (rpcp *RPCProvider) getAllAddonsAndExtensionsFromNodeUrlSlice(nodeUrls []common.NodeUrl) *ProviderPolicy { +func GetAllAddonsAndExtensionsFromNodeUrlSlice(nodeUrls []common.NodeUrl) *ProviderPolicy { policy := &ProviderPolicy{} for _, nodeUrl := range nodeUrls { policy.addons = append(policy.addons, nodeUrl.Addons...) // addons are added without validation while extensions are. so we add to the addons all. @@ -342,7 +342,7 @@ func (rpcp *RPCProvider) SetupEndpoint(ctx context.Context, rpcProviderEndpoint } // after registering for spec updates our chain parser contains the spec and we can add our addons and extensions to allow our provider to function properly - providerPolicy := rpcp.getAllAddonsAndExtensionsFromNodeUrlSlice(rpcProviderEndpoint.NodeUrls) + providerPolicy := GetAllAddonsAndExtensionsFromNodeUrlSlice(rpcProviderEndpoint.NodeUrls) utils.LavaFormatDebug("supported services for provider", utils.LogAttr("specId", rpcProviderEndpoint.ChainID), utils.LogAttr("apiInterface", apiInterface), diff --git a/protocol/rpcprovider/rpcprovider_server.go b/protocol/rpcprovider/rpcprovider_server.go index f435e07585..0a364c24a4 100644 --- a/protocol/rpcprovider/rpcprovider_server.go +++ b/protocol/rpcprovider/rpcprovider_server.go @@ -679,6 +679,7 @@ func (rpcps *RPCProviderServer) TryRelay(ctx context.Context, request *pairingty updatedChainMessage := false var blockLagForQosSync int64 blockLagForQosSync, averageBlockTime, blockDistanceToFinalization, blocksInFinalizationData = rpcps.chainParser.ChainBlockStats() + relayTimeout := chainlib.GetRelayTimeout(chainMsg, averageBlockTime) if dataReliabilityEnabled { var err error specificBlock := request.RelayData.RequestBlock @@ -690,7 +691,7 @@ func (rpcps *RPCProviderServer) TryRelay(ctx context.Context, request *pairingty // handle consistency, if the consumer requested information we do not have in the state tracker - latestBlock, requestedHashes, _, err = rpcps.handleConsistency(ctx, request.RelayData.GetSeenBlock(), request.RelayData.GetRequestBlock(), averageBlockTime, blockLagForQosSync, blockDistanceToFinalization, blocksInFinalizationData) + latestBlock, requestedHashes, _, err = rpcps.handleConsistency(ctx, relayTimeout, request.RelayData.GetSeenBlock(), request.RelayData.GetRequestBlock(), averageBlockTime, blockLagForQosSync, blockDistanceToFinalization, blocksInFinalizationData) if err != nil { return nil, err } @@ -824,7 +825,8 @@ func (rpcps *RPCProviderServer) TryRelay(ctx context.Context, request *pairingty if proofBlock < modifiedReqBlock && proofBlock < request.RelayData.SeenBlock { // we requested with a newer block, but don't necessarily have the finaliziation proof, chaintracker might be behind proofBlock = slices.Min([]int64{modifiedReqBlock, request.RelayData.SeenBlock}) - proofBlock, requestedHashes, err = rpcps.GetBlockDataForOptimisticFetch(ctx, proofBlock, blockDistanceToFinalization, blocksInFinalizationData, averageBlockTime) + + proofBlock, requestedHashes, err = rpcps.GetBlockDataForOptimisticFetch(ctx, relayTimeout, proofBlock, blockDistanceToFinalization, blocksInFinalizationData, averageBlockTime) if err != nil { return nil, utils.LavaFormatError("error getting block range for finalization proof", err) } @@ -849,7 +851,7 @@ func (rpcps *RPCProviderServer) TryRelay(ctx context.Context, request *pairingty return reply, nil } -func (rpcps *RPCProviderServer) GetBlockDataForOptimisticFetch(ctx context.Context, requiredProofBlock int64, blockDistanceToFinalization uint32, blocksInFinalizationData uint32, averageBlockTime time.Duration) (latestBlock int64, requestedHashes []*chaintracker.BlockStore, err error) { +func (rpcps *RPCProviderServer) GetBlockDataForOptimisticFetch(ctx context.Context, relayBaseTimeout time.Duration, requiredProofBlock int64, blockDistanceToFinalization uint32, blocksInFinalizationData uint32, averageBlockTime time.Duration) (latestBlock int64, requestedHashes []*chaintracker.BlockStore, err error) { utils.LavaFormatDebug("getting new blockData for optimistic fetch", utils.Attribute{Key: "GUID", Value: ctx}, utils.Attribute{Key: "requiredProofBlock", Value: requiredProofBlock}) proofBlock := requiredProofBlock toBlock := proofBlock - int64(blockDistanceToFinalization) @@ -862,7 +864,7 @@ func (rpcps *RPCProviderServer) GetBlockDataForOptimisticFetch(ctx context.Conte } timeSlept := 0 * time.Millisecond refreshTime := (averageBlockTime / chaintracker.MostFrequentPollingMultiplier) / 2 - sleepTime := slices.Min([]time.Duration{10 * refreshTime, timeCanWait}) + sleepTime := slices.Min([]time.Duration{10 * refreshTime, timeCanWait, relayBaseTimeout / 2}) sleepContext, cancel := context.WithTimeout(context.Background(), sleepTime) fetchedWithoutError := func() bool { timeSlept += refreshTime @@ -885,7 +887,7 @@ func (rpcps *RPCProviderServer) GetBlockDataForOptimisticFetch(ctx context.Conte return proofBlock, requestedHashes, err } -func (rpcps *RPCProviderServer) handleConsistency(ctx context.Context, seenBlock int64, requestBlock int64, averageBlockTime time.Duration, blockLagForQosSync int64, blockDistanceToFinalization uint32, blocksInFinalizationData uint32) (latestBlock int64, requestedHashes []*chaintracker.BlockStore, timeSlept time.Duration, err error) { +func (rpcps *RPCProviderServer) handleConsistency(ctx context.Context, baseRelayTimeout time.Duration, seenBlock int64, requestBlock int64, averageBlockTime time.Duration, blockLagForQosSync int64, blockDistanceToFinalization uint32, blocksInFinalizationData uint32) (latestBlock int64, requestedHashes []*chaintracker.BlockStore, timeSlept time.Duration, err error) { latestBlock, requestedHashes, changeTime, err := rpcps.GetLatestBlockData(ctx, blockDistanceToFinalization, blocksInFinalizationData) if err != nil { return 0, nil, 0, err @@ -907,6 +909,10 @@ func (rpcps *RPCProviderServer) handleConsistency(ctx context.Context, seenBlock deadline, ok := ctx.Deadline() probabilityBlockError := 0.0 halfTimeLeft := time.Until(deadline) / 2 // giving the node at least half the timeout time to process + if baseRelayTimeout/2 < halfTimeLeft { + // do not allow waiting the full timeout since now it's absurdly high + halfTimeLeft = baseRelayTimeout / 2 + } if ok { timeProviderHasS := (time.Since(changeTime) + halfTimeLeft).Seconds() // add waiting half the timeout time if changeTime.IsZero() { @@ -935,8 +941,7 @@ func (rpcps *RPCProviderServer) handleConsistency(ctx context.Context, seenBlock } // we are waiting for the state tracker to catch up with the requested block utils.LavaFormatDebug("waiting for state tracker to update", utils.Attribute{Key: "probabilityBlockError", Value: probabilityBlockError}, utils.Attribute{Key: "time", Value: time.Until(deadline)}, utils.Attribute{Key: "GUID", Value: ctx}, utils.Attribute{Key: "requestedBlock", Value: requestBlock}, utils.Attribute{Key: "seenBlock", Value: seenBlock}, utils.Attribute{Key: "latestBlock", Value: latestBlock}, utils.Attribute{Key: "blockGap", Value: blockGap}) - sleepTime := time.Until(deadline) / 2 // sleep up to half the timeout so we actually have time to do the relay - sleepContext, cancel := context.WithTimeout(context.Background(), sleepTime) + sleepContext, cancel := context.WithTimeout(context.Background(), halfTimeLeft) getLatestBlock := func() bool { ret, _ := rpcps.reliabilityManager.GetLatestBlockNum() // if we hit either seen or requested we can return diff --git a/protocol/rpcprovider/rpcprovider_server_test.go b/protocol/rpcprovider/rpcprovider_server_test.go index 8efa93f7e4..97131e2654 100644 --- a/protocol/rpcprovider/rpcprovider_server_test.go +++ b/protocol/rpcprovider/rpcprovider_server_test.go @@ -222,7 +222,7 @@ func TestHandleConsistency(t *testing.T) { w.WriteHeader(http.StatusOK) fmt.Fprint(w, string(replyDataBuf)) }) - chainParser, chainProxy, _, closeServer, err := chainlib.CreateChainLibMocks(ts.Ctx, specId, spectypes.APIInterfaceRest, serverHandler, "../../", nil) + chainParser, chainProxy, _, closeServer, _, err := chainlib.CreateChainLibMocks(ts.Ctx, specId, spectypes.APIInterfaceRest, serverHandler, "../../", nil) if closeServer != nil { defer closeServer() } @@ -251,7 +251,7 @@ func TestHandleConsistency(t *testing.T) { } }() ctx, cancel := context.WithTimeout(context.Background(), play.timeout) - latestBlock, _, timeSlept, err := rpcproviderServer.handleConsistency(ctx, seenBlock, requestBlock, averageBlockTime, blockLagForQosSync, blocksInFinalizationData, blockDistanceToFinalization) + latestBlock, _, timeSlept, err := rpcproviderServer.handleConsistency(ctx, play.timeout, seenBlock, requestBlock, averageBlockTime, blockLagForQosSync, blocksInFinalizationData, blockDistanceToFinalization) cancel() require.Equal(t, play.err == nil, err == nil, err, strconv.Itoa(calls)) require.Less(t, timeSlept, play.timeout) diff --git a/protocol/statetracker/updaters/policy_updater.go b/protocol/statetracker/updaters/policy_updater.go index 8dd7c3cd9e..5d87a1418c 100644 --- a/protocol/statetracker/updaters/policy_updater.go +++ b/protocol/statetracker/updaters/policy_updater.go @@ -71,7 +71,6 @@ func (pu *PolicyUpdater) UpdateEpoch(epoch uint64) { pu.lock.Lock() defer pu.lock.Unlock() // update policy now - utils.LavaFormatDebug("PolicyUpdater, fetching current policy and updating the effective policy", utils.LogAttr("epoch", epoch), utils.LogAttr("chainId", pu.chainId)) ctx, cancel := context.WithTimeout(context.Background(), time.Second*10) defer cancel() policy, err := pu.policyFetcher.GetConsumerPolicy(ctx, pu.consumerAddress, pu.chainId) diff --git a/scripts/pre_setups/init_lava_only_test_5.sh b/scripts/pre_setups/init_lava_only_test_5.sh index 8a0e3e6530..79c3374989 100755 --- a/scripts/pre_setups/init_lava_only_test_5.sh +++ b/scripts/pre_setups/init_lava_only_test_5.sh @@ -35,11 +35,11 @@ PROVIDER5_LISTENER="127.0.0.1:2225" lavad tx subscription buy DefaultPlan $(lavad keys show user1 -a) -y --from user1 --gas-adjustment "1.5" --gas "auto" --gas-prices $GASPRICE wait_next_block -lavad tx pairing stake-provider "LAV1" $PROVIDERSTAKE "$PROVIDER1_LISTENER,1" 1 $(operator_address) -y --from servicer1 --provider-moniker "dummyMoniker" --gas-adjustment "1.5" --gas "auto" --gas-prices $GASPRICE -lavad tx pairing stake-provider "LAV1" $PROVIDERSTAKE "$PROVIDER2_LISTENER,1" 1 $(operator_address) -y --from servicer2 --provider-moniker "dummyMoniker" --gas-adjustment "1.5" --gas "auto" --gas-prices $GASPRICE -lavad tx pairing stake-provider "LAV1" $PROVIDERSTAKE "$PROVIDER3_LISTENER,1" 1 $(operator_address) -y --from servicer3 --provider-moniker "dummyMoniker" --gas-adjustment "1.5" --gas "auto" --gas-prices $GASPRICE -lavad tx pairing stake-provider "LAV1" $PROVIDERSTAKE "$PROVIDER4_LISTENER,1" 1 $(operator_address) -y --from servicer4 --provider-moniker "dummyMoniker" --gas-adjustment "1.5" --gas "auto" --gas-prices $GASPRICE -lavad tx pairing stake-provider "LAV1" $PROVIDERSTAKE "$PROVIDER5_LISTENER,1" 1 $(operator_address) -y --from servicer5 --provider-moniker "dummyMoniker" --gas-adjustment "1.5" --gas "auto" --gas-prices $GASPRICE +lavad tx pairing stake-provider "LAV1" $PROVIDERSTAKE "$PROVIDER1_LISTENER,1" 1 $(operator_address) -y --delegate-limit 10ulava --from servicer1 --provider-moniker "dummyMoniker" --gas-adjustment "1.5" --gas "auto" --gas-prices $GASPRICE +lavad tx pairing stake-provider "LAV1" $PROVIDERSTAKE "$PROVIDER2_LISTENER,1" 1 $(operator_address) -y --delegate-limit 10ulava --from servicer2 --provider-moniker "dummyMoniker" --gas-adjustment "1.5" --gas "auto" --gas-prices $GASPRICE +lavad tx pairing stake-provider "LAV1" $PROVIDERSTAKE "$PROVIDER3_LISTENER,1" 1 $(operator_address) -y --delegate-limit 10ulava --from servicer3 --provider-moniker "dummyMoniker" --gas-adjustment "1.5" --gas "auto" --gas-prices $GASPRICE +lavad tx pairing stake-provider "LAV1" $PROVIDERSTAKE "$PROVIDER4_LISTENER,1" 1 $(operator_address) -y --delegate-limit 10ulava --from servicer4 --provider-moniker "dummyMoniker" --gas-adjustment "1.5" --gas "auto" --gas-prices $GASPRICE +lavad tx pairing stake-provider "LAV1" $PROVIDERSTAKE "$PROVIDER5_LISTENER,1" 1 $(operator_address) -y --delegate-limit 10ulava --from servicer5 --provider-moniker "dummyMoniker" --gas-adjustment "1.5" --gas "auto" --gas-prices $GASPRICE sleep_until_next_epoch diff --git a/testutil/e2e/allowedErrorList.go b/testutil/e2e/allowedErrorList.go index 43b64c9dce..df4c0f862b 100644 --- a/testutil/e2e/allowedErrorList.go +++ b/testutil/e2e/allowedErrorList.go @@ -12,7 +12,8 @@ var allowedErrors = map[string]string{ } var allowedErrorsDuringEmergencyMode = map[string]string{ - "connection refused": "Connection to tendermint port sometimes can happen as we shut down the node and we try to fetch info during emergency mode", + "connection refused": "Connection to tendermint port sometimes can happen as we shut down the node and we try to fetch info during emergency mode", + "connection reset by peer": "Connection to tendermint port sometimes can happen as we shut down the node and we try to fetch info during emergency mode", } var allowedErrorsPaymentE2E = map[string]string{ diff --git a/testutil/e2e/e2eProviderConfigs/lavaConsumer1.yml b/testutil/e2e/e2eProviderConfigs/lavaConsumer1.yml index f31f2f4c85..af1f67666e 100644 --- a/testutil/e2e/e2eProviderConfigs/lavaConsumer1.yml +++ b/testutil/e2e/e2eProviderConfigs/lavaConsumer1.yml @@ -8,3 +8,4 @@ endpoints: - chain-id: LAV1 api-interface: grpc network-address: 127.0.0.1:3342 +debug-relays: true \ No newline at end of file diff --git a/testutil/e2e/e2eProviderConfigs/lavaConsumer2.yml b/testutil/e2e/e2eProviderConfigs/lavaConsumer2.yml index 403b31c0fd..3e5d3c0192 100644 --- a/testutil/e2e/e2eProviderConfigs/lavaConsumer2.yml +++ b/testutil/e2e/e2eProviderConfigs/lavaConsumer2.yml @@ -8,3 +8,4 @@ endpoints: - chain-id: LAV1 api-interface: grpc network-address: 127.0.0.1:3345 +debug-relays: true \ No newline at end of file diff --git a/testutil/e2e/e2eProviderConfigs/lavaConsumerEmergency1.yml b/testutil/e2e/e2eProviderConfigs/lavaConsumerEmergency1.yml index a8f03c79d0..01a23596ca 100644 --- a/testutil/e2e/e2eProviderConfigs/lavaConsumerEmergency1.yml +++ b/testutil/e2e/e2eProviderConfigs/lavaConsumerEmergency1.yml @@ -7,4 +7,5 @@ endpoints: network-address: 127.0.0.1:3347 - chain-id: LAV1 api-interface: grpc - network-address: 127.0.0.1:3348 \ No newline at end of file + network-address: 127.0.0.1:3348 +debug-relays: true \ No newline at end of file