From bd9f44e359b9f8177e0614435f5c01d1a20d2fdb Mon Sep 17 00:00:00 2001 From: nonergodic Date: Wed, 18 Dec 2024 12:57:12 -0800 Subject: [PATCH 1/3] add eager boolean evaluation utility functions --- docs/Optimization.md | 45 ++++++++++++++++++++++++++++++++++++++++---- src/Utils.sol | 14 ++++++++++++++ 2 files changed, 55 insertions(+), 4 deletions(-) diff --git a/docs/Optimization.md b/docs/Optimization.md index 9df8fed..df51ef8 100644 --- a/docs/Optimization.md +++ b/docs/Optimization.md @@ -1,17 +1,17 @@ # Compiler Optimization -List of ways to avoid short-comings of the current optimizer which lead to suboptimal byte code +List of ways to avoid short-comings of the current optimizer which lead to suboptimal byte code. ## for loop array length checking -``` +```solidity function iterate(uint[] memory myArray) { uint len = myArray.length; for (uint i; i < len; ++i) { /*...*/} } ``` is more efficient than -``` +```solidity function iterate(uint[] memory myArray) { for (uint i; i < myArray.length; ++i) { /*...*/} } @@ -22,4 +22,41 @@ If `myArray` uses `calldata` instead of `memory`, both versions produce the same ## prefer `< MAX + 1` over `<= MAX` for const comparison -Given that the EVM only supports `LT` and `GT` but not `LTE` or `GTE`, solc implements `x<=y` as `!(x>y)`. However, given a constant `MAX`, since solc resolves `MAX + 1` at compile time, `< MAX + 1` saves one `ISZERO` opcode. \ No newline at end of file +Given that the EVM only supports `LT` and `GT` but not `LTE` or `GTE`, solc implements `x<=y` as `!(x>y)`. However, given a constant `MAX`, since solc resolves `MAX + 1` at compile time, `< MAX + 1` saves one `ISZERO` opcode. + +## consider using `eagerAnd` and `eagerOr` over short-curcuiting `&&` and `||` + +Short-circuiting `lhs && rhs` requires _at least_ the insertion of: + +| OpCode/ByteCode | Size | Gas | Explanation | +| --------------- | :--: | :-: | ----------------------------------------------------------- | +| `DUP1` | 1 | 3 | copy result of `lhs` which currently is on top of the stack | +| `PUSH2` | 1 | 3 | push location for code to eval/load `rhs` | +| jump offset | 2 | 0 | points to **second** `JUMPDST` | +| `JUMPI` | 1 | 10 | if `lhs` is `true` eval `rhs` too, otherwise short-circuit | +| `JUMPDST` | 1 | 1 | proceed here with the result on top of the stack | +| --------------- | ---- | --- | ----------------------------------------------------------- | +| `JUMPDST` | 1 | 1 | code to eval/load `rhs` starts here | +| `POP` | 1 | 3 | remove duplicated `true` from stack | +| --------------- | ---- | --- | ----------------------------------------------------------- | +| `PUSH2` | 1 | 3 | push location to jump back to where we proceed | +| jump offest | 2 | 0 | points to **first** jump offset (after `JUMPI`) | +| `JUMP` | 1 | 8 | jump back after evaluating `rhs` | +| --------------- | ---- | --- | ----------------------------------------------------------- | +| Total | 12 | 32 | | + +So our code will always bloat by at least 12 bytes, and even if the short-circuiting triggers, we still pay for the `PUSH`, the `JUMPI`, and stepping over the subsequent `JUMPDST` for a total of 17 gas, when the alternative can be as cheap as a single `AND` for 1 byte and 3 gas (if we just check a boolean thats already on the stack). + +This is particularly unnecessary when checking that a bunch of variables all have their expected values, and where short-circuiting would _at best_ make the failing path cheaper, while always introducing the gas overhead on our precious happy path. + +The way to avoid this is using the `eagerAnd` and `eagerOr` utility functions: + +```solidity +function eagerAnd(bool lhs, bool rhs) internal pure returns (bool ret) { + assembly ("memory-safe") { + ret := and(lhs, rhs) + } +} +``` + +Thankfully, while solc is not smart enough to consider the cost/side-effects of evaluating the right hand side before deciding whether to implement short-circuiting or not, but simply _always_ short-circuits, it will at least inline `eagerAnd` and `eagerOr`. diff --git a/src/Utils.sol b/src/Utils.sol index 63949f8..8da8174 100644 --- a/src/Utils.sol +++ b/src/Utils.sol @@ -26,3 +26,17 @@ function reRevert(bytes memory err) pure { revert(add(err, 32), mload(err)) } } + +//see Optimization.md for rationale on avoiding short-circuiting +function eagerAnd(bool lhs, bool rhs) pure returns (bool ret) { + assembly ("memory-safe") { + ret := and(lhs, rhs) + } +} + +//see Optimization.md for rationale on avoiding short-circuiting +function eagerOr(bool lhs, bool rhs) pure returns (bool ret) { + assembly ("memory-safe") { + ret := or(lhs, rhs) + } +} From 2c92ce8a60300e1bcdb1a651a572e1b002cc8637 Mon Sep 17 00:00:00 2001 From: Andreas <41449730+nonergodic@users.noreply.github.com> Date: Wed, 18 Dec 2024 13:14:09 -0800 Subject: [PATCH 2/3] Update docs/Optimization.md Co-authored-by: scnale --- docs/Optimization.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/Optimization.md b/docs/Optimization.md index df51ef8..6a02ebf 100644 --- a/docs/Optimization.md +++ b/docs/Optimization.md @@ -24,7 +24,7 @@ If `myArray` uses `calldata` instead of `memory`, both versions produce the same Given that the EVM only supports `LT` and `GT` but not `LTE` or `GTE`, solc implements `x<=y` as `!(x>y)`. However, given a constant `MAX`, since solc resolves `MAX + 1` at compile time, `< MAX + 1` saves one `ISZERO` opcode. -## consider using `eagerAnd` and `eagerOr` over short-curcuiting `&&` and `||` +## consider using `eagerAnd` and `eagerOr` over short-circuiting `&&` and `||` Short-circuiting `lhs && rhs` requires _at least_ the insertion of: From 67c4f7934465c71b79f1f030103d757bbd16ded1 Mon Sep 17 00:00:00 2001 From: Andreas <41449730+nonergodic@users.noreply.github.com> Date: Wed, 18 Dec 2024 13:14:14 -0800 Subject: [PATCH 3/3] Update docs/Optimization.md Co-authored-by: scnale --- docs/Optimization.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/Optimization.md b/docs/Optimization.md index 6a02ebf..b76f860 100644 --- a/docs/Optimization.md +++ b/docs/Optimization.md @@ -45,7 +45,7 @@ Short-circuiting `lhs && rhs` requires _at least_ the insertion of: | --------------- | ---- | --- | ----------------------------------------------------------- | | Total | 12 | 32 | | -So our code will always bloat by at least 12 bytes, and even if the short-circuiting triggers, we still pay for the `PUSH`, the `JUMPI`, and stepping over the subsequent `JUMPDST` for a total of 17 gas, when the alternative can be as cheap as a single `AND` for 1 byte and 3 gas (if we just check a boolean thats already on the stack). +So our code will always bloat by at least 12 bytes, and even if the short-circuiting triggers, we still pay for the `PUSH`, the `JUMPI`, and stepping over the subsequent `JUMPDST` for a total of 17 gas, when the alternative can be as cheap as a single `AND` for 1 byte and 3 gas (if we just check a boolean that's already on the stack). This is particularly unnecessary when checking that a bunch of variables all have their expected values, and where short-circuiting would _at best_ make the failing path cheaper, while always introducing the gas overhead on our precious happy path.