diff --git a/nimbus/evm/analysis.nim b/nimbus/evm/analysis.nim new file mode 100644 index 0000000000..c2beca3087 --- /dev/null +++ b/nimbus/evm/analysis.nim @@ -0,0 +1,167 @@ +# Nimbus +# Copyright (c) 2023 Status Research & Development GmbH +# Licensed under either of +# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or +# http://www.apache.org/licenses/LICENSE-2.0) +# * MIT license ([LICENSE-MIT](LICENSE-MIT) or +# http://opensource.org/licenses/MIT) +# at your option. This file may not be copied, modified, or +# distributed except according to those terms. + +import + interpreter/op_codes + +const + set2BitsMask = uint16(0b11) + set3BitsMask = uint16(0b111) + set4BitsMask = uint16(0b1111) + set5BitsMask = uint16(0b1_1111) + set6BitsMask = uint16(0b11_1111) + set7BitsMask = uint16(0b111_1111) + +# bitvec is a bit vector which maps bytes in a program. +# An unset bit means the byte is an opcode, a set bit means +# it's data (i.e. argument of PUSHxx). +type + Bitvec* = seq[byte] + +proc set1(bits: var Bitvec, pos: int) = + let x = bits[pos div 8] + bits[pos div 8] = x or byte(1 shl (pos mod 8)) + +proc setN(bits: var Bitvec, flag: uint16, pos: int) = + let z = pos div 8 + let a = flag shl (pos mod 8) + let x = bits[z] + bits[z] = x or byte(a) + let b = byte(a shr 8) + if b != 0: + bits[z+1] = b + +proc set8(bits: var Bitvec, pos: int) = + let z = pos div 8 + let a = byte(0xFF shl (pos mod 8)) + bits[z] = bits[z] or a + bits[z+1] = not a + +proc set16(bits: var Bitvec, pos: int) = + let z = pos div 8 + let a = byte(0xFF shl (pos mod 8)) + bits[z] = bits[z] or a + bits[z+1] = 0xFF + bits[z+2] = not a + +# codeSegment checks if the position is in a code segment. +proc codeSegment*(bits: Bitvec, pos: int): bool = + ((bits[pos div 8] shr (pos mod 8)) and 1) == 0 + +# codeBitmapInternal is the internal implementation of codeBitmap. +# It exists for the purpose of being able to run benchmark tests +# without dynamic allocations affecting the results. +proc codeBitmapInternal(bits: var Bitvec; code: openArray[byte]) = + var pc = 0 + while pc < code.len: + let op = Op(code[pc]) + inc pc + + if op < PUSH1: + continue + + var numbits = op.int - PUSH1.int + 1 + if numbits >= 8: + while numbits >= 16: + bits.set16(pc) + pc += 16 + numbits -= 16 + + while numbits >= 8: + bits.set8(pc) + pc += 8 + numbits -= 8 + + case numbits + of 1: bits.set1(pc) + of 2: bits.setN(set2BitsMask, pc) + of 3: bits.setN(set3BitsMask, pc) + of 4: bits.setN(set4BitsMask, pc) + of 5: bits.setN(set5BitsMask, pc) + of 6: bits.setN(set6BitsMask, pc) + of 7: bits.setN(set7BitsMask, pc) + else: discard + pc += numbits + +# codeBitmap collects data locations in code. +proc codeBitmap*(code: openArray[byte]): Bitvec = + # The bitmap is 4 bytes longer than necessary, in case the code + # ends with a PUSH32, the algorithm will push zeroes onto the + # bitvector outside the bounds of the actual code. + let len = (code.len div 8)+1+4 + result = newSeq[byte](len) + result.codeBitmapInternal(code) + +# eofCodeBitmapInternal is the internal implementation of codeBitmap for EOF +# code validation. +proc eofCodeBitmapInternal(bits: var Bitvec; code: openArray[byte]) = + var pc = 0 + while pc < code.len: + let op = Op(code[pc]) + inc pc + + # RJUMP and RJUMPI always have 2 byte operand. + if op == RJUMP or op == RJUMPI: + bits.setN(set2BitsMask, pc) + pc += 2 + continue + + var numbits = 0 + if op >= PUSH1 and op <= PUSH32: + numbits = op.int - PUSH1.int + 1 + elif op == RJUMPV: + # RJUMPV is unique as it has a variable sized operand. + # The total size is determined by the count byte which + # immediate proceeds RJUMPV. Truncation will be caught + # in other validation steps -- for now, just return a + # valid bitmap for as much of the code as is + # available. + if pc >= code.len: + # Count missing, no more bits to mark. + return + numbits = code[pc].int*2 + 1 + if pc+numbits > code.len: + # Jump table is truncated, mark as many bits + # as possible. + numbits = code.len - pc + else: + # If not PUSH (the int8(op) > int(PUSH32) is always false). + continue + + if numbits >= 8: + while numbits >= 16: + bits.set16(pc) + pc += 16 + numbits -= 16 + + while numbits >= 8: + bits.set8(pc) + pc += 8 + numbits -= 8 + + case numbits + of 1: bits.set1(pc) + of 2: bits.setN(set2BitsMask, pc) + of 3: bits.setN(set3BitsMask, pc) + of 4: bits.setN(set4BitsMask, pc) + of 5: bits.setN(set5BitsMask, pc) + of 6: bits.setN(set6BitsMask, pc) + of 7: bits.setN(set7BitsMask, pc) + else: discard + pc += numbits + +# eofCodeBitmap collects data locations in code. +proc eofCodeBitmap*(code: openArray[byte]): Bitvec = + # The bitmap is 4 bytes longer than necessary, in case the code + # ends with a PUSH32, the algorithm will push zeroes onto the + # bitvector outside the bounds of the actual code. + let len = (code.len div 8)+1+4 + result = newSeq[byte](len) + result.eofCodeBitmapInternal(code) diff --git a/nimbus/evm/computation.nim b/nimbus/evm/computation.nim index 002a3870a8..484d3f7e1d 100644 --- a/nimbus/evm/computation.nim +++ b/nimbus/evm/computation.nim @@ -11,7 +11,7 @@ import ".."/[db/accounts_cache, constants], "."/[code_stream, memory, message, stack, state], - "."/[types], + "."/[types, validate], ./interpreter/[gas_meter, gas_costs, op_codes], ../common/[common, evmforks], ../utils/[utils, eof], @@ -340,6 +340,11 @@ proc writeContract*(c: Computation) c.setError("EOF retcode parse error: " & res.error.toString, true) return + let vres = con.validateCode() + if vres.isErr: + c.setError("EOF retcode validate error: " & vres.error.toString, true) + return + elif fork >= FkLondon: withExtra trace, "New contract code starts with 0xEF byte, not allowed by EIP-3541" c.setError(EVMC_CONTRACT_VALIDATION_FAILURE, true) diff --git a/nimbus/evm/interpreter/op_handlers.nim b/nimbus/evm/interpreter/op_handlers.nim index 730afc512f..7e4900f584 100644 --- a/nimbus/evm/interpreter/op_handlers.nim +++ b/nimbus/evm/interpreter/op_handlers.nim @@ -75,13 +75,6 @@ proc mkOpTable(selected: EVMFork): array[Op,Vm2OpExec] {.compileTime.} = # Public functions # ------------------------------------------------------------------------------ -#const -# vm2OpHandlers* = block: -# var rc: array[Fork, array[Op, Vm2OpExec]] -# for w in Fork: -# rc[w] = w.mkOpTable -# rc - type vmOpHandlersRec* = tuple name: string ## Name (or ID) of op handler diff --git a/nimbus/evm/interpreter/op_handlers/oph_call.nim b/nimbus/evm/interpreter/op_handlers/oph_call.nim index 8b8b744ab8..359041a692 100644 --- a/nimbus/evm/interpreter/op_handlers/oph_call.nim +++ b/nimbus/evm/interpreter/op_handlers/oph_call.nim @@ -519,7 +519,7 @@ const post: vm2OpIgnore)), (opCode: CallCode, ## 0xf2, Message-Call with alternative code - forks: Vm2OpAllForks, + forks: Vm2OpAllForks - Vm2OpEOFAndLater, name: "callCode", info: "Message-call into this account with alternative account's code", exec: (prep: vm2OpIgnore, diff --git a/nimbus/evm/interpreter/op_handlers/oph_sysops.nim b/nimbus/evm/interpreter/op_handlers/oph_sysops.nim index 9870bc9a4c..f467ef4c76 100644 --- a/nimbus/evm/interpreter/op_handlers/oph_sysops.nim +++ b/nimbus/evm/interpreter/op_handlers/oph_sysops.nim @@ -209,7 +209,7 @@ const post: vm2OpIgnore)), (opCode: SelfDestruct, ## 0xff, EIP2929: self destruct, Berlin and later - forks: Vm2OpBerlinAndLater, + forks: Vm2OpBerlinAndLater - Vm2OpEOFAndLater, name: "selfDestructEIP2929", info: "EIP2929: Halt execution and register account for later deletion", exec: (prep: vm2OpIgnore, diff --git a/nimbus/evm/interpreter_dispatch.nim b/nimbus/evm/interpreter_dispatch.nim index b6ead433ba..30e1605488 100644 --- a/nimbus/evm/interpreter_dispatch.nim +++ b/nimbus/evm/interpreter_dispatch.nim @@ -17,7 +17,7 @@ import std/[macros, sets, strformat], pkg/[chronicles, chronos, stew/byteutils], ".."/[constants, db/accounts_cache], - "."/[code_stream, computation], + "."/[code_stream, computation, validate], "."/[message, precompiles, state, types], ../utils/[utils, eof], ./interpreter/[op_dispatcher, gas_costs], @@ -170,6 +170,11 @@ proc beforeExecCreate(c: Computation): bool c.setError("EOF initcode parse error: " & res.error.toString, false) return true + let vres = c.code.container.validateCode() + if vres.isErr: + c.setError("EOF initcode validation error: " & vres.error.toString, false) + return true + c.snapshot() if c.vmState.readOnlyStateDB().hasCodeOrNonce(c.msg.contractAddress): diff --git a/nimbus/evm/stack_table.nim b/nimbus/evm/stack_table.nim new file mode 100644 index 0000000000..8b73221bb5 --- /dev/null +++ b/nimbus/evm/stack_table.nim @@ -0,0 +1,225 @@ +# Nimbus +# Copyright (c) 2023 Status Research & Development GmbH +# Licensed under either of +# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or +# http://www.apache.org/licenses/LICENSE-2.0) +# * MIT license ([LICENSE-MIT](LICENSE-MIT) or +# http://opensource.org/licenses/MIT) +# at your option. This file may not be copied, modified, or +# distributed except according to those terms. + +import + interpreter/op_codes, + ../common/evmforks + +type + StackDesc* = object + min*: int + max*: int + enabled*: bool + + StackTable* = array[Op, StackDesc] + +const + StackLimit* = 1024 + +func maxStack(pop, push: int): int {.compileTime.} = + StackLimit + pop - push + +func minStack(pops, push: int): int {.compileTime.} = + pops + +func minSwapStack(n: int): int {.compileTime.} = + minStack(n, n) + +func maxSwapStack(n: int): int {.compileTime.} = + maxStack(n, n) + +func minDupStack(n: int): int {.compileTime.} = + minStack(n, n+1) + +func maxDupStack(n: int): int {.compileTime.} = + maxStack(n, n+1) + +template sm(op: Op, a, b: int): untyped = + (op, StackDesc( + min: minStack(a, b), + max: maxStack(a, b), + enabled: true) + ) + +template sp(a, b: int): untyped = + StackDesc( + min: minStack(a, b), + max: maxStack(a, b), + enabled: true + ) + +template sd(x: int): untyped = + StackDesc( + min: minDupStack(x), + max: maxDupStack(x), + enabled: true + ) + +template ss(x: int): untyped = + StackDesc( + min: minSwapStack(x), + max: maxSwapStack(x), + enabled: true + ) + +const + BaseStackTable = [ + sm(Stop, 0, 0), + sm(Add, 2, 1), + sm(Mul, 2, 1), + sm(Sub, 2, 1), + sm(Div, 2, 1), + sm(Sdiv, 2, 1), + sm(Mod, 2, 1), + sm(Smod, 2, 1), + sm(Addmod, 3, 1), + sm(Mulmod, 3, 1), + sm(Exp, 2, 1), + sm(SignExtend, 2, 1), + sm(Lt, 2, 1), + sm(Gt, 2, 1), + sm(Slt, 2, 1), + sm(Sgt, 2, 1), + sm(Eq, 2, 1), + sm(IsZero, 1, 1), + sm(And, 2, 1), + sm(Or, 2, 1), + sm(Xor, 2, 1), + sm(Not, 1, 1), + sm(Byte, 2, 1), + sm(Sha3, 2, 1), + sm(Address, 0, 1), + sm(Balance, 1, 1), + sm(Origin, 0, 1), + sm(Caller, 0, 1), + sm(CallValue, 0, 1), + sm(CallDataLoad, 1, 1), + sm(CallDataSize, 0, 1), + sm(CallDataCopy, 3, 0), + sm(CodeSize, 0, 1), + sm(CodeCopy, 3, 0), + sm(GasPrice, 0, 1), + sm(ExtCodeSize, 1, 1), + sm(ExtCodeCopy, 4, 0), + sm(Blockhash, 1, 1), + sm(Coinbase, 0, 1), + sm(Timestamp, 0, 1), + sm(Number, 0, 1), + sm(Difficulty, 0, 1), + sm(GasLimit, 0, 1), + sm(Pop, 1, 0), + sm(Mload, 1, 1), + sm(Mstore, 2, 0), + sm(Mstore8, 2, 0), + sm(Sload, 1, 1), + sm(Sstore, 2, 0), + sm(Jump, 1, 0), + sm(JumpI, 2, 0), + sm(Pc, 0, 1), + sm(Msize, 0, 1), + sm(Gas, 0, 1), + sm(JumpDest, 0, 0), + sm(Log0, 2, 0), + sm(Log1, 3, 0), + sm(Log2, 4, 0), + sm(Log3, 5, 0), + sm(Log4, 6, 0), + sm(Create, 3, 1), + sm(Call, 7, 1), + sm(CallCode, 7, 1), + sm(Return, 2, 0), + sm(SelfDestruct, 1, 0), + sm(Invalid, 0, 0), + ] + +proc frontierStackTable(): StackTable {.compileTime.} = + for x in BaseStackTable: + result[x[0]] = x[1] + + for x in Push1..Push32: + result[x] = sp(0, 1) + + for x in Dup1..Dup16: + result[x] = sd(x.int-Dup1.int+1) + + for x in Swap1..Swap16: + result[x] = ss(x.int-Swap1.int+2) + +proc homesteadStackTable(): StackTable {.compileTime.} = + result = frontierStackTable() + result[DelegateCall] = sp(6, 1) + +proc byzantiumStackTable(): StackTable {.compileTime.} = + result = homesteadStackTable() + result[StaticCall] = sp(6, 1) + result[ReturnDataSize] = sp(0, 1) + result[ReturnDataCopy] = sp(3, 0) + result[Revert] = sp(2, 0) + +proc constantinopleStackTable(): StackTable {.compileTime.} = + result = byzantiumStackTable() + result[Shl] = sp(2, 1) + result[Shr] = sp(2, 1) + result[Sar] = sp(2, 1) + result[ExtCodeHash] = sp(1, 1) + result[Create2] = sp(4, 1) + +proc istanbulStackTable(): StackTable {.compileTime.} = + result = constantinopleStackTable() + # new opcodes EIP-1344 + result[ChainIdOp] = sp(0, 1) + # new opcodes EIP-1884 + result[SelfBalance] = sp(0, 1) + +proc londonStackTable(): StackTable {.compileTime.} = + result = istanbulStackTable() + # new opcodes EIP-3198 + result[BaseFee] = sp(0, 1) + +proc mergeStackTable(): StackTable {.compileTime.} = + result = londonStackTable() + result[PrevRandao] = sp(0, 1) + +proc cancunStackTable(): StackTable {.compileTime.} = + result = mergeStackTable() + # new opcodes EIP-4200 + result[Rjump] = sp(0, 0) + result[RJumpI] = sp(1, 0) + result[RJumpV] = sp(1, 0) + # new opcodes EIP-4750 + result[CallF] = sp(0, 0) + result[RetF] = sp(0, 0) + # new opcodes EIP-3855 + result[Push0] = sp(0, 1) + + # disable opcodes EIP-3670 + result[CallCode] = StackDesc() + result[SelfDestruct] = StackDesc() + # disable opcodes EIP-5450 + result[Jump] = StackDesc() + result[JumpI] = StackDesc() + result[Pc] = StackDesc() + +const + EVMForksStackTable*: array[EVMFork, StackTable] = [ + frontierStackTable(), + homesteadStackTable(), + homesteadStackTable(), + homesteadStackTable(), + byzantiumStackTable(), + constantinopleStackTable(), + constantinopleStackTable(), + istanbulStackTable(), + istanbulStackTable(), + londonStackTable(), + mergeStackTable(), + mergeStackTable(), + cancunStackTable(), + ] diff --git a/nimbus/evm/validate.nim b/nimbus/evm/validate.nim new file mode 100644 index 0000000000..bc9811e0c4 --- /dev/null +++ b/nimbus/evm/validate.nim @@ -0,0 +1,267 @@ +# Nimbus +# Copyright (c) 2023 Status Research & Development GmbH +# Licensed under either of +# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or +# http://www.apache.org/licenses/LICENSE-2.0) +# * MIT license ([LICENSE-MIT](LICENSE-MIT) or +# http://opensource.org/licenses/MIT) +# at your option. This file may not be copied, modified, or +# distributed except according to those terms. + + +# EIP-3670: EOF - Code Validation +# EIP-5450: EOF - Stack Validation + +import + std/[tables, strutils], + stew/[results, endians2], + ../utils/eof, + ../common/evmforks, + ./interpreter/op_codes, + ./analysis, + ./stack_table + +type + OpDefined = array[Op, bool] + +template EOFStackTable(): untyped = + EVMForksStackTable[FkEOF] + +func isTerminal(op: Op): bool = + case op + of RJUMP, RETF, STOP, RETURN, REVERT, INVALID: + true + else: + false + +proc parseUint16(code: openArray[byte], pos: int): int = + fromBytesBE(uint16, toOpenArray(code, pos, pos+2-1)).int + +proc parseInt16(code: openArray[byte], pos: int): int = + let x = fromBytesBE(uint16, toOpenArray(code, pos, pos+2-1)) + cast[int16](x).int + +# checkDest parses a relative offset at code[0:2] and checks if it is a valid jump destination. +proc checkDest(code: openArray[byte], analysis: Bitvec, + imm, src, length: int): Result[void, EOFV1Error] = + if code.len < imm+2: + return err(eofErr(ErrUnexpectedEOF, code.len)) + + let offset = parseInt16(code, imm) + let dest = src + offset + if dest < 0 or dest >= length: + return err(eofErr(ErrInvalidJumpDest, + imm, + "relative offset out-of-bounds: offset $1, dest $2" % + [$offset, $dest])) + + if not analysis.codeSegment(dest): + return err(eofErr(ErrInvalidJumpDest, + imm, + "relative offset into immediate value: offset $1, dest $2" % + [$offset, $dest])) + + ok() + +proc stackOverflow(pos: int, len: int, limit: int, msg = ""): EOFV1Error = + if msg.len == 0: + eofErr(ErrStackOverflow, pos, "len: $1, limit: $2" % [$len, $limit]) + else: + eofErr(ErrStackOverflow, pos, "len: $1, limit: $2, $3" % [$len, $limit, msg]) + +proc stackUnderflow(pos: int, len: int, req: int, msg = ""): EOFV1Error = + if msg.len == 0: + eofErr(ErrStackUnderflow, pos, "($1 <=> $2)" % [$len, $req]) + else: + eofErr(ErrStackUnderflow, pos, "($1 <=> $2), $3" % [$len, $req, msg]) + +# validateControlFlow iterates through all possible branches the provided code +# value and determines if it is valid per EOF v1. +proc validateControlFlow(code: openArray[byte], + section: int, + metadata: openArray[FunctionMetadata], + st: StackTable): Result[int, EOFV1Error] = + var + heights = initTable[int, int]() + worklist = @[(0, metadata[section].input.int)] + maxStackHeight = metadata[section].input.int + + while worklist.len > 0: + var (pos, height) = worklist.pop() + + block outer: + while pos < code.len: + let op = Op(code[pos]) + + # Check if pos has already be visited; if so, the stack heights should be the same. + heights.withValue(pos, val) do: + let want = val[] + if height != want: + return err(eofErr(ErrConflictingStack, pos, + "have $1, want $2" % [$height, $want])) + # Already visited this path and stack height + # matches. + break + heights[pos] = height + + # Validate height for current op and update as needed. + if st[op].min > height: + return err(stackUnderflow(pos, height, st[op].min)) + + if st[op].max < height: + return err(stackOverflow(pos, height, st[op].max)) + + height += StackLimit - st[op].max + + case op + of CALLF: + let arg = parseUint16(code, pos+1) + if metadata[arg].input.int > height: + return err(stackUnderflow(pos, height, metadata[arg].input.int, + "CALLF underflow to section " & $arg)) + + if metadata[arg].output.int+height > StackLimit: + return err(stackOverflow(pos, metadata[arg].output.int+height, StackLimit, + "CALLF overflow to section " & $arg)) + + height -= metadata[arg].input.int + height += metadata[arg].output.int + pos += 3 + of RETF: + if int(metadata[section].output) != height: + return err(eofErr(ErrInvalidOutputs, pos, + "have $1, want $1" % + [$metadata[section].output, $height])) + break outer + of RJUMP: + let arg = parseInt16(code, pos+1) + pos += 3 + arg + of RJUMPI: + let arg = parseInt16(code, pos+1) + worklist.add((pos + 3 + arg, height)) + pos += 3 + of RJUMPV: + let count = int(code[pos+1]) + for i in 0 ..< count: + let arg = parseInt16(code, pos+2+2*i) + worklist.add((pos + 2 + 2*count + arg, height)) + pos += 2 + 2*count + else: + if op >= PUSH1 and op <= PUSH32: + pos += 1 + op.int-PUSH0.int + elif isTerminal(op): + break outer + else: + # Simple op, no operand. + pos += 1 + + maxStackHeight = max(maxStackHeight, height) + + if maxStackHeight != metadata[section].maxStackHeight.int: + return err(eofErr(ErrInvalidMaxStackHeight, 0, + "at code section $1, have $2, want $3" % + [$section, $metadata[section].maxStackHeight, $maxStackHeight])) + + ok(heights.len) + +# validateCode validates the code parameter against the EOF v1 validity requirements. +proc validateCode(code: openArray[byte], + section: int, + metadata: openArray[FunctionMetadata], + st: StackTable): Result[void, EOFV1Error] = + var + i = 0 + # Tracks the number of actual instructions in the code (e.g. + # non-immediate values). This is used at the end to determine + # if each instruction is reachable. + count = 0 + analysis = eofCodeBitmap(code) + op: Op + + # This loop visits every single instruction and verifies: + # * if the instruction is valid for the given jump table. + # * if the instruction has an immediate value, it is not truncated. + # * if performing a relative jump, all jump destinations are valid. + # * if changing code sections, the new code section index is valid and + # will not cause a stack overflow. + while i < code.len: + inc count + op = Op(code[i]) + if not st[op].enabled: + return err(eofErr(ErrUndefinedInstruction, + i, "opcode=" & $op)) + + case op + of PUSH1..PUSH32: + let size = op.int - PUSH0.int + if code.len <= i+size: + return err(eofErr(ErrTruncatedImmediate, + i, "op=" & $op)) + i += size + of RJUMP, RJUMPI: + if code.len <= i+2: + return err(eofErr(ErrTruncatedImmediate, + i, "op=" & $op)) + let res = checkDest(code, analysis, i+1, i+3, code.len) + if res.isErr: + return res + i += 2 + of RJUMPV: + if code.len <= i+1: + return err(eofErr(ErrTruncatedImmediate, + i, "jump table size missing")) + let count = int(code[i+1]) + if count == 0: + return err(eofErr(ErrInvalidBranchCount, + i, "must not be 0")) + if code.len <= i+count: + return err(eofErr(ErrTruncatedImmediate, + i, "jump table truncated")) + for j in 0 ..< count: + let res = checkDest(code, analysis, i+2+j*2, i+2*count+2, code.len) + if res.isErr: + return res + i += 1 + 2*count + of CALLF: + if i+2 >= code.len: + return err(eofErr(ErrTruncatedImmediate, + i, "op=" & $op)) + let arg = parseUint16(code, i+1) + if arg >= metadata.len: + return err(eofErr(ErrInvalidSectionArgument, + i, "arg $1, last section $2" % [$arg, $metadata.len])) + i += 2 + else: + discard + inc i + + # Code sections may not "fall through" and require proper termination. + # Therefore, the last instruction must be considered terminal. + if not isTerminal(op): + return err(eofErr(ErrInvalidCodeTermination, + i, "ends with op " & $op)) + + let res = validateControlFlow(code, section, metadata, st) + if res.isErr: + return err(res.error) + + let paths = res.get() + if paths != count: + # TODO: return actual unreachable position + return err(eofErr(ErrUnreachableCode, 0, "")) + + ok() + +proc validateCode*(code: openArray[byte], section: int, + metadata: openArray[FunctionMetadata]): Result[void, EOFV1Error] = + validateCode(code, section, metadata, EOFStackTable) + +# ValidateCode validates each code section of the container against the EOF v1 +# rule set. +proc validateCode*(c: Container): Result[void, EOFV1Error] = + for i in 0 ..< c.code.len: + let res = validateCode(c.code[i], i, c.types) + if res.isErr: + return res + + ok() diff --git a/nimbus/utils/eof.nim b/nimbus/utils/eof.nim index 3c5330cd37..d1f3b6921b 100644 --- a/nimbus/utils/eof.nim +++ b/nimbus/utils/eof.nim @@ -56,6 +56,20 @@ type ErrTooLargeMaxStackHeight = "invalid type content, max stack height exceeds limit" ErrInvalidContainerSize = "invalid container size" + # validation error + ErrUndefinedInstruction = "undefined instrustion" + ErrTruncatedImmediate = "truncated immediate" + ErrInvalidSectionArgument = "invalid section argument" + ErrInvalidJumpDest = "invalid jump destination" + ErrConflictingStack = "conflicting stack height" + ErrInvalidBranchCount = "invalid number of branches in jump table" + ErrInvalidOutputs = "invalid number of outputs" + ErrInvalidMaxStackHeight = "invalid max stack height" + ErrInvalidCodeTermination = "invalid code termination" + ErrUnreachableCode = "unreachable code" + ErrStackUnderflow = "stack underflow" + ErrStackOverflow = "stack overflow" + EOFV1Error* = object kind*: EOFV1ErrorKind pos* : int diff --git a/tests/test_code_stream.nim b/tests/test_code_stream.nim index 627bdf8b10..658d508c02 100644 --- a/tests/test_code_stream.nim +++ b/tests/test_code_stream.nim @@ -10,7 +10,9 @@ import unittest2, stew/[byteutils, results], ../nimbus/vm_internals, - ../nimbus/utils/eof + ../nimbus/utils/eof, + ../nimbus/evm/[analysis, validate], + ../nimbus/evm/interpreter/op_codes proc codeStreamMain*() = suite "parse bytecode": @@ -172,5 +174,213 @@ proc codeStreamMain*() = check c.data.len == 1 check c.data[0] == 0xAA.byte + suite "code analysis": + type + Spec = object + code : seq[byte] + exp : byte + which: int + + proc spec(code: openArray[byte], exp: byte, which: int): Spec = + Spec(code: @code, exp: exp, which: which) + + const vectors = [ + spec(@[byte(PUSH1), 0x01, 0x01, 0x01], 0b0000_0010, 0), + spec(@[byte(PUSH1), byte(PUSH1), byte(PUSH1), byte(PUSH1)], 0b0000_1010, 0), + spec(@[byte(0x00), byte(PUSH1), 0x00, byte(PUSH1), 0x00, byte(PUSH1), 0x00, byte(PUSH1)], 0b0101_0100, 0), + spec(@[byte(PUSH8), byte(PUSH8), byte(PUSH8), byte(PUSH8), byte(PUSH8), + byte(PUSH8), byte(PUSH8), byte(PUSH8), 0x01, 0x01, 0x01], 0xFE, 0), + spec(@[byte(PUSH8), 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01], 0b0000_0001, 1), + spec(@[byte(0x01), 0x01, 0x01, 0x01, 0x01, byte(PUSH2), byte(PUSH2), byte(PUSH2), 0x01, 0x01, 0x01], 0b1100_0000, 0), + spec(@[byte(0x01), 0x01, 0x01, 0x01, 0x01, byte(PUSH2), 0x01, 0x01, 0x01, 0x01, 0x01], 0b0000_0000, 1), + spec(@[byte(PUSH3), 0x01, 0x01, 0x01, byte(PUSH1), 0x01, 0x01, 0x01, 0x01, 0x01, 0x01], 0b0010_1110, 0), + spec(@[byte(PUSH3), 0x01, 0x01, 0x01, byte(PUSH1), 0x01, 0x01, 0x01, 0x01, 0x01, 0x01], 0b0000_0000, 1), + spec(@[byte(0x01), byte(PUSH8), 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01], 0b1111_1100, 0), + spec(@[byte(0x01), byte(PUSH8), 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01], 0b0000_0011, 1), + spec(@[byte(PUSH16), 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01], 0b1111_1110, 0), + spec(@[byte(PUSH16), 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01], 0b1111_1111, 1), + spec(@[byte(PUSH16), 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01], 0b0000_0001, 2), + spec(@[byte(PUSH8), 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, byte(PUSH1), 0x01], 0b1111_1110, 0), + spec(@[byte(PUSH8), 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, byte(PUSH1), 0x01], 0b0000_0101, 1), + spec(@[byte(PUSH32)], 0b1111_1110, 0), + spec(@[byte(PUSH32)], 0b1111_1111, 1), + spec(@[byte(PUSH32)], 0b1111_1111, 2), + spec(@[byte(PUSH32)], 0b1111_1111, 3), + spec(@[byte(PUSH32)], 0b0000_0001, 4), + ] + + const eofVectors = [ + spec(@[byte(RJUMP), 0x01, 0x01, 0x01], 0b0000_0110, 0), + spec(@[byte(RJUMPI), byte(RJUMP), byte(RJUMP), byte(RJUMPI)], 0b0011_0110, 0), + spec(@[byte(RJUMPV), 0x02, byte(RJUMP), 0x00, byte(RJUMPI), 0x00], 0b0011_1110, 0), + ] + + test "jump dest analysis": + for x in vectors: + let z = codeBitmap(x.code) + check z[x.which] == x.exp + let y = eofCodeBitmap(x.code) + check y[x.which] == x.exp + + test "eof analysis": + for x in eofVectors: + let z = eofCodeBitmap(x.code) + check z[x.which] == x.exp + + suite "validate code tests": + type + ValidateVector = object + code: seq[byte] + section: int + metadata: seq[FunctionMetadata] + err: EOFV1ErrorKind + + proc vv(code: openArray[byte], section: int, + met: openArray[FunctionMetaData], + err = ErrNoEOFErr): ValidateVector = + ValidateVector(code: @code, section: section, + metadata: @met, err: err) + + proc fm(input, output, max: int): FunctionMetadata = + FunctionMetadata(input: input.uint8, + output: output.uint8, maxStackHeight: max.uint16) + + const + ValidateVectors = [ + vv([byte(CALLER), byte(POP), byte(STOP)], 0, [fm(0, 0, 1)]), + + vv([byte(CALLF), 0x00, 0x00, byte(STOP)], 0, [fm(0, 0, 0)]), + + vv([ + byte(ADDRESS), byte(CALLF), 0x00, 0x00, byte(STOP)], 0, + [fm(0, 0, 1)]), + + vv([byte(CALLER), byte(POP),], 0, + [fm(0, 0, 1)], + ErrInvalidCodeTermination), + + vv([ + byte(RJUMP), + byte(0x00), + byte(0x01), + byte(CALLER), + byte(STOP)], 0, + [fm(0, 0, 0)], + ErrUnreachableCode), + + vv([ + byte(PUSH1), byte(0x42), byte(ADD), byte(STOP)], 0, + [fm(0, 0, 1)], + ErrStackUnderflow), + + vv([ + byte(PUSH1), byte(0x42), byte(POP), byte(STOP)], 0, + [fm(0, 0, 2)], + ErrInvalidMaxStackHeight), + + vv([ + byte(PUSH0), + byte(RJUMPI), + byte(0x00), + byte(0x01), + byte(PUSH1), + byte(0x42), # jumps to here + byte(POP), + byte(STOP)], 0, + [fm(0, 0, 1)], + ErrInvalidJumpDest), + + vv([ + byte(PUSH0), + byte(RJUMPV), + byte(0x02), + byte(0x00), + byte(0x01), + byte(0x00), + byte(0x02), + byte(PUSH1), + byte(0x42), # jumps to here + byte(POP), # and here + byte(STOP)], 0, + [fm(0, 0, 1)], + ErrInvalidJumpDest), + + vv([ + byte(PUSH0), byte(RJUMPV), byte(0x00), byte(STOP)], 0, + [fm(0, 0, 1)], + ErrInvalidBranchCount), + + vv([ + byte(RJUMP), 0x00, 0x03, + byte(JUMPDEST), + byte(JUMPDEST), + byte(RETURN), + byte(PUSH1), 20, + byte(PUSH1), 39, + byte(PUSH1), 0x00, + byte(CODECOPY), + byte(PUSH1), 20, + byte(PUSH1), 0x00, + byte(RJUMP), 0xff, 0xef], 0, + [fm(0, 0, 3)]), + + vv([ + byte(PUSH1), 1, + byte(RJUMPI), 0x00, 0x03, + byte(JUMPDEST), + byte(JUMPDEST), + byte(STOP), + byte(PUSH1), 20, + byte(PUSH1), 39, + byte(PUSH1), 0x00, + byte(CODECOPY), + byte(PUSH1), 20, + byte(PUSH1), 0x00, + byte(RETURN)], 0, + [fm(0, 0, 3)]), + + vv([ + byte(PUSH1), 1, + byte(RJUMPV), 0x02, 0x00, 0x03, 0xff, 0xf8, + byte(JUMPDEST), + byte(JUMPDEST), + byte(STOP), + byte(PUSH1), 20, + byte(PUSH1), 39, + byte(PUSH1), 0x00, + byte(CODECOPY), + byte(PUSH1), 20, + byte(PUSH1), 0x00, + byte(RETURN)], 0, + [fm(0, 0, 3)]), + + vv([byte(STOP), byte(STOP), byte(INVALID)], 0, + [fm(0, 0, 0)], + ErrUnreachableCode), + + vv([byte(RETF)], 0, [fm(0, 1, 0)], ErrInvalidOutputs), + + vv([byte(RETF)], 0, [fm(3, 3, 3)]), + + vv([byte(CALLF), 0x00, 0x01, byte(POP), byte(STOP)], 0, + [fm(0, 0, 1), fm(0, 1, 0)]), + + vv([ + byte(ORIGIN), + byte(ORIGIN), + byte(CALLF), 0x00, 0x01, + byte(POP), + byte(RETF)], 0, + [fm(0, 0, 2), fm(2, 1, 2)]), + ] + + for i, x in ValidateVectors: + test "validate code " & $i: + let res = validateCode(x.code, x.section, x.metadata) + if res.isErr: + check res.error.kind == x.err + else: + check x.err == ErrNoEOFErr + when isMainModule: codeStreamMain()