EIP-3670: EOF - Code Validation, EIP-5450: EOF - Stack Validation

status-im · Oct 31, 2023 · d0a2c5f · d0a2c5f
1 parent 7637250
commit d0a2c5f
Show file tree

Hide file tree

Showing 10 changed files with 898 additions and 12 deletions.
diff --git a/nimbus/evm/analysis.nim b/nimbus/evm/analysis.nim
@@ -0,0 +1,167 @@
+# Nimbus
+# Copyright (c) 2023 Status Research & Development GmbH
+# Licensed under either of
+#  * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or
+#    http://www.apache.org/licenses/LICENSE-2.0)
+#  * MIT license ([LICENSE-MIT](LICENSE-MIT) or
+#    http://opensource.org/licenses/MIT)
+# at your option. This file may not be copied, modified, or
+# distributed except according to those terms.
+
+import
+  interpreter/op_codes
+
+const
+  set2BitsMask = uint16(0b11)
+  set3BitsMask = uint16(0b111)
+  set4BitsMask = uint16(0b1111)
+  set5BitsMask = uint16(0b1_1111)
+  set6BitsMask = uint16(0b11_1111)
+  set7BitsMask = uint16(0b111_1111)
+
+# bitvec is a bit vector which maps bytes in a program.
+# An unset bit means the byte is an opcode, a set bit means
+# it's data (i.e. argument of PUSHxx).
+type
+  Bitvec* = seq[byte]
+
+proc set1(bits: var Bitvec, pos: int) =
+  let x = bits[pos div 8]
+  bits[pos div 8] = x or byte(1 shl (pos mod 8))
+
+proc setN(bits: var Bitvec, flag: uint16, pos: int) =
+  let z = pos div 8
+  let a = flag shl (pos mod 8)
+  let x = bits[z]
+  bits[z] = x or byte(a)
+  let b = byte(a shr 8)
+  if b != 0:
+    bits[z+1] = b
+
+proc set8(bits: var Bitvec, pos: int) =
+  let z = pos div 8
+  let a = byte(0xFF shl (pos mod 8))
+  bits[z] = bits[z] or a
+  bits[z+1] = not a
+
+proc set16(bits: var Bitvec, pos: int) =
+  let z = pos div 8
+  let a = byte(0xFF shl (pos mod 8))
+  bits[z] = bits[z] or a
+  bits[z+1] = 0xFF
+  bits[z+2] = not a
+
+# codeSegment checks if the position is in a code segment.
+proc codeSegment*(bits: Bitvec, pos: int): bool =
+  ((bits[pos div 8] shr (pos mod 8)) and 1) == 0
+
+# codeBitmapInternal is the internal implementation of codeBitmap.
+# It exists for the purpose of being able to run benchmark tests
+# without dynamic allocations affecting the results.
+proc codeBitmapInternal(bits: var Bitvec; code: openArray[byte]) =
+  var pc = 0
+  while pc < code.len:
+    let op = Op(code[pc])
+    inc pc
+
+    if op < PUSH1:
+      continue
+
+    var numbits = op.int - PUSH1.int + 1
+    if numbits >= 8:
+      while numbits >= 16:
+        bits.set16(pc)
+        pc += 16
+        numbits -= 16
+
+      while numbits >= 8:
+        bits.set8(pc)
+        pc += 8
+        numbits -= 8
+
+    case numbits
+    of 1: bits.set1(pc)
+    of 2: bits.setN(set2BitsMask, pc)
+    of 3: bits.setN(set3BitsMask, pc)
+    of 4: bits.setN(set4BitsMask, pc)
+    of 5: bits.setN(set5BitsMask, pc)
+    of 6: bits.setN(set6BitsMask, pc)
+    of 7: bits.setN(set7BitsMask, pc)
+    else: discard
+    pc += numbits
+
+# codeBitmap collects data locations in code.
+proc codeBitmap*(code: openArray[byte]): Bitvec =
+  # The bitmap is 4 bytes longer than necessary, in case the code
+  # ends with a PUSH32, the algorithm will push zeroes onto the
+  # bitvector outside the bounds of the actual code.
+  let len = (code.len div 8)+1+4
+  result = newSeq[byte](len)
+  result.codeBitmapInternal(code)
+
+# eofCodeBitmapInternal is the internal implementation of codeBitmap for EOF
+# code validation.
+proc eofCodeBitmapInternal(bits: var Bitvec; code: openArray[byte]) =
+  var pc = 0
+  while pc < code.len:
+    let op = Op(code[pc])
+    inc pc
+
+    # RJUMP and RJUMPI always have 2 byte operand.
+    if op == RJUMP or op == RJUMPI:
+      bits.setN(set2BitsMask, pc)
+      pc += 2
+      continue
+
+    var numbits = 0
+    if op >= PUSH1 and op <= PUSH32:
+      numbits = op.int - PUSH1.int + 1
+    elif op == RJUMPV:
+      # RJUMPV is unique as it has a variable sized operand.
+      # The total size is determined by the count byte which
+      # immediate proceeds RJUMPV. Truncation will be caught
+      # in other validation steps -- for now, just return a
+      # valid bitmap for as much of the code as is
+      # available.
+      if pc >= code.len:
+        # Count missing, no more bits to mark.
+        return
+      numbits = code[pc].int*2 + 1
+      if pc+numbits > code.len:
+        # Jump table is truncated, mark as many bits
+        # as possible.
+        numbits = code.len - pc
+    else:
+      # If not PUSH (the int8(op) > int(PUSH32) is always false).
+      continue
+
+    if numbits >= 8:
+      while numbits >= 16:
+        bits.set16(pc)
+        pc += 16
+        numbits -= 16
+
+      while numbits >= 8:
+        bits.set8(pc)
+        pc += 8
+        numbits -= 8
+
+    case numbits
+    of 1: bits.set1(pc)
+    of 2: bits.setN(set2BitsMask, pc)
+    of 3: bits.setN(set3BitsMask, pc)
+    of 4: bits.setN(set4BitsMask, pc)
+    of 5: bits.setN(set5BitsMask, pc)
+    of 6: bits.setN(set6BitsMask, pc)
+    of 7: bits.setN(set7BitsMask, pc)
+    else: discard
+    pc += numbits
+
+# eofCodeBitmap collects data locations in code.
+proc eofCodeBitmap*(code: openArray[byte]): Bitvec =
+  # The bitmap is 4 bytes longer than necessary, in case the code
+  # ends with a PUSH32, the algorithm will push zeroes onto the
+  # bitvector outside the bounds of the actual code.
+  let len = (code.len div 8)+1+4
+  result = newSeq[byte](len)
+  result.eofCodeBitmapInternal(code)
diff --git a/nimbus/evm/computation.nim b/nimbus/evm/computation.nim
@@ -11,7 +11,7 @@
 import
   ".."/[db/accounts_cache, constants],
   "."/[code_stream, memory, message, stack, state],
-  "."/[types],
+  "."/[types, validate],
   ./interpreter/[gas_meter, gas_costs, op_codes],
   ../common/[common, evmforks],
   ../utils/[utils, eof],
@@ -340,6 +340,11 @@ proc writeContract*(c: Computation)
         c.setError("EOF retcode parse error: " & res.error.toString, true)
         return
 
+      let vres = con.validateCode()
+      if vres.isErr:
+        c.setError("EOF retcode validate error: " & vres.error.toString, true)
+        return
+
     elif fork >= FkLondon:
       withExtra trace, "New contract code starts with 0xEF byte, not allowed by EIP-3541"
       c.setError(EVMC_CONTRACT_VALIDATION_FAILURE, true)

diff --git a/nimbus/evm/interpreter/op_handlers.nim b/nimbus/evm/interpreter/op_handlers.nim
@@ -75,13 +75,6 @@ proc mkOpTable(selected: EVMFork): array[Op,Vm2OpExec] {.compileTime.} =
 # Public functions
 # ------------------------------------------------------------------------------
 
-#const
-#  vm2OpHandlers* = block:
-#    var rc: array[Fork, array[Op, Vm2OpExec]]
-#    for w in Fork:
-#      rc[w] = w.mkOpTable
-#    rc
-
 type
   vmOpHandlersRec* = tuple
     name: string    ## Name (or ID) of op handler

diff --git a/nimbus/evm/interpreter/op_handlers/oph_call.nim b/nimbus/evm/interpreter/op_handlers/oph_call.nim
@@ -519,7 +519,7 @@ const
             post: vm2OpIgnore)),
 
     (opCode: CallCode,     ## 0xf2, Message-Call with alternative code
-     forks: Vm2OpAllForks,
+     forks: Vm2OpAllForks - Vm2OpEOFAndLater,
      name: "callCode",
      info: "Message-call into this account with alternative account's code",
      exec: (prep: vm2OpIgnore,

diff --git a/nimbus/evm/interpreter/op_handlers/oph_sysops.nim b/nimbus/evm/interpreter/op_handlers/oph_sysops.nim
@@ -209,7 +209,7 @@ const
             post: vm2OpIgnore)),
 
     (opCode: SelfDestruct, ## 0xff, EIP2929: self destruct, Berlin and later
-     forks: Vm2OpBerlinAndLater,
+     forks: Vm2OpBerlinAndLater - Vm2OpEOFAndLater,
      name: "selfDestructEIP2929",
      info: "EIP2929: Halt execution and register account for later deletion",
      exec: (prep: vm2OpIgnore,

diff --git a/nimbus/evm/interpreter_dispatch.nim b/nimbus/evm/interpreter_dispatch.nim
@@ -17,7 +17,7 @@ import
   std/[macros, sets, strformat],
   pkg/[chronicles, chronos, stew/byteutils],
   ".."/[constants, db/accounts_cache],
-  "."/[code_stream, computation],
+  "."/[code_stream, computation, validate],
   "."/[message, precompiles, state, types],
   ../utils/[utils, eof],
   ./interpreter/[op_dispatcher, gas_costs],
@@ -170,6 +170,11 @@ proc beforeExecCreate(c: Computation): bool
         c.setError("EOF initcode parse error: " & res.error.toString, false)
         return true
 
+      let vres = c.code.container.validateCode()
+      if vres.isErr:
+        c.setError("EOF initcode validation error: " & vres.error.toString, false)
+        return true
+
   c.snapshot()
 
   if c.vmState.readOnlyStateDB().hasCodeOrNonce(c.msg.contractAddress):