Skip to content

Commit

Permalink
first draft decompiler
Browse files Browse the repository at this point in the history
  • Loading branch information
d-xo committed Sep 29, 2023
1 parent 47404a2 commit 29e84f3
Show file tree
Hide file tree
Showing 9 changed files with 235 additions and 20 deletions.
2 changes: 2 additions & 0 deletions flake.nix
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@
pkgs.mdbook
pkgs.mdbook-mermaid
pkgs.mdbook-katex
pkgs.secp256k1
pkgs.libff
];
withHoogle = true;
shellHook = ''
Expand Down
205 changes: 205 additions & 0 deletions src/Decompile.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,205 @@
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE OverloadedRecordDot #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE DuplicateRecordFields #-}
{-# LANGUAGE NoFieldSelectors #-}

{-|
Module : Decompile
Description : Decompile EVM bytecode into Act
This module decompiles EVM bytecode into an Act spec. It operates as follows
1. Symbolically execute the bytecode to produce an EVM.Expr
2. Transform that Expr into one that can be safely represented using Integers
3. Convert that Expr into an Act spec (trusts solc compiler output)
4. Compile the generated Act spec back to Expr and check equivalence (solc compiler output no longer trusted)
-}
module Decompile where

import Debug.Trace

import Prelude hiding (LT, GT)

import Control.Monad.Except
import Control.Monad.Extra
import Data.Bifunctor
import Data.Set (Set)
import Data.Set qualified as Set
import Data.Map (Map)
import Data.Map qualified as Map
import Data.Maybe (fromJust)
import Data.Text (Text)
import Data.Text qualified as T
import Data.Typeable
import EVM.Fetch qualified as Fetch
import EVM.Solidity hiding (SlotType(..))
import EVM.Solidity qualified as EVM (SlotType(..))
import EVM.Types qualified as EVM
import EVM.Solvers (SolverGroup, withSolvers, Solver(..))
import EVM.SymExec
import EVM.Expr (simplify, isSuccess)
import GHC.IO

import Syntax.Annotated
import HEVM


data EVMContract = EVMContract
{ name :: Text
, storageLayout :: Map Text StorageItem
, runtime :: Map Interface (Set (EVM.Expr EVM.End))
, creation :: (Interface, Set (EVM.Expr EVM.End))
}
deriving (Show, Eq)

-- | Explore both the initcode and runtimecode of the contract
explore :: SolverGroup -> SolcContract -> IO (Either Text EVMContract)
explore solvers contract = do
ctor <- exploreCreation solvers contract
behvs <- exploreRuntime solvers contract
runExceptT $ do
storage <- ExceptT . pure
$ toErr "missing storage layout in compiler output" contract.storageLayout
pure $ EVMContract
{ name = contract.contractName
, storageLayout = storage
, runtime = behvs
, creation = ctor
}

exploreRuntime :: SolverGroup -> SolcContract -> IO (Map Interface (Set (EVM.Expr EVM.End)))
exploreRuntime solvers contract = fmap (Map.fromList . Map.elems) $ forM contract.abiMap $ \method -> do
let calldata = first (`writeSelector` method.methodSignature)
. (flip combineFragments) (EVM.AbstractBuf "txdata")
$ fmap (uncurry symAbiArg) method.inputs
prestate <- stToIO $ abstractVM (fst calldata, []) contract.runtimeCode Nothing False
expr <- simplify <$> interpret (Fetch.oracle solvers Nothing) Nothing 1 StackBased prestate runExpr
let sucs = Set.fromList . filter isSuccess . flattenExpr $ expr
let iface = behvIface method
pure (iface, sucs)

exploreCreation :: SolverGroup -> SolcContract -> IO (Interface, Set (EVM.Expr EVM.End))
exploreCreation solvers contract = do
-- TODO: doesn't this have a 4 byte gap at the front?
let args = (flip combineFragments) (EVM.ConcreteBuf "") $ fmap (uncurry symAbiArg) contract.constructorInputs
initVM <- stToIO $ abstractVM (fst args, []) contract.creationCode Nothing True
expr <- simplify <$> interpret (Fetch.oracle solvers Nothing) Nothing 1 StackBased initVM runExpr
let sucs = Set.fromList . filter isSuccess . flattenExpr $ expr
pure (ctorIface contract.constructorInputs, sucs)

toErr :: Text -> Maybe a -> Either Text a
toErr _ (Just a) = Right a
toErr msg Nothing = Left msg

ctorIface :: [(Text, AbiType)] -> Interface
ctorIface args = Interface "constructor" (fmap (\(n, t) -> Decl t (T.unpack n)) args)

behvIface :: Method -> Interface
behvIface method = Interface (T.unpack method.name) (fmap (\(n, t) -> Decl t (T.unpack n)) method.inputs)

translate :: EVMContract -> Either Text Act
translate c = do
contract <- liftM2 Contract (mkConstructor c) (mkBehvs c)
let store = mkStore c
pure $ Act store [contract]

mkStore :: EVMContract -> Store
mkStore c = Map.singleton (T.unpack c.name) (Map.mapKeys T.unpack $ fmap fromitem c.storageLayout)
where
fromitem item = (convslot item.slotType, toInteger item.slot)
convslot (EVM.StorageMapping a b) = StorageMapping (fmap PrimitiveType a) (PrimitiveType b)
convslot (EVM.StorageValue a) = StorageValue (PrimitiveType a)

mkConstructor :: EVMContract -> Either Text Constructor
mkConstructor cs
| Set.size (snd cs.creation) == 1 =
case head (Set.elems (snd cs.creation)) of
EVM.Success props _ _ _ -> do
ps <- mapM fromProp props
pure $ Constructor
{ _cname = T.unpack cs.name
, _cinterface = fst cs.creation
, _cpreconditions = ps
, _cpostconditions = mempty
, _invariants = mempty
, _initialStorage = mempty -- TODO
, _cstateUpdates = mempty -- TODO
}
_ -> error "unexpected unsucessful branch"
| otherwise = error "TODO: decompile constructors with multiple branches"

mkBehvs :: EVMContract -> Either Text [Behaviour]
mkBehvs c = concatMapM (\(i, bs) -> mapM (mkbehv i) (Set.toList bs)) (Map.toList c.runtime)
where
mkbehv :: Interface -> EVM.Expr EVM.End -> Either Text Behaviour
mkbehv iface@(Interface method _) (EVM.Success props _ _ _) = do
pres <- mapM fromProp props
pure $ Behaviour
{ _contract = T.unpack c.name
, _interface = iface
, _name = method
, _preconditions = pres
, _caseconditions = mempty -- TODO: what to do here?
, _postconditions = mempty
, _stateUpdates = mempty -- TODO
, _returns = Nothing -- TODO
}
mkbehv _ _ = error "unexpected unsucessful branch"

fromProp :: EVM.Prop -> Either Text (Exp ABoolean)
fromProp p = case p of
EVM.PEq (a :: EVM.Expr t) b -> case eqT @t @EVM.EWord of
Nothing -> Left $ "cannot decompile props comparing equality of non word terms: " <> T.pack (show p)
Just Refl -> liftM2 (Eq nowhere SInteger) (fromWord a) (fromWord b)
EVM.PLT a b -> liftM2 (LT nowhere) (fromWord a) (fromWord b)
EVM.PGT a b -> liftM2 (LT nowhere) (fromWord a) (fromWord b)
EVM.PGEq a b -> liftM2 (LT nowhere) (fromWord a) (fromWord b)
EVM.PLEq a b -> liftM2 (LT nowhere) (fromWord a) (fromWord b)
EVM.PNeg a -> fmap (Neg nowhere) (fromProp a)
EVM.PAnd a b -> liftM2 (And nowhere) (fromProp a) (fromProp b)
EVM.POr a b -> liftM2 (Or nowhere) (fromProp a) (fromProp b)
EVM.PImpl a b -> liftM2 (Impl nowhere) (fromProp a) (fromProp b)
EVM.PBool a -> pure $ LitBool nowhere a

fromWord :: EVM.Expr EVM.EWord -> Either Text (Exp AInteger)
fromWord w = case w of
EVM.Lit a -> Right $ LitInt nowhere (toInteger a)
-- TODO: get the actual abi type from the compiler output
EVM.Var a -> Right $ Var nowhere SInteger (AbiBytesType 32) (T.unpack a)
EVM.IsZero a -> do
a' <- fromWord a
Right $ ITE nowhere (Eq nowhere SInteger a' (LitInt nowhere 0)) (LitInt nowhere 1) (LitInt nowhere 0)
EVM.TxValue -> Right $ IntEnv nowhere Callvalue
_ -> Left $ "unable to decompile: " <> T.pack (show w)

verifyDecompilation :: ByteString -> ByteString -> Act -> IO ()
verifyDecompilation creation runtime spec =
withSolvers CVC5 4 Nothing $ \solvers -> do
let opts = defaultVeriOpts
-- Constructor check
checkConstructors solvers opts creation runtime spec
-- Behavours check
checkBehaviours solvers opts runtime spec
-- ABI exhaustiveness sheck
checkAbi solvers opts spec runtime


-- get full SolcContract from foundry project
test :: IO ()
test = do
cs <- readBuildOutput "/home/me/src/mine/scratch/solidity" Foundry
case cs of
Left e -> print e
Right (BuildOutput (Contracts o) _) -> do
withSolvers CVC5 4 Nothing $ \solvers -> do
let c = fromJust $ Map.lookup "src/basic.sol:Basic" o
spec <- runExceptT $ do
exprs <- ExceptT $ explore solvers c
ExceptT (pure $ translate exprs)
case spec of
Left e -> print e
Right s -> verifyDecompilation c.creationCode c.runtimeCode s

11 changes: 4 additions & 7 deletions src/HEVM.hs
Original file line number Diff line number Diff line change
@@ -1,16 +1,13 @@
{-# LANGUAGE GADTs #-}
{-# LANGUAGE ViewPatterns #-}
{-# LANGUAGE PatternSynonyms #-}
{-# LANGUAGE StandaloneDeriving #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE OverloadedRecordDot #-}
{-# LANGUAGE NoFieldSelectors #-}


module HEVM where
Expand All @@ -35,7 +32,7 @@ import Syntax.Annotated
import Syntax.Untyped (makeIface)
import Syntax

import qualified EVM.Types as EVM hiding (Contract(..))
import qualified EVM.Types as EVM hiding (Contract(..), FrameState(..))
import EVM.Expr hiding (op2, inRange)
import EVM.SymExec hiding (EquivResult, isPartial)
import qualified EVM.SymExec as SymExec (EquivResult)
Expand Down Expand Up @@ -240,8 +237,8 @@ refOffset _ _ = error "TODO"

ethEnvToWord :: EthEnv -> EVM.Expr EVM.EWord
ethEnvToWord Callvalue = EVM.TxValue
ethEnvToWord Caller = EVM.WAddr $ EVM.SymAddr "caller"
ethEnvToWord Origin = EVM.Origin
ethEnvToWord Caller = EVM.WAddr (EVM.SymAddr "caller")
ethEnvToWord Origin = EVM.WAddr (EVM.SymAddr "origin")
ethEnvToWord Blocknumber = EVM.BlockNumber
ethEnvToWord Blockhash = error "TODO" -- TODO argument of EVM.BlockHash ??
ethEnvToWord Chainid = EVM.ChainId
Expand Down
4 changes: 2 additions & 2 deletions src/SMT.hs
Original file line number Diff line number Diff line change
Expand Up @@ -514,8 +514,8 @@ parseSMTModel s = if length s0Caps == 1
-- or "((identifier (value)))" for negative integers.
-- The stage0 regex first extracts either value or (value), and then the
-- stage1 regex is used to strip the additional brackets if required.
stage0 = "\\`\\(\\([a-zA-Z0-9_]+ ([ \"\\(\\)a-zA-Z0-9_\\-]+)\\)\\)\\'"
stage1 = "\\(([ a-zA-Z0-9_\\-]+)\\)"
stage0 = "\\`\\(\\([a-zA-Z0-9_]+ ([ \"\\(\\)a-zA-Z0-9_\\-]+)\\)\\)\\'" :: String
stage1 = "\\(([ a-zA-Z0-9_\\-]+)\\)" :: String

s0Caps = getCaptures s stage0
s1Caps = getCaptures (head s0Caps) stage1
Expand Down
10 changes: 4 additions & 6 deletions src/Syntax.hs
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
{-# LANGUAGE GADTs #-}
{-# LANGUAGE PatternSynonyms #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE RankNTypes #-}

Expand Down Expand Up @@ -75,7 +73,7 @@ locFromUpdate :: StorageUpdate t -> StorageLocation t
locFromUpdate (Update _ item _) = _Loc item

locsFromItem :: TStorageItem a t -> [StorageLocation t]
locsFromItem item = _Loc item : concatMap locsFromTypedExp (ixsFromItem item)
locsFromItem item = _Loc item : concatMap locsFromTypedExp (ixsFromItem item)

locsFromTypedExp :: TypedExp t -> [StorageLocation t]
locsFromTypedExp (TExp _ e) = locsFromExp e
Expand Down Expand Up @@ -159,7 +157,7 @@ createsFromExp = nub . go
Var {} -> []

createsFromItem :: TStorageItem a t -> [Id]
createsFromItem item = concatMap createsFromTypedExp (ixsFromItem item)
createsFromItem item = concatMap createsFromTypedExp (ixsFromItem item)

createsFromTypedExp :: TypedExp t -> [Id]
createsFromTypedExp (TExp _ e) = createsFromExp e
Expand All @@ -168,7 +166,7 @@ createsFromContract :: Typed.Contract -> [Id]
createsFromContract (Contract constr behvs) =
createsFromConstructor constr <> concatMap createsFromBehaviour behvs

createsFromConstructor :: Typed.Constructor -> [Id]
createsFromConstructor :: Typed.Constructor -> [Id]
createsFromConstructor (Constructor _ _ pre post inv initialStorage rewrites) = nub $
concatMap createsFromExp pre
<> concatMap createsFromExp post
Expand Down Expand Up @@ -435,7 +433,7 @@ idFromRewrites e = case e of
idFromEntry (EVar p x) = singleton x [p]
idFromEntry (EMapping _ en xs) = unionWith (<>) (idFromEntry en) (idFromRewrites' xs)
idFromEntry (EField _ en _) = idFromEntry en

-- | True iff the case is a wildcard.
isWild :: Case -> Bool
isWild (Case _ (WildExp _) _) = True
Expand Down
4 changes: 2 additions & 2 deletions src/Syntax/Untyped.hs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ type Ensures = [Expr]
type Invariants = [Expr]

data Interface = Interface Id [Decl]
deriving (Eq)
deriving (Eq, Ord)

instance Show Interface where
show (Interface a d) = a <> "(" <> intercalate ", " (fmap show d) <> ")"
Expand Down Expand Up @@ -158,7 +158,7 @@ data StorageVar = StorageVar Pn SlotType Id
deriving (Eq, Show)

data Decl = Decl AbiType Id
deriving Eq
deriving (Eq, Ord)

instance Show Decl where
show (Decl t a) = show t <> " " <> a
Expand Down
11 changes: 9 additions & 2 deletions src/act.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ common deps
build-depends: base >= 4.9 && < 5,
aeson >= 1.0,
containers >= 0.5,
hevm ,
hevm >= 0.51.4,
lens >= 4.17.1,
text >= 1.2,
array >= 0.5.3.0,
Expand Down Expand Up @@ -43,7 +43,14 @@ library
build-tool-depends: happy:happy, alex:alex
hs-source-dirs: .
default-language: Haskell2010
exposed-modules: CLI Error Print SMT Syntax.Annotated Syntax.TimeAgnostic
default-extensions:
ImportQualifiedPost
LambdaCase
OverloadedLabels
DataKinds
GADTs

exposed-modules: CLI Error Print SMT Syntax.Annotated Syntax.TimeAgnostic Decompile
other-modules: Lex Parse K Coq Syntax Syntax.Untyped Syntax.Typed Syntax.Types Syntax.Timing Type Enrich Dev HEVM Consistency

executable act
Expand Down
6 changes: 6 additions & 0 deletions src/cabal.project
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
packages: .
repository local-pkgs
url: file+noindex:///home/me/src/local-pkgs
active-repositories:
, local-pkgs
, hackage.haskell.org
2 changes: 1 addition & 1 deletion src/hie.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
cradle:
cabal:
- path: "."
component: "lib:act-internal"
component: "lib:act"
- path: "./Main.hs"
component: "exe:act"
- path: "./test"
Expand Down

0 comments on commit 29e84f3

Please sign in to comment.