From 86d244fad0b86564eeba3e85e4a40a397bbcb086 Mon Sep 17 00:00:00 2001 From: zapashcanon Date: Thu, 4 Jul 2024 16:57:56 +0200 Subject: [PATCH] move some validation out of rewrite --- src/ast/binary.ml | 2 +- src/ast/binary_encoder.ml | 5 +- src/ast/binary_to_text.ml | 3 +- src/ast/binary_types.ml | 4 +- src/bin/owi.ml | 25 +-- src/link/link.ml | 5 +- src/link/link_env.ml | 4 +- src/parser/binary_parser.ml | 14 +- src/text_to_binary/assigned.ml | 2 +- src/text_to_binary/rewrite.ml | 202 +++++++++--------------- src/utils/result.ml | 34 ++-- src/utils/result.mli | 15 +- src/validate/typecheck.ml | 278 ++++++++++++++++++++------------- test/script/gc.t | 10 +- test/script/reference.t | 2 +- test/script/reference_opt.t | 2 +- test/validate/unknown_label.t | 4 +- test/wasm2wat/not_exists.t | 2 +- test/wat2wasm/not_exists.t | 2 +- 19 files changed, 314 insertions(+), 301 deletions(-) diff --git a/src/ast/binary.ml b/src/ast/binary.ml index 3cfd75d79..aea8437c1 100644 --- a/src/ast/binary.ml +++ b/src/ast/binary.ml @@ -27,7 +27,7 @@ type global = type data_mode = | Data_passive (* TODO: Data_active binary+const expr*) - | Data_active of int option * binary expr + | Data_active of int * binary expr type data = { id : string option diff --git a/src/ast/binary_encoder.ml b/src/ast/binary_encoder.ml index 82891bd51..a4d8ee345 100644 --- a/src/ast/binary_encoder.ml +++ b/src/ast/binary_encoder.ml @@ -604,16 +604,15 @@ let write_data buf ({ init; mode; _ } : data) = | Data_passive -> write_u32_of_int buf 1; write_string buf init - | Data_active (Some 0, expr) -> + | Data_active (0, expr) -> write_u32_of_int buf 0; write_expr buf expr ~end_op_code:None; write_string buf init - | Data_active (Some i, expr) -> + | Data_active (i, expr) -> write_u32_of_int buf 2; write_u32_of_int buf i; write_expr buf expr ~end_op_code:None; write_string buf init - | Data_active (None, _) -> assert false let encode_section buf id encode_func data = let section_buf = Buffer.create 16 in diff --git a/src/ast/binary_to_text.ml b/src/ast/binary_to_text.ml index 0d27d88c2..30ac34f00 100644 --- a/src/ast/binary_to_text.ml +++ b/src/ast/binary_to_text.ml @@ -114,9 +114,8 @@ let convert_data_mode (m : Binary.data_mode) : Text.data_mode = match m with | Data_passive -> Data_passive | Data_active (i, e) -> - let i = Option.map (fun i -> Raw i) i in let e = convert_expr e in - Data_active (i, e) + Data_active (Some (Raw i), e) let convert_data (e : Binary.data) : Text.data = let { Binary.id; init; mode } : Binary.data = e in diff --git a/src/ast/binary_types.ml b/src/ast/binary_types.ml index 941f4322b..f297d3146 100644 --- a/src/ast/binary_types.ml +++ b/src/ast/binary_types.ml @@ -21,10 +21,10 @@ let convert_heap_type tbl = function Ok t | Def_ht (Text i) -> begin match tbl with - | None -> Error `Unknown_type + | None -> Error (`Unknown_type (Text i)) | Some tbl -> begin match Hashtbl.find_opt tbl i with - | None -> Error `Unknown_type + | None -> Error (`Unknown_type (Text i)) | Some i -> ok @@ Def_ht (Raw i) end end diff --git a/src/bin/owi.ml b/src/bin/owi.ml index 04bb5bf0b..b7b50dc21 100644 --- a/src/bin/owi.ml +++ b/src/bin/owi.ml @@ -314,17 +314,20 @@ let exit_code = | `Unbound_name _id -> 38 | `Undeclared_function_reference -> 39 | `Unexpected_token _token -> 40 - | `Unknown_function _id -> 41 - | `Unknown_global -> 42 - | `Unknown_import _ -> 43 - | `Unknown_label -> 44 - | `Unknown_local _id -> 45 - | `Unknown_memory _id -> 46 - | `Unknown_module _id -> 47 - | `Unknown_operator -> 48 - | `Unknown_type -> 49 - | `Unsupported_file_extension _ext -> 50 - | `Failed_with_but_expected (_got, _expected) -> 51 + | `Unknown_data _id -> 41 + | `Unknown_elem _id -> 42 + | `Unknown_func _id -> 43 + | `Unknown_global _id -> 44 + | `Unknown_import _ -> 45 + | `Unknown_label _id -> 46 + | `Unknown_local _id -> 47 + | `Unknown_memory _id -> 48 + | `Unknown_module _id -> 49 + | `Unknown_operator -> 50 + | `Unknown_table _id -> 51 + | `Unknown_type _id -> 52 + | `Unsupported_file_extension _ext -> 53 + | `Failed_with_but_expected (_got, _expected) -> 54 end end | Error e -> ( diff --git a/src/link/link.ml b/src/link/link.ml index ca8c76987..c12ecc4f4 100644 --- a/src/link/link.ml +++ b/src/link/link.ml @@ -271,7 +271,7 @@ let active_elem_expr ~offset ~length ~table ~elem = ] let active_data_expr ~offset ~length ~mem ~data = - if mem <> 0 then Error (`Unknown_memory mem) + if mem <> 0 then Error (`Unknown_memory (Raw mem)) else Ok [ I32_const offset @@ -293,8 +293,7 @@ let define_data env data = let env = Link_env.Build.add_data id data' env in let* init = match data.mode with - | Data_active (None, _) -> assert false - | Data_active (Some mem, offset) -> + | Data_active (mem, offset) -> let* offset = Eval_const.expr env offset in let length = Int32.of_int @@ String.length data.init in let* offset = get_i32 offset in diff --git a/src/link/link_env.ml b/src/link/link_env.ml index 8439487cf..f384d7739 100644 --- a/src/link/link_env.ml +++ b/src/link/link_env.ml @@ -122,7 +122,7 @@ module Build = struct let get_global (env : t) id = match IMap.find_opt id env.globals with - | None -> Error `Unknown_global + | None -> Error (`Unknown_global (Raw id)) | Some v -> Ok v let get_const_global (env : t) id = @@ -133,7 +133,7 @@ module Build = struct let get_func (env : t) id = match IMap.find_opt id env.functions with - | None -> Error (`Unknown_function id) + | None -> Error (`Unknown_func (Raw id)) | Some v -> Ok v end diff --git a/src/parser/binary_parser.ml b/src/parser/binary_parser.ml index 97ea25265..50212902a 100644 --- a/src/parser/binary_parser.ml +++ b/src/parser/binary_parser.ml @@ -892,24 +892,20 @@ let read_code types input = let read_data_active types input = let* Raw index, input = read_indice input in let+ offset, input = read_const types input in - (Data_active (Some index, offset), input) + (Data_active (index, offset), input) let read_data_active_zero types input = let+ offset, input = read_const types input in - (Data_active (Some 0, offset), input) + (Data_active (0, offset), input) -let read_data types memories input = +let read_data types input = let* i, input = read_U32 input in let id = None in match i with | 0 -> let* mode, input = read_data_active_zero types input in - let* init, input = read_bytes ~msg:"read_data 0" input in + let+ init, input = read_bytes ~msg:"read_data 0" input in let init = string_of_char_list init in - (* TODO: this should be removed once we do proper validation of binary modules *) - let+ () = - if List.is_empty memories then Error (`Unknown_memory 0) else Ok () - in ({ id; init; mode }, input) | 1 -> let mode = Data_passive in @@ -1052,7 +1048,7 @@ let sections_iterate (input : Input.t) = (* Data *) let+ data_section, input = section_parse input ~expected_id:'\x0B' [] - (vector_no_id (read_data type_section memory_section)) + (vector_no_id (read_data type_section)) in let* () = diff --git a/src/text_to_binary/assigned.ml b/src/text_to_binary/assigned.ml index fcd8a0485..8d45fbc37 100644 --- a/src/text_to_binary/assigned.ml +++ b/src/text_to_binary/assigned.ml @@ -126,7 +126,7 @@ let check_type_id (types : binary str_type Named.t) (check : Grouped.type_check) in (* TODO more efficient version of that *) match Indexed.get_at id types.values with - | None -> Error `Unknown_type + | None -> Error (`Unknown_type (Raw id)) | Some (Def_func_t func_type') -> let* func_type = Binary_types.convert_func_type None func_type in if not (equal_func_types func_type func_type') then diff --git a/src/text_to_binary/rewrite.ml b/src/text_to_binary/rewrite.ml index caae20ae0..d456b5649 100644 --- a/src/text_to_binary/rewrite.ml +++ b/src/text_to_binary/rewrite.ml @@ -18,38 +18,23 @@ let typemap (types : binary str_type Named.t) = (fun idx typ acc -> TypeMap.add typ (Raw idx) acc) types TypeMap.empty -let find msg (named : 'a Named.t) : _ -> binary indice Result.t = function - | Raw i as indice -> - (* TODO change Indexed.t strucure for that to be more efficient *) - if not (List.exists (Indexed.has_index i) named.values) then - Error (`Msg (Format.sprintf "%s %i" msg i)) - else Ok indice +let find error (named : 'a Named.t) : _ -> binary indice Result.t = function + | Raw _i as indice -> Ok indice | Text name -> ( match String_map.find_opt name named.named with - | None -> Error (`Msg (Format.sprintf "%s %s" msg name)) + | None -> Error error | Some i -> Ok (Raw i) ) -let get msg (named : 'a Named.t) indice : 'a Indexed.t Result.t = - let* (Raw i) = find msg named indice in +let get error (named : 'a Named.t) indice : 'a Indexed.t Result.t = + let* (Raw i) = find error named indice in (* TODO change Named.t structure to make that sensible *) - match List.nth_opt named.values i with - | None -> Error (`Msg msg) - | Some v -> Ok v + match List.nth_opt named.values i with None -> Error error | Some v -> Ok v -let find_global (modul : Assigned.t) ~imported_only id : (int * mut) Result.t = - let* (Raw idx) = find "unknown global" modul.global id in - let va = List.find (Indexed.has_index idx) modul.global.values in - let+ mut, _typ = - match Indexed.get va with - | Imported imported -> Ok imported.desc - | Local global -> - if imported_only then Error `Unknown_global - else - let mut, val_type = global.typ in - let+ val_type = Binary_types.convert_val_type None val_type in - (mut, val_type) - in - (idx, mut) +let find_global (modul : Assigned.t) id : binary indice Result.t = + find (`Unknown_global id) modul.global id + +let find_memory (modul : Assigned.t) id : binary indice Result.t = + find (`Unknown_memory id) modul.mem id let rewrite_expr (modul : Assigned.t) (locals : binary param list) (iexpr : text expr) : binary expr Result.t = @@ -70,17 +55,18 @@ let rewrite_expr (modul : Assigned.t) (locals : binary param list) block_ids with Exit -> () end; - if !pos = -1 then Error `Unknown_label else Ok !pos + if !pos = -1 then Error (`Unknown_label (Text id)) else Ok !pos | Raw id -> Ok id in (* this is > and not >= because you can `br 0` without any block to target the function *) - if id > List.length block_ids + loop_count then Error `Unknown_label + if id > List.length block_ids + loop_count then + Error (`Unknown_label (Raw id)) else Ok (Raw id) in let bt_some_to_raw : text block_type -> binary block_type Result.t = function | Bt_ind ind -> begin - let+ v = get "unknown type" modul.typ ind in + let+ v = get (`Unknown_type ind) modul.typ ind in match Indexed.get v with | Def_func_t t' -> let idx = Indexed.get_index v in @@ -94,7 +80,7 @@ let rewrite_expr (modul : Assigned.t) (locals : binary param list) | Some ind -> (* we check that the explicit type match the type_use, we have to remove parameters names to do so *) let* t' = - let+ v = get "unknown type" modul.typ ind in + let+ v = get (`Unknown_type ind) modul.typ ind in match Indexed.get v with Def_func_t t' -> t' | _ -> assert false in let ok = Binary_types.equal_func_types t t' in @@ -109,7 +95,7 @@ let rewrite_expr (modul : Assigned.t) (locals : binary param list) Some raw in - let* locals, after_last_assigned_local = + let* locals, _after_last_assigned_local = List.fold_left (fun acc ((name, _type) : binary param) -> let* locals, next_free_int = acc in @@ -124,22 +110,18 @@ let rewrite_expr (modul : Assigned.t) (locals : binary param list) in let find_local = function - | Raw i as id -> - if i >= after_last_assigned_local then - Error (`Unknown_local (string_of_int i)) - else Ok id - | Text name -> ( + | Raw _i as id -> Ok id + | Text name as id -> ( match String_map.find_opt name locals with - | None -> Error (`Unknown_local name) + | None -> Error (`Unknown_local id) | Some id -> Ok (Raw id) ) in - let find_table id = find "unknown table" modul.table id in - let find_func id = find "unknown function" modul.func id in - let _find_mem id = find "unknown memory" modul.mem id in - let find_data id = find "unknown data segment" modul.data id in - let find_elem id = find "unknown elem segment" modul.elem id in - let find_type id = find "unknown type" modul.typ id in + let find_table id = find (`Unknown_table id) modul.table id in + let find_func id = find (`Unknown_func id) modul.func id in + let find_data id = find (`Unknown_data id) modul.data id in + let find_elem id = find (`Unknown_elem id) modul.elem id in + let find_type id = find (`Unknown_type id) modul.typ id in let rec body (loop_count, block_ids) : text instr -> binary instr Result.t = function @@ -197,15 +179,12 @@ let rewrite_expr (modul : Assigned.t) (locals : binary param list) | Return_call_ref bt -> let+ bt = bt_some_to_raw bt in Return_call_ref bt - | Global_set id -> begin - let* idx, mut = find_global modul ~imported_only:false id in - match mut with - | Const -> Error `Global_is_immutable - | Var -> ok @@ Global_set (Raw idx) - end + | Global_set id -> + let+ idx = find_global modul id in + Global_set idx | Global_get id -> - let+ idx, _mut = find_global modul ~imported_only:false id in - Global_get (Raw idx) + let+ idx = find_global modul id in + Global_get idx | Ref_func id -> let+ id = find_func id in Ref_func id @@ -233,11 +212,8 @@ let rewrite_expr (modul : Assigned.t) (locals : binary param list) let+ table' = find_table i' in Table_copy (table, table') | Memory_init id -> - (* TODO: this is already checked in typechecked.ml but for some reason, but we need to test is here first for now in order to pass a test, until we move the find_data validation for *binary* indice in typecheck *) - if List.length modul.mem.values < 1 then Error (`Unknown_memory 0) - else - let+ id = find_data id in - Memory_init id + let+ id = find_data id in + Memory_init id | Data_drop id -> let+ id = find_data id in Data_drop id @@ -303,41 +279,11 @@ let rewrite_expr (modul : Assigned.t) (locals : binary param list) in expr iexpr (0, []) -(* TODO: binary+const expr/list *) -let rewrite_const_expr (modul : Assigned.t) (expr : text expr) : - binary expr Result.t = - let const_instr (instr : text instr) : binary instr Result.t = - match instr with - | Global_get id -> begin - let* idx, mut = find_global modul ~imported_only:true id in - match mut with - | Const -> ok @@ Global_get (Raw idx) - | Var -> Error `Constant_expression_required - end - | Ref_null v -> - let+ v = Binary_types.convert_heap_type None v in - Ref_null v - | Ref_func f -> - let+ f = find "unknown function" modul.func f in - Ref_func f - | Array_new t -> - let+ t = find "unknown type" modul.typ t in - Array_new t - | Array_new_default t -> - let+ t = find "unknown type" modul.typ t in - Array_new_default t - | ( I32_const _ | I64_const _ | F32_const _ | F64_const _ | Ref_i31 - | I_binop (_, (Add | Sub | Mul)) ) as i -> - Ok i - | _i -> Error `Constant_expression_required - in - list_map const_instr expr - let rewrite_block_type (typemap : binary indice TypeMap.t) (modul : Assigned.t) (block_type : text block_type) : binary block_type Result.t = match block_type with | Bt_ind id -> begin - let+ v = get "unknown type" modul.typ id in + let+ v = get (`Unknown_type id) modul.typ id in match Indexed.get v with | Def_func_t t' -> let idx = Indexed.get_index v in @@ -358,7 +304,7 @@ let rewrite_block_type (typemap : binary indice TypeMap.t) (modul : Assigned.t) let rewrite_global (modul : Assigned.t) (global : Text.global) : Binary.global Result.t = - let* init = rewrite_const_expr modul global.init in + let* init = rewrite_expr modul [] global.init in let mut, val_type = global.typ in let+ val_type = Binary_types.convert_val_type None val_type in let typ = (mut, val_type) in @@ -372,13 +318,13 @@ let rewrite_elem (modul : Assigned.t) (elem : Text.elem) : Binary.elem Result.t | Elem_passive -> Ok Elem_passive | Elem_active (None, _expr) -> (* TODO: does this really happen ? *) - Error (`Msg "unknown table") - | Elem_active (Some indice, expr) -> - let* (Raw indice) = find "unknown table" modul.table indice in - let+ expr = rewrite_const_expr modul expr in + Error (`Unknown_table (Raw 0)) + | Elem_active (Some id, expr) -> + let* (Raw indice) = find (`Unknown_table id) modul.table id in + let+ expr = rewrite_expr modul [] expr in Binary.Elem_active (Some indice, expr) in - let* init = list_map (rewrite_const_expr modul) elem.init in + let* init = list_map (rewrite_expr modul []) elem.init in let+ typ = Binary_types.convert_ref_type None elem.typ in { Binary.init; mode; id = elem.id; typ } @@ -388,16 +334,16 @@ let rewrite_data (modul : Assigned.t) (data : Text.data) : Binary.data Result.t match data.mode with | Data_passive -> Ok Binary.Data_passive | Data_active (None, _expr) -> - (* TODO: does this really happen ? *) - Error (`Msg "unknown memory") + (* TODO: maybe we should change the type in assigned to avoid this case ? *) + assert false | Data_active (Some indice, expr) -> - let* (Raw indice) = find "unknown memory" modul.mem indice in - let+ expr = rewrite_const_expr modul expr in - Binary.Data_active (Some indice, expr) + let* (Raw indice) = find_memory modul indice in + let+ expr = rewrite_expr modul [] expr in + Binary.Data_active (indice, expr) in { Binary.mode; id = data.id; init = data.init } -let rewrite_export msg named (exports : Grouped.opt_export list) : +let rewrite_export err named (exports : Grouped.opt_export list) : Binary.export list Result.t = list_map (fun { Grouped.name; id } -> @@ -405,7 +351,7 @@ let rewrite_export msg named (exports : Grouped.opt_export list) : match id with | Curr id -> Ok id | Indice id -> - let+ (Raw id) = find msg named id in + let+ (Raw id) = find (err id) named id in id in { Binary.name; id } ) @@ -413,19 +359,28 @@ let rewrite_export msg named (exports : Grouped.opt_export list) : let rewrite_exports (modul : Assigned.t) (exports : Grouped.opt_exports) : Binary.exports Result.t = - let* global = rewrite_export "unknown global" modul.global exports.global in - let* mem = rewrite_export "unknown memory" modul.mem exports.mem in - let* table = rewrite_export "unknown table" modul.table exports.table in - let+ func = rewrite_export "unknown function" modul.func exports.func in + let* global = + rewrite_export (fun id -> `Unknown_global id) modul.global exports.global + in + let* mem = + rewrite_export (fun id -> `Unknown_memory id) modul.mem exports.mem + in + let* table = + rewrite_export (fun id -> `Unknown_table id) modul.table exports.table + in + let+ func = + rewrite_export (fun id -> `Unknown_func id) modul.func exports.func + in { Binary.global; mem; table; func } let rewrite_func (typemap : binary indice TypeMap.t) (modul : Assigned.t) - (func : text func) : binary func Result.t = - let* type_f = rewrite_block_type typemap modul func.type_f in - let (Bt_raw ((None | Some _), (params, _))) = type_f in - let* locals = list_map (Binary_types.convert_param None) func.locals in - let+ body = rewrite_expr modul (params @ locals) func.body in - { body; type_f; id = func.id; locals } + ({ id; type_f; locals; body; _ } : text func) : binary func Result.t = + let* (Bt_raw (_, (params, _)) as type_f) = + rewrite_block_type typemap modul type_f + in + let* locals = list_map (Binary_types.convert_param None) locals in + let+ body = rewrite_expr modul (params @ locals) body in + { body; type_f; id; locals } let rewrite_import (f : 'a -> 'b Result.t) (import : 'a Imported.t) : 'b Imported.t Result.t = @@ -462,20 +417,17 @@ let rewrite_types (_modul : Assigned.t) (t : binary str_type) : let modul (modul : Assigned.t) : Binary.modul Result.t = Log.debug0 "rewriting ...@\n"; let typemap = typemap modul.typ in - let* (global : (Binary.global, binary global_type) Runtime.t Named.t) = + let* global = let+ { Named.named; values } = rewrite_named (rewrite_runtime (rewrite_global modul) ok) modul.global in let values = List.rev values in - let global : (Binary.global, binary global_type) Runtime.t Named.t = - { Named.named; values } - in - global + { Named.named; values } in let* elem = rewrite_named (rewrite_elem modul) modul.elem in let* data = rewrite_named (rewrite_data modul) modul.data in let* exports = rewrite_exports modul modul.exports in - let* (func : (binary func, binary block_type) Runtime.t Named.t) = + let* func = let import = rewrite_import (rewrite_block_type typemap modul) in let runtime = rewrite_runtime (rewrite_func typemap modul) import in rewrite_named runtime modul.func @@ -484,21 +436,9 @@ let modul (modul : Assigned.t) : Binary.modul Result.t = let+ start = match modul.start with | None -> Ok None - | Some start -> ( - let* (Raw idx) = find "unknown function" func start in - let va = List.find (Indexed.has_index idx) func.Named.values in - let param_typ, result_typ = - match Indexed.get va with - | Local func -> - let (Bt_raw ((None | Some _), t)) = func.type_f in - t - | Imported imported -> - let (Bt_raw ((None | Some _), t)) = imported.desc in - t - in - match (param_typ, result_typ) with - | [], [] -> Ok (Some idx) - | _, _ -> Error `Start_function ) + | Some id -> + let* (Raw id) = find (`Unknown_func id) func id in + Ok (Some id) in let modul : Binary.modul = diff --git a/src/utils/result.ml b/src/utils/result.ml index c03a1b29e..0a090ec7c 100644 --- a/src/utils/result.ml +++ b/src/utils/result.ml @@ -46,15 +46,18 @@ type err = | `Unbound_name of string | `Undeclared_function_reference | `Unexpected_token of string - | `Unknown_function of int - | `Unknown_global + | `Unknown_data of Types.text Types.indice + | `Unknown_elem of Types.text Types.indice + | `Unknown_func of Types.text Types.indice + | `Unknown_global of Types.text Types.indice | `Unknown_import of string * string - | `Unknown_label - | `Unknown_local of string - | `Unknown_memory of int + | `Unknown_label of Types.text Types.indice + | `Unknown_local of Types.text Types.indice + | `Unknown_memory of Types.text Types.indice | `Unknown_module of string | `Unknown_operator - | `Unknown_type + | `Unknown_table of Types.text Types.indice + | `Unknown_type of Types.text Types.indice | `Unsupported_file_extension of string ] @@ -108,16 +111,21 @@ let rec err_to_string = function | `Unbound_name id -> Format.sprintf "unbound name %s" id | `Undeclared_function_reference -> "undeclared function reference" | `Unexpected_token s -> Format.sprintf "unexpected token %S" s - | `Unknown_function id -> Format.sprintf "unknown function %d" id - | `Unknown_global -> "unknown global" + | `Unknown_data id -> + Format.asprintf "unknown data segment %a" Types.pp_indice id + | `Unknown_elem id -> + Format.asprintf "unknown elem segment %a" Types.pp_indice id + | `Unknown_func id -> Format.asprintf "unknown function %a" Types.pp_indice id + | `Unknown_global id -> Format.asprintf "unknown global %a" Types.pp_indice id | `Unknown_import (modul, value) -> Format.sprintf "unknown import %S %S" modul value - | `Unknown_label -> "unknown label" - | `Unknown_local id -> Format.sprintf "unknown local %s" id - | `Unknown_memory id -> Format.sprintf "unknown memory %d" id - | `Unknown_module id -> Format.sprintf "unknown module %s" id + | `Unknown_label id -> Format.asprintf "unknown label %a" Types.pp_indice id + | `Unknown_local id -> Format.asprintf "unknown local %a" Types.pp_indice id + | `Unknown_memory id -> Format.asprintf "unknown memory %a" Types.pp_indice id + | `Unknown_module name -> Format.sprintf "unknown module %s" name | `Unknown_operator -> Format.sprintf "unknown operator" - | `Unknown_type -> "unknown type" + | `Unknown_table id -> Format.asprintf "unknown table %a" Types.pp_indice id + | `Unknown_type id -> Format.asprintf "unknown type %a" Types.pp_indice id | `Unsupported_file_extension ext -> Format.sprintf "unsupported file_extension %S" ext diff --git a/src/utils/result.mli b/src/utils/result.mli index e2575c4d0..242afb9f0 100644 --- a/src/utils/result.mli +++ b/src/utils/result.mli @@ -46,15 +46,18 @@ type err = | `Unbound_name of string | `Undeclared_function_reference | `Unexpected_token of string - | `Unknown_function of int - | `Unknown_global + | `Unknown_data of Types.text Types.indice + | `Unknown_elem of Types.text Types.indice + | `Unknown_func of Types.text Types.indice + | `Unknown_global of Types.text Types.indice | `Unknown_import of string * string - | `Unknown_label - | `Unknown_local of string - | `Unknown_memory of int + | `Unknown_label of Types.text Types.indice + | `Unknown_local of Types.text Types.indice + | `Unknown_memory of Types.text Types.indice | `Unknown_module of string | `Unknown_operator - | `Unknown_type + | `Unknown_table of Types.text Types.indice + | `Unknown_type of Types.text Types.indice | `Unsupported_file_extension of string ] diff --git a/src/validate/typecheck.ml b/src/validate/typecheck.ml index ac28b2117..a589ad48c 100644 --- a/src/validate/typecheck.ml +++ b/src/validate/typecheck.ml @@ -33,61 +33,64 @@ module Index = struct include M end +let check_mem modul n = + if n >= List.length modul.mem.values then Error (`Unknown_memory (Raw n)) + else Ok () + +let check_data modul n = + if n >= List.length modul.data.values then Error (`Unknown_data (Raw n)) + else Ok () + +let check_align memarg_align align = + if memarg_align >= align then Error `Alignment_too_large else Ok () + module Env = struct type t = { locals : typ Index.Map.t - ; mem : (mem, limits) Runtime.t Named.t - ; globals : (global, binary global_type) Runtime.t Named.t ; result_type : binary result_type - ; funcs : (binary func, binary block_type) Runtime.t Named.t ; blocks : typ list list - ; tables : (binary table, binary table_type) Runtime.t Named.t - ; elems : elem Named.t + ; modul : Binary.modul ; refs : (int, unit) Hashtbl.t } - let local_get i env = match Index.Map.find i env.locals with v -> v + let local_get i env = + match Index.Map.find_opt i env.locals with + | None -> Error (`Unknown_local (Raw i)) + | Some v -> Ok v - let global_get i env = - let value = Indexed.get_at_exn i env.globals.values in - let _mut, typ = - match value with Local { typ; _ } -> typ | Runtime.Imported t -> t.desc - in - typ + let global_get i modul = + let value = Indexed.get_at i modul.global.values in + match value with + | None -> Error (`Unknown_global (Raw i)) + | Some (Runtime.Local { typ = desc; _ } | Imported { desc; _ }) -> Ok desc - let func_get i env = - let value = Indexed.get_at_exn i env.funcs.values in + let func_get i modul = + let value = Indexed.get_at i modul.func.values in match value with - | Local { type_f; _ } -> - let (Bt_raw ((None | Some _), t)) = type_f in - t - | Runtime.Imported t -> - let (Bt_raw ((None | Some _), t)) = t.desc in - t + | None -> Error (`Unknown_func (Raw i)) + | Some + ( Runtime.Local { type_f = Bt_raw (_, t); _ } + | Imported { desc = Bt_raw (_, t); _ } ) -> + Ok t let block_type_get i env = match List.nth_opt env.blocks i with - | None -> Error `Unknown_label + | None -> Error (`Unknown_label (Raw i)) | Some bt -> Ok bt - let table_type_get_from_module i (modul : Binary.modul) = - let value = Indexed.get_at_exn i modul.table.values in + let table_type_get i (modul : Binary.modul) = + let value = Indexed.get_at i modul.table.values in match value with - | Local table -> snd (snd table) - | Runtime.Imported t -> snd t.desc - - let table_type_get i env = - let value = Indexed.get_at_exn i env.tables.values in - match value with - | Local table -> snd (snd table) - | Runtime.Imported t -> snd t.desc + | None -> Error (`Unknown_table (Raw i)) + | Some (Runtime.Local (_, (_, t)) | Imported { desc = _, t; _ }) -> Ok t let elem_type_get i env = - let value = Indexed.get_at_exn i env.elems.values in - value.typ + let value = Indexed.get_at i env.modul.elem.values in + match value with + | None -> Error (`Unknown_elem (Raw i)) + | Some value -> Ok value.typ - let make ~params ~locals ~mem ~globals ~funcs ~result_type ~tables ~elems - ~refs = + let make ~params ~locals ~modul ~result_type ~refs = let l = List.mapi (fun i v -> (i, v)) (params @ locals) in let locals = List.fold_left @@ -96,20 +99,9 @@ module Env = struct Index.Map.add i typ locals ) Index.Map.empty l in - { locals - ; mem - ; globals - ; result_type - ; funcs - ; tables - ; elems - ; blocks = [] - ; refs - } + { locals; modul; result_type; blocks = []; refs } end -type env = Env.t - type stack = typ list let i32 = Num_type I32 @@ -226,22 +218,14 @@ end = struct let push t stack = ok @@ t @ stack - let pop_push (Bt_raw ((None | Some _), (pt, rt))) stack = + let pop_push (Bt_raw (_, (pt, rt))) stack = let pt, rt = (List.rev_map typ_of_pt pt, List.rev_map typ_of_val_type rt) in let* stack = pop pt stack in push rt stack end -let rec typecheck_instr (env : env) (stack : stack) (instr : binary instr) : +let rec typecheck_instr (env : Env.t) (stack : stack) (instr : binary instr) : stack Result.t = - let check_mem align = - if List.length env.mem.values < 1 then Error (`Unknown_memory 0) - else - match align with - | None -> Ok () - | Some (memarg_align, align) -> - if memarg_align >= align then Error `Alignment_too_large else Ok () - in match instr with | Nop -> Ok stack | Drop -> Stack.drop stack @@ -282,62 +266,80 @@ let rec typecheck_instr (env : env) (stack : stack) (instr : binary instr) : let t = ftype nn in let* stack = Stack.pop [ t; t ] stack in Stack.push [ i32 ] stack - | Local_get (Raw i) -> Stack.push [ Env.local_get i env ] stack + | Local_get (Raw i) -> + let* t = Env.local_get i env in + Stack.push [ t ] stack | Local_set (Raw i) -> - let t = Env.local_get i env in + let* t = Env.local_get i env in Stack.pop [ t ] stack | Local_tee (Raw i) -> - let t = Env.local_get i env in + let* t = Env.local_get i env in let* stack = Stack.pop [ t ] stack in Stack.push [ t ] stack | Global_get (Raw i) -> - Stack.push [ typ_of_val_type @@ Env.global_get i env ] stack + let* _mut, t = Env.global_get i env.modul in + let t = typ_of_val_type t in + Stack.push [ t ] stack | Global_set (Raw i) -> - let t = Env.global_get i env in - Stack.pop [ typ_of_val_type t ] stack + let* mut, t = Env.global_get i env.modul in + let* () = + match mut with Var -> Ok () | Const -> Error `Global_is_immutable + in + let t = typ_of_val_type t in + Stack.pop [ t ] stack | If_else (_id, block_type, e1, e2) -> let* stack = Stack.pop [ i32 ] stack in let* stack_e1 = typecheck_expr env e1 ~is_loop:false block_type ~stack in let+ _stack_e2 = typecheck_expr env e2 ~is_loop:false block_type ~stack in stack_e1 | I_load8 (nn, _, memarg) -> - let* () = check_mem (Some (memarg.align, 1l)) in + let* () = check_mem env.modul 0 in + let* () = check_align memarg.align 1l in let* stack = Stack.pop [ i32 ] stack in Stack.push [ itype nn ] stack | I_load16 (nn, _, memarg) -> - let* () = check_mem (Some (memarg.align, 2l)) in + let* () = check_mem env.modul 0 in + let* () = check_align memarg.align 2l in let* stack = Stack.pop [ i32 ] stack in Stack.push [ itype nn ] stack | I_load (nn, memarg) -> + let* () = check_mem env.modul 0 in let max_allowed = match nn with S32 -> 4l | S64 -> 8l in - let* () = check_mem (Some (memarg.align, max_allowed)) in + let* () = check_align memarg.align max_allowed in let* stack = Stack.pop [ i32 ] stack in Stack.push [ itype nn ] stack | I64_load32 (_, memarg) -> - let* () = check_mem (Some (memarg.align, 4l)) in + let* () = check_mem env.modul 0 in + let* () = check_align memarg.align 4l in let* stack = Stack.pop [ i32 ] stack in Stack.push [ i64 ] stack | I_store8 (nn, memarg) -> - let* () = check_mem (Some (memarg.align, 1l)) in + let* () = check_mem env.modul 0 in + let* () = check_align memarg.align 1l in Stack.pop [ itype nn; i32 ] stack | I_store16 (nn, memarg) -> - let* () = check_mem (Some (memarg.align, 2l)) in + let* () = check_mem env.modul 0 in + let* () = check_align memarg.align 2l in Stack.pop [ itype nn; i32 ] stack | I_store (nn, memarg) -> + let* () = check_mem env.modul 0 in let max_allowed = match nn with S32 -> 4l | S64 -> 8l in - let* () = check_mem (Some (memarg.align, max_allowed)) in + let* () = check_align memarg.align max_allowed in Stack.pop [ itype nn; i32 ] stack | I64_store32 memarg -> - let* () = check_mem (Some (memarg.align, 4l)) in + let* () = check_mem env.modul 0 in + let* () = check_align memarg.align 4l in Stack.pop [ i64; i32 ] stack | F_load (nn, memarg) -> + let* () = check_mem env.modul 0 in let max_allowed = match nn with S32 -> 4l | S64 -> 8l in - let* () = check_mem (Some (memarg.align, max_allowed)) in + let* () = check_align memarg.align max_allowed in let* stack = Stack.pop [ i32 ] stack in Stack.push [ ftype nn ] stack | F_store (nn, memarg) -> + let* () = check_mem env.modul 0 in let max_allowed = match nn with S32 -> 4l | S64 -> 8l in - let* () = check_mem (Some (memarg.align, max_allowed)) in + let* () = check_align memarg.align max_allowed in Stack.pop [ ftype nn; i32 ] stack | I_reinterpret_f (inn, fnn) -> let* stack = Stack.pop [ ftype fnn ] stack in @@ -371,22 +373,27 @@ let rec typecheck_instr (env : env) (stack : stack) (instr : binary instr) : let* stack = Stack.pop [ i32 ] stack in Stack.push [ i64 ] stack | Memory_grow -> - let* () = check_mem None in + let* () = check_mem env.modul 0 in let* stack = Stack.pop [ i32 ] stack in Stack.push [ i32 ] stack | Memory_size -> - let* () = check_mem None in + let* () = check_mem env.modul 0 in Stack.push [ i32 ] stack - | Memory_copy | Memory_init _ | Memory_fill -> - let* () = check_mem None in + | Memory_copy | Memory_fill -> + let* () = check_mem env.modul 0 in + Stack.pop [ i32; i32; i32 ] stack + | Memory_init (Raw id) -> + let* () = check_mem env.modul 0 in + let* () = check_data env.modul id in Stack.pop [ i32; i32; i32 ] stack | Block (_, bt, expr) -> typecheck_expr env expr ~is_loop:false bt ~stack | Loop (_, bt, expr) -> typecheck_expr env expr ~is_loop:true bt ~stack - | Call_indirect (_, bt) -> + | Call_indirect (Raw tbl_id, bt) -> + let* _tbl_type = Env.table_type_get tbl_id env.modul in let* stack = Stack.pop [ i32 ] stack in Stack.pop_push bt stack | Call (Raw i) -> - let pt, rt = Env.func_get i env in + let* pt, rt = Env.func_get i env.modul in let* stack = Stack.pop (List.rev_map typ_of_pt pt) stack in Stack.push (List.rev_map typ_of_val_type rt) stack | Call_ref _t -> @@ -397,7 +404,7 @@ let rec typecheck_instr (env : env) (stack : stack) (instr : binary instr) : *) stack | Return_call (Raw i) -> - let pt, rt = Env.func_get i env in + let* pt, rt = Env.func_get i env.modul in if not (Stack.equal @@ -407,7 +414,8 @@ let rec typecheck_instr (env : env) (stack : stack) (instr : binary instr) : else let+ _stack = Stack.pop (List.rev_map typ_of_pt pt) stack in [ any ] - | Return_call_indirect (_, Bt_raw ((None | Some _), (pt, rt))) -> + | Return_call_indirect (Raw tbl_id, Bt_raw (_, (pt, rt))) -> + let* _tbl_type = Env.table_type_get tbl_id env.modul in if not (Stack.equal @@ -418,7 +426,7 @@ let rec typecheck_instr (env : env) (stack : stack) (instr : binary instr) : let* stack = Stack.pop [ i32 ] stack in let+ _stack = Stack.pop (List.rev_map typ_of_pt pt) stack in [ any ] - | Return_call_ref (Bt_raw ((None | Some _), (pt, rt))) -> + | Return_call_ref (Bt_raw (_, (pt, rt))) -> if not (Stack.equal @@ -429,31 +437,37 @@ let rec typecheck_instr (env : env) (stack : stack) (instr : binary instr) : let* stack = Stack.pop_ref stack in let+ _stack = Stack.pop (List.rev_map typ_of_pt pt) stack in [ any ] - | Data_drop _i -> Ok stack + | Data_drop (Raw id) -> + let* () = check_data env.modul id in + Ok stack | Table_init (Raw ti, Raw ei) -> - let table_typ = Env.table_type_get ti env in - let elem_typ = Env.elem_type_get ei env in + let* table_typ = Env.table_type_get ti env.modul in + let* elem_typ = Env.elem_type_get ei env in if not @@ Stack.match_ref_type (snd table_typ) (snd elem_typ) then Error (`Type_mismatch "table_init") else Stack.pop [ i32; i32; i32 ] stack | Table_copy (Raw i, Raw i') -> - let typ = Env.table_type_get i env in - let typ' = Env.table_type_get i' env in + let* typ = Env.table_type_get i env.modul in + let* typ' = Env.table_type_get i' env.modul in if typ <> typ' then Error (`Type_mismatch "table_copy") else Stack.pop [ i32; i32; i32 ] stack | Table_fill (Raw i) -> - let _null, t = Env.table_type_get i env in + let* _null, t = Env.table_type_get i env.modul in Stack.pop [ i32; Ref_type t; i32 ] stack | Table_grow (Raw i) -> - let _null, t = Env.table_type_get i env in + let* _null, t = Env.table_type_get i env.modul in let* stack = Stack.pop [ i32; Ref_type t ] stack in Stack.push [ i32 ] stack - | Table_size _ -> Stack.push [ i32 ] stack + | Table_size (Raw i) -> + let* _null, _t = Env.table_type_get i env.modul in + Stack.push [ i32 ] stack | Ref_is_null -> let* stack = Stack.pop_ref stack in Stack.push [ i32 ] stack | Ref_null rt -> Stack.push [ Ref_type rt ] stack - | Elem_drop _ -> Ok stack + | Elem_drop (Raw id) -> + let* _elem_typ = Env.elem_type_get id env in + Ok stack | Select t -> let* stack = Stack.pop [ i32 ] stack in begin @@ -501,11 +515,11 @@ let rec typecheck_instr (env : env) (stack : stack) (instr : binary instr) : in Ok [ any ] | Table_get (Raw i) -> - let _null, t = Env.table_type_get i env in + let* _null, t = Env.table_type_get i env.modul in let* stack = Stack.pop [ i32 ] stack in Stack.push [ Ref_type t ] stack | Table_set (Raw i) -> - let _null, t = Env.table_type_get i env in + let* _null, t = Env.table_type_get i env.modul in Stack.pop [ Ref_type t; i32 ] stack | Array_len -> (* TODO: fixme, Something is not right *) @@ -530,7 +544,7 @@ and typecheck_expr env expr ~is_loop (block_type : binary block_type option) ~stack:previous_stack : stack Result.t = let pt, rt = Option.fold ~none:([], []) - ~some:(fun (Bt_raw ((None | Some _), (pt, rt)) : binary block_type) -> + ~some:(fun (Bt_raw (_, (pt, rt)) : binary block_type) -> (List.rev_map typ_of_pt pt, List.rev_map typ_of_val_type rt) ) block_type in @@ -551,11 +565,9 @@ let typecheck_function (modul : modul) func refs = match func with | Runtime.Imported _ -> Ok () | Local func -> - let (Bt_raw ((None | Some _), (params, result))) = func.type_f in + let (Bt_raw (_, (params, result))) = func.type_f in let env = - Env.make ~params ~funcs:modul.func ~locals:func.locals ~mem:modul.mem - ~globals:modul.global ~result_type:result ~tables:modul.table - ~elems:modul.elem ~refs + Env.make ~params ~modul ~locals:func.locals ~result_type:result ~refs in let* stack = typecheck_expr env func.body ~is_loop:false @@ -574,14 +586,20 @@ let typecheck_const_instr (modul : modul) refs stack = function | F64_const _ -> Stack.push [ f64 ] stack | Ref_null t -> Stack.push [ Ref_type t ] stack | Ref_func (Raw i) -> + let* _t = Env.func_get i modul in Hashtbl.add refs i (); Stack.push [ Ref_type Func_ht ] stack - | Global_get (Raw i) -> - let value = Indexed.get_at_exn i modul.global.values in - let* _mut, typ = + | Global_get (Raw i as idx) -> + let value = Indexed.get_at i modul.global.values in + let* mut, typ = match value with - | Local _ -> Error `Unknown_global - | Imported t -> Ok t.desc + | None | Some (Local _) -> Error (`Unknown_global idx) + | Some (Imported t) -> Ok t.desc + in + let* () = + match mut with + | Const -> Ok () + | Var -> Error `Constant_expression_required in Stack.push [ typ_of_val_type typ ] stack | I_binop (t, _op) -> @@ -596,7 +614,7 @@ let typecheck_const_instr (modul : modul) refs stack = function | Ref_i31 -> let* stack = Stack.pop [ i32 ] stack in Stack.push [ i31 ] stack - | _ -> assert false + | _ -> Error `Constant_expression_required let typecheck_const_expr (modul : modul) refs = list_fold_left (typecheck_const_instr modul refs) [] @@ -633,7 +651,7 @@ let typecheck_elem modul refs (elem : elem Indexed.t) = | Elem_passive | Elem_declarative -> Ok () | Elem_active (None, _e) -> assert false | Elem_active (Some tbl_i, e) -> ( - let _null, tbl_type = Env.table_type_get_from_module tbl_i modul in + let* _null, tbl_type = Env.table_type_get tbl_i modul in if tbl_type <> expected_type then Error (`Type_mismatch "typecheck elem 3") else let* t = typecheck_const_expr modul refs e in @@ -648,18 +666,66 @@ let typecheck_data modul refs (data : data Indexed.t) = let data = Indexed.get data in match data.mode with | Data_passive -> Ok () - | Data_active (_i, e) -> ( + | Data_active (n, e) -> ( + let* () = check_mem modul n in let* t = typecheck_const_expr modul refs e in match t with | [ _t ] -> Ok () | _whatever -> Error (`Type_mismatch "typecheck_data") ) +let typecheck_start { start; func; _ } = + match start with + | None -> Ok () + | Some idx -> ( + let* f = + match List.find_opt (Indexed.has_index idx) func.values with + | None -> Error (`Unknown_func (Raw idx)) + | Some f -> Ok f + in + let pt, rt = + match Indexed.get f with + | Local { type_f = Bt_raw (_, t); _ } + | Imported { desc = Bt_raw (_, t); _ } -> + t + in + match (pt, rt) with [], [] -> Ok () | _, _ -> Error `Start_function ) + +let validate_exports modul = + let* () = + list_iter + (fun { id; name = _ } -> + let* _t = Env.func_get id modul in + Ok () ) + modul.exports.func + in + let* () = + list_iter + (fun { id; name = _ } -> + let* _t = Env.table_type_get id modul in + Ok () ) + modul.exports.table + in + let* () = + list_iter + (fun { id; name = _ } -> + let* _t = Env.global_get id modul in + Ok () ) + modul.exports.global + in + list_iter + (fun { id; name = _ } -> + let* () = check_mem modul id in + Ok () ) + modul.exports.mem + let modul (modul : modul) = Log.debug0 "typechecking ...@\n"; let refs = Hashtbl.create 512 in let* () = list_iter (typecheck_global modul refs) modul.global.values in let* () = list_iter (typecheck_elem modul refs) modul.elem.values in let* () = list_iter (typecheck_data modul refs) modul.data.values in + let* () = typecheck_start modul in + let* () = validate_exports modul in List.iter (fun (export : export) -> Hashtbl.add refs export.id ()) modul.exports.func; diff --git a/test/script/gc.t b/test/script/gc.t index 12e04030b..6d9264edf 100644 --- a/test/script/gc.t +++ b/test/script/gc.t @@ -8,8 +8,8 @@ unknown operator unknown operator "any.convert_extern" [23] $ owi script --no-exhaustion reference/proposals/gc/call_ref.wast - unknown type - [49] + unknown type $ii + [52] $ owi script --no-exhaustion reference/proposals/gc/extern.wast unknown operator unknown operator "any.convert_extern" [23] @@ -21,15 +21,15 @@ [23] $ owi script --no-exhaustion reference/proposals/gc/ref_eq.wast owi: internal error, uncaught exception: - File "src/validate/typecheck.ml", line 527, characters 4-10: Assertion failed + File "src/validate/typecheck.ml", line 541, characters 4-10: Assertion failed [125] $ owi script --no-exhaustion reference/proposals/gc/ref_test.wast unknown operator unknown operator "any.convert_extern" [23] $ owi script --no-exhaustion reference/proposals/gc/return_call_ref.wast - unknown type - [49] + unknown type $i64-i64 + [52] $ owi script --no-exhaustion reference/proposals/gc/struct.wast unknown operator unknown operator "struct.get_u" [23] diff --git a/test/script/reference.t b/test/script/reference.t index 64b4651a7..1b89e21d9 100644 --- a/test/script/reference.t +++ b/test/script/reference.t @@ -3,7 +3,7 @@ $ owi script --no-exhaustion reference/binary-leb128.wast $ owi script --no-exhaustion reference/binary.wast expected END opcode expected but got (unexpected end of section or function) - [51] + [54] $ owi script --no-exhaustion reference/block.wast $ owi script --no-exhaustion reference/br_if.wast $ owi script --no-exhaustion reference/br_table.wast diff --git a/test/script/reference_opt.t b/test/script/reference_opt.t index 82424706a..a79fa4fb5 100644 --- a/test/script/reference_opt.t +++ b/test/script/reference_opt.t @@ -3,7 +3,7 @@ $ owi script --no-exhaustion --optimize reference/binary-leb128.wast $ owi script --no-exhaustion --optimize reference/binary.wast expected END opcode expected but got (unexpected end of section or function) - [51] + [54] $ owi script --no-exhaustion --optimize reference/block.wast $ owi script --no-exhaustion --optimize reference/br_if.wast $ owi script --no-exhaustion --optimize reference/br_table.wast diff --git a/test/validate/unknown_label.t b/test/validate/unknown_label.t index 3fdeee09d..d6e54899c 100644 --- a/test/validate/unknown_label.t +++ b/test/validate/unknown_label.t @@ -1,3 +1,3 @@ $ owi validate ./unknown_label.wat - unknown label - [44] + unknown label 2 + [46] diff --git a/test/wasm2wat/not_exists.t b/test/wasm2wat/not_exists.t index 3e836d9b5..cc6c2a43f 100644 --- a/test/wasm2wat/not_exists.t +++ b/test/wasm2wat/not_exists.t @@ -5,4 +5,4 @@ [124] $ owi wasm2wat bad.ext unsupported file_extension ".ext" - [50] + [53] diff --git a/test/wat2wasm/not_exists.t b/test/wat2wasm/not_exists.t index c060644bc..b09f42d15 100644 --- a/test/wat2wasm/not_exists.t +++ b/test/wat2wasm/not_exists.t @@ -5,4 +5,4 @@ [124] $ owi wat2wasm bad.ext unsupported file_extension ".ext" - [50] + [53]