From f298fd1f8800e57aa18f232bacd5cf59b34e66ea Mon Sep 17 00:00:00 2001 From: Nora Sandler Date: Wed, 30 Nov 2022 10:52:16 -0800 Subject: [PATCH] chapter 20a: register allocation without coalescing --- bin/main.ml | 23 +- lib/assembly.ml | 21 + lib/backend/assembly_symbols.ml | 49 ++- lib/backend/assembly_symbols.mli | 8 +- lib/backend/codegen.ml | 182 +++++---- lib/backend/instruction_fixup.ml | 40 +- lib/backend/regalloc.ml | 569 +++++++++++++++++++++++++++ lib/backend/regalloc.mli | 2 + lib/backend/replace_pseudos.ml | 1 + lib/backward_dataflow.ml | 45 +++ lib/backward_dataflow.mli | 12 + lib/cfg.ml | 52 ++- lib/cfg.mli | 12 +- lib/compile.ml | 18 +- lib/context.ml | 3 +- lib/dune | 2 + lib/emit.ml | 22 ++ lib/optimizations/address_taken.ml | 10 +- lib/optimizations/address_taken.mli | 3 +- lib/optimizations/dead_store_elim.ml | 49 +-- lib/settings.ml | 20 + lib/settings.mli | 11 + 22 files changed, 1016 insertions(+), 138 deletions(-) create mode 100644 lib/backend/regalloc.ml create mode 100644 lib/backend/regalloc.mli create mode 100644 lib/backward_dataflow.ml create mode 100644 lib/backward_dataflow.mli diff --git a/bin/main.ml b/bin/main.ml index bd679dd..39c3d31 100644 --- a/bin/main.ml +++ b/bin/main.ml @@ -184,6 +184,14 @@ let debug = in Arg.(value & flag & info [ "dump-unreachable-elim" ] ~doc) in + let dump_gp_regalloc_opt = + let doc = "Verbosity level when allocating general-purpose registers" in + Arg.(value & opt ~vopt:1 int 0 & info [ "dump-gp-regalloc" ] ~doc) + in + let dump_xmm_regalloc_opt = + let doc = "Verbosity level when allocating XMM registers" in + Arg.(value & opt ~vopt:1 int 0 & info [ "dump-xmm-regalloc" ] ~doc) + in let dump_fun_opt = let doc = "Function to dump extra optimization details for (if not specified, dump \ @@ -194,7 +202,16 @@ let debug = in let set_debug_options dump_asm dump_tacky constant_folding dead_store_elimination copy_propagation unreachable_code_elimination - dump_fun = + dump_gp_regalloc_lvl dump_xmm_regalloc_lvl dump_fun = + let mk_regalloc_opts dump_lvl = + Settings. + { + spill_info = dump_lvl >= 1; + interference_ncol = dump_lvl >= 2; + interference_graphviz = dump_lvl >= 3; + liveness = dump_lvl >= 4; + } + in Settings. { dump_asm; @@ -206,6 +223,8 @@ let debug = unreachable_code_elimination; copy_propagation; }; + dump_gp_regalloc = mk_regalloc_opts dump_gp_regalloc_lvl; + dump_xmm_regalloc = mk_regalloc_opts dump_xmm_regalloc_lvl; dump_fun; } in @@ -217,6 +236,8 @@ let debug = $ dump_dse_opt $ dump_copy_prop_opt $ dump_unreachable_elim_opt + $ dump_gp_regalloc_opt + $ dump_xmm_regalloc_opt $ dump_fun_opt) let src_file = diff --git a/lib/assembly.ml b/lib/assembly.ml index 1424f72..3b68b62 100644 --- a/lib/assembly.ml +++ b/lib/assembly.ml @@ -1,5 +1,8 @@ +[@@@coverage exclude_file] + type reg = | AX + | BX | CX | DX | DI @@ -8,6 +11,10 @@ type reg = | R9 | R10 | R11 + | R12 + | R13 + | R14 + | R15 | SP | BP | XMM0 @@ -18,8 +25,15 @@ type reg = | XMM5 | XMM6 | XMM7 + | XMM8 + | XMM9 + | XMM10 + | XMM11 + | XMM12 + | XMM13 | XMM14 | XMM15 +[@@deriving show { with_path = false }] type operand = | Imm of int64 @@ -29,8 +43,10 @@ type operand = | Data of string * int | PseudoMem of string * int | Indexed of { base : reg; index : reg; scale : int } +[@@deriving show { with_path = false }] type unary_operator = Neg | Not | ShrOneOp +[@@deriving show { with_path = false }] type binary_operator = | Add @@ -44,8 +60,10 @@ type binary_operator = | Sar | Shr | Shl +[@@deriving show { with_path = false }] type cond_code = E | NE | G | GE | L | LE | A | AE | B | BE | P | NP +[@@deriving show { with_path = false }] type asm_type = | Byte @@ -53,6 +71,7 @@ type asm_type = | Quadword | Double | ByteArray of { size : int; alignment : int } +[@@deriving show { with_path = false }] type instruction = | Mov of asm_type * operand * operand @@ -87,8 +106,10 @@ type instruction = | SetCC of cond_code * operand | Label of string | Push of operand + | Pop of reg | Call of string | Ret +[@@deriving show { with_path = false }] type top_level = | Function of { diff --git a/lib/backend/assembly_symbols.ml b/lib/backend/assembly_symbols.ml index d7b71cb..7f74760 100644 --- a/lib/backend/assembly_symbols.ml +++ b/lib/backend/assembly_symbols.ml @@ -1,14 +1,29 @@ open Batteries type entry = - | Fun of { defined : bool; bytes_required : int; return_on_stack : bool } + | Fun of { + defined : bool; + bytes_required : int; + return_on_stack : bool; + param_regs : Assembly.reg list; + return_regs : Assembly.reg list; + callee_saved_regs_used : Assembly.reg Set.t; + } | Obj of { t : Assembly.asm_type; is_static : bool; constant : bool } let (symbol_table : (string, entry) Hashtbl.t) = Hashtbl.create 20 -let add_fun fun_name defined return_on_stack = +let add_fun fun_name defined return_on_stack param_regs return_regs = Hashtbl.replace symbol_table fun_name - (Fun { defined; bytes_required = 0; return_on_stack }) + (Fun + { + defined; + bytes_required = 0; + callee_saved_regs_used = Set.empty; + return_on_stack; + param_regs; + return_regs; + }) let add_var var_name t is_static = Hashtbl.replace symbol_table var_name (Obj { t; is_static; constant = false }) @@ -29,6 +44,24 @@ let get_bytes_required fun_name = | Fun f -> f.bytes_required | Obj _ -> failwith "Internal error: not a function" [@coverage off] +let add_callee_saved_regs_used fun_name regs = + let set_regs = function + | Fun f -> + Fun + { + f with + callee_saved_regs_used = Set.union f.callee_saved_regs_used regs; + } + | Obj _ -> failwith "Internal error: not a function" [@coverage off] + in + + Hashtbl.modify fun_name set_regs symbol_table + +let get_callee_saved_regs_used fun_name = + match Hashtbl.find symbol_table fun_name with + | Fun f -> f.callee_saved_regs_used + | Obj _ -> failwith "Internal error: not a function" [@coverage off] + let get_size var_name = match Hashtbl.find symbol_table var_name with | Obj { t = Byte; _ } -> 1 @@ -82,3 +115,13 @@ let returns_on_stack fun_name = | Obj _ -> failwith "Internal error: this is an object, not a function" [@coverage off] + +let param_regs_used fun_name = + match Hashtbl.find symbol_table fun_name with + | Fun f -> f.param_regs + | Obj _ -> failwith "Internal error: not a function" [@coverage off] + +let return_regs_used fun_name = + match Hashtbl.find symbol_table fun_name with + | Fun f -> f.return_regs + | Obj _ -> failwith "Internal error: not a function" [@coverage off] diff --git a/lib/backend/assembly_symbols.mli b/lib/backend/assembly_symbols.mli index 863c945..3f72ba3 100644 --- a/lib/backend/assembly_symbols.mli +++ b/lib/backend/assembly_symbols.mli @@ -1,8 +1,12 @@ -val add_fun : string -> bool -> bool -> unit +val add_fun : + string -> bool -> bool -> Assembly.reg list -> Assembly.reg list -> unit + val add_var : string -> Assembly.asm_type -> bool -> unit val add_constant : string -> Assembly.asm_type -> unit val set_bytes_required : string -> int -> unit val get_bytes_required : string -> int +val add_callee_saved_regs_used : string -> Assembly.reg Batteries.Set.t -> unit +val get_callee_saved_regs_used : string -> Assembly.reg Batteries.Set.t val get_type : string -> Assembly.asm_type val get_size : string -> int val get_alignment : string -> int @@ -10,3 +14,5 @@ val is_defined : string -> bool val is_constant : string -> bool val is_static : string -> bool val returns_on_stack : string -> bool +val param_regs_used : string -> Assembly.reg list +val return_regs_used : string -> Assembly.reg list diff --git a/lib/backend/codegen.ml b/lib/backend/codegen.ml index 74f8fa3..f9583e1 100644 --- a/lib/backend/codegen.ml +++ b/lib/backend/codegen.ml @@ -259,39 +259,31 @@ let classify_type tag = Hashtbl.add classified_types tag classes; classes -let classify_tacky_val v = - match Tacky.type_of_val v with - | Structure tag -> classify_type tag - | Union tag -> classify_type tag +let get_tag t = + match t with + | Types.Structure tag -> tag + | Union tag -> tag | _ -> - failwith "Internal error: trying to classify non-structure or union type" + failwith + "Internal error: trying to get tag for non-structure or union type" [@coverage off] -let classify_parameters tacky_vals return_on_stack = +let classify_params_helper typed_asm_vals return_on_stack = let int_regs_available = if return_on_stack then 5 else 6 in - let process_one_param (int_reg_args, dbl_reg_args, stack_args) v = - let operand = convert_val v in - let t = asm_type v in + let process_one_param (int_reg_args, dbl_reg_args, stack_args) + (tacky_t, operand) = + let t = convert_type tacky_t in let typed_operand = (t, operand) in - match t with - | Double -> - if List.length dbl_reg_args < 8 then - (int_reg_args, operand :: dbl_reg_args, stack_args) - else (int_reg_args, dbl_reg_args, typed_operand :: stack_args) - | Byte | Longword | Quadword -> - if List.length int_reg_args < int_regs_available then - (typed_operand :: int_reg_args, dbl_reg_args, stack_args) - else (int_reg_args, dbl_reg_args, typed_operand :: stack_args) - | ByteArray _ -> + match tacky_t with + | Structure _ | Union _ -> (* it's a structure or union *) let var_name = - match v with - | Tacky.Var n -> n - | Constant _ -> - failwith "Internal error: constant byte array" [@coverage off] + match operand with + | Assembly.PseudoMem (n, 0) -> n + | _ -> failwith "Bad structure operand" [@coverage off] in - let var_size = Type_utils.get_size (Tacky.type_of_val v) in - let classes = classify_tacky_val v in + let var_size = Type_utils.get_size tacky_t in + let classes = classify_type (get_tag tacky_t) in let updated_int, updated_dbl, use_stack = if List.hd classes = Mem then (* all eightbytes go on the stack*) @@ -338,56 +330,92 @@ let classify_parameters tacky_vals return_on_stack = else stack_args in (updated_int, updated_dbl, updated_stack_args) + | Double -> + if List.length dbl_reg_args < 8 then + (int_reg_args, operand :: dbl_reg_args, stack_args) + else (int_reg_args, dbl_reg_args, typed_operand :: stack_args) + | _ -> + if List.length int_reg_args < int_regs_available then + (typed_operand :: int_reg_args, dbl_reg_args, stack_args) + else (int_reg_args, dbl_reg_args, typed_operand :: stack_args) in let reversed_int, reversed_dbl, reversed_stack = - List.fold_left process_one_param ([], [], []) tacky_vals + List.fold_left process_one_param ([], [], []) typed_asm_vals in (List.rev reversed_int, List.rev reversed_dbl, List.rev reversed_stack) -let classify_return_value retval = +let classify_parameters params return_on_stack = + let f v = (Tacky.type_of_val v, convert_val v) in + classify_params_helper (List.map f params) return_on_stack + +let classify_param_types type_list return_on_stack = + let f t = + if Type_utils.is_scalar t then (t, Assembly.Pseudo "dummy") + else (t, Assembly.PseudoMem ("dummy", 0)) + in + let ints, dbls, _ = + classify_params_helper (List.map f type_list) return_on_stack + in + let int_regs = List.take (List.length ints) int_param_passing_regs in + let dbl_regs = List.take (List.length dbls) dbl_param_passing_regs in + int_regs @ dbl_regs + +let classify_return_helper ret_type asm_retval = let open Assembly in - let retval_type = Tacky.type_of_val retval in - let classify_return_val_helper tag = - let classes = classify_type tag in - let var_name = - match retval with - | Tacky.Var n -> n - | Constant _ -> - failwith "Internal error: constant with structure type" - [@coverage off] - in - if List.hd classes = Mem then ([], [], true) - else - (* return in registers, can move everything w/ quadword operands *) - let process_quadword (ints, dbls) i cls = - let operand = PseudoMem (var_name, i * 8) in - match cls with - | SSE -> (ints, dbls @ [ operand ]) - | Integer -> - let eightbyte_type = - get_eightbyte_type ~eightbyte_idx:i - ~total_var_size:(Type_utils.get_size retval_type) - in - (ints @ [ (eightbyte_type, operand) ], dbls) - | Mem -> + match ret_type with + | Types.Structure _ | Union _ -> + let classes = classify_type (get_tag ret_type) in + let var_name = + match asm_retval with + | PseudoMem (n, 0) -> n + | _ -> failwith - "Internal error: found eightbyte in Mem class, but first \ - eighbyte wasn't Mem" [@coverage off] + "Internal error: invalid assembly operand for structure type" + [@coverage off] in - let i, d = List.fold_lefti process_quadword ([], []) classes in - (i, d, false) - in - match retval_type with - | Types.Structure tag -> classify_return_val_helper tag - | Types.Union tag -> classify_return_val_helper tag - | Double -> - let asm_val = convert_val retval in - ([], [ asm_val ], false) - | _ -> - let typed_operand = (asm_type retval, convert_val retval) in + if List.hd classes = Mem then ([], [], true) + else + (* return in registers, can move everything w/ quadword operands *) + let process_quadword (ints, dbls) i cls = + let operand = PseudoMem (var_name, i * 8) in + match cls with + | SSE -> (ints, dbls @ [ operand ]) + | Integer -> + let eightbyte_type = + get_eightbyte_type ~eightbyte_idx:i + ~total_var_size:(Type_utils.get_size ret_type) + in + (ints @ [ (eightbyte_type, operand) ], dbls) + | Mem -> + failwith + "Internal error: found eightbyte in Mem class, but first \ + eighbyte wasn't Mem" [@coverage off] + in + let i, d = List.fold_lefti process_quadword ([], []) classes in + (i, d, false) + | Double -> ([], [ asm_retval ], false) + | t -> + let typed_operand = (convert_type t, asm_retval) in ([ typed_operand ], [], false) +let classify_return_value retval = + classify_return_helper (Tacky.type_of_val retval) (convert_val retval) + +let classify_return_type = function + | Types.Void -> ([], false) + | t -> + let asm_val = + if Type_utils.is_scalar t then Assembly.Pseudo "dummy" + else Assembly.PseudoMem ("dummy", 0) + in + let ints, dbls, return_on_stack = classify_return_helper t asm_val in + if return_on_stack then ([ Assembly.AX ], true) + else + let int_regs = List.take (List.length ints) Assembly.[ AX; DX ] in + let dbl_regs = List.take (List.length dbls) Assembly.[ XMM0; XMM1 ] in + (int_regs @ dbl_regs, false) + let convert_function_call f args dst = let int_retvals, dbl_retvals, return_on_stack = Option.map_default classify_return_value ([], [], false) dst @@ -953,15 +981,23 @@ let convert_constant (key, (name, alignment)) = (* convert each symbol table entry to assembly symbol table equivalent*) let convert_symbol name = function | Symbols. - { t = Types.FunType { ret_type; _ }; attrs = FunAttr { defined; _ } } -> - (* If this function has incomplete return type (implying we don't define or call it in this translation unit) - * use a dummy value for fun_returns_on_stack *) - let fun_returns_on_stack = - if Type_utils.is_complete ret_type || ret_type = Void then - returns_on_stack name - else false - in - Assembly_symbols.add_fun name defined fun_returns_on_stack + { + t = Types.FunType { param_types; ret_type }; + attrs = FunAttr { defined; _ }; + } + when (Type_utils.is_complete ret_type || ret_type = Void) + && List.for_all Type_utils.is_complete param_types -> + let ret_regs, return_on_stack = classify_return_type ret_type in + + let param_regs = classify_param_types param_types return_on_stack in + Assembly_symbols.add_fun name defined (returns_on_stack name) param_regs + ret_regs + | Symbols.{ t = Types.FunType _; attrs = FunAttr { defined; _ } } -> + (* If this function has incomplete return type besides void, or any incomplete + * param type (implying we don't define or call it in this translation unit) + * use dummy values *) + assert (not defined); + Assembly_symbols.add_fun name defined false [] [] | { t; attrs = ConstAttr _ } -> Assembly_symbols.add_constant name (convert_type t) (* use dummy type for static variables of incomplete type *) diff --git a/lib/backend/instruction_fixup.ml b/lib/backend/instruction_fixup.ml index 8bc4429..7176560 100644 --- a/lib/backend/instruction_fixup.ml +++ b/lib/backend/instruction_fixup.ml @@ -21,11 +21,12 @@ let is_memory = function | _ -> false let is_xmm = function - | XMM0 | XMM1 | XMM2 | XMM3 | XMM4 | XMM5 | XMM6 | XMM7 | XMM14 | XMM15 -> + | XMM0 | XMM1 | XMM2 | XMM3 | XMM4 | XMM5 | XMM6 | XMM7 | XMM8 | XMM9 | XMM10 + | XMM11 | XMM12 | XMM13 | XMM14 | XMM15 -> true | _ -> false -let fixup_instruction = function +let fixup_instruction callee_saved_regs = function (* Mov can't move a value from one memory address to another *) | Mov (t, ((Memory _ | Data _) as src), ((Memory _ | Data _) as dst)) -> let scratch = if t = Double then Reg XMM14 else Reg R10 in @@ -200,22 +201,45 @@ let fixup_instruction = function else if is_memory dst then [ Cvtsi2sd (t, src, Reg XMM15); Mov (Double, Reg XMM15, dst) ] else [ i ] + | Ret -> + let restore_reg r = Pop r in + let restore_regs = List.rev_map restore_reg callee_saved_regs in + restore_regs @ [ Ret ] | other -> [ other ] +let emit_stack_adjustment bytes_for_locals callee_saved_count = + let callee_saved_bytes = 8 * callee_saved_count in + let total_stack_bytes = callee_saved_bytes + bytes_for_locals in + let adjusted_stack_bytes = + Rounding.round_away_from_zero 16 total_stack_bytes + in + let stack_adjustment = + Int64.of_int (adjusted_stack_bytes - callee_saved_bytes) + in + Binary { op = Sub; t = Quadword; src = Imm stack_adjustment; dst = Reg SP } + let fixup_tl = function | Function { name; global; instructions } -> - let stack_bytes = - Rounding.round_away_from_zero 16 - (-Assembly_symbols.get_bytes_required name) + (* TODO bytes_required should be positive (fix this in replace_pseudos) *) + let stack_bytes = -Assembly_symbols.get_bytes_required name in + let callee_saved_regs = + Assembly_symbols.get_callee_saved_regs_used name |> Set.to_list + in + + let save_reg r = Push (Reg r) in + let adjust_rsp = + emit_stack_adjustment stack_bytes (List.length callee_saved_regs) + in + let setup_instructions = + adjust_rsp :: List.map save_reg callee_saved_regs in - let stack_byte_op = Imm (Int64.of_int stack_bytes) in Function { name; global; instructions = - Binary { op = Sub; t = Quadword; src = stack_byte_op; dst = Reg SP } - :: List.concat_map fixup_instruction instructions; + setup_instructions + @ List.concat_map (fixup_instruction callee_saved_regs) instructions; } | static_var -> static_var diff --git a/lib/backend/regalloc.ml b/lib/backend/regalloc.ml new file mode 100644 index 0000000..20e50d5 --- /dev/null +++ b/lib/backend/regalloc.ml @@ -0,0 +1,569 @@ +open Batteries +open Assembly + +module AsmCfg = struct + include Cfg.AsmCfg +end + +(* extract all operands from an instruction. + * NOTE: don't need to include implicit operands (e.g. ax/dx for cdq) + * because we only use this to find pseudos + *) +let get_operands = function + | Mov (_, src, dst) -> [ src; dst ] + | Movsx i -> [ i.src; i.dst ] + | MovZeroExtend zx -> [ zx.src; zx.dst ] + | Lea (src, dst) -> [ src; dst ] + | Cvttsd2si (_, src, dst) -> [ src; dst ] + | Cvtsi2sd (_, src, dst) -> [ src; dst ] + | Unary (_, _, op) -> [ op ] + | Binary b -> [ b.src; b.dst ] + | Cmp (_, v1, v2) -> [ v1; v2 ] + | Idiv (_, op) -> [ op ] + | Div (_, op) -> [ op ] + | SetCC (_, op) -> [ op ] + | Push op -> [ op ] + | Label _ | Call _ | Ret | Cdq _ | JmpCC _ | Jmp _ -> [] + | Pop _ -> failwith "Internal error" [@coverage off] + +let replace_ops f i = + match i with + | Mov (t, src, dst) -> Mov (t, f src, f dst) + | Movsx sx -> Movsx { sx with dst = f sx.dst; src = f sx.src } + | MovZeroExtend zx -> MovZeroExtend { zx with dst = f zx.dst; src = f zx.src } + | Lea (src, dst) -> Lea (f src, f dst) + | Cvttsd2si (t, src, dst) -> Cvttsd2si (t, f src, f dst) + | Cvtsi2sd (t, src, dst) -> Cvtsi2sd (t, f src, f dst) + | Unary (operator, t, operand) -> Unary (operator, t, f operand) + | Binary b -> Binary { b with dst = f b.dst; src = f b.src } + | Cmp (code, v1, v2) -> Cmp (code, f v1, f v2) + | Idiv (t, v) -> Idiv (t, f v) + | Div (t, v) -> Div (t, f v) + | SetCC (code, dst) -> SetCC (code, f dst) + | Push v -> Push (f v) + | Label _ | Call _ | Ret | Cdq _ | Jmp _ | JmpCC _ -> i + | Pop _ -> failwith "We shouldn't use this yet" [@coverage off] + +let cleanup_movs instructions = + let is_redundant_mov = function + | Mov (_, src, dst) when src = dst -> true + | _ -> false + in + List.filter (not % is_redundant_mov) instructions + +module type REG_TYPE = sig + val suffix : string + val all_hardregs : Assembly.reg list + val caller_saved_regs : Assembly.reg list + val pseudo_is_current_type : string -> bool + val debug_settings : unit -> Settings.regalloc_debug_options +end + +module Allocator (R : REG_TYPE) = struct + (* convenience function : convert set of regs to set of operands *) + let regs_to_operands = Set.map (fun r -> Reg r) + + (* values derived from R *) + let all_hardregs = R.all_hardregs |> Set.of_list |> regs_to_operands + let caller_saved_regs = R.caller_saved_regs |> Set.of_list |> regs_to_operands + + (* Interference graph definition and helpers for manipulating graph *) + type node_id = Assembly.operand + + type node = { + id : node_id; + mutable neighbors : node_id Set.t; + spill_cost : Float.t; + color : int option; + pruned : bool; + } + + type graph = (node_id, node) Map.t + + let show_node_id nd = + let s = + match nd with + | Reg r -> show_reg r + | Pseudo p -> p + | _ -> + failwith "Internal error: malformed interference graph" + [@coverage off] + in + String.replace_chars (function '.' -> "_" | c -> String.of_char c) s + + let k = Set.cardinal all_hardregs + + let add_edge g nd_id1 nd_id2 = + let nd1 = Map.find nd_id1 g in + let nd2 = Map.find nd_id2 g in + nd1.neighbors <- Set.add nd_id2 nd1.neighbors; + nd2.neighbors <- Set.add nd_id1 nd2.neighbors + + (* utility function *) + + let regs_used_and_written i = + let ops_used, ops_written = + match i with + | Mov (_, src, dst) -> ([ src ], [ dst ]) + | MovZeroExtend zx -> ([ zx.src ], [ zx.dst ]) + | Movsx sx -> ([ sx.src ], [ sx.dst ]) + | Cvtsi2sd (_, src, dst) -> ([ src ], [ dst ]) + | Cvttsd2si (_, src, dst) -> ([ src ], [ dst ]) + (* dst of binary or unary instruction is both read and written *) + | Binary b -> ([ b.src; b.dst ], [ b.dst ]) + | Unary (_, _, op) -> ([ op ], [ op ]) + | Cmp (_, v1, v2) -> ([ v1; v2 ], []) + | SetCC (_, op) -> ([], [ op ]) + | Push v -> ([ v ], []) + | Idiv (_, op) -> ([ op; Reg AX; Reg DX ], [ Reg AX; Reg DX ]) + | Div (_, op) -> ([ op; Reg AX; Reg DX ], [ Reg AX; Reg DX ]) + | Cdq _ -> ([ Reg AX ], [ Reg DX ]) + | Call f -> + (* function call updates caller-saved regs, uses param-passing registers *) + let used = + Assembly_symbols.param_regs_used f + |> List.filter (fun r -> List.mem r R.all_hardregs) + |> List.map (fun r -> Reg r) + in + (used, Set.to_list caller_saved_regs) + (* if src is a pseudo, lea won't actually generate it, + * but we've excluded it from the graph anyway + * if it's a memory address or indexed operand, we _do_ want to generate + * hardregs used in address calculations + *) + | Lea (src, dst) -> ([ src ], [ dst ]) + | Jmp _ | JmpCC _ | Label _ | Ret -> ([], []) + | Pop _ -> failwith "Internal error" [@coverage off] + in + (* convert list of operands read into list of hard/pseudoregs read *) + let regs_used_to_read opr = + match opr with + | Pseudo _ | Reg _ -> [ opr ] + | Memory (r, _) -> [ Reg r ] + | Indexed x -> [ Reg x.base; Reg x.index ] + | Imm _ | Data _ | PseudoMem _ -> [] + in + let regs_read1 = List.concat_map regs_used_to_read ops_used in + (* now convert list of operands written into lists of hard/pseudoregs + * read _or_ written, accounting for the fact that writing to a memory address + * may require reading a pointer *) + let regs_used_to_update opr = + match opr with + | Pseudo _ | Reg _ -> ([], [ opr ]) + | Memory (r, _) -> ([ Reg r ], []) + | Indexed x -> ([ Reg x.base; Reg x.index ], []) + | Imm _ | Data _ | PseudoMem _ -> ([], []) + in + let regs_read2, regs_written = + List.map regs_used_to_update ops_written + |> List.split + |> Tuple2.mapn List.concat + in + (Set.of_list (regs_read1 @ regs_read2), Set.of_list regs_written) + + (* Functions to dump the interference graph *) + module DumpGraph = struct + (* common logic for printing in ncol and graphviz format *) + let dump_helper ?start_graph ?end_graph file_ext edge_printer post_processor + ctx g = + let filename = + Debug.mk_filename (R.suffix ^ ".interference") ctx file_ext + in + let path = Filename.concat (Sys.getcwd ()) filename in + let chan = open_out path in + let print_edges nd { neighbors; pruned; _ } = + if pruned then () + else + let print_edge nghbor_id = + edge_printer chan (show_node_id nd) (show_node_id nghbor_id) + in + let _, later_neighbors = Set.split_le nd neighbors in + (* todo avoid copypasta of not_pruned helper*) + let not_pruned nd_id = not (Map.find nd_id g).pruned in + let unpruned_later_neighbors = + Set.filter not_pruned later_neighbors + in + Set.iter print_edge unpruned_later_neighbors + in + Option.may (Printf.fprintf chan "%s") start_graph; + Map.iter print_edges g; + Option.may (Printf.fprintf chan "%s") end_graph; + close_out chan; + post_processor filename + + let dump_graphviz ctx g = + if (R.debug_settings ()).interference_graphviz && Debug.is_dump_target ctx + then + let start_graph = "graph {\n" in + let end_graph = "\t}\n" in + let edge_printer chan = Printf.fprintf chan "\t%s -- %s\n" in + let post_processor filename = + (* convert DOT file to png*) + let cmd = + Printf.sprintf "circo -Tpng %s -o %s" filename + (Filename.chop_extension filename ^ ".png") + in + if Sys.command cmd <> 0 then failwith ("graphviz fail: " ^ cmd) + else if Sys.command ("rm " ^ filename) <> 0 then + failwith "failed to remove DOT file" + in + dump_helper ~start_graph ~end_graph ".dot" edge_printer post_processor + ctx g + + let dump_ncol ctx g = + if (R.debug_settings ()).interference_ncol && Debug.is_dump_target ctx + then + let edge_printer chan = Printf.fprintf chan "%s %s\n" in + let post_processor _ = () in + dump_helper ".ncol" edge_printer post_processor ctx g + end + + module LivenessAnalysis = struct + open AsmCfg + module Iterative = Backward_dataflow.Dataflow (AsmCfg) + + let debug_print_cfg extra_tag cfg = + if (R.debug_settings ()).liveness && Debug.is_dump_target AsmCfg.(cfg.ctx) + then + let livevar_printer fmt live_vars = + Format.pp_open_box fmt 0; + Format.pp_print_list + ~pp_sep:(fun fmt () -> Format.pp_print_string fmt ", ") + Assembly.pp_operand fmt (Set.to_list live_vars); + Format.pp_close_box fmt () + in + let tag = "regalloc_liveness." ^ extra_tag in + AsmCfg.(print_graphviz tag livevar_printer cfg) + + let meet cfg block = + let live_at_exit = + let all_return_regs = + Assembly_symbols.return_regs_used AsmCfg.(cfg.ctx.fun_name) + |> Set.of_list + |> regs_to_operands + in + let return_regs = Set.intersect all_hardregs all_return_regs in + return_regs + in + + let update_live live = function + | Entry -> + failwith "Internal error: malformed interference graph" + [@coverage off] + | Exit -> Set.union live live_at_exit + | Block n -> Set.union live (get_block_value n cfg) + in + List.fold update_live Set.empty block.succs + + let transfer block end_live_regs = + let process_instr current_live_regs (_, i) = + let annotated_instr = (current_live_regs, i) in + let new_live_regs = + let regs_used, regs_written = regs_used_and_written i in + let without_killed = Set.diff current_live_regs regs_written in + Set.union without_killed regs_used + in + (new_live_regs, annotated_instr) + in + let incoming_live_regs, annotated_reversed_instrutions = + block.instructions + |> List.rev + |> List.fold_left_map process_instr end_live_regs + in + { + block with + instructions = List.rev annotated_reversed_instrutions; + value = incoming_live_regs; + } + + let analyze = + Iterative.analyze (debug_print_cfg "in_progress") meet transfer + end + + let mk_base_graph () = + let add_node g r = + Map.add r + { + id = r; + neighbors = Set.remove r all_hardregs; + spill_cost = Float.infinity; + color = None; + pruned = false; + } + g + in + List.fold add_node Map.empty (Set.to_list all_hardregs) + + let get_pseudo_nodes aliased_pseudos instructions = + (* convert list of operands to list of pseudoregisters - note that we don't need to include hardregs because they're already in the base graph + * and some operands e.g. constants, pseudos with static storage duration, and regs of the wrong type) should be excluded *) + let operands_to_pseudos = function + | Assembly.Pseudo r -> + if + R.pseudo_is_current_type r + && not (Assembly_symbols.is_static r || Set.mem r aliased_pseudos) + then Some r + else None + | _ -> None + in + let get_pseudos i = get_operands i |> List.filter_map operands_to_pseudos in + let initialize_node pseudo = + { + id = Pseudo pseudo; + neighbors = Set.empty; + spill_cost = 0.0; + color = None; + pruned = false; + } + in + List.concat_map get_pseudos instructions + |> List.unique + |> List.map initialize_node + + let add_pseudo_nodes aliased_pseudos graph instructions = + let nds = get_pseudo_nodes aliased_pseudos instructions in + let add_node g nd = Map.add nd.id nd g in + List.fold add_node graph nds + + let add_edges liveness_cfg interference_graph = + let handle_instr (live_after_instr, i) = + let _, updated_regs = regs_used_and_written i in + + let handle_livereg l = + match i with + | Mov (_, src, _) when src = l -> () + | _ -> + let handle_update u = + if + u <> l + && Map.mem l interference_graph + && Map.mem u interference_graph + then add_edge interference_graph l u + else () + in + Set.iter handle_update updated_regs + in + Set.iter handle_livereg live_after_instr + in + + let all_instructions = + let open AsmCfg in + List.concat_map + (fun (_, blk) -> blk.instructions) + liveness_cfg.basic_blocks + in + List.iter handle_instr all_instructions + + let build_interference_graph ctx aliased_pseudos instructions = + let base_graph = mk_base_graph () in + let graph = add_pseudo_nodes aliased_pseudos base_graph instructions in + let cfg = AsmCfg.instructions_to_cfg ctx instructions in + let liveness_cfg = LivenessAnalysis.analyze cfg in + LivenessAnalysis.debug_print_cfg "annotated" liveness_cfg; + add_edges liveness_cfg graph; + graph + + let add_spill_costs graph instructions = + (* given map from pseudo names to counts, incremene map entry for pseudo, or set to 1 if not already present *) + let incr_count (counts : (string, int) Map.t) pseudo = + Map.modify_def 0 pseudo (( + ) 1) counts + in + (* get list of all operands in the function, filter out all but pseudoregs *) + let operands = List.concat_map get_operands instructions in + let get_pseudo = function Assembly.Pseudo r -> Some r | _ -> None in + let pseudos = List.filter_map get_pseudo operands in + (* create map from pseudoregs to counts - note that this may include pseudos that aren't in the interference graph, we'll ignore them *) + let count_map = List.fold incr_count Map.empty pseudos in + (* set each node's spill cost to the count from count_map *) + let set_spill_cost nd = + match nd.id with + | Pseudo r -> { nd with spill_cost = Float.of_int (Map.find r count_map) } + | _ -> nd + in + Map.map set_spill_cost graph + + let rec color_graph ctx graph = + let remaining = + graph + |> Map.bindings + |> List.split + |> snd + |> List.filter (fun nd -> not nd.pruned) + in + match remaining with + | [] -> + (* we've pruned the whole graph, so we're done *) + graph + | _ -> ( + let not_pruned nd_id = not (Map.find nd_id graph).pruned in + (* find next node to prune *) + let degree nd = + let unpruned_neighbors = Set.filter not_pruned nd.neighbors in + Set.cardinal unpruned_neighbors + in + let is_low_degree nd = degree nd < k in + let next_node = + try List.find is_low_degree remaining + with Not_found -> + (* Need to choose a spill candidat! *) + + (* choose next node by spill metric *) + let spill_metric nd = nd.spill_cost /. Float.of_int (degree nd) in + let cmp nd1 nd2 = + Float.compare (spill_metric nd1) (spill_metric nd2) + in + let spilled = List.min ~cmp remaining in + + (* debug printing *) + let spill_info_printer fmt = + Printf.ksprintf + (fun msg -> + if + (R.debug_settings ()).spill_info && Debug.is_dump_target ctx + then Printf.printf "%s" msg) + fmt + in + spill_info_printer "================================\n"; + List.iter + (fun nd -> + spill_info_printer + "Node %s has degree %d, spill cost %f, and spill metric %f\n" + (show_node_id nd.id) (degree nd) nd.spill_cost + (spill_metric nd)) + (List.sort cmp remaining); + + spill_info_printer "Spill candidate: %s\n" (show_node_id spilled.id); + (* return the spill candidate *) + spilled + in + let pruned_graph = + Map.modify next_node.id (fun nd -> { nd with pruned = true }) graph + in + let partly_colored = color_graph ctx pruned_graph in + let all_colors = List.range 0 `To (k - 1) in + let remove_neighbor_color neighbor_id remaining_colors = + let neighbor_nd = Map.find neighbor_id partly_colored in + match neighbor_nd.color with + | Some c -> List.remove remaining_colors c + | None -> remaining_colors + in + let available_colors = + Set.fold remove_neighbor_color next_node.neighbors all_colors + in + match available_colors with + (* no available colors, leave this node uncolored *) + | [] -> partly_colored + (* we found an available color! *) + | _ :: _ -> + (* If this is a callee-saved reg, give it the highest-numbered color; otherwise give it the lowest (implementation tip)*) + let c = + match next_node.id with + | Reg r when not (List.mem r R.caller_saved_regs) -> + List.max available_colors + | _ -> List.min available_colors + in + Map.modify next_node.id + (fun nd -> { nd with pruned = false; color = Some c }) + partly_colored) + + let make_register_map ctx graph = + (* first build map from colors to hardregs *) + let add_color nd_id { color; _ } color_map = + match nd_id with + | Reg r -> Map.add (Option.get color) r color_map + | _ -> color_map + in + let colors_to_regs = Map.foldi add_color graph Map.empty in + + (* then build map from pseudoregisters to hard registers *) + let add_mapping nd (used_callee_saved, reg_map) = + match nd with + | { id = Pseudo p; color = Some c; _ } -> + let hardreg = Map.find c colors_to_regs in + let used_callee_saved = + if List.mem hardreg R.caller_saved_regs then used_callee_saved + else Set.add hardreg used_callee_saved + in + (used_callee_saved, Map.add p hardreg reg_map) + | _ -> (used_callee_saved, reg_map) + in + let callee_saved_regs_used, reg_map = + Map.fold add_mapping graph (Set.empty, Map.empty) + in + + let fn_name = Context.(ctx.fun_name) in + Assembly_symbols.add_callee_saved_regs_used fn_name callee_saved_regs_used; + reg_map + + let replace_pseudoregs instructions reg_map = + let f = function + (* replace pseudoregister w/ corresponding hardreg. If operand isn't a pseudo or isn't colored, don't replace it*) + | Assembly.Pseudo p as op -> ( + try Reg (Map.find p reg_map) with Not_found -> op) + | op -> op + in + cleanup_movs (List.map (replace_ops f) instructions) + + let allocate ctx aliased_pseudos instructions = + let graph : graph = + build_interference_graph ctx aliased_pseudos instructions + in + DumpGraph.dump_graphviz ctx graph; + DumpGraph.dump_ncol ctx graph; + let graph_with_spill_costs = add_spill_costs graph instructions in + let colored_graph = color_graph ctx graph_with_spill_costs in + + let register_map = make_register_map ctx colored_graph in + replace_pseudoregs instructions register_map +end + +module GP = Allocator (struct + let suffix = "gp" + let all_hardregs = [ AX; BX; CX; DX; DI; SI; R8; R9; R12; R13; R14; R15 ] + let caller_saved_regs = [ AX; CX; DX; DI; SI; R8; R9 ] + let pseudo_is_current_type p = Assembly_symbols.get_type p <> Double + let debug_settings () = !Settings.debug.dump_gp_regalloc +end) + +module XMM = Allocator (struct + let suffix = "xmm" + + let all_hardregs = + [ + XMM0; + XMM1; + XMM2; + XMM3; + XMM4; + XMM5; + XMM6; + XMM7; + XMM8; + XMM9; + XMM10; + XMM11; + XMM12; + XMM13; + ] + + let caller_saved_regs = all_hardregs + let pseudo_is_current_type p = Assembly_symbols.get_type p = Double + let debug_settings () = !Settings.debug.dump_xmm_regalloc +end) + +let allocate_registers src_file aliased_pseudos (Program tls) = + let allocate_regs_for_fun fn_name instructions = + instructions + |> GP.allocate fn_name aliased_pseudos + |> XMM.allocate fn_name aliased_pseudos + in + let alloc_in_tl = function + | Function f -> + let ctx = + Context.{ filename = src_file; fun_name = f.name; params = [] } + in + Function + { f with instructions = allocate_regs_for_fun ctx f.instructions } + | tl -> tl + in + Program (List.map alloc_in_tl tls) diff --git a/lib/backend/regalloc.mli b/lib/backend/regalloc.mli new file mode 100644 index 0000000..b072512 --- /dev/null +++ b/lib/backend/regalloc.mli @@ -0,0 +1,2 @@ +val allocate_registers : + string -> string Batteries.Set.t -> Assembly.t -> Assembly.t diff --git a/lib/backend/replace_pseudos.ml b/lib/backend/replace_pseudos.ml index 151998a..bde49f5 100644 --- a/lib/backend/replace_pseudos.ml +++ b/lib/backend/replace_pseudos.ml @@ -110,6 +110,7 @@ let replace_pseudos_in_instruction state = function let new_cvt = Cvtsi2sd (t, new_src, new_dst) in (state2, new_cvt) | (Ret | Cdq _ | Label _ | JmpCC _ | Jmp _ | Call _) as other -> (state, other) + | Pop _ -> failwith "Internal error" [@coverage off] let replace_pseudos_in_tl = function | Function { name; global; instructions } -> diff --git a/lib/backward_dataflow.ml b/lib/backward_dataflow.ml new file mode 100644 index 0000000..0db47f6 --- /dev/null +++ b/lib/backward_dataflow.ml @@ -0,0 +1,45 @@ +open Batteries + +module Dataflow (G : Cfg.CFG) = struct + type 'var annotation = 'var Batteries.Set.t + type 'var annotated_block = 'var annotation G.basic_block + type 'var annotated_graph = 'var annotation G.t + + let analyze debug_printer meet_fn transfer_fn cfg = + let starting_cfg = G.initialize_annotation cfg Set.empty in + let rec process_worklist current_cfg + (worklist : (int * 'var annotated_block) list) = + debug_printer current_cfg; + match worklist with + | [] -> current_cfg (* we're done*) + | (block_idx, blk) :: rest -> + let old_annotation = blk.value in + let live_vars_at_exit = meet_fn current_cfg blk in + let block' = transfer_fn blk live_vars_at_exit in + let updated_cfg = + G. + { + current_cfg with + basic_blocks = + List.modify block_idx + (fun _ -> block') + current_cfg.basic_blocks; + } + in + let new_worklist = + if Set.equal old_annotation block'.value then rest + else + List.fold + (fun wklist -> function + | G.Entry -> wklist + | Exit -> + failwith "Internal error: malformed CFG" [@coverage off] + | Block n -> + if List.mem_assoc n wklist then wklist + else (n, List.assoc n updated_cfg.basic_blocks) :: wklist) + rest block'.preds + in + process_worklist updated_cfg new_worklist + in + process_worklist starting_cfg (List.rev starting_cfg.basic_blocks) +end diff --git a/lib/backward_dataflow.mli b/lib/backward_dataflow.mli new file mode 100644 index 0000000..f4b64a6 --- /dev/null +++ b/lib/backward_dataflow.mli @@ -0,0 +1,12 @@ +module Dataflow : functor (G : Cfg.CFG) -> sig + type 'var annotation = 'var Batteries.Set.t + type 'var annotated_block = 'var annotation G.basic_block + type 'var annotated_graph = 'var annotation G.t + + val analyze : + ('var annotation G.t -> unit) -> + ('var annotated_graph -> 'var annotated_block -> 'var annotation) -> + ('var annotated_block -> 'var annotation -> 'var annotated_block) -> + unit G.t -> + 'var annotated_graph +end diff --git a/lib/cfg.ml b/lib/cfg.ml index ac226cf..9f69a66 100644 --- a/lib/cfg.ml +++ b/lib/cfg.ml @@ -16,7 +16,44 @@ module type INSTR = sig val pp_instr : Format.formatter -> instr -> unit end -module Cfg (Instr : INSTR) = struct +module type CFG = sig + type instr + type node_id = Entry | Block of int | Exit + + (* Cfg is parameterized by type of val we compute and type of instruction (we use both Tacky and assembly instructions )*) + + type 'v basic_block = { + id : node_id; + instructions : ('v * instr) list; + mutable preds : node_id list; + mutable succs : node_id list; + value : 'v; + } + + type 'v t = { + (* store basic blocks in association list, indexed by block # *) + basic_blocks : (int * 'v basic_block) list; + mutable entry_succs : node_id list; + mutable exit_preds : node_id list; + ctx : Context.t; + } + + val instructions_to_cfg : Context.t -> instr list -> unit t + val cfg_to_instructions : 'v t -> instr list + val get_succs : node_id -> 'v t -> node_id list + val get_block_value : int -> 'v t -> 'v + val add_edge : node_id -> node_id -> 'v t -> unit + val remove_edge : node_id -> node_id -> 'v t -> unit + val initialize_annotation : 'a t -> 'b -> 'b t + val strip_annotations : 'a t -> unit t + + (* debugging *) + val print_graphviz : + string -> (Format.formatter -> 'v -> unit) -> 'v t -> unit +end + +module Cfg (Instr : INSTR) : CFG with type instr = Instr.instr = struct + type instr = Instr.instr type node_id = Entry | Block of int | Exit (* Cfg is parameterized by type of val we compute and type of instruction (we use both Tacky and assembly instructions )*) @@ -276,3 +313,16 @@ module TackyCfg = Cfg (struct let pp_instr = Tacky_print.pp_instruction ~escape_brackets:true [@@coverage off] end) + +module AsmCfg = Cfg (struct + type instr = Assembly.instruction + + let simplify = function + | Assembly.Label l -> Label l + | Jmp target -> UnconditionalJump target + | JmpCC (_, target) -> ConditionalJump target + | Ret -> Return + | _ -> Other + + let pp_instr = Assembly.pp_instruction +end) diff --git a/lib/cfg.mli b/lib/cfg.mli index 1711a51..0fa20f2 100644 --- a/lib/cfg.mli +++ b/lib/cfg.mli @@ -1,11 +1,12 @@ -module TackyCfg : sig +module type CFG = sig + type instr type node_id = Entry | Block of int | Exit (* Cfg is parameterized by type of val we compute and type of instruction (we use both Tacky and assembly instructions )*) type 'v basic_block = { id : node_id; - instructions : ('v * Tacky.instruction) list; + instructions : ('v * instr) list; mutable preds : node_id list; mutable succs : node_id list; value : 'v; @@ -19,8 +20,8 @@ module TackyCfg : sig ctx : Context.t; } - val instructions_to_cfg : Context.t -> Tacky.instruction list -> unit t - val cfg_to_instructions : 'v t -> Tacky.instruction list + val instructions_to_cfg : Context.t -> instr list -> unit t + val cfg_to_instructions : 'v t -> instr list val get_succs : node_id -> 'v t -> node_id list val get_block_value : int -> 'v t -> 'v val add_edge : node_id -> node_id -> 'v t -> unit @@ -32,3 +33,6 @@ module TackyCfg : sig val print_graphviz : string -> (Format.formatter -> 'v -> unit) -> 'v t -> unit end + +module TackyCfg : CFG with type instr = Tacky.instruction +module AsmCfg : CFG with type instr = Assembly.instruction diff --git a/lib/compile.ml b/lib/compile.ml index d4dae3f..5a3f3ec 100644 --- a/lib/compile.ml +++ b/lib/compile.ml @@ -39,6 +39,8 @@ let compile stage optimizations src_file = optimized_tacky; if stage = Settings.Tacky then () else + (* start by getting all aliased vars, we'll need them for register allocation *) + let aliased_vars = Address_taken.analyze_program optimized_tacky in (* Assembly generation has three steps: * 1. convert TACKY to assembly *) let asm_ast = Codegen.gen optimized_tacky in @@ -48,11 +50,19 @@ let compile stage optimizations src_file = Filename.chop_extension src_file ^ ".prealloc.debug.s" in Emit.emit prealloc_filename asm_ast); - (* replace pseudoregisters with Stack operands *) - let asm_ast1 = Replace_pseudos.replace_pseudos asm_ast in + (* replace remaining pseudoregisters with Data/Stack operands *) + let asm_ast1 = + Regalloc.allocate_registers src_file aliased_vars asm_ast + in + (if !Settings.debug.dump_asm then + let postalloc_filename = + Filename.chop_extension src_file ^ ".postalloc.debug.s" + in + Emit.emit postalloc_filename asm_ast1); + let asm_ast2 = Replace_pseudos.replace_pseudos asm_ast1 in (* fix up instructions *) - let asm_ast2 = Instruction_fixup.fixup_program asm_ast1 in + let asm_ast3 = Instruction_fixup.fixup_program asm_ast2 in if stage = Settings.Codegen then () else let asm_filename = Filename.chop_extension src_file ^ ".s" in - Emit.emit asm_filename asm_ast2 + Emit.emit asm_filename asm_ast3 diff --git a/lib/context.ml b/lib/context.ml index 0128f31..894c454 100644 --- a/lib/context.ml +++ b/lib/context.ml @@ -1,2 +1,3 @@ -(* Extra context about a TACKY function; used primarily for debugging *) +(* Extra context about a TACKY or assembly function; + * used for debugging, looking things up in symbol table, etc *) type t = { filename : string; fun_name : string; params : string list } diff --git a/lib/dune b/lib/dune index ee56aef..557c4ad 100644 --- a/lib/dune +++ b/lib/dune @@ -8,6 +8,7 @@ ast assembly assembly_symbols + backward_dataflow cfg cnums codegen @@ -31,6 +32,7 @@ num_interfaces optimize parse + regalloc replace_pseudos rounding symbols diff --git a/lib/emit.ml b/lib/emit.ml index f5cd527..771ff9f 100644 --- a/lib/emit.ml +++ b/lib/emit.ml @@ -27,6 +27,7 @@ let show_fun_name f = let show_long_reg = function | AX -> "%eax" + | BX -> "%ebx" | CX -> "%ecx" | DX -> "%edx" | DI -> "%edi" @@ -35,6 +36,10 @@ let show_long_reg = function | R9 -> "%r9d" | R10 -> "%r10d" | R11 -> "%r11d" + | R12 -> "%r12d" + | R13 -> "%r13d" + | R14 -> "%r14d" + | R15 -> "%r15d" | SP -> failwith "Internal error: no 32-bit RSP" [@coverage off] | BP -> failwith "Internal error: no 32-bit RBP" [@coverage off] | _ -> @@ -43,6 +48,7 @@ let show_long_reg = function let show_quadword_reg = function | AX -> "%rax" + | BX -> "%rbx" | CX -> "%rcx" | DX -> "%rdx" | DI -> "%rdi" @@ -51,6 +57,10 @@ let show_quadword_reg = function | R9 -> "%r9" | R10 -> "%r10" | R11 -> "%r11" + | R12 -> "%r12" + | R13 -> "%r13" + | R14 -> "%r14" + | R15 -> "%r15" | SP -> "%rsp" | BP -> "%rbp" | _ -> @@ -66,6 +76,12 @@ let show_double_reg = function | XMM5 -> "%xmm5" | XMM6 -> "%xmm6" | XMM7 -> "%xmm7" + | XMM8 -> "%xmm8" + | XMM9 -> "%xmm9" + | XMM10 -> "%xmm10" + | XMM11 -> "%xmm11" + | XMM12 -> "%xmm12" + | XMM13 -> "%xmm13" | XMM14 -> "%xmm14" | XMM15 -> "%xmm15" | _ -> @@ -75,6 +91,7 @@ let show_double_reg = function let show_byte_reg = function | AX -> "%al" + | BX -> "%bl" | CX -> "%cl" | DX -> "%dl" | DI -> "%dil" @@ -83,6 +100,10 @@ let show_byte_reg = function | R9 -> "%r9b" | R10 -> "%r10b" | R11 -> "%r11b" + | R12 -> "%r12b" + | R13 -> "%r13b" + | R14 -> "%r14b" + | R15 -> "%r15b" | SP -> failwith "Internal error: no one-byte RSP" [@coverage off] | BP -> failwith "Internal error: no one-byte RBP" [@coverage off] | _ -> @@ -202,6 +223,7 @@ let emit_instruction chan = function (show_byte_operand operand) | Label lbl -> Printf.fprintf chan "%s:\n" (show_local_label lbl) | Push op -> Printf.fprintf chan "\tpushq %s\n" (show_operand Quadword op) + | Pop r -> Printf.fprintf chan "\tpopq %s\n" (show_quadword_reg r) | Call f -> Printf.fprintf chan "\tcall %s\n" (show_fun_name f) | Movsx { src_type; dst_type; src; dst } -> Printf.fprintf chan "\tmovs%s%s %s, %s\n" (suffix src_type) diff --git a/lib/optimizations/address_taken.ml b/lib/optimizations/address_taken.ml index 8e32f60..452c0cd 100644 --- a/lib/optimizations/address_taken.ml +++ b/lib/optimizations/address_taken.ml @@ -6,4 +6,12 @@ let analyze instrs = | _ -> None in - Set.of_list (List.filter_map addr_taken instrs) \ No newline at end of file + Set.of_list (List.filter_map addr_taken instrs) + +let analyze_program (Tacky.Program tls) = + let analyze_tl = function + | Tacky.Function f -> analyze f.body + | _ -> Set.empty + in + let aliased_vars_per_fun = List.map analyze_tl tls in + List.fold Set.union Set.empty aliased_vars_per_fun diff --git a/lib/optimizations/address_taken.mli b/lib/optimizations/address_taken.mli index 688015c..e0855d2 100644 --- a/lib/optimizations/address_taken.mli +++ b/lib/optimizations/address_taken.mli @@ -1 +1,2 @@ -val analyze : Tacky.instruction list -> string Batteries.Set.t \ No newline at end of file +val analyze : Tacky.instruction list -> string Batteries.Set.t +val analyze_program : Tacky.t -> string Batteries.Set.t \ No newline at end of file diff --git a/lib/optimizations/dead_store_elim.ml b/lib/optimizations/dead_store_elim.ml index b507faf..3234f94 100644 --- a/lib/optimizations/dead_store_elim.ml +++ b/lib/optimizations/dead_store_elim.ml @@ -4,7 +4,7 @@ module G = struct include Cfg.TackyCfg end -type annotated_block = string Set.t G.basic_block +module Liveness = Backward_dataflow.Dataflow (G) let debug_print ~extra_tag cfg = if @@ -21,8 +21,7 @@ let debug_print ~extra_tag cfg = let tag = "dse." ^ extra_tag in G.(print_graphviz tag livevar_printer cfg) -let transfer static_and_aliased_vars (block : annotated_block) - (end_live_variables : string Set.t) = +let transfer static_and_aliased_vars block (end_live_variables : string Set.t) = let remove_var var var_set = match var with | Tacky.Var v -> Set.remove v var_set @@ -87,7 +86,7 @@ let transfer static_and_aliased_vars (block : annotated_block) (new_live_vars, annotated_instr) in let incoming_live_vars, annotated_reversed_instructions = - block.instructions + block.G.instructions |> List.rev |> List.fold_left_map process_instr end_live_variables in @@ -97,50 +96,20 @@ let transfer static_and_aliased_vars (block : annotated_block) value = incoming_live_vars; } -let meet static_vars cfg (block : annotated_block) = +let meet static_vars cfg block = let update_live live = function | G.Entry -> failwith "Internal error: malformed CFG" [@coverage off] | Exit -> Set.union live static_vars | Block n -> Set.union live (G.get_block_value n cfg) in - List.fold update_live Set.empty block.succs + List.fold update_live Set.empty block.G.succs let find_live_variables static_vars aliased_vars (cfg : unit G.t) = - let starting_cfg = G.initialize_annotation cfg Set.empty in let static_and_aliased_vars = Set.union static_vars aliased_vars in - let rec process_worklist current_cfg (worklist : (int * annotated_block) list) - = - debug_print ~extra_tag:"in_progress" current_cfg; - match worklist with - | [] -> current_cfg (* we're done*) - | (block_idx, blk) :: rest -> - let old_annotation = blk.value in - let live_vars_at_exit = meet static_vars current_cfg blk in - let block' = transfer static_and_aliased_vars blk live_vars_at_exit in - let updated_cfg = - G. - { - current_cfg with - basic_blocks = - List.modify block_idx (fun _ -> block') current_cfg.basic_blocks; - } - in - let new_worklist = - if Set.equal old_annotation block'.value then rest - else - List.fold - (fun wklist -> function - | G.Entry -> wklist - | Exit -> - failwith "Internal error: malformed CFG" [@coverage off] - | Block n -> - if List.mem_assoc n wklist then wklist - else (n, List.assoc n updated_cfg.basic_blocks) :: wklist) - rest block'.preds - in - process_worklist updated_cfg new_worklist - in - process_worklist starting_cfg (List.rev starting_cfg.basic_blocks) + let meet_f = meet static_vars in + let transfer_f = transfer static_and_aliased_vars in + let in_progress_debug_printer = debug_print ~extra_tag:"in_progress" in + Liveness.analyze in_progress_debug_printer meet_f transfer_f cfg let is_dead_store (live_vars, i) = match i with diff --git a/lib/settings.ml b/lib/settings.ml index 27a9e1e..5b25942 100644 --- a/lib/settings.ml +++ b/lib/settings.ml @@ -30,12 +30,22 @@ type optimizations = { copy_propagation : bool; } +type regalloc_debug_options = { + spill_info : bool; + interference_ncol : bool; + interference_graphviz : bool; + liveness : bool; +} + type debug_options = { (* dumping intermediate representations *) dump_tacky : bool; dump_asm : bool; (* dumping extra info about specific optimizations*) dump_optimizations : optimizations; + (* dumping extra info during register allocation *) + dump_gp_regalloc : regalloc_debug_options; + dump_xmm_regalloc : regalloc_debug_options; (* If specified, we dump optimization info only for this function; * otherwise dump for all functions * doesn't impact dump_tacky/dump_asm, which always dump the whole program *) @@ -43,6 +53,14 @@ type debug_options = { } let debug = + let regalloc_default = + { + spill_info = false; + interference_ncol = false; + interference_graphviz = false; + liveness = false; + } + in ref { dump_tacky = false; @@ -54,5 +72,7 @@ let debug = unreachable_code_elimination = false; copy_propagation = false; }; + dump_gp_regalloc = regalloc_default; + dump_xmm_regalloc = regalloc_default; dump_fun = None; } diff --git a/lib/settings.mli b/lib/settings.mli index c6ccf9a..f9372d6 100644 --- a/lib/settings.mli +++ b/lib/settings.mli @@ -7,12 +7,23 @@ type optimizations = { unreachable_code_elimination : bool; copy_propagation : bool; } + +type regalloc_debug_options = { + spill_info : bool; + interference_ncol : bool; + interference_graphviz : bool; + liveness : bool; +} + type debug_options = { (* dumping intermediate representations *) dump_tacky : bool; dump_asm : bool; (* dumping extra info about specific optimizations*) dump_optimizations : optimizations; + (* dumping extra info during register allocation *) + dump_gp_regalloc : regalloc_debug_options; + dump_xmm_regalloc : regalloc_debug_options; (* If specified, we dump optimization info only for this function; * otherwise dump for all functions * doesn't impact dump_tacky/dump_asm, which always dump the whole program *)