diff --git a/crates/polkavm-common/src/assembler.rs b/crates/polkavm-common/src/assembler.rs index 8dec2693..8149774a 100644 --- a/crates/polkavm-common/src/assembler.rs +++ b/crates/polkavm-common/src/assembler.rs @@ -1,5 +1,6 @@ use crate::program::{Instruction, Reg}; use crate::utils::{parse_imm, parse_reg}; +use crate::writer::{InstructionOrBytes, RawInstruction}; use alloc::borrow::ToOwned; use alloc::collections::BTreeMap; use alloc::format; @@ -71,6 +72,22 @@ fn parse_load_imm_and_jump_indirect_with_tmp(line: &str) -> Option<(Reg, Reg, i3 } } +/// Parses an offset with an explicitly required sign, e.g., `+10` or `-10`. +pub fn parse_explicitly_signed_offset(text: &str) -> Option { + let text = text.trim(); + if text.starts_with('+') || text.starts_with('-') { + let (sign, text) = text.split_at(1); + let parsed_value = parse_imm(text)?; + if sign == "-" { + Some(-parsed_value) + } else { + Some(parsed_value) + } + } else { + None + } +} + #[derive(Copy, Clone)] pub enum LoadKind { I8, @@ -155,13 +172,18 @@ fn parse_condition(text: &str) -> Option { } pub fn assemble(code: &str) -> Result, String> { + enum TargetKind { + Label(String), + Offset(i32), + } + enum MaybeInstruction { Instruction(Instruction), Jump(String), Branch(String, ConditionKind, Reg, Reg), BranchImm(String, ConditionKind, Reg, i32), LoadLabelAddress(Reg, String), - LoadImmAndJump(Reg, i32, String), + LoadImmAndJump(Reg, i32, TargetKind), } impl MaybeInstruction { @@ -379,7 +401,9 @@ pub fn assemble(code: &str) -> Result, String> { if let Some(value) = parse_imm(&rhs[..index]) { if let Some(line) = rhs[index + 1..].trim().strip_prefix("jump") { if let Some(label) = line.trim().strip_prefix('@') { - emit_and_continue!(MaybeInstruction::LoadImmAndJump(dst, value, label.to_owned())); + emit_and_continue!(MaybeInstruction::LoadImmAndJump(dst, value, TargetKind::Label(label.to_owned()))); + } else if let Some(offset) = parse_explicitly_signed_offset(line) { + emit_and_continue!(MaybeInstruction::LoadImmAndJump(dst, value, TargetKind::Offset(offset))); } if let Some((base, offset)) = parse_indirect_memory_access(line) { let instruction = @@ -686,12 +710,12 @@ pub fn assemble(code: &str) -> Result, String> { return Err(format!("cannot parse line {nth_line}: \"{original_line}\"")); } - let mut code = Vec::new(); + let mut code: Vec = Vec::new(); let mut jump_table = Vec::new(); for instruction in instructions { match instruction { MaybeInstruction::Instruction(instruction) => { - code.push(instruction); + code.push(instruction.into()); } MaybeInstruction::LoadLabelAddress(dst, label) => { let Some(&target_index) = label_to_index.get(&*label) else { @@ -699,23 +723,25 @@ pub fn assemble(code: &str) -> Result, String> { }; jump_table.push(target_index); - code.push(Instruction::load_imm( - dst.into(), - (jump_table.len() as u32) * crate::abi::VM_CODE_ADDRESS_ALIGNMENT, - )); - } - MaybeInstruction::LoadImmAndJump(dst, value, label) => { - let Some(&target_index) = label_to_index.get(&*label) else { - return Err(format!("label is not defined: \"{label}\"")); - }; - - code.push(Instruction::load_imm_and_jump(dst.into(), value as u32, target_index)); + code.push(Instruction::load_imm(dst.into(), (jump_table.len() as u32) * crate::abi::VM_CODE_ADDRESS_ALIGNMENT).into()); } + MaybeInstruction::LoadImmAndJump(dst, value, target) => match target { + TargetKind::Label(label) => { + let Some(&target_index) = label_to_index.get(&*label) else { + return Err(format!("label is not defined: \"{label}\"")); + }; + code.push(Instruction::load_imm_and_jump(dst.into(), value as u32, target_index).into()); + } + TargetKind::Offset(offset) => { + let instruction = Instruction::load_imm_and_jump(dst.into(), value as u32, offset as u32); + code.push(RawInstruction::from((0, 2, instruction)).into()); + } + }, MaybeInstruction::Jump(label) => { let Some(&target_index) = label_to_index.get(&*label) else { return Err(format!("label is not defined: \"{label}\"")); }; - code.push(Instruction::jump(target_index)); + code.push(Instruction::jump(target_index).into()); } MaybeInstruction::Branch(label, kind, lhs, rhs) => { let Some(&target_index) = label_to_index.get(&*label) else { @@ -737,7 +763,7 @@ pub fn assemble(code: &str) -> Result, String> { ConditionKind::GreaterSigned => Instruction::branch_less_signed(rhs, lhs, target_index), ConditionKind::GreaterUnsigned => Instruction::branch_less_unsigned(rhs, lhs, target_index), }; - code.push(instruction); + code.push(instruction.into()); } MaybeInstruction::BranchImm(label, kind, lhs, rhs) => { let Some(&target_index) = label_to_index.get(&*label) else { @@ -758,7 +784,7 @@ pub fn assemble(code: &str) -> Result, String> { ConditionKind::GreaterSigned => Instruction::branch_greater_signed_imm(lhs, rhs, target_index), ConditionKind::GreaterUnsigned => Instruction::branch_greater_unsigned_imm(lhs, rhs, target_index), }; - code.push(instruction); + code.push(instruction.into()); } }; } diff --git a/crates/polkavm-common/src/writer.rs b/crates/polkavm-common/src/writer.rs index aa2b33ef..52f5c4ed 100644 --- a/crates/polkavm-common/src/writer.rs +++ b/crates/polkavm-common/src/writer.rs @@ -3,7 +3,7 @@ use alloc::boxed::Box; use alloc::vec::Vec; use core::ops::Range; -#[derive(Copy, Clone, Default)] +#[derive(Copy, Clone, Debug, Default)] struct InstructionBuffer { bytes: [u8; program::MAX_INSTRUCTION_LENGTH], length: u8, @@ -71,9 +71,42 @@ impl Instruction { } } -#[derive(Copy, Clone)] +#[derive(Copy, Clone, Debug)] +pub struct RawInstruction { + buffer: InstructionBuffer, + starts_new_basic_block: bool, +} + +impl From<(u32, u8, Instruction)> for RawInstruction { + fn from((position, minimum_size, instruction): (u32, u8, Instruction)) -> Self { + Self { + buffer: InstructionBuffer::new(position, minimum_size, instruction), + starts_new_basic_block: instruction.opcode().starts_new_basic_block(), + } + } +} + +#[derive(Copy, Clone, Debug)] +pub enum InstructionOrBytes { + Instruction(Instruction), + Raw(RawInstruction), +} + +impl From for InstructionOrBytes { + fn from(value: Instruction) -> Self { + Self::Instruction(value) + } +} + +impl From for InstructionOrBytes { + fn from(value: RawInstruction) -> Self { + Self::Raw(value) + } +} + +#[derive(Copy, Clone, Debug)] struct SerializedInstruction { - instruction: Instruction, + instruction: InstructionOrBytes, bytes: InstructionBuffer, target_nth_instruction: Option, position: u32, @@ -89,7 +122,7 @@ pub struct ProgramBlobBuilder { rw_data: Vec, imports: Vec>>, exports: Vec<(u32, ProgramSymbol>)>, - code: Vec, + code: Vec, jump_table: Vec, custom: Vec<(u8, Vec)>, dispatch_table: Vec>, @@ -137,12 +170,15 @@ impl ProgramBlobBuilder { self.exports.push((target_basic_block, ProgramSymbol::new(symbol.into()))); } + // pub fn set_code(&mut self, code: &[impl Into + Copy], jump_table: &[u32]) { + // let code: Vec = code.iter().map(|inst| (*inst).into()).collect(); + pub fn add_dispatch_table_entry(&mut self, symbol: impl Into>) { self.dispatch_table.push(symbol.into()); } - pub fn set_code(&mut self, code: &[Instruction], jump_table: &[u32]) { - self.code = code.to_vec(); + pub fn set_code(&mut self, code: &[impl Into + Copy], jump_table: &[u32]) { + self.code = code.iter().map(|inst| (*inst).into()).collect(); self.jump_table = jump_table.to_vec(); } @@ -177,7 +213,7 @@ impl ProgramBlobBuilder { }; instructions.push(SerializedInstruction { - instruction: Instruction::jump(target_basic_block + basic_block_shift), + instruction: Instruction::jump(target_basic_block + basic_block_shift).into(), bytes: InstructionBuffer::default(), target_nth_instruction: None, position: 0, @@ -187,40 +223,65 @@ impl ProgramBlobBuilder { for instruction in &self.code { let mut instruction = *instruction; - if let Some(target_basic_block) = instruction.target_mut() { - *target_basic_block += basic_block_shift; - } - instructions.push(SerializedInstruction { - instruction, - bytes: InstructionBuffer::default(), - target_nth_instruction: None, - position: 0, - minimum_size: 0, - }); + match instruction { + InstructionOrBytes::Instruction(ref mut inst) => { + if let Some(target_basic_block) = inst.target_mut() { + *target_basic_block += basic_block_shift; + } + + instructions.push(SerializedInstruction { + instruction, + bytes: InstructionBuffer::default(), + target_nth_instruction: None, + position: 0, + minimum_size: 0, + }); + } + + // The instruction in the form of raw bytes, that should only be appended, as we want to + // be able to slip in invalid instructions, e.g., jump instruction with an invalid offset + InstructionOrBytes::Raw(raw_inst) => { + instructions.push(SerializedInstruction { + instruction, + bytes: raw_inst.buffer, + target_nth_instruction: None, + position: 0, + minimum_size: 0, + }); + } + } } let mut basic_block_to_instruction_index = Vec::with_capacity(self.code.len()); basic_block_to_instruction_index.push(0); for (nth_instruction, entry) in instructions.iter().enumerate() { - if entry.instruction.opcode().starts_new_basic_block() { + let start_new_basic_block = match entry.instruction { + InstructionOrBytes::Instruction(inst) => inst.opcode().starts_new_basic_block(), + InstructionOrBytes::Raw(inst) => inst.starts_new_basic_block, + }; + + if start_new_basic_block { basic_block_to_instruction_index.push(nth_instruction + 1); } } let mut position: u32 = 0; + for (nth_instruction, entry) in instructions.iter_mut().enumerate() { - entry.target_nth_instruction = entry.instruction.target_mut().map(|target| { - let target_nth_instruction = basic_block_to_instruction_index[*target as usize]; - // Here we change the target from a basic block index into a byte offset. - // This is completely inaccurate, but that's fine. This is just a guess, and we'll correct it in the next loop. - *target = position.wrapping_add((target_nth_instruction as i32 - nth_instruction as i32) as u32); - target_nth_instruction - }); + if let InstructionOrBytes::Instruction(ref mut inst) = entry.instruction { + entry.target_nth_instruction = inst.target_mut().map(|target| { + let target_nth_instruction = basic_block_to_instruction_index[*target as usize]; + // Here we change the target from a basic block index into a byte offset. + // This is completely inaccurate, but that's fine. This is just a guess, and we'll correct it in the next loop. + *target = position.wrapping_add((target_nth_instruction as i32 - nth_instruction as i32) as u32); + target_nth_instruction + }); + entry.bytes = InstructionBuffer::new(position, entry.minimum_size, *inst); + } entry.position = position; - entry.bytes = InstructionBuffer::new(position, entry.minimum_size, entry.instruction); position = position.checked_add(entry.bytes.len() as u32).expect("too many instructions"); } @@ -230,17 +291,18 @@ impl ProgramBlobBuilder { position = 0; for nth_instruction in 0..instructions.len() { let mut self_modified = mutate(&mut instructions[nth_instruction].position, position); + if let Some(target_nth_instruction) = instructions[nth_instruction].target_nth_instruction { let new_target = instructions[target_nth_instruction].position; - let old_target = instructions[nth_instruction].instruction.target_mut().unwrap(); - self_modified |= mutate(old_target, new_target); - - if self_modified { - instructions[nth_instruction].bytes = InstructionBuffer::new( - position, - instructions[nth_instruction].minimum_size, - instructions[nth_instruction].instruction, - ); + let minimum_size = instructions[nth_instruction].minimum_size; + + if let InstructionOrBytes::Instruction(ref mut inst) = instructions[nth_instruction].instruction { + let old_target = inst.target_mut().unwrap(); + self_modified |= mutate(old_target, new_target); + + if self_modified { + instructions[nth_instruction].bytes = InstructionBuffer::new(position, minimum_size, *inst); + } } } @@ -342,7 +404,16 @@ impl ProgramBlobBuilder { assert_eq!(parsed.len(), instructions.len()); for (nth_instruction, (mut parsed, entry)) in parsed.into_iter().zip(instructions.into_iter()).enumerate() { - if parsed.kind != entry.instruction || entry.position != parsed.offset.0 || u32::from(entry.bytes.length) != parsed.length { + let mut kind_check = false; + if let InstructionOrBytes::Instruction(inst) = entry.instruction { + kind_check = parsed.kind != inst; + + if let Some(target) = parsed.kind.target_mut() { + assert!(offsets.contains(&ProgramCounter(*target))); + } + } + + if kind_check || entry.position != parsed.offset.0 || u32::from(entry.bytes.length) != parsed.length { panic!( concat!( "Broken serialization for instruction #{}:\n", @@ -368,10 +439,6 @@ impl ProgramBlobBuilder { &output.code[parsed.offset.0 as usize..parsed.offset.0 as usize + parsed.length as usize], ); } - - if let Some(target) = parsed.kind.target_mut() { - assert!(offsets.contains(&ProgramCounter(*target))); - } } } diff --git a/tools/spectool/spec/src/inst_load_imm_and_jump_using_offset_nok.txt b/tools/spectool/spec/src/inst_load_imm_and_jump_using_offset_nok.txt new file mode 100644 index 00000000..2be06daf --- /dev/null +++ b/tools/spectool/spec/src/inst_load_imm_and_jump_using_offset_nok.txt @@ -0,0 +1,5 @@ +pub @main: +pub @expected_exit: + a0 = 1234, jump -25 + trap + a1 = 0xdeadbeef diff --git a/tools/spectool/spec/src/inst_load_imm_and_jump_using_offset_ok.txt b/tools/spectool/spec/src/inst_load_imm_and_jump_using_offset_ok.txt new file mode 100644 index 00000000..7433e651 --- /dev/null +++ b/tools/spectool/spec/src/inst_load_imm_and_jump_using_offset_ok.txt @@ -0,0 +1,4 @@ +pub @main: + a0 = 1234, jump +6 + trap + a1 = 0xdeadbeef diff --git a/tools/spectool/spec/src/inst_load_imm_and_jump.txt b/tools/spectool/spec/src/inst_load_imm_and_jump_using_target_ok.txt similarity index 100% rename from tools/spectool/spec/src/inst_load_imm_and_jump.txt rename to tools/spectool/spec/src/inst_load_imm_and_jump_using_target_ok.txt