diff --git a/sway-ir/src/optimize/memcpyopt.rs b/sway-ir/src/optimize/memcpyopt.rs index 90b1f327e1e..446fe6f61af 100644 --- a/sway-ir/src/optimize/memcpyopt.rs +++ b/sway-ir/src/optimize/memcpyopt.rs @@ -1121,10 +1121,6 @@ fn copy_prop_reverse( _ => continue, }; - if dst_sym.get_type(context) != src_sym.get_type(context) { - continue; - } - // We don't deal with partial memcpys if dst_sym .get_type(context) @@ -1172,7 +1168,7 @@ fn copy_prop_reverse( let mut to_delete: FxHashSet = FxHashSet::default(); let mut src_to_dst: FxHashMap = FxHashMap::default(); - for (inst, dst_sym, src_sym) in candidates { + for (_inst, dst_sym, src_sym) in candidates { match src_sym { Symbol::Arg(_) => { // Args are mostly copied to locals before actually being used. @@ -1201,7 +1197,6 @@ fn copy_prop_reverse( } } } - to_delete.insert(inst); } } } @@ -1230,16 +1225,38 @@ fn copy_prop_reverse( } } + let mut repl_locals: Vec<(Value, Symbol)> = vec![]; + let mut value_replacements = FxHashMap::default(); + // Gather the get_local instructions that need to be replaced. - let mut repl_locals = vec![]; - for (_block, inst) in function.instruction_iter(context) { - match inst.get_instruction(context).unwrap() { + for (_block, inst) in function.instruction_iter(context).collect::>() { + match inst.get_instruction(context).cloned().unwrap() { Instruction { op: InstOp::GetLocal(sym), + parent, .. } => { - if let Some(dst) = src_to_dst.get(&Symbol::Local(*sym)) { - repl_locals.push((inst, *dst)); + if let Some(dst) = src_to_dst.get(&Symbol::Local(sym)) { + let sym_type = sym.get_type(context); + let dst_type = dst.get_type(context); + + // TODO we are comparings strings here + let sym_type_str = sym_type.as_string(context); + let dst_type_str = dst_type.as_string(context); + if sym_type_str == dst_type_str { + repl_locals.push((inst, *dst)); + //to_delete.insert(inst); // TODO what to do with this? + } else { + let original_ptr = match dst { + Symbol::Local(_) => { + continue; + } + Symbol::Arg(block_argument) => block_argument.as_value(context), + }; + + let cast_ptr = InstOp::CastPtr(original_ptr, sym_type); + value_replacements.insert(inst, (parent, cast_ptr)); + } } } _ => { @@ -1249,12 +1266,25 @@ fn copy_prop_reverse( } } - if repl_locals.is_empty() { + if repl_locals.is_empty() && value_replacements.is_empty() { return Ok(modified); } + modified = true; - let mut value_replacements = FxHashMap::default(); + let mut value_replacements = value_replacements + .into_iter() + .map(|(old, (block, instruction))| { + let v = Value::new_instruction(context, block, instruction); + + let mut inserter = + InstructionInserter::new(context, block, crate::InsertionPosition::Before(old)); + inserter.insert(v); + + (old, v) + }) + .collect::>(); + for (to_repl, repl_with) in repl_locals { let Instruction { op: InstOp::GetLocal(sym), diff --git a/test/src/e2e_vm_tests/test_programs/should_pass/language/intrinsics/dbg_release/stdout.snap b/test/src/e2e_vm_tests/test_programs/should_pass/language/intrinsics/dbg_release/stdout.snap index c25729eaec8..72a814f3b60 100644 --- a/test/src/e2e_vm_tests/test_programs/should_pass/language/intrinsics/dbg_release/stdout.snap +++ b/test/src/e2e_vm_tests/test_programs/should_pass/language/intrinsics/dbg_release/stdout.snap @@ -1,6 +1,5 @@ --- source: test/src/snapshot/mod.rs -assertion_line: 162 --- > forc build --path test/src/e2e_vm_tests/test_programs/should_pass/language/intrinsics/dbg_release --asm final | sub ecal ecal $r1 $r0 $zero $zero ; ecal id fd zero zero diff --git a/test/src/e2e_vm_tests/test_programs/should_pass/language/intrinsics/transmute/snapshot.toml b/test/src/e2e_vm_tests/test_programs/should_pass/language/intrinsics/transmute/snapshot.toml index c613fa3ab43..d291408834d 100644 --- a/test/src/e2e_vm_tests/test_programs/should_pass/language/intrinsics/transmute/snapshot.toml +++ b/test/src/e2e_vm_tests/test_programs/should_pass/language/intrinsics/transmute/snapshot.toml @@ -1,3 +1,3 @@ cmds = [ - "forc build --path {root} --ir final --asm final | filter-fn {name} transmute_by_reference_7", + "forc build --path {root} --ir final --asm final --release | filter-fn {name} transmute_by_reference_7,transmute_optimisation_13", ] \ No newline at end of file diff --git a/test/src/e2e_vm_tests/test_programs/should_pass/language/intrinsics/transmute/src/main.sw b/test/src/e2e_vm_tests/test_programs/should_pass/language/intrinsics/transmute/src/main.sw index 8ac4aeaaddf..f580bd9dc60 100644 --- a/test/src/e2e_vm_tests/test_programs/should_pass/language/intrinsics/transmute/src/main.sw +++ b/test/src/e2e_vm_tests/test_programs/should_pass/language/intrinsics/transmute/src/main.sw @@ -49,6 +49,16 @@ fn const_transmute() { let _ = transmute_by_reference(); } + +#[inline(never)] +fn transmute_optimisation() -> raw_slice { + let ptr = asm(size: 0) { + aloc size; + hp: raw_ptr + }; + __transmute::<(raw_ptr, u64), raw_slice>((ptr, 0)) +} + fn main() { const_transmute(); @@ -129,4 +139,6 @@ fn main() { let some_struct = __transmute::(some_enum); assert(some_struct.tag == 1); assert(some_struct.value == 1); + + let _ = transmute_optimisation(); } diff --git a/test/src/e2e_vm_tests/test_programs/should_pass/language/intrinsics/transmute/stdout.snap b/test/src/e2e_vm_tests/test_programs/should_pass/language/intrinsics/transmute/stdout.snap index 243a5b271bb..5e9105f8bd4 100644 --- a/test/src/e2e_vm_tests/test_programs/should_pass/language/intrinsics/transmute/stdout.snap +++ b/test/src/e2e_vm_tests/test_programs/should_pass/language/intrinsics/transmute/stdout.snap @@ -1,45 +1,55 @@ --- source: test/src/snapshot/mod.rs --- -> forc build --path test/src/e2e_vm_tests/test_programs/should_pass/language/intrinsics/transmute --ir final --asm final | filter-fn transmute transmute_by_reference_7 +> forc build --path test/src/e2e_vm_tests/test_programs/should_pass/language/intrinsics/transmute --ir final --asm final --release | filter-fn transmute transmute_by_reference_7,transmute_optimisation_13 fn transmute_by_reference_7(__ret_value: __ptr u256) -> () { - local [u8; 32] __anon_0 - local mut [u8; 32] bytes - local __ptr u256 v - entry(__ret_value: __ptr u256): - v0 = get_local __ptr [u8; 32], __anon_0 + v0 = cast_ptr __ret_value to __ptr [u8; 32] mem_clear_val v0 - v1 = get_local __ptr [u8; 32], bytes - mem_copy_val v1, v0 - v2 = get_local __ptr [u8; 32], bytes - v3 = cast_ptr v2 to __ptr u256 - v4 = get_local __ptr __ptr u256, v - store v3 to v4 - v5 = get_local __ptr __ptr u256, v - v6 = load v5 - mem_copy_val __ret_value, v6 - v7 = const unit () - ret () v7 + v1 = const unit () + ret () v1 +} + + +fn transmute_optimisation_13(__ret_value: __ptr slice) -> () { + entry(__ret_value: __ptr slice): + v0 = const u64 0 + v1 = asm(size: v0) -> ptr hp { + aloc size + } + v2 = cast_ptr __ret_value to __ptr { ptr, u64 } + v3 = const u64 0 + v4 = get_elem_ptr v2, __ptr ptr, v3 + store v1 to v4 + v5 = const u64 1 + v6 = get_elem_ptr v2, __ptr u64, v5 + v7 = const u64 0 + store v7 to v6 + v8 = const unit () + ret () v8 } -pshl i7 ; save registers 16..40 +pshl i1 ; save registers 16..40 pshh i524288 ; save registers 40..64 move $$locbase $sp ; save locals base register for function transmute_by_reference_7 -cfei i72 ; allocate 72 bytes for locals and 0 slots for call arguments +move $r0 $$reta ; save return address +mcli $$arg0 i32 ; clear memory [u8; 32], 32 bytes +move $$reta $r0 ; restore return address +poph i524288 ; restore registers 40..64 +popl i1 ; restore registers 16..40 +jal $zero $$reta i0 ; return from call + +pshl i3 ; save registers 16..40 +pshh i524288 ; save registers 40..64 +move $$locbase $sp ; save locals base register for function transmute_optimisation_13 move $r0 $$arg0 ; save argument 0 (__ret_value) move $r1 $$reta ; save return address -mcli $$locbase i32 ; clear memory [u8; 32], 32 bytes -addi $r2 $$locbase i32 ; get offset to local __ptr [u8; 32] -mcpi $r2 $$locbase i32 ; copy memory -addi $r2 $$locbase i32 ; get offset to local __ptr [u8; 32] -sw $$locbase $r2 i8 ; store word -lw $r2 $$locbase i8 ; load word -mcpi $r0 $r2 i32 ; copy memory -cfsi i72 ; free 72 bytes for locals and 0 slots for extra call arguments +aloc $zero ; aloc size +sw $r0 $hp i0 ; store word +sw $r0 $zero i1 ; store word move $$reta $r1 ; restore return address poph i524288 ; restore registers 40..64 -popl i7 ; restore registers 16..40 +popl i3 ; restore registers 16..40 jal $zero $$reta i0 ; return from call diff --git a/test/src/e2e_vm_tests/test_programs/should_pass/language/panic_expression/panicking_lib/stdout.snap b/test/src/e2e_vm_tests/test_programs/should_pass/language/panic_expression/panicking_lib/stdout.snap index 29c31e2d26a..f2fb87bc7ec 100644 --- a/test/src/e2e_vm_tests/test_programs/should_pass/language/panic_expression/panicking_lib/stdout.snap +++ b/test/src/e2e_vm_tests/test_programs/should_pass/language/panic_expression/panicking_lib/stdout.snap @@ -1,6 +1,5 @@ --- source: test/src/snapshot/mod.rs -assertion_line: 101 --- > forc test --path test/src/e2e_vm_tests/test_programs/should_pass/language/panic_expression/panicking_lib --test-threads 1 --logs --reverts exit status: 0