Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
97 changes: 75 additions & 22 deletions llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2452,11 +2452,10 @@ bool AArch64InstrInfo::isFPRCopy(const MachineInstr &MI) {
return false;
}

Register AArch64InstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
int &FrameIndex) const {
switch (MI.getOpcode()) {
static bool isFrameLoadOpcode(int Opcode) {
switch (Opcode) {
default:
break;
return false;
case AArch64::LDRWui:
case AArch64::LDRXui:
case AArch64::LDRBui:
Expand All @@ -2465,22 +2464,27 @@ Register AArch64InstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
case AArch64::LDRDui:
case AArch64::LDRQui:
case AArch64::LDR_PXI:
if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
FrameIndex = MI.getOperand(1).getIndex();
return MI.getOperand(0).getReg();
}
break;
return true;
}
}

return 0;
Register AArch64InstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
int &FrameIndex) const {
if (!isFrameLoadOpcode(MI.getOpcode()))
return Register();

if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
FrameIndex = MI.getOperand(1).getIndex();
return MI.getOperand(0).getReg();
}
return Register();
}

Register AArch64InstrInfo::isStoreToStackSlot(const MachineInstr &MI,
int &FrameIndex) const {
switch (MI.getOpcode()) {
static bool isFrameStoreOpcode(int Opcode) {
switch (Opcode) {
default:
break;
return false;
case AArch64::STRWui:
case AArch64::STRXui:
case AArch64::STRBui:
Expand All @@ -2489,14 +2493,63 @@ Register AArch64InstrInfo::isStoreToStackSlot(const MachineInstr &MI,
case AArch64::STRDui:
case AArch64::STRQui:
case AArch64::STR_PXI:
if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
FrameIndex = MI.getOperand(1).getIndex();
return MI.getOperand(0).getReg();
}
break;
return true;
}
}

Register AArch64InstrInfo::isStoreToStackSlot(const MachineInstr &MI,
int &FrameIndex) const {
if (!isFrameStoreOpcode(MI.getOpcode()))
return Register();

if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
FrameIndex = MI.getOperand(1).getIndex();
return MI.getOperand(0).getReg();
}
return Register();
}

Register AArch64InstrInfo::isStoreToStackSlotPostFE(const MachineInstr &MI,
int &FrameIndex) const {
if (!isFrameStoreOpcode(MI.getOpcode()))
return Register();

if (Register Reg = isStoreToStackSlot(MI, FrameIndex))
return Reg;

SmallVector<const MachineMemOperand *, 1> Accesses;
if (hasStoreToStackSlot(MI, Accesses)) {
if (Accesses.size() > 1)
return Register();

FrameIndex =
cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
->getFrameIndex();
return MI.getOperand(0).getReg();
}
return 0;
return Register();
}

Register AArch64InstrInfo::isLoadFromStackSlotPostFE(const MachineInstr &MI,
int &FrameIndex) const {
if (!isFrameLoadOpcode(MI.getOpcode()))
return Register();

if (Register Reg = isLoadFromStackSlot(MI, FrameIndex))
return Reg;

SmallVector<const MachineMemOperand *, 1> Accesses;
if (hasLoadFromStackSlot(MI, Accesses)) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is hasLoadFromStackSlot guaranteed to place an element in Accesses when it returns true?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, hasLoadFromStackSlot only returns true if Accesses.size() changes, and Accesses is only being push_back()ed to, therefore, if hasLoadFromStackSlot returns true, it definitely placed an element in Accesses

if (Accesses.size() > 1)
return Register();

FrameIndex =
cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
->getFrameIndex();
return MI.getOperand(0).getReg();
}
return Register();
}

/// Check all MachineMemOperands for a hint to suppress pairing.
Expand Down
9 changes: 9 additions & 0 deletions llvm/lib/Target/AArch64/AArch64InstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,15 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo {
Register isStoreToStackSlot(const MachineInstr &MI,
int &FrameIndex) const override;

/// Check for post-frame ptr elimination stack locations as well. This uses a
/// heuristic so it isn't reliable for correctness.
Register isStoreToStackSlotPostFE(const MachineInstr &MI,
int &FrameIndex) const override;
/// Check for post-frame ptr elimination stack locations as well. This uses a
/// heuristic so it isn't reliable for correctness.
Register isLoadFromStackSlotPostFE(const MachineInstr &MI,
int &FrameIndex) const override;

/// Does this instruction set its full destination register to zero?
static bool isGPRZero(const MachineInstr &MI);

Expand Down
72 changes: 36 additions & 36 deletions llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic-128.ll
Original file line number Diff line number Diff line change
Expand Up @@ -89,33 +89,33 @@ define void @val_compare_and_swap(ptr %p, i128 %oldval, i128 %newval) {
; CHECK-OUTLINE-LLSC-O0-LABEL: val_compare_and_swap:
; CHECK-OUTLINE-LLSC-O0: // %bb.0:
; CHECK-OUTLINE-LLSC-O0-NEXT: sub sp, sp, #32
; CHECK-OUTLINE-LLSC-O0-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
; CHECK-OUTLINE-LLSC-O0-NEXT: str x30, [sp, #16] // 8-byte Spill
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It might be easier to land an NFC PR that just regenerates all the comments ahead of time, then the follow-up patch becomes much smaller.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the review! I am confused about this comment however, how do I land a patch with all the comments changed without landing the code changes first? The code changes are the reasons the comments are needed right? I can put up a separate PR for the comments, but I still need the code changes to land first, otherwise the tests will fail till the code changes are landed.

This is why I split the PR into 2 commits, to make reviewing easier

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As far as I understand it, the massive comment updates are just a result of adding these functions, such that the assembly printing passes have more information available to them? In which case there's no way to split out the test changes from the implementation; it would be possible to add the hooks in separate commits so that the size of the individual commits are minimized, but I think that landing it all as one commit is better in this case: the main threat of changing so many files is if this commit has to be reverted after changes are placed on top of it, or if it similarly interferes with another revert; if that does happen though, splitting this into multiple commits would make the problem worse. Better to get it all done in one push imo, ymmv etc.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, the comment updates are because some "Folded Spills" have become "Spills" and "Folded Reloads" have become "Reloads"

We would not be able to land the test changes unless we upstream the code changes first, but I think I want to keep it as one commit, so everything goes together in the case of a revert. I will squash the patch before I submit the PR. Does everything look good to you otherwise @SLTozer ?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks fine to me, ideally I'd want to be sure that @adrian-prantl is happy with the above explanation as to why this should all land in one patch - though since it was just a suggestion, maybe a final confirmation isn't necessary.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, I would also want another final comment from @adrian-prantl

Thanks!

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Works for me, the end result is desirable and if the updates come from adding the function it makes sense to land it all in one!

; CHECK-OUTLINE-LLSC-O0-NEXT: .cfi_def_cfa_offset 32
; CHECK-OUTLINE-LLSC-O0-NEXT: .cfi_offset w30, -16
; CHECK-OUTLINE-LLSC-O0-NEXT: str x0, [sp, #8] // 8-byte Folded Spill
; CHECK-OUTLINE-LLSC-O0-NEXT: str x0, [sp, #8] // 8-byte Spill
; CHECK-OUTLINE-LLSC-O0-NEXT: mov x0, x2
; CHECK-OUTLINE-LLSC-O0-NEXT: mov x1, x3
; CHECK-OUTLINE-LLSC-O0-NEXT: mov x2, x4
; CHECK-OUTLINE-LLSC-O0-NEXT: ldr x4, [sp, #8] // 8-byte Folded Reload
; CHECK-OUTLINE-LLSC-O0-NEXT: ldr x4, [sp, #8] // 8-byte Reload
; CHECK-OUTLINE-LLSC-O0-NEXT: mov x3, x5
; CHECK-OUTLINE-LLSC-O0-NEXT: bl __aarch64_cas16_acq
; CHECK-OUTLINE-LLSC-O0-NEXT: mov x8, x0
; CHECK-OUTLINE-LLSC-O0-NEXT: ldr x0, [sp, #8] // 8-byte Folded Reload
; CHECK-OUTLINE-LLSC-O0-NEXT: ldr x0, [sp, #8] // 8-byte Reload
; CHECK-OUTLINE-LLSC-O0-NEXT: // implicit-def: $q0
; CHECK-OUTLINE-LLSC-O0-NEXT: mov v0.d[0], x8
; CHECK-OUTLINE-LLSC-O0-NEXT: mov v0.d[1], x1
; CHECK-OUTLINE-LLSC-O0-NEXT: str q0, [x0]
; CHECK-OUTLINE-LLSC-O0-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
; CHECK-OUTLINE-LLSC-O0-NEXT: ldr x30, [sp, #16] // 8-byte Reload
; CHECK-OUTLINE-LLSC-O0-NEXT: add sp, sp, #32
; CHECK-OUTLINE-LLSC-O0-NEXT: ret
;
; CHECK-CAS-O0-LABEL: val_compare_and_swap:
; CHECK-CAS-O0: // %bb.0:
; CHECK-CAS-O0-NEXT: sub sp, sp, #16
; CHECK-CAS-O0-NEXT: .cfi_def_cfa_offset 16
; CHECK-CAS-O0-NEXT: str x3, [sp, #8] // 8-byte Folded Spill
; CHECK-CAS-O0-NEXT: str x3, [sp, #8] // 8-byte Spill
; CHECK-CAS-O0-NEXT: mov x1, x5
; CHECK-CAS-O0-NEXT: ldr x5, [sp, #8] // 8-byte Folded Reload
; CHECK-CAS-O0-NEXT: ldr x5, [sp, #8] // 8-byte Reload
; CHECK-CAS-O0-NEXT: // kill: def $x2 killed $x2 def $x2_x3
; CHECK-CAS-O0-NEXT: mov x3, x5
; CHECK-CAS-O0-NEXT: // kill: def $x4 killed $x4 def $x4_x5
Expand Down Expand Up @@ -216,33 +216,33 @@ define void @val_compare_and_swap_monotonic_seqcst(ptr %p, i128 %oldval, i128 %n
; CHECK-OUTLINE-LLSC-O0-LABEL: val_compare_and_swap_monotonic_seqcst:
; CHECK-OUTLINE-LLSC-O0: // %bb.0:
; CHECK-OUTLINE-LLSC-O0-NEXT: sub sp, sp, #32
; CHECK-OUTLINE-LLSC-O0-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
; CHECK-OUTLINE-LLSC-O0-NEXT: str x30, [sp, #16] // 8-byte Spill
; CHECK-OUTLINE-LLSC-O0-NEXT: .cfi_def_cfa_offset 32
; CHECK-OUTLINE-LLSC-O0-NEXT: .cfi_offset w30, -16
; CHECK-OUTLINE-LLSC-O0-NEXT: str x0, [sp, #8] // 8-byte Folded Spill
; CHECK-OUTLINE-LLSC-O0-NEXT: str x0, [sp, #8] // 8-byte Spill
; CHECK-OUTLINE-LLSC-O0-NEXT: mov x0, x2
; CHECK-OUTLINE-LLSC-O0-NEXT: mov x1, x3
; CHECK-OUTLINE-LLSC-O0-NEXT: mov x2, x4
; CHECK-OUTLINE-LLSC-O0-NEXT: ldr x4, [sp, #8] // 8-byte Folded Reload
; CHECK-OUTLINE-LLSC-O0-NEXT: ldr x4, [sp, #8] // 8-byte Reload
; CHECK-OUTLINE-LLSC-O0-NEXT: mov x3, x5
; CHECK-OUTLINE-LLSC-O0-NEXT: bl __aarch64_cas16_acq_rel
; CHECK-OUTLINE-LLSC-O0-NEXT: mov x8, x0
; CHECK-OUTLINE-LLSC-O0-NEXT: ldr x0, [sp, #8] // 8-byte Folded Reload
; CHECK-OUTLINE-LLSC-O0-NEXT: ldr x0, [sp, #8] // 8-byte Reload
; CHECK-OUTLINE-LLSC-O0-NEXT: // implicit-def: $q0
; CHECK-OUTLINE-LLSC-O0-NEXT: mov v0.d[0], x8
; CHECK-OUTLINE-LLSC-O0-NEXT: mov v0.d[1], x1
; CHECK-OUTLINE-LLSC-O0-NEXT: str q0, [x0]
; CHECK-OUTLINE-LLSC-O0-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
; CHECK-OUTLINE-LLSC-O0-NEXT: ldr x30, [sp, #16] // 8-byte Reload
; CHECK-OUTLINE-LLSC-O0-NEXT: add sp, sp, #32
; CHECK-OUTLINE-LLSC-O0-NEXT: ret
;
; CHECK-CAS-O0-LABEL: val_compare_and_swap_monotonic_seqcst:
; CHECK-CAS-O0: // %bb.0:
; CHECK-CAS-O0-NEXT: sub sp, sp, #16
; CHECK-CAS-O0-NEXT: .cfi_def_cfa_offset 16
; CHECK-CAS-O0-NEXT: str x3, [sp, #8] // 8-byte Folded Spill
; CHECK-CAS-O0-NEXT: str x3, [sp, #8] // 8-byte Spill
; CHECK-CAS-O0-NEXT: mov x1, x5
; CHECK-CAS-O0-NEXT: ldr x5, [sp, #8] // 8-byte Folded Reload
; CHECK-CAS-O0-NEXT: ldr x5, [sp, #8] // 8-byte Reload
; CHECK-CAS-O0-NEXT: // kill: def $x2 killed $x2 def $x2_x3
; CHECK-CAS-O0-NEXT: mov x3, x5
; CHECK-CAS-O0-NEXT: // kill: def $x4 killed $x4 def $x4_x5
Expand Down Expand Up @@ -343,33 +343,33 @@ define void @val_compare_and_swap_release_acquire(ptr %p, i128 %oldval, i128 %ne
; CHECK-OUTLINE-LLSC-O0-LABEL: val_compare_and_swap_release_acquire:
; CHECK-OUTLINE-LLSC-O0: // %bb.0:
; CHECK-OUTLINE-LLSC-O0-NEXT: sub sp, sp, #32
; CHECK-OUTLINE-LLSC-O0-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
; CHECK-OUTLINE-LLSC-O0-NEXT: str x30, [sp, #16] // 8-byte Spill
; CHECK-OUTLINE-LLSC-O0-NEXT: .cfi_def_cfa_offset 32
; CHECK-OUTLINE-LLSC-O0-NEXT: .cfi_offset w30, -16
; CHECK-OUTLINE-LLSC-O0-NEXT: str x0, [sp, #8] // 8-byte Folded Spill
; CHECK-OUTLINE-LLSC-O0-NEXT: str x0, [sp, #8] // 8-byte Spill
; CHECK-OUTLINE-LLSC-O0-NEXT: mov x0, x2
; CHECK-OUTLINE-LLSC-O0-NEXT: mov x1, x3
; CHECK-OUTLINE-LLSC-O0-NEXT: mov x2, x4
; CHECK-OUTLINE-LLSC-O0-NEXT: ldr x4, [sp, #8] // 8-byte Folded Reload
; CHECK-OUTLINE-LLSC-O0-NEXT: ldr x4, [sp, #8] // 8-byte Reload
; CHECK-OUTLINE-LLSC-O0-NEXT: mov x3, x5
; CHECK-OUTLINE-LLSC-O0-NEXT: bl __aarch64_cas16_acq_rel
; CHECK-OUTLINE-LLSC-O0-NEXT: mov x8, x0
; CHECK-OUTLINE-LLSC-O0-NEXT: ldr x0, [sp, #8] // 8-byte Folded Reload
; CHECK-OUTLINE-LLSC-O0-NEXT: ldr x0, [sp, #8] // 8-byte Reload
; CHECK-OUTLINE-LLSC-O0-NEXT: // implicit-def: $q0
; CHECK-OUTLINE-LLSC-O0-NEXT: mov v0.d[0], x8
; CHECK-OUTLINE-LLSC-O0-NEXT: mov v0.d[1], x1
; CHECK-OUTLINE-LLSC-O0-NEXT: str q0, [x0]
; CHECK-OUTLINE-LLSC-O0-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
; CHECK-OUTLINE-LLSC-O0-NEXT: ldr x30, [sp, #16] // 8-byte Reload
; CHECK-OUTLINE-LLSC-O0-NEXT: add sp, sp, #32
; CHECK-OUTLINE-LLSC-O0-NEXT: ret
;
; CHECK-CAS-O0-LABEL: val_compare_and_swap_release_acquire:
; CHECK-CAS-O0: // %bb.0:
; CHECK-CAS-O0-NEXT: sub sp, sp, #16
; CHECK-CAS-O0-NEXT: .cfi_def_cfa_offset 16
; CHECK-CAS-O0-NEXT: str x3, [sp, #8] // 8-byte Folded Spill
; CHECK-CAS-O0-NEXT: str x3, [sp, #8] // 8-byte Spill
; CHECK-CAS-O0-NEXT: mov x1, x5
; CHECK-CAS-O0-NEXT: ldr x5, [sp, #8] // 8-byte Folded Reload
; CHECK-CAS-O0-NEXT: ldr x5, [sp, #8] // 8-byte Reload
; CHECK-CAS-O0-NEXT: // kill: def $x2 killed $x2 def $x2_x3
; CHECK-CAS-O0-NEXT: mov x3, x5
; CHECK-CAS-O0-NEXT: // kill: def $x4 killed $x4 def $x4_x5
Expand Down Expand Up @@ -470,33 +470,33 @@ define void @val_compare_and_swap_monotonic(ptr %p, i128 %oldval, i128 %newval)
; CHECK-OUTLINE-LLSC-O0-LABEL: val_compare_and_swap_monotonic:
; CHECK-OUTLINE-LLSC-O0: // %bb.0:
; CHECK-OUTLINE-LLSC-O0-NEXT: sub sp, sp, #32
; CHECK-OUTLINE-LLSC-O0-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
; CHECK-OUTLINE-LLSC-O0-NEXT: str x30, [sp, #16] // 8-byte Spill
; CHECK-OUTLINE-LLSC-O0-NEXT: .cfi_def_cfa_offset 32
; CHECK-OUTLINE-LLSC-O0-NEXT: .cfi_offset w30, -16
; CHECK-OUTLINE-LLSC-O0-NEXT: str x0, [sp, #8] // 8-byte Folded Spill
; CHECK-OUTLINE-LLSC-O0-NEXT: str x0, [sp, #8] // 8-byte Spill
; CHECK-OUTLINE-LLSC-O0-NEXT: mov x0, x2
; CHECK-OUTLINE-LLSC-O0-NEXT: mov x1, x3
; CHECK-OUTLINE-LLSC-O0-NEXT: mov x2, x4
; CHECK-OUTLINE-LLSC-O0-NEXT: ldr x4, [sp, #8] // 8-byte Folded Reload
; CHECK-OUTLINE-LLSC-O0-NEXT: ldr x4, [sp, #8] // 8-byte Reload
; CHECK-OUTLINE-LLSC-O0-NEXT: mov x3, x5
; CHECK-OUTLINE-LLSC-O0-NEXT: bl __aarch64_cas16_acq_rel
; CHECK-OUTLINE-LLSC-O0-NEXT: mov x8, x0
; CHECK-OUTLINE-LLSC-O0-NEXT: ldr x0, [sp, #8] // 8-byte Folded Reload
; CHECK-OUTLINE-LLSC-O0-NEXT: ldr x0, [sp, #8] // 8-byte Reload
; CHECK-OUTLINE-LLSC-O0-NEXT: // implicit-def: $q0
; CHECK-OUTLINE-LLSC-O0-NEXT: mov v0.d[0], x8
; CHECK-OUTLINE-LLSC-O0-NEXT: mov v0.d[1], x1
; CHECK-OUTLINE-LLSC-O0-NEXT: str q0, [x0]
; CHECK-OUTLINE-LLSC-O0-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
; CHECK-OUTLINE-LLSC-O0-NEXT: ldr x30, [sp, #16] // 8-byte Reload
; CHECK-OUTLINE-LLSC-O0-NEXT: add sp, sp, #32
; CHECK-OUTLINE-LLSC-O0-NEXT: ret
;
; CHECK-CAS-O0-LABEL: val_compare_and_swap_monotonic:
; CHECK-CAS-O0: // %bb.0:
; CHECK-CAS-O0-NEXT: sub sp, sp, #16
; CHECK-CAS-O0-NEXT: .cfi_def_cfa_offset 16
; CHECK-CAS-O0-NEXT: str x3, [sp, #8] // 8-byte Folded Spill
; CHECK-CAS-O0-NEXT: str x3, [sp, #8] // 8-byte Spill
; CHECK-CAS-O0-NEXT: mov x1, x5
; CHECK-CAS-O0-NEXT: ldr x5, [sp, #8] // 8-byte Folded Reload
; CHECK-CAS-O0-NEXT: ldr x5, [sp, #8] // 8-byte Reload
; CHECK-CAS-O0-NEXT: // kill: def $x2 killed $x2 def $x2_x3
; CHECK-CAS-O0-NEXT: mov x3, x5
; CHECK-CAS-O0-NEXT: // kill: def $x4 killed $x4 def $x4_x5
Expand Down Expand Up @@ -580,22 +580,22 @@ define void @atomic_load_relaxed(i64, i64, ptr %p, ptr %p2) {
; CHECK-OUTLINE-LLSC-O0-LABEL: atomic_load_relaxed:
; CHECK-OUTLINE-LLSC-O0: // %bb.0:
; CHECK-OUTLINE-LLSC-O0-NEXT: sub sp, sp, #32
; CHECK-OUTLINE-LLSC-O0-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
; CHECK-OUTLINE-LLSC-O0-NEXT: str x30, [sp, #16] // 8-byte Spill
; CHECK-OUTLINE-LLSC-O0-NEXT: .cfi_def_cfa_offset 32
; CHECK-OUTLINE-LLSC-O0-NEXT: .cfi_offset w30, -16
; CHECK-OUTLINE-LLSC-O0-NEXT: mov x4, x2
; CHECK-OUTLINE-LLSC-O0-NEXT: str x3, [sp, #8] // 8-byte Folded Spill
; CHECK-OUTLINE-LLSC-O0-NEXT: str x3, [sp, #8] // 8-byte Spill
; CHECK-OUTLINE-LLSC-O0-NEXT: mov x3, xzr
; CHECK-OUTLINE-LLSC-O0-NEXT: mov x0, x3
; CHECK-OUTLINE-LLSC-O0-NEXT: mov x1, x3
; CHECK-OUTLINE-LLSC-O0-NEXT: mov x2, x3
; CHECK-OUTLINE-LLSC-O0-NEXT: bl __aarch64_cas16_relax
; CHECK-OUTLINE-LLSC-O0-NEXT: ldr x3, [sp, #8] // 8-byte Folded Reload
; CHECK-OUTLINE-LLSC-O0-NEXT: ldr x3, [sp, #8] // 8-byte Reload
; CHECK-OUTLINE-LLSC-O0-NEXT: // implicit-def: $q0
; CHECK-OUTLINE-LLSC-O0-NEXT: mov v0.d[0], x0
; CHECK-OUTLINE-LLSC-O0-NEXT: mov v0.d[1], x1
; CHECK-OUTLINE-LLSC-O0-NEXT: str q0, [x3]
; CHECK-OUTLINE-LLSC-O0-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
; CHECK-OUTLINE-LLSC-O0-NEXT: ldr x30, [sp, #16] // 8-byte Reload
; CHECK-OUTLINE-LLSC-O0-NEXT: add sp, sp, #32
; CHECK-OUTLINE-LLSC-O0-NEXT: ret
;
Expand Down Expand Up @@ -690,17 +690,17 @@ define i128 @val_compare_and_swap_return(ptr %p, i128 %oldval, i128 %newval) {
; CHECK-OUTLINE-LLSC-O0-LABEL: val_compare_and_swap_return:
; CHECK-OUTLINE-LLSC-O0: // %bb.0:
; CHECK-OUTLINE-LLSC-O0-NEXT: sub sp, sp, #32
; CHECK-OUTLINE-LLSC-O0-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
; CHECK-OUTLINE-LLSC-O0-NEXT: str x30, [sp, #16] // 8-byte Spill
; CHECK-OUTLINE-LLSC-O0-NEXT: .cfi_def_cfa_offset 32
; CHECK-OUTLINE-LLSC-O0-NEXT: .cfi_offset w30, -16
; CHECK-OUTLINE-LLSC-O0-NEXT: str x0, [sp, #8] // 8-byte Folded Spill
; CHECK-OUTLINE-LLSC-O0-NEXT: str x0, [sp, #8] // 8-byte Spill
; CHECK-OUTLINE-LLSC-O0-NEXT: mov x0, x2
; CHECK-OUTLINE-LLSC-O0-NEXT: mov x1, x3
; CHECK-OUTLINE-LLSC-O0-NEXT: mov x2, x4
; CHECK-OUTLINE-LLSC-O0-NEXT: ldr x4, [sp, #8] // 8-byte Folded Reload
; CHECK-OUTLINE-LLSC-O0-NEXT: ldr x4, [sp, #8] // 8-byte Reload
; CHECK-OUTLINE-LLSC-O0-NEXT: mov x3, x5
; CHECK-OUTLINE-LLSC-O0-NEXT: bl __aarch64_cas16_acq
; CHECK-OUTLINE-LLSC-O0-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
; CHECK-OUTLINE-LLSC-O0-NEXT: ldr x30, [sp, #16] // 8-byte Reload
; CHECK-OUTLINE-LLSC-O0-NEXT: add sp, sp, #32
; CHECK-OUTLINE-LLSC-O0-NEXT: ret
;
Expand Down
Loading
Loading