Skip to content

Commit 3ecf780

Browse files
[AArch64] Inline asm v0-v31 are scalar when having less than 64-bit capacity.
If 32-bit (or less) "v0" registers coming from inline asm are treated as vector ones, codegen might produce incorrect vector<->scalar conversions. This causes types mismatch assertion failures later during compile-time. The fix treats 32-bit or less v0-v31 AArch64 registers as scalar, along with 64-bit ones. Fixes clang crash reported in #153442.
1 parent 870f581 commit 3ecf780

File tree

2 files changed

+137
-1
lines changed

2 files changed

+137
-1
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13365,7 +13365,7 @@ AArch64TargetLowering::getRegForInlineAsmConstraint(
1336513365
// v0 - v31 are aliases of q0 - q31 or d0 - d31 depending on size.
1336613366
// By default we'll emit v0-v31 for this unless there's a modifier where
1336713367
// we'll emit the correct register as well.
13368-
if (VT != MVT::Other && VT.getSizeInBits() == 64) {
13368+
if (VT != MVT::Other && VT.getSizeInBits() <= 64) {
1336913369
Res.first = AArch64::FPR64RegClass.getRegister(RegNo);
1337013370
Res.second = &AArch64::FPR64RegClass;
1337113371
} else {
Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
; RUN: llc -O1 -mtriple=aarch64-linux-gnu %s -o - 2>&1 | FileCheck %s
2+
3+
; This test checks that the code containing "nop" inline assembler instruction
4+
; with 16/32/64-bit FP "v0" register, will be successfully compiled
5+
; and generated code will contain one optimized nop-instruction
6+
; per each function.
7+
;
8+
; IR for this test was generated from the following source code:
9+
;
10+
; #define _FP16 _Float16
11+
; #define _FP32 float
12+
; #define _FP64 double
13+
;
14+
; #define FOO(BITS) \
15+
; int foo##BITS(void) { \
16+
; register _FP##BITS a0 asm("v0"); \
17+
; for (int i = 0; i < 2; ++i) { \
18+
; __asm__ volatile("nop" : [a0] "+w"(a0)::); \
19+
; } \
20+
; return 0; \
21+
; }
22+
;
23+
; FOO(16)
24+
; FOO(32)
25+
; FOO(64)
26+
27+
28+
; test nop_fp16_reg
29+
; CHECK-LABEL: foo16:
30+
; CHECK: nop
31+
; CHECK-NOT: nop
32+
define dso_local i32 @foo16() #0 {
33+
%1 = alloca half, align 2
34+
%2 = alloca i32, align 4
35+
store i32 0, ptr %2, align 4
36+
br label %3
37+
38+
3: ; preds = %9, %0
39+
%4 = load i32, ptr %2, align 4
40+
%5 = icmp slt i32 %4, 2
41+
br i1 %5, label %6, label %12
42+
43+
6: ; preds = %3
44+
%7 = load half, ptr %1, align 2
45+
%8 = call half asm sideeffect "nop", "={v0},{v0}"(half %7) #1, !srcloc !6
46+
store half %8, ptr %1, align 2
47+
br label %9
48+
49+
9: ; preds = %6
50+
%10 = load i32, ptr %2, align 4
51+
%11 = add nsw i32 %10, 1
52+
store i32 %11, ptr %2, align 4
53+
br label %3, !llvm.loop !7
54+
55+
12: ; preds = %3
56+
ret i32 0
57+
}
58+
59+
; test nop_fp32_reg
60+
; CHECK-LABEL: foo32:
61+
; CHECK: nop
62+
; CHECK-NOT: nop
63+
define dso_local i32 @foo32() #0 {
64+
%1 = alloca float, align 4
65+
%2 = alloca i32, align 4
66+
store i32 0, ptr %2, align 4
67+
br label %3
68+
69+
3: ; preds = %9, %0
70+
%4 = load i32, ptr %2, align 4
71+
%5 = icmp slt i32 %4, 2
72+
br i1 %5, label %6, label %12
73+
74+
6: ; preds = %3
75+
%7 = load float, ptr %1, align 4
76+
%8 = call float asm sideeffect "nop", "={v0},{v0}"(float %7) #1, !srcloc !9
77+
store float %8, ptr %1, align 4
78+
br label %9
79+
80+
9: ; preds = %6
81+
%10 = load i32, ptr %2, align 4
82+
%11 = add nsw i32 %10, 1
83+
store i32 %11, ptr %2, align 4
84+
br label %3, !llvm.loop !10
85+
86+
12: ; preds = %3
87+
ret i32 0
88+
}
89+
90+
; test nop_fp64_reg
91+
; CHECK-LABEL: foo64:
92+
; CHECK: nop
93+
; CHECK-NOT: nop
94+
define dso_local i32 @foo64() #0 {
95+
%1 = alloca double, align 8
96+
%2 = alloca i32, align 4
97+
store i32 0, ptr %2, align 4
98+
br label %3
99+
100+
3: ; preds = %9, %0
101+
%4 = load i32, ptr %2, align 4
102+
%5 = icmp slt i32 %4, 2
103+
br i1 %5, label %6, label %12
104+
105+
6: ; preds = %3
106+
%7 = load double, ptr %1, align 8
107+
%8 = call double asm sideeffect "nop", "={v0},{v0}"(double %7) #1, !srcloc !11
108+
store double %8, ptr %1, align 8
109+
br label %9
110+
111+
9: ; preds = %6
112+
%10 = load i32, ptr %2, align 4
113+
%11 = add nsw i32 %10, 1
114+
store i32 %11, ptr %2, align 4
115+
br label %3, !llvm.loop !12
116+
117+
12: ; preds = %3
118+
ret i32 0
119+
}
120+
121+
!llvm.module.flags = !{!0, !1, !2, !3, !4}
122+
!llvm.ident = !{!5}
123+
124+
!0 = !{i32 1, !"wchar_size", i32 4}
125+
!1 = !{i32 8, !"PIC Level", i32 2}
126+
!2 = !{i32 7, !"PIE Level", i32 2}
127+
!3 = !{i32 7, !"uwtable", i32 2}
128+
!4 = !{i32 7, !"frame-pointer", i32 1}
129+
!5 = !{!"clang version 22.0.0git"}
130+
!6 = !{i64 2147502427}
131+
!7 = distinct !{!7, !8}
132+
!8 = !{!"llvm.loop.mustprogress"}
133+
!9 = !{i64 2147502622}
134+
!10 = distinct !{!10, !8}
135+
!11 = !{i64 2147502814}
136+
!12 = distinct !{!12, !8}

0 commit comments

Comments
 (0)