-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[X86] Add constexpr support for addsub and select intrinsics #167512
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
|
@llvm/pr-subscribers-backend-x86 Author: Ahmed Nour (ahmednoursphinx) ChangesRecent commits (7fe0691, 53ddeb4) marked several x86 intrinsics as constexpr in headers without providing the necessary constant evaluation support in the compiler backend. This caused compilation failures when attempting to use these intrinsics in constant expressions. Resolves #166814 Full diff: https://github.com/llvm/llvm-project/pull/167512.diff 8 Files Affected:
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index cd5f2c3012712..7d110fc71e15d 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -93,7 +93,7 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in {
}
- let Features = "sse3" in {
+ let Features = "sse3", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
foreach Op = ["addsub"] in {
def Op#ps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>)">;
def Op#pd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>)">;
@@ -121,7 +121,7 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in {
}
// AVX
-let Attributes = [Const, NoThrow, RequiredVectorWidth<256>], Features = "avx" in {
+let Attributes = [Const, NoThrow, Constexpr, RequiredVectorWidth<256>], Features = "avx" in {
foreach Op = ["addsub", "max", "min"] in {
def Op#pd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>)">;
def Op#ps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>)">;
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 0ef130c0a55df..8e73ec24902b2 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -4279,6 +4279,36 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
F.subtract(RHS, RM);
return F;
});
+ case clang::X86::BI__builtin_ia32_addsubpd:
+ case clang::X86::BI__builtin_ia32_addsubps:
+ case clang::X86::BI__builtin_ia32_addsubpd256:
+ case clang::X86::BI__builtin_ia32_addsubps256: {
+ // Addsub: alternates between subtraction and addition
+ // Result[i] = (i % 2 == 0) ? (a[i] - b[i]) : (a[i] + b[i])
+ const Pointer &RHS = S.Stk.pop<Pointer>();
+ const Pointer &LHS = S.Stk.pop<Pointer>();
+ const Pointer &Dst = S.Stk.peek<Pointer>();
+ FPOptions FPO = Call->getFPFeaturesInEffect(S.Ctx.getLangOpts());
+ llvm::RoundingMode RM = getRoundingMode(FPO);
+ const auto *VT = Call->getArg(0)->getType()->castAs<VectorType>();
+ unsigned NumElts = VT->getNumElements();
+
+ using T = PrimConv<PT_Float>::T;
+ for (unsigned I = 0; I < NumElts; ++I) {
+ APFloat LElem = LHS.elem<T>(I).getAPFloat();
+ APFloat RElem = RHS.elem<T>(I).getAPFloat();
+ if (I % 2 == 0) {
+ // Even indices: subtract
+ LElem.subtract(RElem, RM);
+ } else {
+ // Odd indices: add
+ LElem.add(RElem, RM);
+ }
+ Dst.elem<T>(I) = static_cast<T>(LElem);
+ }
+ Dst.initializeAllElements();
+ return true;
+ }
case clang::X86::BI__builtin_ia32_pmuldq128:
case clang::X86::BI__builtin_ia32_pmuldq256:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 972d9fe3b5e4f..3ba7520adf195 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -13383,6 +13383,35 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
}
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
}
+ case clang::X86::BI__builtin_ia32_addsubpd:
+ case clang::X86::BI__builtin_ia32_addsubps:
+ case clang::X86::BI__builtin_ia32_addsubpd256:
+ case clang::X86::BI__builtin_ia32_addsubps256: {
+ // Addsub: alternates between subtraction and addition
+ // Result[i] = (i % 2 == 0) ? (a[i] - b[i]) : (a[i] + b[i])
+ APValue SourceLHS, SourceRHS;
+ if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) ||
+ !EvaluateAsRValue(Info, E->getArg(1), SourceRHS))
+ return false;
+ unsigned NumElts = SourceLHS.getVectorLength();
+ SmallVector<APValue, 8> ResultElements;
+ ResultElements.reserve(NumElts);
+ llvm::RoundingMode RM = getActiveRoundingMode(getEvalInfo(), E);
+
+ for (unsigned I = 0; I < NumElts; ++I) {
+ APFloat LHS = SourceLHS.getVectorElt(I).getFloat();
+ APFloat RHS = SourceRHS.getVectorElt(I).getFloat();
+ if (I % 2 == 0) {
+ // Even indices: subtract
+ LHS.subtract(RHS, RM);
+ } else {
+ // Odd indices: add
+ LHS.add(RHS, RM);
+ }
+ ResultElements.push_back(APValue(LHS));
+ }
+ return Success(APValue(ResultElements.data(), ResultElements.size()), E);
+ }
case Builtin::BI__builtin_elementwise_fshl:
case Builtin::BI__builtin_elementwise_fshr: {
APValue SourceHi, SourceLo, SourceShift;
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index 997e9608e112f..39a5c2d4c218c 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -8383,23 +8383,23 @@ _mm512_maskz_moveldup_ps(__mmask16 __U, __m512 __A) {
(__v16sf)_mm512_setzero_ps());
}
-static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_mask_move_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
return __builtin_ia32_selectss_128(__U, _mm_move_ss(__A, __B), __W);
}
-static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_maskz_move_ss(__mmask8 __U, __m128 __A, __m128 __B) {
return __builtin_ia32_selectss_128(__U, _mm_move_ss(__A, __B),
_mm_setzero_ps());
}
-static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_mask_move_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B), __W);
}
-static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_maskz_move_sd(__mmask8 __U, __m128d __A, __m128d __B) {
return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B),
_mm_setzero_pd());
diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h
index 4aef9245323fb..33b8eaec1f99a 100644
--- a/clang/lib/Headers/avxintrin.h
+++ b/clang/lib/Headers/avxintrin.h
@@ -147,7 +147,7 @@ static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_sub_ps(__m256 __a,
/// A 256-bit vector of [4 x double] containing the right source operand.
/// \returns A 256-bit vector of [4 x double] containing the alternating sums
/// and differences between both operands.
-static __inline __m256d __DEFAULT_FN_ATTRS
+static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
_mm256_addsub_pd(__m256d __a, __m256d __b)
{
return (__m256d)__builtin_ia32_addsubpd256((__v4df)__a, (__v4df)__b);
@@ -166,7 +166,7 @@ _mm256_addsub_pd(__m256d __a, __m256d __b)
/// A 256-bit vector of [8 x float] containing the right source operand.
/// \returns A 256-bit vector of [8 x float] containing the alternating sums and
/// differences between both operands.
-static __inline __m256 __DEFAULT_FN_ATTRS
+static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm256_addsub_ps(__m256 __a, __m256 __b)
{
return (__m256)__builtin_ia32_addsubps256((__v8sf)__a, (__v8sf)__b);
diff --git a/clang/lib/Headers/pmmintrin.h b/clang/lib/Headers/pmmintrin.h
index 6b152bde29fc1..4b284c41181ca 100644
--- a/clang/lib/Headers/pmmintrin.h
+++ b/clang/lib/Headers/pmmintrin.h
@@ -60,7 +60,7 @@ _mm_lddqu_si128(__m128i_u const *__p)
/// A 128-bit vector of [4 x float] containing the right source operand.
/// \returns A 128-bit vector of [4 x float] containing the alternating sums and
/// differences of both operands.
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_addsub_ps(__m128 __a, __m128 __b)
{
return __builtin_ia32_addsubps((__v4sf)__a, (__v4sf)__b);
@@ -166,7 +166,7 @@ _mm_moveldup_ps(__m128 __a)
/// A 128-bit vector of [2 x double] containing the right source operand.
/// \returns A 128-bit vector of [2 x double] containing the alternating sums
/// and differences of both operands.
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_addsub_pd(__m128d __a, __m128d __b) {
return __builtin_ia32_addsubpd((__v2df)__a, (__v2df)__b);
}
diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c
index 737febbc7fef6..46bc28b85d8db 100644
--- a/clang/test/CodeGen/X86/avx-builtins.c
+++ b/clang/test/CodeGen/X86/avx-builtins.c
@@ -45,12 +45,14 @@ __m256d test_mm256_addsub_pd(__m256d A, __m256d B) {
// CHECK: call {{.*}}<4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}})
return _mm256_addsub_pd(A, B);
}
+TEST_CONSTEXPR(match_m256d(_mm256_addsub_pd((__m256d){+1.0, +2.0, +3.0, +4.0}, (__m256d){+1.0, +1.0, +1.0, +1.0}), +0.0, +3.0, +2.0, +5.0));
__m256 test_mm256_addsub_ps(__m256 A, __m256 B) {
// CHECK-LABEL: test_mm256_addsub_ps
// CHECK: call {{.*}}<8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}})
return _mm256_addsub_ps(A, B);
}
+TEST_CONSTEXPR(match_m256(_mm256_addsub_ps((__m256){+1.0f, +2.0f, +3.0f, +4.0f, +5.0f, +6.0f, +7.0f, +8.0f}, (__m256){+1.0f, +1.0f, +1.0f, +1.0f, +1.0f, +1.0f, +1.0f, +1.0f}), +0.0f, +3.0f, +2.0f, +5.0f, +4.0f, +7.0f, +6.0f, +9.0f));
__m256d test_mm256_and_pd(__m256d A, __m256d B) {
// CHECK-LABEL: test_mm256_and_pd
diff --git a/clang/test/CodeGen/X86/sse3-builtins.c b/clang/test/CodeGen/X86/sse3-builtins.c
index a82dd4080670b..44389fbdc6f77 100644
--- a/clang/test/CodeGen/X86/sse3-builtins.c
+++ b/clang/test/CodeGen/X86/sse3-builtins.c
@@ -19,12 +19,14 @@ __m128d test_mm_addsub_pd(__m128d A, __m128d B) {
// CHECK: call {{.*}}<2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
return _mm_addsub_pd(A, B);
}
+TEST_CONSTEXPR(match_m128d(_mm_addsub_pd((__m128d){+2.0, +2.0}, (__m128d){+1.0, +2.0}), +1.0, +4.0));
__m128 test_mm_addsub_ps(__m128 A, __m128 B) {
// CHECK-LABEL: test_mm_addsub_ps
// CHECK: call {{.*}}<4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}})
return _mm_addsub_ps(A, B);
}
+TEST_CONSTEXPR(match_m128(_mm_addsub_ps((__m128){+3.0f, +4.0f, +5.0f, +6.0f}, (__m128){+1.0f, +2.0f, +3.0f, +4.0f}), +2.0f, +6.0f, +2.0f, +10.0f));
__m128d test_mm_hadd_pd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_hadd_pd
|
|
@llvm/pr-subscribers-clang Author: Ahmed Nour (ahmednoursphinx) ChangesRecent commits (7fe0691, 53ddeb4) marked several x86 intrinsics as constexpr in headers without providing the necessary constant evaluation support in the compiler backend. This caused compilation failures when attempting to use these intrinsics in constant expressions. Resolves #166814 Full diff: https://github.com/llvm/llvm-project/pull/167512.diff 8 Files Affected:
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index cd5f2c3012712..7d110fc71e15d 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -93,7 +93,7 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in {
}
- let Features = "sse3" in {
+ let Features = "sse3", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
foreach Op = ["addsub"] in {
def Op#ps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>)">;
def Op#pd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>)">;
@@ -121,7 +121,7 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in {
}
// AVX
-let Attributes = [Const, NoThrow, RequiredVectorWidth<256>], Features = "avx" in {
+let Attributes = [Const, NoThrow, Constexpr, RequiredVectorWidth<256>], Features = "avx" in {
foreach Op = ["addsub", "max", "min"] in {
def Op#pd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>)">;
def Op#ps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>)">;
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 0ef130c0a55df..8e73ec24902b2 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -4279,6 +4279,36 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
F.subtract(RHS, RM);
return F;
});
+ case clang::X86::BI__builtin_ia32_addsubpd:
+ case clang::X86::BI__builtin_ia32_addsubps:
+ case clang::X86::BI__builtin_ia32_addsubpd256:
+ case clang::X86::BI__builtin_ia32_addsubps256: {
+ // Addsub: alternates between subtraction and addition
+ // Result[i] = (i % 2 == 0) ? (a[i] - b[i]) : (a[i] + b[i])
+ const Pointer &RHS = S.Stk.pop<Pointer>();
+ const Pointer &LHS = S.Stk.pop<Pointer>();
+ const Pointer &Dst = S.Stk.peek<Pointer>();
+ FPOptions FPO = Call->getFPFeaturesInEffect(S.Ctx.getLangOpts());
+ llvm::RoundingMode RM = getRoundingMode(FPO);
+ const auto *VT = Call->getArg(0)->getType()->castAs<VectorType>();
+ unsigned NumElts = VT->getNumElements();
+
+ using T = PrimConv<PT_Float>::T;
+ for (unsigned I = 0; I < NumElts; ++I) {
+ APFloat LElem = LHS.elem<T>(I).getAPFloat();
+ APFloat RElem = RHS.elem<T>(I).getAPFloat();
+ if (I % 2 == 0) {
+ // Even indices: subtract
+ LElem.subtract(RElem, RM);
+ } else {
+ // Odd indices: add
+ LElem.add(RElem, RM);
+ }
+ Dst.elem<T>(I) = static_cast<T>(LElem);
+ }
+ Dst.initializeAllElements();
+ return true;
+ }
case clang::X86::BI__builtin_ia32_pmuldq128:
case clang::X86::BI__builtin_ia32_pmuldq256:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 972d9fe3b5e4f..3ba7520adf195 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -13383,6 +13383,35 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
}
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
}
+ case clang::X86::BI__builtin_ia32_addsubpd:
+ case clang::X86::BI__builtin_ia32_addsubps:
+ case clang::X86::BI__builtin_ia32_addsubpd256:
+ case clang::X86::BI__builtin_ia32_addsubps256: {
+ // Addsub: alternates between subtraction and addition
+ // Result[i] = (i % 2 == 0) ? (a[i] - b[i]) : (a[i] + b[i])
+ APValue SourceLHS, SourceRHS;
+ if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) ||
+ !EvaluateAsRValue(Info, E->getArg(1), SourceRHS))
+ return false;
+ unsigned NumElts = SourceLHS.getVectorLength();
+ SmallVector<APValue, 8> ResultElements;
+ ResultElements.reserve(NumElts);
+ llvm::RoundingMode RM = getActiveRoundingMode(getEvalInfo(), E);
+
+ for (unsigned I = 0; I < NumElts; ++I) {
+ APFloat LHS = SourceLHS.getVectorElt(I).getFloat();
+ APFloat RHS = SourceRHS.getVectorElt(I).getFloat();
+ if (I % 2 == 0) {
+ // Even indices: subtract
+ LHS.subtract(RHS, RM);
+ } else {
+ // Odd indices: add
+ LHS.add(RHS, RM);
+ }
+ ResultElements.push_back(APValue(LHS));
+ }
+ return Success(APValue(ResultElements.data(), ResultElements.size()), E);
+ }
case Builtin::BI__builtin_elementwise_fshl:
case Builtin::BI__builtin_elementwise_fshr: {
APValue SourceHi, SourceLo, SourceShift;
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index 997e9608e112f..39a5c2d4c218c 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -8383,23 +8383,23 @@ _mm512_maskz_moveldup_ps(__mmask16 __U, __m512 __A) {
(__v16sf)_mm512_setzero_ps());
}
-static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_mask_move_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
return __builtin_ia32_selectss_128(__U, _mm_move_ss(__A, __B), __W);
}
-static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_maskz_move_ss(__mmask8 __U, __m128 __A, __m128 __B) {
return __builtin_ia32_selectss_128(__U, _mm_move_ss(__A, __B),
_mm_setzero_ps());
}
-static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_mask_move_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B), __W);
}
-static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_maskz_move_sd(__mmask8 __U, __m128d __A, __m128d __B) {
return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B),
_mm_setzero_pd());
diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h
index 4aef9245323fb..33b8eaec1f99a 100644
--- a/clang/lib/Headers/avxintrin.h
+++ b/clang/lib/Headers/avxintrin.h
@@ -147,7 +147,7 @@ static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_sub_ps(__m256 __a,
/// A 256-bit vector of [4 x double] containing the right source operand.
/// \returns A 256-bit vector of [4 x double] containing the alternating sums
/// and differences between both operands.
-static __inline __m256d __DEFAULT_FN_ATTRS
+static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
_mm256_addsub_pd(__m256d __a, __m256d __b)
{
return (__m256d)__builtin_ia32_addsubpd256((__v4df)__a, (__v4df)__b);
@@ -166,7 +166,7 @@ _mm256_addsub_pd(__m256d __a, __m256d __b)
/// A 256-bit vector of [8 x float] containing the right source operand.
/// \returns A 256-bit vector of [8 x float] containing the alternating sums and
/// differences between both operands.
-static __inline __m256 __DEFAULT_FN_ATTRS
+static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm256_addsub_ps(__m256 __a, __m256 __b)
{
return (__m256)__builtin_ia32_addsubps256((__v8sf)__a, (__v8sf)__b);
diff --git a/clang/lib/Headers/pmmintrin.h b/clang/lib/Headers/pmmintrin.h
index 6b152bde29fc1..4b284c41181ca 100644
--- a/clang/lib/Headers/pmmintrin.h
+++ b/clang/lib/Headers/pmmintrin.h
@@ -60,7 +60,7 @@ _mm_lddqu_si128(__m128i_u const *__p)
/// A 128-bit vector of [4 x float] containing the right source operand.
/// \returns A 128-bit vector of [4 x float] containing the alternating sums and
/// differences of both operands.
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_addsub_ps(__m128 __a, __m128 __b)
{
return __builtin_ia32_addsubps((__v4sf)__a, (__v4sf)__b);
@@ -166,7 +166,7 @@ _mm_moveldup_ps(__m128 __a)
/// A 128-bit vector of [2 x double] containing the right source operand.
/// \returns A 128-bit vector of [2 x double] containing the alternating sums
/// and differences of both operands.
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_addsub_pd(__m128d __a, __m128d __b) {
return __builtin_ia32_addsubpd((__v2df)__a, (__v2df)__b);
}
diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c
index 737febbc7fef6..46bc28b85d8db 100644
--- a/clang/test/CodeGen/X86/avx-builtins.c
+++ b/clang/test/CodeGen/X86/avx-builtins.c
@@ -45,12 +45,14 @@ __m256d test_mm256_addsub_pd(__m256d A, __m256d B) {
// CHECK: call {{.*}}<4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}})
return _mm256_addsub_pd(A, B);
}
+TEST_CONSTEXPR(match_m256d(_mm256_addsub_pd((__m256d){+1.0, +2.0, +3.0, +4.0}, (__m256d){+1.0, +1.0, +1.0, +1.0}), +0.0, +3.0, +2.0, +5.0));
__m256 test_mm256_addsub_ps(__m256 A, __m256 B) {
// CHECK-LABEL: test_mm256_addsub_ps
// CHECK: call {{.*}}<8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}})
return _mm256_addsub_ps(A, B);
}
+TEST_CONSTEXPR(match_m256(_mm256_addsub_ps((__m256){+1.0f, +2.0f, +3.0f, +4.0f, +5.0f, +6.0f, +7.0f, +8.0f}, (__m256){+1.0f, +1.0f, +1.0f, +1.0f, +1.0f, +1.0f, +1.0f, +1.0f}), +0.0f, +3.0f, +2.0f, +5.0f, +4.0f, +7.0f, +6.0f, +9.0f));
__m256d test_mm256_and_pd(__m256d A, __m256d B) {
// CHECK-LABEL: test_mm256_and_pd
diff --git a/clang/test/CodeGen/X86/sse3-builtins.c b/clang/test/CodeGen/X86/sse3-builtins.c
index a82dd4080670b..44389fbdc6f77 100644
--- a/clang/test/CodeGen/X86/sse3-builtins.c
+++ b/clang/test/CodeGen/X86/sse3-builtins.c
@@ -19,12 +19,14 @@ __m128d test_mm_addsub_pd(__m128d A, __m128d B) {
// CHECK: call {{.*}}<2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
return _mm_addsub_pd(A, B);
}
+TEST_CONSTEXPR(match_m128d(_mm_addsub_pd((__m128d){+2.0, +2.0}, (__m128d){+1.0, +2.0}), +1.0, +4.0));
__m128 test_mm_addsub_ps(__m128 A, __m128 B) {
// CHECK-LABEL: test_mm_addsub_ps
// CHECK: call {{.*}}<4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}})
return _mm_addsub_ps(A, B);
}
+TEST_CONSTEXPR(match_m128(_mm_addsub_ps((__m128){+3.0f, +4.0f, +5.0f, +6.0f}, (__m128){+1.0f, +2.0f, +3.0f, +4.0f}), +2.0f, +6.0f, +2.0f, +10.0f));
__m128d test_mm_hadd_pd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_hadd_pd
|
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
| case clang::X86::BI__builtin_ia32_addsubpd: | ||
| case clang::X86::BI__builtin_ia32_addsubps: | ||
| case clang::X86::BI__builtin_ia32_addsubpd256: | ||
| case clang::X86::BI__builtin_ia32_addsubps256: { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Use a static function like everything else.
| FPOptions FPO = Call->getFPFeaturesInEffect(S.Ctx.getLangOpts()); | ||
| llvm::RoundingMode RM = getRoundingMode(FPO); | ||
| const auto *VT = Call->getArg(0)->getType()->castAs<VectorType>(); | ||
| unsigned NumElts = VT->getNumElements(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| unsigned NumElts = VT->getNumElements(); | |
| unsigned NumElems = VT->getNumElements(); |
| unsigned NumElts = VT->getNumElements(); | ||
|
|
||
| using T = PrimConv<PT_Float>::T; | ||
| for (unsigned I = 0; I < NumElts; ++I) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| for (unsigned I = 0; I < NumElts; ++I) { | |
| for (unsigned I = 0; I != NumElts; ++I) { |
| let Attributes = [Const, NoThrow, RequiredVectorWidth<256>], Features = "avx" in { | ||
| let Attributes = [Const, NoThrow, Constexpr, RequiredVectorWidth<256>], | ||
| Features = "avx" in { | ||
| foreach Op = ["addsub", "max", "min"] in { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this also affects the max/min methods - you need to split off the addsub builtins before you make them constexpr (maybe add them further down to the block with movmskpd256 etc?)
| static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_move_sd(__mmask8 __U, | ||
| __m128d __A, | ||
| __m128d __B) { | ||
| return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B), |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
pull this out into its own PR
|
Please do not @ someone in PR description. |
|
Thanks all will address feedback soon :) |
Please ignore it. It seems that the requirement will be removed. https://discourse.llvm.org/t/forbidding-username-in-commits/86997/18 https://github.blog/changelog/2025-11-07-removing-notifications-for-mentions-in-commit-messages/ |
Recent commits (7fe0691, 53ddeb4) marked several x86 intrinsics as constexpr in headers without providing the necessary constant evaluation support in the compiler backend. This caused compilation failures when attempting to use these intrinsics in constant expressions.
Resolves #166814
Resolves #161203
Thanks for @ykhatav for syncing and collaborating on a solution for this issue