[X86] Add constexpr support for addsub and select intrinsics #167512

ahmednoursphinx · 2025-11-11T14:34:46Z

Recent commits (7fe0691, 53ddeb4) marked several x86 intrinsics as constexpr in headers without providing the necessary constant evaluation support in the compiler backend. This caused compilation failures when attempting to use these intrinsics in constant expressions.

Resolves #166814
Resolves #161203
Thanks for @ykhatav for syncing and collaborating on a solution for this issue

llvmbot · 2025-11-11T14:35:21Z

@llvm/pr-subscribers-backend-x86

Author: Ahmed Nour (ahmednoursphinx)

Changes

Recent commits (7fe0691, 53ddeb4) marked several x86 intrinsics as constexpr in headers without providing the necessary constant evaluation support in the compiler backend. This caused compilation failures when attempting to use these intrinsics in constant expressions.

Resolves #166814
Thanks for @ykhatav for syncing and collaborating on a solution for this issue

Full diff: https://github.com/llvm/llvm-project/pull/167512.diff

8 Files Affected:

(modified) clang/include/clang/Basic/BuiltinsX86.td (+2-2)
(modified) clang/lib/AST/ByteCode/InterpBuiltin.cpp (+30)
(modified) clang/lib/AST/ExprConstant.cpp (+29)
(modified) clang/lib/Headers/avx512fintrin.h (+4-4)
(modified) clang/lib/Headers/avxintrin.h (+2-2)
(modified) clang/lib/Headers/pmmintrin.h (+2-2)
(modified) clang/test/CodeGen/X86/avx-builtins.c (+2)
(modified) clang/test/CodeGen/X86/sse3-builtins.c (+2)

diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index cd5f2c3012712..7d110fc71e15d 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -93,7 +93,7 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in {
   }
 
 
-  let Features = "sse3" in {
+  let Features = "sse3", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
     foreach Op = ["addsub"] in {
       def Op#ps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>)">;
       def Op#pd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>)">;
@@ -121,7 +121,7 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in {
 }
 
 // AVX
-let Attributes = [Const, NoThrow, RequiredVectorWidth<256>], Features = "avx" in {
+let Attributes = [Const, NoThrow, Constexpr, RequiredVectorWidth<256>], Features = "avx" in {
   foreach Op = ["addsub", "max", "min"] in {
     def Op#pd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>)">;
     def Op#ps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>)">;
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 0ef130c0a55df..8e73ec24902b2 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -4279,6 +4279,36 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
           F.subtract(RHS, RM);
           return F;
         });
+  case clang::X86::BI__builtin_ia32_addsubpd:
+  case clang::X86::BI__builtin_ia32_addsubps:
+  case clang::X86::BI__builtin_ia32_addsubpd256:
+  case clang::X86::BI__builtin_ia32_addsubps256: {
+    // Addsub: alternates between subtraction and addition
+    // Result[i] = (i % 2 == 0) ? (a[i] - b[i]) : (a[i] + b[i])
+    const Pointer &RHS = S.Stk.pop<Pointer>();
+    const Pointer &LHS = S.Stk.pop<Pointer>();
+    const Pointer &Dst = S.Stk.peek<Pointer>();
+    FPOptions FPO = Call->getFPFeaturesInEffect(S.Ctx.getLangOpts());
+    llvm::RoundingMode RM = getRoundingMode(FPO);
+    const auto *VT = Call->getArg(0)->getType()->castAs<VectorType>();
+    unsigned NumElts = VT->getNumElements();
+    
+    using T = PrimConv<PT_Float>::T;
+    for (unsigned I = 0; I < NumElts; ++I) {
+      APFloat LElem = LHS.elem<T>(I).getAPFloat();
+      APFloat RElem = RHS.elem<T>(I).getAPFloat();
+      if (I % 2 == 0) {
+        // Even indices: subtract
+        LElem.subtract(RElem, RM);
+      } else {
+        // Odd indices: add
+        LElem.add(RElem, RM);
+      }
+      Dst.elem<T>(I) = static_cast<T>(LElem);
+    }
+    Dst.initializeAllElements();
+    return true;
+  }
 
   case clang::X86::BI__builtin_ia32_pmuldq128:
   case clang::X86::BI__builtin_ia32_pmuldq256:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 972d9fe3b5e4f..3ba7520adf195 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -13383,6 +13383,35 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
     }
     return Success(APValue(ResultElements.data(), ResultElements.size()), E);
   }
+  case clang::X86::BI__builtin_ia32_addsubpd:
+  case clang::X86::BI__builtin_ia32_addsubps:
+  case clang::X86::BI__builtin_ia32_addsubpd256:
+  case clang::X86::BI__builtin_ia32_addsubps256: {
+    // Addsub: alternates between subtraction and addition
+    // Result[i] = (i % 2 == 0) ? (a[i] - b[i]) : (a[i] + b[i])
+    APValue SourceLHS, SourceRHS;
+    if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) ||
+        !EvaluateAsRValue(Info, E->getArg(1), SourceRHS))
+      return false;
+    unsigned NumElts = SourceLHS.getVectorLength();
+    SmallVector<APValue, 8> ResultElements;
+    ResultElements.reserve(NumElts);
+    llvm::RoundingMode RM = getActiveRoundingMode(getEvalInfo(), E);
+    
+    for (unsigned I = 0; I < NumElts; ++I) {
+      APFloat LHS = SourceLHS.getVectorElt(I).getFloat();
+      APFloat RHS = SourceRHS.getVectorElt(I).getFloat();
+      if (I % 2 == 0) {
+        // Even indices: subtract
+        LHS.subtract(RHS, RM);
+      } else {
+        // Odd indices: add
+        LHS.add(RHS, RM);
+      }
+      ResultElements.push_back(APValue(LHS));
+    }
+    return Success(APValue(ResultElements.data(), ResultElements.size()), E);
+  }
   case Builtin::BI__builtin_elementwise_fshl:
   case Builtin::BI__builtin_elementwise_fshr: {
     APValue SourceHi, SourceLo, SourceShift;
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index 997e9608e112f..39a5c2d4c218c 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -8383,23 +8383,23 @@ _mm512_maskz_moveldup_ps(__mmask16 __U, __m512 __A) {
                                              (__v16sf)_mm512_setzero_ps());
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_mask_move_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
   return __builtin_ia32_selectss_128(__U, _mm_move_ss(__A, __B), __W);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_maskz_move_ss(__mmask8 __U, __m128 __A, __m128 __B) {
   return __builtin_ia32_selectss_128(__U, _mm_move_ss(__A, __B),
                                      _mm_setzero_ps());
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_mask_move_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
   return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B), __W);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_maskz_move_sd(__mmask8 __U, __m128d __A, __m128d __B) {
   return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B),
                                      _mm_setzero_pd());
diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h
index 4aef9245323fb..33b8eaec1f99a 100644
--- a/clang/lib/Headers/avxintrin.h
+++ b/clang/lib/Headers/avxintrin.h
@@ -147,7 +147,7 @@ static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_sub_ps(__m256 __a,
 ///    A 256-bit vector of [4 x double] containing the right source operand.
 /// \returns A 256-bit vector of [4 x double] containing the alternating sums
 ///    and differences between both operands.
-static __inline __m256d __DEFAULT_FN_ATTRS
+static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm256_addsub_pd(__m256d __a, __m256d __b)
 {
   return (__m256d)__builtin_ia32_addsubpd256((__v4df)__a, (__v4df)__b);
@@ -166,7 +166,7 @@ _mm256_addsub_pd(__m256d __a, __m256d __b)
 ///    A 256-bit vector of [8 x float] containing the right source operand.
 /// \returns A 256-bit vector of [8 x float] containing the alternating sums and
 ///    differences between both operands.
-static __inline __m256 __DEFAULT_FN_ATTRS
+static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm256_addsub_ps(__m256 __a, __m256 __b)
 {
   return (__m256)__builtin_ia32_addsubps256((__v8sf)__a, (__v8sf)__b);
diff --git a/clang/lib/Headers/pmmintrin.h b/clang/lib/Headers/pmmintrin.h
index 6b152bde29fc1..4b284c41181ca 100644
--- a/clang/lib/Headers/pmmintrin.h
+++ b/clang/lib/Headers/pmmintrin.h
@@ -60,7 +60,7 @@ _mm_lddqu_si128(__m128i_u const *__p)
 ///    A 128-bit vector of [4 x float] containing the right source operand.
 /// \returns A 128-bit vector of [4 x float] containing the alternating sums and
 ///    differences of both operands.
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm_addsub_ps(__m128 __a, __m128 __b)
 {
   return __builtin_ia32_addsubps((__v4sf)__a, (__v4sf)__b);
@@ -166,7 +166,7 @@ _mm_moveldup_ps(__m128 __a)
 ///    A 128-bit vector of [2 x double] containing the right source operand.
 /// \returns A 128-bit vector of [2 x double] containing the alternating sums
 ///    and differences of both operands.
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm_addsub_pd(__m128d __a, __m128d __b) {
   return __builtin_ia32_addsubpd((__v2df)__a, (__v2df)__b);
 }
diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c
index 737febbc7fef6..46bc28b85d8db 100644
--- a/clang/test/CodeGen/X86/avx-builtins.c
+++ b/clang/test/CodeGen/X86/avx-builtins.c
@@ -45,12 +45,14 @@ __m256d test_mm256_addsub_pd(__m256d A, __m256d B) {
   // CHECK: call {{.*}}<4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}})
   return _mm256_addsub_pd(A, B);
 }
+TEST_CONSTEXPR(match_m256d(_mm256_addsub_pd((__m256d){+1.0, +2.0, +3.0, +4.0}, (__m256d){+1.0, +1.0, +1.0, +1.0}), +0.0, +3.0, +2.0, +5.0));
 
 __m256 test_mm256_addsub_ps(__m256 A, __m256 B) {
   // CHECK-LABEL: test_mm256_addsub_ps
   // CHECK: call {{.*}}<8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}})
   return _mm256_addsub_ps(A, B);
 }
+TEST_CONSTEXPR(match_m256(_mm256_addsub_ps((__m256){+1.0f, +2.0f, +3.0f, +4.0f, +5.0f, +6.0f, +7.0f, +8.0f}, (__m256){+1.0f, +1.0f, +1.0f, +1.0f, +1.0f, +1.0f, +1.0f, +1.0f}), +0.0f, +3.0f, +2.0f, +5.0f, +4.0f, +7.0f, +6.0f, +9.0f));
 
 __m256d test_mm256_and_pd(__m256d A, __m256d B) {
   // CHECK-LABEL: test_mm256_and_pd
diff --git a/clang/test/CodeGen/X86/sse3-builtins.c b/clang/test/CodeGen/X86/sse3-builtins.c
index a82dd4080670b..44389fbdc6f77 100644
--- a/clang/test/CodeGen/X86/sse3-builtins.c
+++ b/clang/test/CodeGen/X86/sse3-builtins.c
@@ -19,12 +19,14 @@ __m128d test_mm_addsub_pd(__m128d A, __m128d B) {
   // CHECK: call {{.*}}<2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
   return _mm_addsub_pd(A, B);
 }
+TEST_CONSTEXPR(match_m128d(_mm_addsub_pd((__m128d){+2.0, +2.0}, (__m128d){+1.0, +2.0}), +1.0, +4.0));
 
 __m128 test_mm_addsub_ps(__m128 A, __m128 B) {
   // CHECK-LABEL: test_mm_addsub_ps
   // CHECK: call {{.*}}<4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}})
   return _mm_addsub_ps(A, B);
 }
+TEST_CONSTEXPR(match_m128(_mm_addsub_ps((__m128){+3.0f, +4.0f, +5.0f, +6.0f}, (__m128){+1.0f, +2.0f, +3.0f, +4.0f}), +2.0f, +6.0f, +2.0f, +10.0f));
 
 __m128d test_mm_hadd_pd(__m128d A, __m128d B) {
   // CHECK-LABEL: test_mm_hadd_pd

llvmbot · 2025-11-11T14:35:21Z

@llvm/pr-subscribers-clang

Author: Ahmed Nour (ahmednoursphinx)

Changes

Recent commits (7fe0691, 53ddeb4) marked several x86 intrinsics as constexpr in headers without providing the necessary constant evaluation support in the compiler backend. This caused compilation failures when attempting to use these intrinsics in constant expressions.

Resolves #166814
Thanks for @ykhatav for syncing and collaborating on a solution for this issue

Full diff: https://github.com/llvm/llvm-project/pull/167512.diff

8 Files Affected:

(modified) clang/include/clang/Basic/BuiltinsX86.td (+2-2)
(modified) clang/lib/AST/ByteCode/InterpBuiltin.cpp (+30)
(modified) clang/lib/AST/ExprConstant.cpp (+29)
(modified) clang/lib/Headers/avx512fintrin.h (+4-4)
(modified) clang/lib/Headers/avxintrin.h (+2-2)
(modified) clang/lib/Headers/pmmintrin.h (+2-2)
(modified) clang/test/CodeGen/X86/avx-builtins.c (+2)
(modified) clang/test/CodeGen/X86/sse3-builtins.c (+2)

diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index cd5f2c3012712..7d110fc71e15d 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -93,7 +93,7 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in {
   }
 
 
-  let Features = "sse3" in {
+  let Features = "sse3", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
     foreach Op = ["addsub"] in {
       def Op#ps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>)">;
       def Op#pd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>)">;
@@ -121,7 +121,7 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in {
 }
 
 // AVX
-let Attributes = [Const, NoThrow, RequiredVectorWidth<256>], Features = "avx" in {
+let Attributes = [Const, NoThrow, Constexpr, RequiredVectorWidth<256>], Features = "avx" in {
   foreach Op = ["addsub", "max", "min"] in {
     def Op#pd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>)">;
     def Op#ps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>)">;
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 0ef130c0a55df..8e73ec24902b2 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -4279,6 +4279,36 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
           F.subtract(RHS, RM);
           return F;
         });
+  case clang::X86::BI__builtin_ia32_addsubpd:
+  case clang::X86::BI__builtin_ia32_addsubps:
+  case clang::X86::BI__builtin_ia32_addsubpd256:
+  case clang::X86::BI__builtin_ia32_addsubps256: {
+    // Addsub: alternates between subtraction and addition
+    // Result[i] = (i % 2 == 0) ? (a[i] - b[i]) : (a[i] + b[i])
+    const Pointer &RHS = S.Stk.pop<Pointer>();
+    const Pointer &LHS = S.Stk.pop<Pointer>();
+    const Pointer &Dst = S.Stk.peek<Pointer>();
+    FPOptions FPO = Call->getFPFeaturesInEffect(S.Ctx.getLangOpts());
+    llvm::RoundingMode RM = getRoundingMode(FPO);
+    const auto *VT = Call->getArg(0)->getType()->castAs<VectorType>();
+    unsigned NumElts = VT->getNumElements();
+    
+    using T = PrimConv<PT_Float>::T;
+    for (unsigned I = 0; I < NumElts; ++I) {
+      APFloat LElem = LHS.elem<T>(I).getAPFloat();
+      APFloat RElem = RHS.elem<T>(I).getAPFloat();
+      if (I % 2 == 0) {
+        // Even indices: subtract
+        LElem.subtract(RElem, RM);
+      } else {
+        // Odd indices: add
+        LElem.add(RElem, RM);
+      }
+      Dst.elem<T>(I) = static_cast<T>(LElem);
+    }
+    Dst.initializeAllElements();
+    return true;
+  }
 
   case clang::X86::BI__builtin_ia32_pmuldq128:
   case clang::X86::BI__builtin_ia32_pmuldq256:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 972d9fe3b5e4f..3ba7520adf195 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -13383,6 +13383,35 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
     }
     return Success(APValue(ResultElements.data(), ResultElements.size()), E);
   }
+  case clang::X86::BI__builtin_ia32_addsubpd:
+  case clang::X86::BI__builtin_ia32_addsubps:
+  case clang::X86::BI__builtin_ia32_addsubpd256:
+  case clang::X86::BI__builtin_ia32_addsubps256: {
+    // Addsub: alternates between subtraction and addition
+    // Result[i] = (i % 2 == 0) ? (a[i] - b[i]) : (a[i] + b[i])
+    APValue SourceLHS, SourceRHS;
+    if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) ||
+        !EvaluateAsRValue(Info, E->getArg(1), SourceRHS))
+      return false;
+    unsigned NumElts = SourceLHS.getVectorLength();
+    SmallVector<APValue, 8> ResultElements;
+    ResultElements.reserve(NumElts);
+    llvm::RoundingMode RM = getActiveRoundingMode(getEvalInfo(), E);
+    
+    for (unsigned I = 0; I < NumElts; ++I) {
+      APFloat LHS = SourceLHS.getVectorElt(I).getFloat();
+      APFloat RHS = SourceRHS.getVectorElt(I).getFloat();
+      if (I % 2 == 0) {
+        // Even indices: subtract
+        LHS.subtract(RHS, RM);
+      } else {
+        // Odd indices: add
+        LHS.add(RHS, RM);
+      }
+      ResultElements.push_back(APValue(LHS));
+    }
+    return Success(APValue(ResultElements.data(), ResultElements.size()), E);
+  }
   case Builtin::BI__builtin_elementwise_fshl:
   case Builtin::BI__builtin_elementwise_fshr: {
     APValue SourceHi, SourceLo, SourceShift;
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index 997e9608e112f..39a5c2d4c218c 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -8383,23 +8383,23 @@ _mm512_maskz_moveldup_ps(__mmask16 __U, __m512 __A) {
                                              (__v16sf)_mm512_setzero_ps());
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_mask_move_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
   return __builtin_ia32_selectss_128(__U, _mm_move_ss(__A, __B), __W);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_maskz_move_ss(__mmask8 __U, __m128 __A, __m128 __B) {
   return __builtin_ia32_selectss_128(__U, _mm_move_ss(__A, __B),
                                      _mm_setzero_ps());
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_mask_move_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
   return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B), __W);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_maskz_move_sd(__mmask8 __U, __m128d __A, __m128d __B) {
   return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B),
                                      _mm_setzero_pd());
diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h
index 4aef9245323fb..33b8eaec1f99a 100644
--- a/clang/lib/Headers/avxintrin.h
+++ b/clang/lib/Headers/avxintrin.h
@@ -147,7 +147,7 @@ static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_sub_ps(__m256 __a,
 ///    A 256-bit vector of [4 x double] containing the right source operand.
 /// \returns A 256-bit vector of [4 x double] containing the alternating sums
 ///    and differences between both operands.
-static __inline __m256d __DEFAULT_FN_ATTRS
+static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm256_addsub_pd(__m256d __a, __m256d __b)
 {
   return (__m256d)__builtin_ia32_addsubpd256((__v4df)__a, (__v4df)__b);
@@ -166,7 +166,7 @@ _mm256_addsub_pd(__m256d __a, __m256d __b)
 ///    A 256-bit vector of [8 x float] containing the right source operand.
 /// \returns A 256-bit vector of [8 x float] containing the alternating sums and
 ///    differences between both operands.
-static __inline __m256 __DEFAULT_FN_ATTRS
+static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm256_addsub_ps(__m256 __a, __m256 __b)
 {
   return (__m256)__builtin_ia32_addsubps256((__v8sf)__a, (__v8sf)__b);
diff --git a/clang/lib/Headers/pmmintrin.h b/clang/lib/Headers/pmmintrin.h
index 6b152bde29fc1..4b284c41181ca 100644
--- a/clang/lib/Headers/pmmintrin.h
+++ b/clang/lib/Headers/pmmintrin.h
@@ -60,7 +60,7 @@ _mm_lddqu_si128(__m128i_u const *__p)
 ///    A 128-bit vector of [4 x float] containing the right source operand.
 /// \returns A 128-bit vector of [4 x float] containing the alternating sums and
 ///    differences of both operands.
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm_addsub_ps(__m128 __a, __m128 __b)
 {
   return __builtin_ia32_addsubps((__v4sf)__a, (__v4sf)__b);
@@ -166,7 +166,7 @@ _mm_moveldup_ps(__m128 __a)
 ///    A 128-bit vector of [2 x double] containing the right source operand.
 /// \returns A 128-bit vector of [2 x double] containing the alternating sums
 ///    and differences of both operands.
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm_addsub_pd(__m128d __a, __m128d __b) {
   return __builtin_ia32_addsubpd((__v2df)__a, (__v2df)__b);
 }
diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c
index 737febbc7fef6..46bc28b85d8db 100644
--- a/clang/test/CodeGen/X86/avx-builtins.c
+++ b/clang/test/CodeGen/X86/avx-builtins.c
@@ -45,12 +45,14 @@ __m256d test_mm256_addsub_pd(__m256d A, __m256d B) {
   // CHECK: call {{.*}}<4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}})
   return _mm256_addsub_pd(A, B);
 }
+TEST_CONSTEXPR(match_m256d(_mm256_addsub_pd((__m256d){+1.0, +2.0, +3.0, +4.0}, (__m256d){+1.0, +1.0, +1.0, +1.0}), +0.0, +3.0, +2.0, +5.0));
 
 __m256 test_mm256_addsub_ps(__m256 A, __m256 B) {
   // CHECK-LABEL: test_mm256_addsub_ps
   // CHECK: call {{.*}}<8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}})
   return _mm256_addsub_ps(A, B);
 }
+TEST_CONSTEXPR(match_m256(_mm256_addsub_ps((__m256){+1.0f, +2.0f, +3.0f, +4.0f, +5.0f, +6.0f, +7.0f, +8.0f}, (__m256){+1.0f, +1.0f, +1.0f, +1.0f, +1.0f, +1.0f, +1.0f, +1.0f}), +0.0f, +3.0f, +2.0f, +5.0f, +4.0f, +7.0f, +6.0f, +9.0f));
 
 __m256d test_mm256_and_pd(__m256d A, __m256d B) {
   // CHECK-LABEL: test_mm256_and_pd
diff --git a/clang/test/CodeGen/X86/sse3-builtins.c b/clang/test/CodeGen/X86/sse3-builtins.c
index a82dd4080670b..44389fbdc6f77 100644
--- a/clang/test/CodeGen/X86/sse3-builtins.c
+++ b/clang/test/CodeGen/X86/sse3-builtins.c
@@ -19,12 +19,14 @@ __m128d test_mm_addsub_pd(__m128d A, __m128d B) {
   // CHECK: call {{.*}}<2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
   return _mm_addsub_pd(A, B);
 }
+TEST_CONSTEXPR(match_m128d(_mm_addsub_pd((__m128d){+2.0, +2.0}, (__m128d){+1.0, +2.0}), +1.0, +4.0));
 
 __m128 test_mm_addsub_ps(__m128 A, __m128 B) {
   // CHECK-LABEL: test_mm_addsub_ps
   // CHECK: call {{.*}}<4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}})
   return _mm_addsub_ps(A, B);
 }
+TEST_CONSTEXPR(match_m128(_mm_addsub_ps((__m128){+3.0f, +4.0f, +5.0f, +6.0f}, (__m128){+1.0f, +2.0f, +3.0f, +4.0f}), +2.0f, +6.0f, +2.0f, +10.0f));
 
 __m128d test_mm_hadd_pd(__m128d A, __m128d B) {
   // CHECK-LABEL: test_mm_hadd_pd

github-actions · 2025-11-11T14:36:28Z

✅ With the latest revision this PR passed the C/C++ code formatter.

tbaederr · 2025-11-11T14:39:41Z

clang/lib/AST/ByteCode/InterpBuiltin.cpp

+  case clang::X86::BI__builtin_ia32_addsubpd:
+  case clang::X86::BI__builtin_ia32_addsubps:
+  case clang::X86::BI__builtin_ia32_addsubpd256:
+  case clang::X86::BI__builtin_ia32_addsubps256: {


Use a static function like everything else.

tbaederr · 2025-11-11T14:39:51Z

clang/lib/AST/ByteCode/InterpBuiltin.cpp

+    FPOptions FPO = Call->getFPFeaturesInEffect(S.Ctx.getLangOpts());
+    llvm::RoundingMode RM = getRoundingMode(FPO);
+    const auto *VT = Call->getArg(0)->getType()->castAs<VectorType>();
+    unsigned NumElts = VT->getNumElements();


Suggested change

unsigned NumElts = VT->getNumElements();

unsigned NumElems = VT->getNumElements();

tbaederr · 2025-11-11T14:39:57Z

clang/lib/AST/ByteCode/InterpBuiltin.cpp

+    unsigned NumElts = VT->getNumElements();
+
+    using T = PrimConv<PT_Float>::T;
+    for (unsigned I = 0; I < NumElts; ++I) {


Suggested change

for (unsigned I = 0; I < NumElts; ++I) {

for (unsigned I = 0; I != NumElts; ++I) {

RKSimon · 2025-11-11T15:13:21Z

clang/include/clang/Basic/BuiltinsX86.td

-let Attributes = [Const, NoThrow, RequiredVectorWidth<256>], Features = "avx" in {
+let Attributes = [Const, NoThrow, Constexpr, RequiredVectorWidth<256>],
+    Features = "avx" in {
  foreach Op = ["addsub", "max", "min"] in {


this also affects the max/min methods - you need to split off the addsub builtins before you make them constexpr (maybe add them further down to the block with movmskpd256 etc?)

RKSimon · 2025-11-11T15:15:31Z

clang/lib/Headers/avx512fintrin.h

+static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_move_sd(__mmask8 __U,
+                                                                  __m128d __A,
+                                                                  __m128d __B) {
  return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B),


pull this out into its own PR

zwuis · 2025-11-11T15:42:59Z

Please do not @ someone in PR description.

https://llvm.org/docs/DeveloperPolicy.html#commit-messages

ahmednoursphinx · 2025-11-12T12:31:32Z

Thanks all will address feedback soon :)

RKSimon · 2025-11-12T12:35:34Z

CC @woruyu #162816 should have the select ss/sd fixes soon

zwuis · 2025-11-13T16:25:39Z

Please do not @ someone in PR description.

https://llvm.org/docs/DeveloperPolicy.html#commit-messages

Please ignore it. It seems that the requirement will be removed.

https://discourse.llvm.org/t/forbidding-username-in-commits/86997/18

https://github.blog/changelog/2025-11-07-removing-notifications-for-mentions-in-commit-messages/

ahmednoursphinx added 2 commits November 11, 2025 15:10

[X86] Add constexpr support for addsub and select intrinsics

f26a735

feat: Add tests

05ab6c9

llvmbot added clang Clang issues not falling into any other category backend:X86 clang:frontend Language frontend issues, e.g. anything involving "Sema" clang:headers Headers provided by Clang, e.g. for intrinsics clang:bytecode Issues for the clang bytecode constexpr interpreter labels Nov 11, 2025

chore: update formatting

beebb7d

tbaederr reviewed Nov 11, 2025

View reviewed changes

tbaederr requested a review from RKSimon November 11, 2025 15:03

RKSimon requested changes Nov 11, 2025

View reviewed changes

RKSimon mentioned this pull request Nov 11, 2025

[X86][Clang] VectorExprEvaluator::VisitCallExpr / InterpretBuiltin - allow ADDSUB fp intrinsics to be used in constexpr #161203

Open

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[X86] Add constexpr support for addsub and select intrinsics #167512

[X86] Add constexpr support for addsub and select intrinsics #167512

ahmednoursphinx commented Nov 11, 2025 •

edited by RKSimon

Loading

Uh oh!

llvmbot commented Nov 11, 2025

Uh oh!

llvmbot commented Nov 11, 2025

Uh oh!

github-actions bot commented Nov 11, 2025 •

edited

Loading

Uh oh!

tbaederr Nov 11, 2025

Uh oh!

tbaederr Nov 11, 2025

Uh oh!

tbaederr Nov 11, 2025

Uh oh!

RKSimon Nov 11, 2025

Uh oh!

RKSimon Nov 11, 2025

Uh oh!

zwuis commented Nov 11, 2025

Uh oh!

ahmednoursphinx commented Nov 12, 2025

Uh oh!

RKSimon commented Nov 12, 2025

Uh oh!

zwuis commented Nov 13, 2025

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

5 participants

	unsigned NumElts = VT->getNumElements();
	unsigned NumElems = VT->getNumElements();

	for (unsigned I = 0; I < NumElts; ++I) {
	for (unsigned I = 0; I != NumElts; ++I) {

[X86] Add constexpr support for addsub and select intrinsics #167512

Are you sure you want to change the base?

[X86] Add constexpr support for addsub and select intrinsics #167512

Conversation

ahmednoursphinx commented Nov 11, 2025 • edited by RKSimon Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Nov 11, 2025

Uh oh!

llvmbot commented Nov 11, 2025

Uh oh!

github-actions bot commented Nov 11, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

tbaederr Nov 11, 2025

Choose a reason for hiding this comment

Uh oh!

tbaederr Nov 11, 2025

Choose a reason for hiding this comment

Uh oh!

tbaederr Nov 11, 2025

Choose a reason for hiding this comment

Uh oh!

RKSimon Nov 11, 2025

Choose a reason for hiding this comment

Uh oh!

RKSimon Nov 11, 2025

Choose a reason for hiding this comment

Uh oh!

zwuis commented Nov 11, 2025

Uh oh!

ahmednoursphinx commented Nov 12, 2025

Uh oh!

RKSimon commented Nov 12, 2025

Uh oh!

zwuis commented Nov 13, 2025

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

5 participants

ahmednoursphinx commented Nov 11, 2025 •

edited by RKSimon

Loading

github-actions bot commented Nov 11, 2025 •

edited

Loading