@@ -114,6 +114,33 @@ void checkResult(const string &FuncName, const vector<__nv_bfloat162> &Inputs,
114114 checkResult (FuncName, FInputs, Expect, Result, precision);
115115}
116116
117+ void checkResult (const string &FuncName, const vector<float > &Inputs,
118+ const float2 &Expect, const float2 &Result,
119+ const int precision) {
120+ cout << FuncName << " (" << Inputs[0 ] << " " ;
121+ for (size_t i = 1 ; i < Inputs.size (); ++i) {
122+ cout << " , " << Inputs[i];
123+ }
124+ cout << " ) = " << fixed << setprecision (precision) << " {" << Result.x << " , "
125+ << Result.y << " } (expect {" << Expect.x - pow (10 , -precision) << " ~ "
126+ << Expect.x + pow (10 , -precision) << " , "
127+ << Expect.y - pow (10 , -precision) << " ~ "
128+ << Expect.y + pow (10 , -precision) << " )" ;
129+ cout.unsetf (ios::fixed);
130+ check (abs (Result.x - Expect.x ) < pow (10 , -precision) &&
131+ abs (Result.y - Expect.y ) < pow (10 , -precision));
132+ }
133+
134+ void checkResult (const string &FuncName, const vector<__nv_bfloat16> &Inputs,
135+ const __nv_bfloat162 &Expect, const float2 &Result,
136+ const int precision) {
137+ vector<float > FInputs;
138+ for (const auto &Iter : Inputs)
139+ FInputs.emplace_back (__bfloat162float (Iter));
140+ float2 FExpect{__bfloat162float (Expect.x ), __bfloat162float (Expect.y )};
141+ checkResult (FuncName, FInputs, FExpect, Result, precision);
142+ }
143+
117144__global__ void bfloat1622float2 (float *const Result, __nv_bfloat162 Input1) {
118145 auto ret = __bfloat1622float2 (Input1);
119146 Result[0 ] = ret.x ;
@@ -1073,6 +1100,25 @@ void testUshort_as_bfloat16Cases(
10731100 }
10741101}
10751102
1103+ __global__ void make_bfloat162 (float *const Result, __nv_bfloat16 Input1, __nv_bfloat16 Input2) {
1104+ auto ret = make_bfloat162 (Input1, Input2);
1105+ Result[0 ] = __bfloat162float (ret.x );
1106+ Result[1 ] = __bfloat162float (ret.y );
1107+ }
1108+
1109+ void testMake_bfloat162Cases (
1110+ const vector<pair<pair<__nv_bfloat16, __nv_bfloat16>, pair<__nv_bfloat162, int >>> &TestCases) {
1111+ float *Result;
1112+ cudaMallocManaged (&Result, sizeof (*Result) * 2 );
1113+ for (const auto &TestCase : TestCases) {
1114+ make_bfloat162<<<1 , 1 >>> (Result, TestCase.first .first , TestCase.first .second );
1115+ cudaDeviceSynchronize ();
1116+ checkResult (" make_bfloat162" , {TestCase.first .first , TestCase.first .second },
1117+ TestCase.second .first , {Result[0 ], Result[1 ]},
1118+ TestCase.second .second );
1119+ }
1120+ }
1121+
10761122int main () {
10771123 testBfloat1622float2Cases ({
10781124 {{-0.3 , -0.5 }, {{-0.30078125 , -0.5 }, 16 }},
@@ -1542,6 +1588,12 @@ int main() {
15421588 {1000 , {0.0000000000000000000000000000000000013635734469538535 , 52 }},
15431589 {62536 , {-63382530011411470074835160268800.0 , -16 }},
15441590 });
1591+ testMake_bfloat162Cases ({
1592+ {{-0.3 , -0.4 }, {{-0.300048828125 , -0.39990234375 }, 16 }},
1593+ {{0 , 0.7 }, {{0 , 0.7001953125 }, 16 }},
1594+ {{1 , 100.6 }, {{1 , 100.625 }, 14 }},
1595+ {{100.6 , 1 }, {{100.625 , 1 }, 14 }},
1596+ });
15451597 cout << " passed " << passed << " /" << passed + failed << " cases!" << endl;
15461598 if (failed) {
15471599 cout << " failed!" << endl;
0 commit comments