@@ -268,6 +268,8 @@ LogicalResult AIETargetBackend::serializeExecutable(
268268 SmallVector<uint32_t > xclbinIndices (ordinalCount);
269269 SmallVector<uint32_t > asmInstrIndices (ordinalCount);
270270
271+ SmallVector<SmallString<128 >> xclbinPaths;
272+
271273 for (size_t i = 0 ; i < entryPointNames.size (); i++) {
272274 uint64_t ordinal = entryPointOrdinals.at (entryPointNames[i]);
273275
@@ -300,18 +302,34 @@ LogicalResult AIETargetBackend::serializeExecutable(
300302 llvm::sys::path::append (npuInstPath,
301303 entryPointNamesFb[ordinal] + " .npu.txt" );
302304
303- SmallVector<StringRef> cmdArgs{aie2xclbin,
304- inputMlirPath,
305- " --peano" ,
306- options.peanoInstallDir ,
307- " --xclbin-name" ,
308- xclbinPath,
309- " --npu-insts-name" ,
310- npuInstPath,
311- " --xclbin-kernel-name" ,
312- entryPointNamesFb[ordinal],
313- " --tmpdir" ,
314- entryPointWorkDir};
305+ // Convert ordinal to hexadecimal string for xclbin kern id
306+ std::stringstream ss;
307+ ss << " 0x" << std::hex << ordinal + 10 ;
308+ std::string ordinalHex = ss.str ();
309+
310+ SmallVector<StringRef> cmdArgs;
311+ SmallVector<StringRef> cmdArgsBase{aie2xclbin,
312+ inputMlirPath,
313+ " --peano" ,
314+ options.peanoInstallDir ,
315+ " --xclbin-name" ,
316+ xclbinPath,
317+ " --npu-insts-name" ,
318+ npuInstPath,
319+ " --xclbin-kernel-name" ,
320+ entryPointNamesFb[ordinal],
321+ " --tmpdir" ,
322+ entryPointWorkDir,
323+ " --xclbin-kernel-id" ,
324+ ordinalHex};
325+ cmdArgs = cmdArgsBase;
326+ bool AttemptingMerge = false ;
327+ if (i > 0 ) {
328+ cmdArgs.push_back (" --input-xclbin-name" );
329+ cmdArgs.push_back (xclbinPaths.back ());
330+ AttemptingMerge = true ;
331+ }
332+ xclbinPaths.push_back (xclbinPath);
315333
316334 auto addOpt = [&](StringRef arg, bool value) {
317335 if (value) cmdArgs.push_back (arg);
@@ -350,11 +368,24 @@ LogicalResult AIETargetBackend::serializeExecutable(
350368 {
351369 SmallVector<StringRef> cmdEnvRefs{cmdEnv.begin (), cmdEnv.end ()};
352370 int result = llvm::sys::ExecuteAndWait (cmdArgs[0 ], cmdArgs, cmdEnvRefs);
353- if (result != 0 )
371+ if (result != 0 && AttemptingMerge) {
372+ // we failed to create xclbin but maybe we failed becuase we were trying
373+ // to merge the kerenel in exisiting kernel, try again to see if perhaps
374+ // we have success if we dont try to merge.
375+ AttemptingMerge = false ;
376+ result =
377+ llvm::sys::ExecuteAndWait (cmdArgsBase[0 ], cmdArgsBase, cmdEnvRefs);
378+ xclbinPaths.push_back (xclbinPath);
379+ }
380+ if (result != 0 ) {
354381 return moduleOp.emitOpError (
355382 " Failed to produce an XCLBin with external tool." );
383+ }
384+ // delete the previous xclbin if we were able to merge as the new one now
385+ // will have all the kernels from the previous one.
386+ if (AttemptingMerge) xclbinPaths.erase (xclbinPaths.end () - 2 );
387+ xclbinIndices[ordinal] = xclbinPaths.size () - 1 ;
356388 }
357-
358389 std::ifstream instrFile (static_cast <std::string>(npuInstPath));
359390 std::string line;
360391 while (std::getline (instrFile, line)) {
@@ -369,7 +400,7 @@ LogicalResult AIETargetBackend::serializeExecutable(
369400 asmInstrIndices[ordinal] = asmInstrRefs.size ();
370401 asmInstrRefs.push_back (
371402 iree_amd_aie_hal_xrt_AsmInstDef_create (builder, npuInstrsVec));
372-
403+ /*
373404 xclbinIn = openInputFile(xclbinPath, &errorMessage);
374405 if (!xclbinIn) {
375406 moduleOp.emitOpError() << "Failed to open xclbin file: " << errorMessage;
@@ -378,7 +409,21 @@ LogicalResult AIETargetBackend::serializeExecutable(
378409 xclbinIndices[ordinal] = xclbinRefs.size();
379410 xclbinRefs.push_back(
380411 iree_amd_aie_hal_xrt_XclbinDef_create(builder, xclbinStringRef));
412+ */
413+ }
414+ // write out the final xclbins to flatbuffer
415+ for (auto xclbinPath : xclbinPaths) {
416+ llvm::outs () << " writing xclbin from path: " << xclbinPath << " \n " ;
417+ std::string errorMessage;
418+ xclbinIn = openInputFile (xclbinPath, &errorMessage);
419+ if (!xclbinIn) {
420+ moduleOp.emitOpError () << " Failed to open xclbin file: " << errorMessage;
421+ }
422+ auto xclbinStringRef = builder.createString (xclbinIn->getBuffer ());
423+ xclbinRefs.push_back (
424+ iree_amd_aie_hal_xrt_XclbinDef_create (builder, xclbinStringRef));
381425 }
426+
382427 // Serialize the executable to flatbuffer format
383428 auto entryPointsRef = builder.createStringVec (entryPointNamesFb);
384429
0 commit comments