Skip to content

Commit 30612fb

Browse files
author
Changming Sun
authored
Cherry-picks for 1.23.0 release (microsoft#25889)
- **Relax WeightBiasQuantization constraint for larger QDQ node group (microsoft#25673)** - **Add cuda graph implementation for NV TRT RTX EP (microsoft#25787)** - **python GPU IO Bindings for NVIDIA (microsoft#25776)** - **Fixes for DynamicQuantizeMatMul and Attention3D tests (microsoft#25814)** - **Fix a long standing bug on file memory mapping on windows. (microsoft#25833)** - **Add API for precompiled model compatibility check using just the compat info (microsoft#25841)** - **Enable ABSL_FLAGS flag registration for onnxruntime_perf_test for mobile build (microsoft#25849)** - **Add default constructor to Ort::Status. (microsoft#25860)** - microsoft#25871 - microsoft#25878 - microsoft#25884 - microsoft#25886 - microsoft#25866
1 parent ad45432 commit 30612fb

File tree

74 files changed

+2224
-751
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

74 files changed

+2224
-751
lines changed

cmake/onnxruntime_unittests.cmake

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1226,6 +1226,12 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
12261226
${onnxruntime_perf_test_src_patterns}
12271227
)
12281228
onnxruntime_add_executable(onnxruntime_perf_test ${onnxruntime_perf_test_src} ${ONNXRUNTIME_ROOT}/core/platform/path_lib.cc)
1229+
1230+
# ABSL_FLAGS_STRIP_NAMES is set to 1 by default to disable flag registration when building for Android, iPhone, and "embedded devices".
1231+
# See the issue: https://github.com/abseil/abseil-cpp/issues/1875
1232+
# We set it to 0 for all builds to be able to use ABSL flags for onnxruntime_perf_test.
1233+
target_compile_definitions(onnxruntime_perf_test PRIVATE ABSL_FLAGS_STRIP_NAMES=0)
1234+
12291235
if(MSVC)
12301236
target_compile_options(onnxruntime_perf_test PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:--compiler-options /utf-8>"
12311237
"$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/utf-8>")

csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.shared.cs

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -368,6 +368,88 @@ public struct OrtApi
368368
public IntPtr EpDevice_Device;
369369
public IntPtr GetEpApi;
370370
public IntPtr GetTensorSizeInBytes;
371+
372+
public IntPtr AllocatorGetStats;
373+
374+
public IntPtr CreateMemoryInfo_V2;
375+
public IntPtr MemoryInfoGetDeviceMemType;
376+
public IntPtr MemoryInfoGetVendorId;
377+
378+
public IntPtr ValueInfo_GetValueProducer;
379+
public IntPtr ValueInfo_GetValueNumConsumers;
380+
public IntPtr ValueInfo_GetValueConsumers;
381+
public IntPtr ValueInfo_GetInitializerValue;
382+
public IntPtr ValueInfo_GetExternalInitializerInfo;
383+
public IntPtr ValueInfo_IsRequiredGraphInput;
384+
public IntPtr ValueInfo_IsOptionalGraphInput;
385+
public IntPtr ValueInfo_IsGraphOutput;
386+
public IntPtr ValueInfo_IsConstantInitializer;
387+
public IntPtr ValueInfo_IsFromOuterScope;
388+
public IntPtr Graph_GetName;
389+
public IntPtr Graph_GetModelPath;
390+
public IntPtr Graph_GetOnnxIRVersion;
391+
public IntPtr Graph_GetNumOperatorSets;
392+
public IntPtr Graph_GetOperatorSets;
393+
public IntPtr Graph_GetNumInputs;
394+
public IntPtr Graph_GetInputs;
395+
public IntPtr Graph_GetNumOutputs;
396+
public IntPtr Graph_GetOutputs;
397+
public IntPtr Graph_GetNumInitializers;
398+
public IntPtr Graph_GetInitializers;
399+
public IntPtr Graph_GetNumNodes;
400+
public IntPtr Graph_GetNodes;
401+
public IntPtr Graph_GetParentNode;
402+
public IntPtr Graph_GetGraphView;
403+
public IntPtr Node_GetId;
404+
public IntPtr Node_GetName;
405+
public IntPtr Node_GetOperatorType;
406+
public IntPtr Node_GetDomain;
407+
public IntPtr Node_GetSinceVersion;
408+
public IntPtr Node_GetNumInputs;
409+
public IntPtr Node_GetInputs;
410+
public IntPtr Node_GetNumOutputs;
411+
public IntPtr Node_GetOutputs;
412+
public IntPtr Node_GetNumImplicitInputs;
413+
public IntPtr Node_GetImplicitInputs;
414+
public IntPtr Node_GetNumAttributes;
415+
public IntPtr Node_GetAttributes;
416+
public IntPtr Node_GetAttributeByName;
417+
public IntPtr Node_GetTensorAttributeAsOrtValue;
418+
public IntPtr OpAttr_GetType;
419+
public IntPtr OpAttr_GetName;
420+
public IntPtr Node_GetNumSubgraphs;
421+
public IntPtr Node_GetSubgraphs;
422+
public IntPtr Node_GetGraph;
423+
public IntPtr Node_GetEpName;
424+
public IntPtr ReleaseExternalInitializerInfo;
425+
public IntPtr ExternalInitializerInfo_GetFilePath;
426+
public IntPtr ExternalInitializerInfo_GetFileOffset;
427+
public IntPtr ExternalInitializerInfo_GetByteSize;
428+
429+
public IntPtr GetRunConfigEntry;
430+
431+
public IntPtr EpDevice_MemoryInfo;
432+
433+
public IntPtr CreateSharedAllocator;
434+
public IntPtr GetSharedAllocator;
435+
public IntPtr ReleaseSharedAllocator;
436+
437+
public IntPtr GetTensorData;
438+
439+
public IntPtr GetSessionOptionsConfigEntries;
440+
441+
public IntPtr SessionGetMemoryInfoForInputs;
442+
public IntPtr SessionGetMemoryInfoForOutputs;
443+
public IntPtr SessionGetEpDeviceForInputs;
444+
445+
public IntPtr CreateSyncStreamForEpDevice;
446+
public IntPtr SyncStream_GetHandle;
447+
public IntPtr ReleaseSyncStream;
448+
449+
public IntPtr CopyTensors;
450+
451+
public IntPtr Graph_GetModelMetadata;
452+
public IntPtr GetModelCompatibilityForEpDevices;
371453
}
372454

373455
internal static class NativeMethods
@@ -704,6 +786,10 @@ static NativeMethods()
704786
(DSessionOptionsSetEpSelectionPolicyDelegate)Marshal.GetDelegateForFunctionPointer(
705787
api_.SessionOptionsSetEpSelectionPolicyDelegate,
706788
typeof(DSessionOptionsSetEpSelectionPolicyDelegate));
789+
790+
OrtGetModelCompatibilityForEpDevices = (DOrtGetModelCompatibilityForEpDevices)Marshal.GetDelegateForFunctionPointer(
791+
api_.GetModelCompatibilityForEpDevices,
792+
typeof(DOrtGetModelCompatibilityForEpDevices));
707793
}
708794

709795
internal class NativeLib
@@ -2456,6 +2542,18 @@ public delegate void DOrtRemoveKeyValuePair(IntPtr /* OrtKeyValuePairs* */ kvps,
24562542

24572543
public static DOrtGetEpDevices OrtGetEpDevices;
24582544

2545+
/// <summary>
2546+
/// Validate compiled model compatibility for the provided EP devices.
2547+
/// </summary>
2548+
[UnmanagedFunctionPointer(CallingConvention.Winapi)]
2549+
public delegate IntPtr /* OrtStatus* */ DOrtGetModelCompatibilityForEpDevices(
2550+
IntPtr[] /* const OrtEpDevice* const* */ ep_devices,
2551+
UIntPtr /* size_t */ num_ep_devices,
2552+
byte[] /* const char* */ compatibility_info,
2553+
out int /* OrtCompiledModelCompatibility */ out_status);
2554+
2555+
public static DOrtGetModelCompatibilityForEpDevices OrtGetModelCompatibilityForEpDevices;
2556+
24592557
/// <summary>
24602558
/// Add execution provider devices to the session options.
24612559
/// Priority is based on the order of the OrtEpDevice instances. Highest priority first.

csharp/src/Microsoft.ML.OnnxRuntime/OrtEnv.shared.cs

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,21 @@
77

88
namespace Microsoft.ML.OnnxRuntime
99
{
10+
/// <summary>
11+
/// Represents the compatibility status of a pre-compiled model with one or more execution provider devices.
12+
/// </summary>
13+
/// <remarks>
14+
/// This enum is used to determine whether a pre-compiled model can be used with specific execution providers
15+
/// and devices, or if recompilation is needed.
16+
/// </remarks>
17+
public enum OrtCompiledModelCompatibility
18+
{
19+
EP_NOT_APPLICABLE = 0,
20+
EP_SUPPORTED_OPTIMAL = 1,
21+
EP_SUPPORTED_PREFER_RECOMPILATION = 2,
22+
EP_UNSUPPORTED = 3,
23+
}
24+
1025
/// <summary>
1126
/// Delegate for logging function callback.
1227
/// Supply your function and register it with the environment to receive logging callbacks via
@@ -361,6 +376,31 @@ public string[] GetAvailableProviders()
361376
}
362377
}
363378

379+
/// <summary>
380+
/// Validate a compiled model's compatibility information for one or more EP devices.
381+
/// </summary>
382+
/// <param name="epDevices">The list of EP devices to validate against.</param>
383+
/// <param name="compatibilityInfo">The compatibility string from the precompiled model to validate.</param>
384+
/// <returns>OrtCompiledModelCompatibility enum value denoting the compatibility status</returns>
385+
public OrtCompiledModelCompatibility GetModelCompatibilityForEpDevices(
386+
IReadOnlyList<OrtEpDevice> epDevices, string compatibilityInfo)
387+
{
388+
if (epDevices == null || epDevices.Count == 0)
389+
throw new ArgumentException("epDevices must be non-empty", nameof(epDevices));
390+
391+
var devicePtrs = new IntPtr[epDevices.Count];
392+
for (int i = 0; i < epDevices.Count; ++i)
393+
{
394+
devicePtrs[i] = epDevices[i].Handle;
395+
}
396+
397+
var infoUtf8 = NativeOnnxValueHelper.StringToZeroTerminatedUtf8(compatibilityInfo);
398+
NativeApiStatus.VerifySuccess(
399+
NativeMethods.OrtGetModelCompatibilityForEpDevices(
400+
devicePtrs, (UIntPtr)devicePtrs.Length, infoUtf8, out int status));
401+
return (OrtCompiledModelCompatibility)status;
402+
}
403+
364404

365405
/// <summary>
366406
/// Get/Set log level property of OrtEnv instance
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
// Copyright (c) Microsoft Corporation. All rights reserved.
2+
// Licensed under the MIT License.
3+
4+
// not supported on mobile platforms
5+
#if !(ANDROID || IOS)
6+
7+
namespace Microsoft.ML.OnnxRuntime.Tests;
8+
9+
using System;
10+
using System.Linq;
11+
using Xunit;
12+
using System.Collections.Generic;
13+
14+
public class EpCompatibilityTests
15+
{
16+
private readonly OrtEnv ortEnvInstance = OrtEnv.Instance();
17+
18+
private IReadOnlyList<OrtEpDevice> GetDevices()
19+
{
20+
var epDevices = ortEnvInstance.GetEpDevices();
21+
Assert.NotNull(epDevices);
22+
Assert.NotEmpty(epDevices);
23+
return epDevices;
24+
}
25+
26+
[Fact]
27+
public void GetEpCompatibility_InvalidArgs()
28+
{
29+
Assert.Throws<ArgumentException>(() => ortEnvInstance.GetModelCompatibilityForEpDevices(null, "info"));
30+
Assert.Throws<ArgumentException>(() => ortEnvInstance.GetModelCompatibilityForEpDevices(new List<OrtEpDevice>(), "info"));
31+
}
32+
33+
[Fact]
34+
public void GetEpCompatibility_SingleDeviceCpuProvider()
35+
{
36+
var devices = GetDevices();
37+
var someInfo = "arbitrary-compat-string";
38+
39+
// Use CPU device
40+
var cpu = devices.First(d => d.EpName == "CPUExecutionProvider");
41+
Assert.NotNull(cpu);
42+
var selected = new List<OrtEpDevice> { cpu };
43+
var status = ortEnvInstance.GetModelCompatibilityForEpDevices(selected, someInfo);
44+
45+
// CPU defaults to not applicable in this scenario
46+
Assert.Equal(OrtCompiledModelCompatibility.EP_NOT_APPLICABLE, status);
47+
}
48+
}
49+
#endif

include/onnxruntime/core/graph/model_saving_options.h

Lines changed: 10 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -9,36 +9,30 @@ class PrepackedWeightsForGraph;
99

1010
// These options affect how the model initializers are written to the external file.
1111
// This includes options to align external initializer offset.
12-
// For models running on CPU, ORT will try to use mmap to load external
13-
// initializers. To use mmap, external initializer need to be offset aligned.
12+
// ORT will try to use mmap to load external initializers.
13+
//
1414
// ORT saves external initializers into single data file, each initializer is
1515
// accessed with offset(start position of initializer) and length(byte length of
16-
// initializer) of the data file. To use mmap, each offset need to be aligned
17-
// which means offset need to divisible by allocation granularity(64KB for
18-
// windows and 4K for other OSes). With align_offset to true, ORT will align
19-
// offset for large initializer when save ONNX model with external data file.
16+
// initializer) of the data file. With align_offset to true, ORT will align
17+
// offset for large initializer (larger than align_threshold)
18+
// when save ONNX model with external data file. It will align then to
19+
// on_disk_alignment value.
2020
struct ModelSavingOptions {
2121
explicit ModelSavingOptions(size_t size_threshold)
2222
: initializer_size_threshold(size_threshold) {}
2323

2424
// Minimal initializer size in bytes to be externalized on disk
2525
size_t initializer_size_threshold;
26-
// Offset will always be page aligned and allocation granularity aligned for
27-
// mmap support. This is done by padding previous tensor data with zeros
28-
// keeping same length.
26+
// Offset will always be aligned for mmap support.
27+
// This is done by padding previous tensor data with zeros keeping same length.
2928
bool align_offset = false;
3029
// Alignment threshold for size of data.
3130
// Having a low threshold will waste file space for small initializers.
3231
// Only when tensor's data size is > the page_align_threshold it will be force
3332
// aligned. Default to 1MB.
3433
int64_t align_threshold = 1048576;
35-
// The allocation Granularity for mmap() support.
36-
// Typically 64KB for Windows & 4KB for other OSes. Default to 64KB.
37-
#ifdef _WIN32
38-
int64_t allocation_granularity = 65536;
39-
#else
40-
int64_t allocation_granularity = 4096;
41-
#endif
34+
// Alignment factor for big tensors (bigger than align_threshold). Defaults to 4K.
35+
int64_t on_disk_alignment = 4096;
4236
// Force embed all external initializer into the Onnx file
4337
// Used for EPContext model generation while some nodes fallback on CPU which has external data dependency
4438
bool force_embed_external_ini = false;

include/onnxruntime/core/providers/nv_tensorrt_rtx/nv_provider_options.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ constexpr const char* kDetailedBuildLog = "nv_detailed_build_log";
3131
constexpr const char* kProfilesMinShapes = "nv_profile_min_shapes";
3232
constexpr const char* kProfilesMaxShapes = "nv_profile_max_shapes";
3333
constexpr const char* kProfilesOptShapes = "nv_profile_opt_shapes";
34-
constexpr const char* kCudaGraphEnable = "nv_cuda_graph_enable";
34+
constexpr const char* kCudaGraphEnable = "enable_cuda_graph";
3535
constexpr const char* kMultiProfileEnable = "nv_multi_profile_enable";
3636
constexpr const char* kUseExternalDataInitializer = "nv_use_external_data_initializer";
3737

include/onnxruntime/core/providers/utils/ort_graph_to_proto.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,7 @@ static Ort::Status GetOrtValueInfoTensorTypeShape(const OrtValueInfo& ort_value_
232232
/*out*/ std::vector<int64_t>& dims,
233233
/*out*/ std::vector<std::string>& symbolic_dims);
234234
static Ort::Status OrtValueInfoToProto(const OrtValueInfo& ort_value_info, onnx::ValueInfoProto& value_info_proto);
235-
static Ort::Status OrtOpAttrToProto(const OrtNode& ort_node, const OrtOpAttr& ort_attr, onnx::AttributeProto& attr_proto);
235+
static Ort::Status OrtOpAttrToProto(const OrtOpAttr& ort_attr, onnx::AttributeProto& attr_proto);
236236

237237
Ort::Status OrtGraphToProto(const OrtGraph& ort_graph,
238238
onnx::GraphProto& graph_proto,
@@ -379,7 +379,7 @@ Ort::Status OrtGraphToProto(const OrtGraph& ort_graph,
379379
}
380380

381381
onnx::AttributeProto* attr_proto = node_proto->add_attribute();
382-
ORT_EP_UTILS_CXX_RETURN_IF_ERROR(OrtOpAttrToProto(*ort_node, *ort_attr, *attr_proto));
382+
ORT_EP_UTILS_CXX_RETURN_IF_ERROR(OrtOpAttrToProto(*ort_attr, *attr_proto));
383383
}
384384
}
385385

@@ -652,7 +652,7 @@ static Ort::Status OrtValueInfoToProto(const OrtValueInfo& ort_value_info,
652652
return Ort::Status{nullptr};
653653
}
654654

655-
static Ort::Status OrtOpAttrToProto(const OrtNode& ort_node, const OrtOpAttr& ort_attr, onnx::AttributeProto& attr_proto) {
655+
static Ort::Status OrtOpAttrToProto(const OrtOpAttr& ort_attr, onnx::AttributeProto& attr_proto) {
656656
const OrtApi& ort_api = Ort::GetApi();
657657

658658
const char* attr_name = nullptr;
@@ -766,7 +766,7 @@ static Ort::Status OrtOpAttrToProto(const OrtNode& ort_node, const OrtOpAttr& or
766766
// TensorProto as an attribute value doesn't require a name.
767767

768768
OrtValue* ort_value = nullptr;
769-
ORT_EP_UTILS_C_RETURN_IF_ERROR(ort_api.Node_GetTensorAttributeAsOrtValue(&ort_node, &ort_attr, &ort_value));
769+
ORT_EP_UTILS_C_RETURN_IF_ERROR(ort_api.OpAttr_GetTensorAttributeAsOrtValue(&ort_attr, &ort_value));
770770

771771
Ort::Value tensor(ort_value);
772772

include/onnxruntime/core/session/onnxruntime_c_api.h

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -902,6 +902,16 @@ typedef void (*RunAsyncCallbackFn)(void* user_data, OrtValue** outputs, size_t n
902902
*
903903
* \nosubgrouping
904904
*/
905+
/*
906+
* Public enum for compiled model compatibility across EPs.
907+
*/
908+
typedef enum OrtCompiledModelCompatibility {
909+
OrtCompiledModelCompatibility_EP_NOT_APPLICABLE = 0,
910+
OrtCompiledModelCompatibility_EP_SUPPORTED_OPTIMAL,
911+
OrtCompiledModelCompatibility_EP_SUPPORTED_PREFER_RECOMPILATION,
912+
OrtCompiledModelCompatibility_EP_UNSUPPORTED,
913+
} OrtCompiledModelCompatibility;
914+
905915
struct OrtApi {
906916
/// \name OrtStatus
907917
/// @{
@@ -6069,7 +6079,6 @@ struct OrtApi {
60696079

60706080
/** \brief Get the OrtNode's 'TENSOR' attribute as an OrtValue.
60716081
*
6072-
* \param[in] node The OrtNode instance.
60736082
* \param[in] attribute The OrtOpAttr instance.
60746083
* \param[out] attr_tensor If successful, contains the 'TENSOR' attribute as a newly created OrtValue.
60756084
Must be freed with OrtApi::ReleaseValue.
@@ -6078,7 +6087,7 @@ struct OrtApi {
60786087
*
60796088
* \since Version 1.23.
60806089
*/
6081-
ORT_API2_STATUS(Node_GetTensorAttributeAsOrtValue, _In_ const OrtNode* node, _In_ const OrtOpAttr* attribute,
6090+
ORT_API2_STATUS(OpAttr_GetTensorAttributeAsOrtValue, _In_ const OrtOpAttr* attribute,
60826091
_Outptr_result_maybenull_ OrtValue** attr_tensor);
60836092

60846093
/** \brief Get the attribute type as OrtOpAttrType from an OrtOpAttr.
@@ -6480,6 +6489,24 @@ struct OrtApi {
64806489
* \since Version 1.23.
64816490
*/
64826491
ORT_API2_STATUS(Graph_GetModelMetadata, _In_ const OrtGraph* graph, _Outptr_ OrtModelMetadata** out);
6492+
6493+
/** \brief Validate a compiled model's compatibility information for one or more EP devices.
6494+
*
6495+
* \param[in] ep_devices The EP devices to validate against (e.g., from GetEpDevices).
6496+
* All devices must belong to the same execution provider.
6497+
* \param[in] num_ep_devices The number of EP devices provided.
6498+
* \param[in] compatibility_info The compatibility info string produced when the model was compiled.
6499+
* \param[out] out_status The resulting compatibility status for the EP devices.
6500+
*
6501+
* \snippet{doc} snippets.dox OrtStatus Return Value
6502+
*
6503+
* \since Version 1.23.
6504+
*/
6505+
ORT_API2_STATUS(GetModelCompatibilityForEpDevices,
6506+
_In_reads_(num_ep_devices) const OrtEpDevice* const* ep_devices,
6507+
_In_ size_t num_ep_devices,
6508+
_In_ const char* compatibility_info,
6509+
_Out_ OrtCompiledModelCompatibility* out_status);
64836510
};
64846511

64856512
/*

0 commit comments

Comments
 (0)