Skip to content

Commit fa3f6e3

Browse files
snnnyuslepukhinadrianlizarragahariharans29
authored
Cherry-pick release:1.23.0 PRs to rel-1.23.0 (microsoft#25985)
This PR cherry-picks the following PRs to the rel-1.23.0 branch: * microsoft#25938 * microsoft#25957 * microsoft#25960 * microsoft#25968 * microsoft#25971 --------- Co-authored-by: Dmitri Smirnov <[email protected]> Co-authored-by: Adrian Lizarraga <[email protected]> Co-authored-by: Hariharan Seshadri <[email protected]>
1 parent 491f0c1 commit fa3f6e3

File tree

22 files changed

+148
-54
lines changed

22 files changed

+148
-54
lines changed

.github/workflows/android.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,8 +78,8 @@ jobs:
7878
run: |
7979
set -e -x
8080
BINARY_SIZE_THRESHOLD_ARGS=""
81-
echo "Binary size threshold in bytes: 1306224"
82-
BINARY_SIZE_THRESHOLD_ARGS="--threshold_size_in_bytes 1306224"
81+
echo "Binary size threshold in bytes: 1436672"
82+
BINARY_SIZE_THRESHOLD_ARGS="--threshold_size_in_bytes 1436672"
8383
8484
# Ensure ANDROID_NDK_HOME is available and get its real path
8585
if [ -z "$ANDROID_NDK_HOME" ]; then

include/onnxruntime/core/framework/ortmemoryinfo.h

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -13,18 +13,14 @@ struct OrtMemoryInfo {
1313
OrtMemoryInfo() = default; // to allow default construction of Tensor
1414

1515
// use string for name, so we could have customized allocator in execution provider.
16-
const char* name = nullptr;
16+
std::string name;
1717
OrtMemType mem_type = OrtMemTypeDefault;
1818
OrtAllocatorType alloc_type = OrtInvalidAllocator;
1919
OrtDevice device;
2020

21-
constexpr OrtMemoryInfo(const char* name_, OrtAllocatorType type_, OrtDevice device_ = OrtDevice(),
22-
OrtMemType mem_type_ = OrtMemTypeDefault)
23-
#if ((defined(__GNUC__) && __GNUC__ > 4) || defined(__clang__))
24-
// this causes a spurious error in CentOS gcc 4.8 build so disable if GCC version < 5
25-
__attribute__((nonnull))
26-
#endif
27-
: name(name_),
21+
OrtMemoryInfo(std::string name_, OrtAllocatorType type_, OrtDevice device_ = OrtDevice(),
22+
OrtMemType mem_type_ = OrtMemTypeDefault)
23+
: name(std::move(name_)),
2824
mem_type(mem_type_),
2925
alloc_type(type_),
3026
device(device_) {
@@ -39,7 +35,7 @@ struct OrtMemoryInfo {
3935
if (device != other.device)
4036
return device < other.device;
4137

42-
return strcmp(name, other.name) < 0;
38+
return name < other.name;
4339
}
4440

4541
// This is to make OrtMemoryInfo a valid key in hash tables
@@ -68,7 +64,7 @@ inline bool operator==(const OrtMemoryInfo& left, const OrtMemoryInfo& other) {
6864
return left.mem_type == other.mem_type &&
6965
left.alloc_type == other.alloc_type &&
7066
left.device == other.device &&
71-
strcmp(left.name, other.name) == 0;
67+
left.name == other.name;
7268
}
7369

7470
inline bool operator!=(const OrtMemoryInfo& lhs, const OrtMemoryInfo& rhs) { return !(lhs == rhs); }

onnxruntime/core/framework/allocator.cc

Lines changed: 33 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#include "core/common/safeint.h"
77
#include "core/common/status.h"
88
#include "core/framework/allocator.h"
9+
#include "core/framework/error_code_helper.h"
910
#include "core/mlas/inc/mlas.h"
1011
#include "core/framework/utils.h"
1112
#include "core/session/ort_apis.h"
@@ -185,22 +186,32 @@ std::ostream& operator<<(std::ostream& out, const OrtMemoryInfo& info) { return
185186
#endif
186187
ORT_API_STATUS_IMPL(OrtApis::CreateMemoryInfo, _In_ const char* name1, enum OrtAllocatorType type, int id1,
187188
enum OrtMemType mem_type1, _Outptr_ OrtMemoryInfo** out) {
189+
API_IMPL_BEGIN
190+
191+
if (name1 == nullptr) {
192+
return OrtApis::CreateStatus(ORT_INVALID_ARGUMENT, "MemoryInfo name cannot be null.");
193+
}
194+
195+
if (out == nullptr) {
196+
return OrtApis::CreateStatus(ORT_INVALID_ARGUMENT, "Output memory info cannot be null.");
197+
}
198+
188199
auto device_id = static_cast<OrtDevice::DeviceId>(id1);
189200
if (strcmp(name1, onnxruntime::CPU) == 0) {
190201
*out = new OrtMemoryInfo(onnxruntime::CPU, type, OrtDevice(), mem_type1);
191202
} else if (strcmp(name1, onnxruntime::CUDA) == 0) {
192203
*out = new OrtMemoryInfo(
193-
name1, type,
204+
onnxruntime::CUDA, type,
194205
OrtDevice(OrtDevice::GPU, OrtDevice::MemType::DEFAULT, OrtDevice::VendorIds::NVIDIA, device_id),
195206
mem_type1);
196207
} else if (strcmp(name1, onnxruntime::OpenVINO_GPU) == 0) {
197208
*out = new OrtMemoryInfo(
198-
name1, type,
209+
onnxruntime::OpenVINO_GPU, type,
199210
OrtDevice(OrtDevice::GPU, OrtDevice::MemType::DEFAULT, OrtDevice::VendorIds::INTEL, device_id),
200211
mem_type1);
201212
} else if (strcmp(name1, onnxruntime::HIP) == 0) {
202213
*out = new OrtMemoryInfo(
203-
name1, type,
214+
onnxruntime::HIP, type,
204215
OrtDevice(OrtDevice::GPU, OrtDevice::MemType::DEFAULT, OrtDevice::VendorIds::AMD, device_id),
205216
mem_type1);
206217
} else if (strcmp(name1, onnxruntime::WEBGPU_BUFFER) == 0 ||
@@ -212,45 +223,56 @@ ORT_API_STATUS_IMPL(OrtApis::CreateMemoryInfo, _In_ const char* name1, enum OrtA
212223

213224
} else if (strcmp(name1, onnxruntime::DML) == 0) {
214225
*out = new OrtMemoryInfo(
215-
name1, type,
226+
onnxruntime::DML, type,
216227
OrtDevice(OrtDevice::GPU, OrtDevice::MemType::DEFAULT, OrtDevice::VendorIds::MICROSOFT, device_id),
217228
mem_type1);
218229
} else if (strcmp(name1, onnxruntime::OpenVINO_RT_NPU) == 0) {
219230
*out = new OrtMemoryInfo(
220-
name1, type,
231+
onnxruntime::OpenVINO_RT_NPU, type,
221232
OrtDevice(OrtDevice::NPU, OrtDevice::MemType::DEFAULT, OrtDevice::VendorIds::INTEL, device_id),
222233
mem_type1);
223234
} else if (strcmp(name1, onnxruntime::CUDA_PINNED) == 0) {
224235
*out = new OrtMemoryInfo(
225-
name1, type,
236+
onnxruntime::CUDA_PINNED, type,
226237
OrtDevice(OrtDevice::GPU, OrtDevice::MemType::HOST_ACCESSIBLE, OrtDevice::VendorIds::NVIDIA, device_id),
227238
mem_type1);
228239
} else if (strcmp(name1, onnxruntime::HIP_PINNED) == 0) {
229240
*out = new OrtMemoryInfo(
230-
name1, type,
241+
onnxruntime::HIP_PINNED, type,
231242
OrtDevice(OrtDevice::GPU, OrtDevice::MemType::HOST_ACCESSIBLE, OrtDevice::VendorIds::AMD, device_id),
232243
mem_type1);
233244
} else if (strcmp(name1, onnxruntime::QNN_HTP_SHARED) == 0) {
234245
*out = new OrtMemoryInfo(
235-
name1, type,
246+
onnxruntime::QNN_HTP_SHARED, type,
236247
OrtDevice(OrtDevice::CPU, OrtDevice::MemType::HOST_ACCESSIBLE, OrtDevice::VendorIds::QUALCOMM, device_id),
237248
mem_type1);
238249
} else if (strcmp(name1, onnxruntime::CPU_ALIGNED_4K) == 0) {
239250
*out = new OrtMemoryInfo(
240-
name1, type,
251+
onnxruntime::CPU_ALIGNED_4K, type,
241252
OrtDevice(OrtDevice::CPU, OrtDevice::MemType::DEFAULT, OrtDevice::VendorIds::NONE, device_id,
242253
onnxruntime::kAlloc4KAlignment),
243254
mem_type1);
244255
} else {
245256
return OrtApis::CreateStatus(ORT_INVALID_ARGUMENT, "Specified device is not supported. Try CreateMemoryInfo_V2.");
246257
}
258+
API_IMPL_END
247259
return nullptr;
248260
}
249261

250262
ORT_API_STATUS_IMPL(OrtApis::CreateMemoryInfo_V2, _In_ const char* name, _In_ enum OrtMemoryInfoDeviceType device_type,
251263
_In_ uint32_t vendor_id, _In_ int32_t device_id, _In_ enum OrtDeviceMemoryType mem_type,
252264
_In_ size_t alignment, enum OrtAllocatorType type,
253265
_Outptr_ OrtMemoryInfo** out) {
266+
API_IMPL_BEGIN
267+
268+
if (name == nullptr) {
269+
return OrtApis::CreateStatus(ORT_INVALID_ARGUMENT, "MemoryInfo name cannot be null.");
270+
}
271+
272+
if (out == nullptr) {
273+
return OrtApis::CreateStatus(ORT_INVALID_ARGUMENT, "Output memory info cannot be null.");
274+
}
275+
254276
// map the public enum values to internal OrtDevice values
255277
OrtDevice::MemoryType mt = mem_type == OrtDeviceMemoryType_DEFAULT ? OrtDevice::MemType::DEFAULT
256278
: OrtDevice::MemType::HOST_ACCESSIBLE;
@@ -275,6 +297,7 @@ ORT_API_STATUS_IMPL(OrtApis::CreateMemoryInfo_V2, _In_ const char* name, _In_ en
275297

276298
*out = new OrtMemoryInfo(name, type, OrtDevice{dt, mt, vendor_id, narrow<int16_t>(device_id), alignment},
277299
mem_type == OrtDeviceMemoryType_DEFAULT ? OrtMemTypeDefault : OrtMemTypeCPU);
300+
API_IMPL_END
278301
return nullptr;
279302
}
280303

@@ -283,7 +306,7 @@ ORT_API(void, OrtApis::ReleaseMemoryInfo, _Frees_ptr_opt_ OrtMemoryInfo* p) { de
283306
#pragma warning(pop)
284307
#endif
285308
ORT_API_STATUS_IMPL(OrtApis::MemoryInfoGetName, _In_ const OrtMemoryInfo* ptr, _Out_ const char** out) {
286-
*out = ptr->name;
309+
*out = ptr->name.c_str();
287310
return nullptr;
288311
}
289312

onnxruntime/core/framework/bfc_arena.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ BFCArena::BFCArena(std::unique_ptr<IAllocator> resource_allocator,
1313
int max_dead_bytes_per_chunk,
1414
int initial_growth_chunk_size_bytes,
1515
int64_t max_power_of_two_extend_bytes)
16-
: IAllocator(OrtMemoryInfo(resource_allocator->Info().name,
16+
: IAllocator(OrtMemoryInfo(resource_allocator->Info().name.c_str(),
1717
OrtAllocatorType::OrtArenaAllocator,
1818
resource_allocator->Info().device,
1919
resource_allocator->Info().mem_type)),

onnxruntime/core/mlas/lib/qnbitgemm.h

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -53,16 +53,25 @@ struct PackedQuantBDataStruct {
5353
{
5454
const size_t PackedQuantBDataSize = N * BlockCountK * MlasQNBitBlkDataSizeInBytes(BlkBitWidth, BlkLen);
5555
size_t BlkSumSize = MlasDivRoundup(N, 16) * BlockCountK * 16 * sizeof(T);
56-
if constexpr (BlkBitWidth == 8) {
57-
PackedQuantBData = (std::byte*)MlasAlignAddress(PackedQuantBWorkspace, 32);
58-
} else {
5956
#if defined(MLAS_TARGET_AMD64_IX86)
6057
// avx512 requires alignment on a 64-byte boundary
6158
PackedQuantBData = (std::byte*)MlasAlignAddress(PackedQuantBWorkspace, 64);
59+
#elif defined (MLAS_TARGET_ARM64)
60+
// Only for 8-bit Gemms is the `PackedQuantBData` is to be 32-byte aligned and
61+
// there is enough memory allocated to support this alignment.
62+
// See QNBitGemmPackQuantBDataSize().
63+
// When bit width is 4, there is no alignment guarantee.
64+
// TODO(hasesh): Can we unify the alignment for 4-bit and 8-bit ARM64 Gemms so as to
65+
// simpify this logic and make code here cleaner ?
66+
if constexpr (BlkBitWidth == 8) {
67+
PackedQuantBData = (std::byte*)MlasAlignAddress(PackedQuantBWorkspace, 32);
68+
}
69+
else {
70+
PackedQuantBData = (std::byte*)PackedQuantBWorkspace;
71+
}
6272
#else
6373
PackedQuantBData = (std::byte*)PackedQuantBWorkspace;
6474
#endif
65-
}
6675

6776
QuantBBlkSum = (T*)(PackedQuantBData + PackedQuantBDataSize);
6877
QuantBBlkSum = (T*)MlasAlignAddress(QuantBBlkSum, MlasQNBitQuantBBlkSumAlignment());

onnxruntime/core/providers/dml/DmlExecutionProvider/src/ExecutionProvider.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -927,7 +927,7 @@ namespace Dml
927927

928928
bool IsGpuTensor(const onnxruntime::Tensor& tensor)
929929
{
930-
return strcmp(tensor.Location().name, onnxruntime::CPU) &&
930+
return strcmp(tensor.Location().name.c_str(), onnxruntime::CPU) &&
931931
!(tensor.Location().mem_type == ::OrtMemType::OrtMemTypeCPUOutput || tensor.Location().mem_type == ::OrtMemType::OrtMemTypeCPUInput);
932932
}
933933

onnxruntime/core/providers/dml/DmlExecutionProvider/src/MLOperatorAuthorImpl.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ namespace Windows::AI::MachineLearning::Adapter
9898

9999
bool IsAllocationInterface(const ::OrtMemoryInfo& info)
100100
{
101-
return strcmp(info.name, onnxruntime::CPU) && !(info.mem_type == ::OrtMemType::OrtMemTypeCPUOutput || info.mem_type == ::OrtMemType::OrtMemTypeCPUInput);
101+
return strcmp(info.name.c_str(), onnxruntime::CPU) && !(info.mem_type == ::OrtMemType::OrtMemTypeCPUOutput || info.mem_type == ::OrtMemType::OrtMemTypeCPUInput);
102102
}
103103

104104
// Translate the data object stored in a tensor to the type which will be returned through
@@ -1774,7 +1774,9 @@ namespace Windows::AI::MachineLearning::Adapter
17741774
}
17751775

17761776
// tells caller whether this tensor is in CPU memory
1777-
return !strcmp(m_impl->Location().name, onnxruntime::CPU) || m_impl->Location().mem_type == ::OrtMemType::OrtMemTypeCPUOutput || m_impl->Location().mem_type == ::OrtMemType::OrtMemTypeCPUInput;
1777+
return !strcmp(m_impl->Location().name.c_str(), onnxruntime::CPU)
1778+
|| m_impl->Location().mem_type == ::OrtMemType::OrtMemTypeCPUOutput
1779+
|| m_impl->Location().mem_type == ::OrtMemType::OrtMemTypeCPUInput;
17781780
}
17791781

17801782
bool STDMETHODCALLTYPE TensorWrapper::IsDataInterface() const noexcept

onnxruntime/core/providers/qnn/qnn_provider_factory.cc

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -219,9 +219,11 @@ struct QnnEpFactory : OrtEpFactory {
219219
OrtKeyValuePairs* ep_options = nullptr;
220220
factory->ort_api.CreateKeyValuePairs(&ep_options);
221221
factory->ort_api.AddKeyValuePair(ep_options, "backend_path", factory->qnn_backend_path.c_str());
222-
ORT_API_RETURN_IF_ERROR(
223-
factory->ort_api.GetEpApi()->CreateEpDevice(factory, &device, nullptr, ep_options,
224-
&ep_devices[num_ep_devices++]));
222+
OrtStatus* status = factory->ort_api.GetEpApi()->CreateEpDevice(factory, &device, nullptr, ep_options,
223+
&ep_devices[num_ep_devices++]);
224+
225+
factory->ort_api.ReleaseKeyValuePairs(ep_options);
226+
ORT_API_RETURN_IF_ERROR(status);
225227
}
226228
}
227229

onnxruntime/core/providers/webgpu/webgpu_context.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,7 @@ Status WebGpuContext::Run(ComputeContext& context, const ProgramBase& program) {
210210
return tensor != nullptr &&
211211
tensor->Location().mem_type == OrtMemType::OrtMemTypeDefault &&
212212
tensor->Location().device.Type() == OrtDevice::GPU &&
213-
!strcmp(tensor->Location().name, WEBGPU_BUFFER);
213+
!strcmp(tensor->Location().name.c_str(), WEBGPU_BUFFER);
214214
}),
215215
"All inputs must be tensors on WebGPU buffers.");
216216

@@ -219,7 +219,7 @@ Status WebGpuContext::Run(ComputeContext& context, const ProgramBase& program) {
219219
return tensor != nullptr &&
220220
tensor->Location().mem_type == OrtMemType::OrtMemTypeDefault &&
221221
tensor->Location().device.Type() == OrtDevice::GPU &&
222-
!strcmp(tensor->Location().name, WEBGPU_BUFFER);
222+
!strcmp(tensor->Location().name.c_str(), WEBGPU_BUFFER);
223223
}),
224224
"All outputs must be tensors on WebGPU buffers.");
225225
}

onnxruntime/core/session/environment.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ static bool AreOrtMemoryInfosEquivalent(
7979
bool ignore_alignment = false) {
8080
return left.mem_type == right.mem_type &&
8181
(ignore_alignment ? left.device.EqualIgnoringAlignment(right.device) : left.device == right.device) &&
82-
(!match_name || strcmp(left.name, right.name) == 0);
82+
(!match_name || left.name == right.name);
8383
}
8484

8585
std::vector<AllocatorPtr>::const_iterator FindExistingAllocator(const std::vector<AllocatorPtr>& allocators,

0 commit comments

Comments
 (0)