Skip to content

Commit 7078f40

Browse files
authored
enhance: add vector reserve to improve memory allocation in segcore (#45757)
This commit optimizes std::vector usage across segcore by adding reserve() calls where the size is known in advance, reducing memory reallocations during push_back operations. Changes: - TimestampIndex.cpp: Reserve space for prefix_sums and timestamp_barriers - SegmentGrowingImpl.cpp: Reserve space for binlog info vectors - ChunkedSegmentSealedImpl.cpp: Reserve space for futures and field data vectors - storagev2translator/GroupChunkTranslator.cpp: Reserve space for metadata vectors This improves performance by avoiding multiple memory reallocations when the vector size is predictable. issue: #45679 --------- Signed-off-by: Buqian Zheng <[email protected]>
1 parent 346449d commit 7078f40

File tree

5 files changed

+29
-0
lines changed

5 files changed

+29
-0
lines changed

internal/core/src/segcore/ChunkedSegmentSealedImpl.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -391,6 +391,7 @@ ChunkedSegmentSealedImpl::load_column_group_data_internal(
391391
// warmup will be disabled only when all columns are not in load list
392392
bool merged_in_load_list = false;
393393
std::vector<FieldId> milvus_field_ids;
394+
milvus_field_ids.reserve(field_id_list.size());
394395
for (int i = 0; i < field_id_list.size(); ++i) {
395396
milvus_field_ids.push_back(FieldId(field_id_list.Get(i)));
396397
merged_in_load_list = merged_in_load_list ||
@@ -2672,6 +2673,7 @@ ChunkedSegmentSealedImpl::Load(milvus::tracer::TraceContext& trace_ctx) {
26722673
// Step 2: Load indexes in parallel using thread pool
26732674
auto& pool = ThreadPools::GetThreadPool(milvus::ThreadPoolPriority::LOW);
26742675
std::vector<std::future<void>> load_index_futures;
2676+
load_index_futures.reserve(field_id_to_index_info.size());
26752677

26762678
for (const auto& pair : field_id_to_index_info) {
26772679
auto field_id = pair.first;
@@ -2704,6 +2706,7 @@ ChunkedSegmentSealedImpl::Load(milvus::tracer::TraceContext& trace_ctx) {
27042706

27052707
// Wait for all index loading to complete and collect exceptions
27062708
std::vector<std::exception_ptr> index_exceptions;
2709+
index_exceptions.reserve(load_index_futures.size());
27072710
for (auto& future : load_index_futures) {
27082711
try {
27092712
future.get();
@@ -2754,6 +2757,10 @@ ChunkedSegmentSealedImpl::Load(milvus::tracer::TraceContext& trace_ctx) {
27542757

27552758
// Calculate total row count and collect binlog paths
27562759
int64_t total_entries = 0;
2760+
auto binlog_count = field_binlog.binlogs().size();
2761+
field_binlog_info.insert_files.reserve(binlog_count);
2762+
field_binlog_info.entries_nums.reserve(binlog_count);
2763+
field_binlog_info.memory_sizes.reserve(binlog_count);
27572764
for (const auto& binlog : field_binlog.binlogs()) {
27582765
field_binlog_info.insert_files.push_back(binlog.log_path());
27592766
field_binlog_info.entries_nums.push_back(binlog.entries_num());
@@ -2774,6 +2781,7 @@ ChunkedSegmentSealedImpl::Load(milvus::tracer::TraceContext& trace_ctx) {
27742781
field_data_to_load.size(),
27752782
id_);
27762783
std::vector<std::future<void>> load_field_futures;
2784+
load_field_futures.reserve(field_data_to_load.size());
27772785

27782786
for (const auto& [field_id, load_field_data_info] :
27792787
field_data_to_load) {
@@ -2787,6 +2795,7 @@ ChunkedSegmentSealedImpl::Load(milvus::tracer::TraceContext& trace_ctx) {
27872795

27882796
// Wait for all field data loading to complete and collect exceptions
27892797
std::vector<std::exception_ptr> field_exceptions;
2798+
field_exceptions.reserve(load_field_futures.size());
27902799
for (auto& future : load_field_futures) {
27912800
try {
27922801
future.get();

internal/core/src/segcore/SegmentGrowingImpl.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1344,6 +1344,10 @@ SegmentGrowingImpl::Load(milvus::tracer::TraceContext& trace_ctx) {
13441344

13451345
// Process each binlog
13461346
int64_t total_row_count = 0;
1347+
auto binlog_count = field_binlog.binlogs().size();
1348+
binlog_info.entries_nums.reserve(binlog_count);
1349+
binlog_info.insert_files.reserve(binlog_count);
1350+
binlog_info.memory_sizes.reserve(binlog_count);
13471351
for (const auto& binlog : field_binlog.binlogs()) {
13481352
binlog_info.entries_nums.push_back(binlog.entries_num());
13491353
binlog_info.insert_files.push_back(binlog.log_path());
@@ -1353,6 +1357,7 @@ SegmentGrowingImpl::Load(milvus::tracer::TraceContext& trace_ctx) {
13531357
binlog_info.row_count = total_row_count;
13541358

13551359
// Set child field ids
1360+
binlog_info.child_field_ids.reserve(field_binlog.child_fields().size());
13561361
for (const auto& child_field : field_binlog.child_fields()) {
13571362
binlog_info.child_field_ids.push_back(child_field);
13581363
}

internal/core/src/segcore/TimestampIndex.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,11 @@ TimestampIndex::build_with(const Timestamp* timestamps, int64_t size) {
2323
auto num_slice = lengths_.size();
2424
Assert(num_slice > 0);
2525
std::vector<int64_t> prefix_sums;
26+
prefix_sums.reserve(num_slice + 1);
2627
int offset = 0;
2728
prefix_sums.push_back(offset);
2829
std::vector<Timestamp> timestamp_barriers;
30+
timestamp_barriers.reserve(num_slice + 1);
2931
Timestamp last_max_v = 0;
3032
for (int slice_id = 0; slice_id < num_slice; ++slice_id) {
3133
auto length = lengths_[slice_id];

internal/core/src/segcore/storagev2translator/GroupChunkTranslator.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,8 @@ GroupChunkTranslator::GroupChunkTranslator(
8989
.GetArrowFileSystem();
9090

9191
// Get row group metadata from files
92+
parquet_file_metadata_.reserve(insert_files_.size());
93+
row_group_meta_list_.reserve(insert_files_.size());
9294
for (const auto& file : insert_files_) {
9395
auto reader = std::make_shared<milvus_storage::FileRowGroupReader>(
9496
fs,
@@ -114,11 +116,16 @@ GroupChunkTranslator::GroupChunkTranslator(
114116
file_row_group_prefix_sum_.reserve(row_group_meta_list_.size() + 1);
115117
file_row_group_prefix_sum_.push_back(
116118
0); // Base case: 0 row groups before first file
119+
size_t total_row_groups = 0;
117120
for (const auto& file_metas : row_group_meta_list_) {
121+
total_row_groups += file_metas.size();
118122
file_row_group_prefix_sum_.push_back(file_row_group_prefix_sum_.back() +
119123
file_metas.size());
120124
}
121125

126+
meta_.num_rows_until_chunk_.reserve(total_row_groups + 1);
127+
meta_.chunk_memory_size_.reserve(total_row_groups);
128+
122129
meta_.num_rows_until_chunk_.push_back(0);
123130
for (const auto& row_group_meta : row_group_meta_list_) {
124131
for (int i = 0; i < row_group_meta.size(); ++i) {

internal/core/thirdparty/tantivy/tantivy-wrapper.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -355,6 +355,7 @@ struct TantivyIndexWrapper {
355355
int64_t offset_begin) {
356356
assert(!finished_);
357357
std::vector<const char*> views;
358+
views.reserve(len);
358359
for (uintptr_t i = 0; i < len; i++) {
359360
views.push_back(array[i].c_str());
360361
}
@@ -435,6 +436,7 @@ struct TantivyIndexWrapper {
435436

436437
if constexpr (std::is_same_v<T, std::string>) {
437438
std::vector<const char*> views;
439+
views.reserve(len);
438440
for (uintptr_t i = 0; i < len; i++) {
439441
views.push_back(array[i].c_str());
440442
}
@@ -621,6 +623,7 @@ struct TantivyIndexWrapper {
621623

622624
if constexpr (std::is_same_v<T, std::string>) {
623625
std::vector<const char*> views;
626+
views.reserve(len);
624627
for (uintptr_t i = 0; i < len; i++) {
625628
views.push_back(array[i].c_str());
626629
}
@@ -709,6 +712,7 @@ struct TantivyIndexWrapper {
709712
} else {
710713
// smaller integer should be converted first
711714
std::vector<int64_t> buf(len);
715+
buf.reserve(len);
712716
for (uintptr_t i = 0; i < len; ++i) {
713717
buf[i] = static_cast<int64_t>(terms[i]);
714718
}
@@ -726,6 +730,7 @@ struct TantivyIndexWrapper {
726730
bitset);
727731
} else {
728732
std::vector<double> buf(len);
733+
buf.reserve(len);
729734
for (uintptr_t i = 0; i < len; ++i) {
730735
buf[i] = static_cast<double>(terms[i]);
731736
}
@@ -736,6 +741,7 @@ struct TantivyIndexWrapper {
736741

737742
if constexpr (std::is_same_v<T, std::string>) {
738743
std::vector<const char*> views;
744+
views.reserve(len);
739745
for (uintptr_t i = 0; i < len; i++) {
740746
views.push_back(terms[i].c_str());
741747
}

0 commit comments

Comments
 (0)