Skip to content

Commit bced2f3

Browse files
committed
add ut
1 parent fff4f71 commit bced2f3

10 files changed

+396
-74
lines changed

be/src/vec/data_types/serde/data_type_array_serde.cpp

Lines changed: 19 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -326,15 +326,19 @@ Status DataTypeArraySerDe::read_column_from_arrow(IColumn& column, const arrow::
326326
auto arrow_offsets_array = concrete_array->offsets();
327327
auto* arrow_offsets = dynamic_cast<arrow::Int32Array*>(arrow_offsets_array.get());
328328
auto prev_size = offsets_data.back();
329-
int32_t arrow_nested_start_offset;
330-
int32_t arrow_nested_end_offset;
331-
const auto* offsets_raw_data = arrow_offsets->raw_values();
332-
memcpy(&arrow_nested_start_offset, offsets_raw_data + start, sizeof(int32_t));
333-
memcpy(&arrow_nested_end_offset, offsets_raw_data + end, sizeof(int32_t));
329+
const auto* base_offsets_ptr = reinterpret_cast<const uint8_t*>(arrow_offsets->raw_values());
330+
const size_t offset_element_size = sizeof(int32_t);
331+
int32_t arrow_nested_start_offset = 0;
332+
int32_t arrow_nested_end_offset = 0;
333+
const uint8_t* start_offset_ptr = base_offsets_ptr + start * offset_element_size;
334+
const uint8_t* end_offset_ptr = base_offsets_ptr + end * offset_element_size;
335+
memcpy(&arrow_nested_start_offset, start_offset_ptr, offset_element_size);
336+
memcpy(&arrow_nested_end_offset, end_offset_ptr, offset_element_size);
334337

335338
for (auto i = start + 1; i < end + 1; ++i) {
336-
int32_t current_offset;
337-
memcpy(&current_offset, offsets_raw_data + i, sizeof(int32_t));
339+
int32_t current_offset = 0;
340+
const uint8_t* current_offset_ptr = base_offsets_ptr + i * offset_element_size;
341+
memcpy(&current_offset, current_offset_ptr, offset_element_size);
338342
// convert to doris offset, start from offsets.back()
339343
offsets_data.emplace_back(prev_size + current_offset - arrow_nested_start_offset);
340344
}
@@ -395,17 +399,17 @@ Status DataTypeArraySerDe::_write_column_to_mysql(const IColumn& column,
395399
return Status::OK();
396400
}
397401

398-
Status DataTypeArraySerDe::write_column_to_mysql_binary(const IColumn& column,
399-
MysqlRowBinaryBuffer& row_buffer,
400-
int64_t row_idx, bool col_const,
401-
const FormatOptions& options) const {
402+
Status DataTypeArraySerDe::write_column_to_mysql(const IColumn& column,
403+
MysqlRowBuffer<true>& row_buffer, int64_t row_idx,
404+
bool col_const,
405+
const FormatOptions& options) const {
402406
return _write_column_to_mysql(column, row_buffer, row_idx, col_const, options);
403407
}
404408

405-
Status DataTypeArraySerDe::write_column_to_mysql_text(const IColumn& column,
406-
MysqlRowTextBuffer& row_buffer,
407-
int64_t row_idx, bool col_const,
408-
const FormatOptions& options) const {
409+
Status DataTypeArraySerDe::write_column_to_mysql(const IColumn& column,
410+
MysqlRowBuffer<false>& row_buffer, int64_t row_idx,
411+
bool col_const,
412+
const FormatOptions& options) const {
409413
return _write_column_to_mysql(column, row_buffer, row_idx, col_const, options);
410414
}
411415

be/src/vec/data_types/serde/data_type_datetimev2_serde.cpp

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -397,12 +397,13 @@ Status DataTypeDateTimeV2SerDe::read_column_from_arrow(IColumn& column,
397397
type->unit());
398398
}
399399
}
400+
const auto* base_ptr = reinterpret_cast<const uint8_t*>(concrete_array->raw_values());
401+
const size_t element_size = sizeof(int64_t);
400402
for (auto value_i = start; value_i < end; ++value_i) {
401-
int64_t utc_epoch_raw;
402-
const auto* raw_data_ptr = concrete_array->raw_values() + value_i;
403-
memcpy(&utc_epoch_raw, raw_data_ptr, sizeof(int64_t));
404-
405-
auto utc_epoch = static_cast<UInt64>(utc_epoch_raw);
403+
int64_t date_value = 0;
404+
const uint8_t* raw_byte_ptr = base_ptr + value_i * element_size;
405+
memcpy(&date_value, raw_byte_ptr, element_size);
406+
auto utc_epoch = static_cast<UInt64>(date_value);
406407

407408
DateV2Value<DateTimeV2ValueType> v;
408409
// convert second
@@ -450,17 +451,17 @@ Status DataTypeDateTimeV2SerDe::_write_column_to_mysql(const IColumn& column,
450451
return Status::OK();
451452
}
452453

453-
Status DataTypeDateTimeV2SerDe::write_column_to_mysql_binary(const IColumn& column,
454-
MysqlRowBinaryBuffer& row_buffer,
455-
int64_t row_idx, bool col_const,
456-
const FormatOptions& options) const {
454+
Status DataTypeDateTimeV2SerDe::write_column_to_mysql(const IColumn& column,
455+
MysqlRowBuffer<true>& row_buffer,
456+
int64_t row_idx, bool col_const,
457+
const FormatOptions& options) const {
457458
return _write_column_to_mysql(column, row_buffer, row_idx, col_const, options);
458459
}
459460

460-
Status DataTypeDateTimeV2SerDe::write_column_to_mysql_text(const IColumn& column,
461-
MysqlRowTextBuffer& row_buffer,
462-
int64_t row_idx, bool col_const,
463-
const FormatOptions& options) const {
461+
Status DataTypeDateTimeV2SerDe::write_column_to_mysql(const IColumn& column,
462+
MysqlRowBuffer<false>& row_buffer,
463+
int64_t row_idx, bool col_const,
464+
const FormatOptions& options) const {
464465
return _write_column_to_mysql(column, row_buffer, row_idx, col_const, options);
465466
}
466467

be/src/vec/data_types/serde/data_type_datev2_serde.cpp

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -114,10 +114,12 @@ Status DataTypeDateV2SerDe::read_column_from_arrow(IColumn& column, const arrow:
114114
const cctz::time_zone& ctz) const {
115115
auto& col_data = static_cast<ColumnDateV2&>(column).get_data();
116116
const auto* concrete_array = dynamic_cast<const arrow::Date32Array*>(arrow_array);
117+
const auto* base_ptr = reinterpret_cast<const uint8_t*>(concrete_array->raw_values());
118+
const size_t element_size = sizeof(int32_t);
117119
for (auto value_i = start; value_i < end; ++value_i) {
118-
int32_t date_value;
119-
const auto* raw_data_ptr = concrete_array->raw_values() + value_i;
120-
memcpy(&date_value, raw_data_ptr, sizeof(int32_t));
120+
int32_t date_value = 0;
121+
const uint8_t* raw_byte_ptr = base_ptr + value_i * element_size;
122+
memcpy(&date_value, raw_byte_ptr, element_size);
121123

122124
DateV2Value<DateV2ValueType> v;
123125
v.get_date_from_daynr(date_value + date_threshold);
@@ -153,17 +155,17 @@ Status DataTypeDateV2SerDe::_write_column_to_mysql(const IColumn& column,
153155
return Status::OK();
154156
}
155157

156-
Status DataTypeDateV2SerDe::write_column_to_mysql_binary(const IColumn& column,
157-
MysqlRowBinaryBuffer& row_buffer,
158-
int64_t row_idx, bool col_const,
159-
const FormatOptions& options) const {
158+
Status DataTypeDateV2SerDe::write_column_to_mysql(const IColumn& column,
159+
MysqlRowBuffer<true>& row_buffer, int64_t row_idx,
160+
bool col_const,
161+
const FormatOptions& options) const {
160162
return _write_column_to_mysql(column, row_buffer, row_idx, col_const, options);
161163
}
162164

163-
Status DataTypeDateV2SerDe::write_column_to_mysql_text(const IColumn& column,
164-
MysqlRowTextBuffer& row_buffer,
165-
int64_t row_idx, bool col_const,
166-
const FormatOptions& options) const {
165+
Status DataTypeDateV2SerDe::write_column_to_mysql(const IColumn& column,
166+
MysqlRowBuffer<false>& row_buffer,
167+
int64_t row_idx, bool col_const,
168+
const FormatOptions& options) const {
167169
return _write_column_to_mysql(column, row_buffer, row_idx, col_const, options);
168170
}
169171

be/src/vec/data_types/serde/data_type_decimal_serde.cpp

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -338,7 +338,7 @@ Status DataTypeDecimalSerDe<T>::read_column_from_arrow(IColumn& column,
338338
const auto* concrete_array = dynamic_cast<const arrow::DecimalArray*>(arrow_array);
339339
for (auto value_i = start; value_i < end; ++value_i) {
340340
const auto* value = concrete_array->Value(value_i);
341-
FieldType decimal_value;
341+
FieldType decimal_value = FieldType{};
342342
memcpy(&decimal_value, value, sizeof(FieldType));
343343
column_data.emplace_back(decimal_value);
344344
}
@@ -380,18 +380,18 @@ Status DataTypeDecimalSerDe<T>::_write_column_to_mysql(const IColumn& column,
380380
}
381381

382382
template <PrimitiveType T>
383-
Status DataTypeDecimalSerDe<T>::write_column_to_mysql_binary(const IColumn& column,
384-
MysqlRowBinaryBuffer& row_buffer,
385-
int64_t row_idx, bool col_const,
386-
const FormatOptions& options) const {
383+
Status DataTypeDecimalSerDe<T>::write_column_to_mysql(const IColumn& column,
384+
MysqlRowBuffer<true>& row_buffer,
385+
int64_t row_idx, bool col_const,
386+
const FormatOptions& options) const {
387387
return _write_column_to_mysql(column, row_buffer, row_idx, col_const, options);
388388
}
389389

390390
template <PrimitiveType T>
391-
Status DataTypeDecimalSerDe<T>::write_column_to_mysql_text(const IColumn& column,
392-
MysqlRowTextBuffer& row_buffer,
393-
int64_t row_idx, bool col_const,
394-
const FormatOptions& options) const {
391+
Status DataTypeDecimalSerDe<T>::write_column_to_mysql(const IColumn& column,
392+
MysqlRowBuffer<false>& row_buffer,
393+
int64_t row_idx, bool col_const,
394+
const FormatOptions& options) const {
395395
return _write_column_to_mysql(column, row_buffer, row_idx, col_const, options);
396396
}
397397

be/src/vec/data_types/serde/data_type_number_serde.cpp

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -219,14 +219,14 @@ Status DataTypeNumberSerDe<T>::read_column_from_arrow(IColumn& column,
219219
const auto* concrete_array = dynamic_cast<const arrow::StringArray*>(arrow_array);
220220
std::shared_ptr<arrow::Buffer> buffer = concrete_array->value_data();
221221

222+
const auto* offsets_data = concrete_array->value_offsets()->data();
223+
const size_t offset_size = sizeof(int32_t);
222224
for (size_t offset_i = start; offset_i < end; ++offset_i) {
223225
if (!concrete_array->IsNull(offset_i)) {
224-
int32_t start_offset;
225-
int32_t end_offset;
226-
const auto* offsets_data = concrete_array->value_offsets()->data();
227-
memcpy(&start_offset, offsets_data + offset_i * sizeof(int32_t), sizeof(int32_t));
228-
memcpy(&end_offset, offsets_data + (offset_i + 1) * sizeof(int32_t),
229-
sizeof(int32_t));
226+
int32_t start_offset = 0;
227+
int32_t end_offset = 0;
228+
memcpy(&start_offset, offsets_data + offset_i * offset_size, offset_size);
229+
memcpy(&end_offset, offsets_data + (offset_i + 1) * offset_size, offset_size);
230230

231231
const auto* raw_data = buffer->data() + start_offset;
232232
const auto raw_data_len = end_offset - start_offset;
@@ -507,18 +507,18 @@ Status DataTypeNumberSerDe<T>::_write_column_to_mysql(const IColumn& column,
507507
}
508508

509509
template <PrimitiveType T>
510-
Status DataTypeNumberSerDe<T>::write_column_to_mysql_binary(const IColumn& column,
511-
MysqlRowBinaryBuffer& row_buffer,
512-
int64_t row_idx, bool col_const,
513-
const FormatOptions& options) const {
510+
Status DataTypeNumberSerDe<T>::write_column_to_mysql(const IColumn& column,
511+
MysqlRowBuffer<true>& row_buffer,
512+
int64_t row_idx, bool col_const,
513+
const FormatOptions& options) const {
514514
return _write_column_to_mysql(column, row_buffer, row_idx, col_const, options);
515515
}
516516

517517
template <PrimitiveType T>
518-
Status DataTypeNumberSerDe<T>::write_column_to_mysql_text(const IColumn& column,
519-
MysqlRowTextBuffer& row_buffer,
520-
int64_t row_idx, bool col_const,
521-
const FormatOptions& options) const {
518+
Status DataTypeNumberSerDe<T>::write_column_to_mysql(const IColumn& column,
519+
MysqlRowBuffer<false>& row_buffer,
520+
int64_t row_idx, bool col_const,
521+
const FormatOptions& options) const {
522522
return _write_column_to_mysql(column, row_buffer, row_idx, col_const, options);
523523
}
524524

be/src/vec/data_types/serde/data_type_string_serde.cpp

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -262,17 +262,15 @@ Status DataTypeStringSerDeBase<ColumnType>::read_column_from_arrow(
262262
arrow_array->type_id() == arrow::Type::BINARY) {
263263
const auto* concrete_array = dynamic_cast<const arrow::BinaryArray*>(arrow_array);
264264
std::shared_ptr<arrow::Buffer> buffer = concrete_array->value_data();
265-
const auto offsets_buffer = concrete_array->value_offsets();
266-
const uint8_t* offsets_data = offsets_buffer->data();
265+
const uint8_t* offsets_data = concrete_array->value_offsets()->data();
266+
const size_t offset_size = sizeof(int32_t);
267267

268268
for (auto offset_i = start; offset_i < end; ++offset_i) {
269269
if (!concrete_array->IsNull(offset_i)) {
270-
int32_t start_offset;
271-
int32_t end_offset;
272-
273-
memcpy(&start_offset, offsets_data + offset_i * sizeof(int32_t), sizeof(int32_t));
274-
memcpy(&end_offset, offsets_data + (offset_i + 1) * sizeof(int32_t),
275-
sizeof(int32_t));
270+
int32_t start_offset = 0;
271+
int32_t end_offset = 0;
272+
memcpy(&start_offset, offsets_data + offset_i * offset_size, offset_size);
273+
memcpy(&end_offset, offsets_data + (offset_i + 1) * offset_size, offset_size);
276274

277275
int32_t length = end_offset - start_offset;
278276
const auto* raw_data = buffer->data() + start_offset;
@@ -318,14 +316,14 @@ Status DataTypeStringSerDeBase<ColumnType>::read_column_from_arrow(
318316
}
319317

320318
template <typename ColumnType>
321-
Status DataTypeStringSerDeBase<ColumnType>::write_column_to_mysql_binary(
322-
const IColumn& column, MysqlRowBinaryBuffer& row_buffer, int64_t row_idx, bool col_const,
319+
Status DataTypeStringSerDeBase<ColumnType>::write_column_to_mysql(
320+
const IColumn& column, MysqlRowBuffer<true>& row_buffer, int64_t row_idx, bool col_const,
323321
const FormatOptions& options) const {
324322
return _write_column_to_mysql(column, row_buffer, row_idx, col_const, options);
325323
}
326324
template <typename ColumnType>
327-
Status DataTypeStringSerDeBase<ColumnType>::write_column_to_mysql_text(
328-
const IColumn& column, MysqlRowTextBuffer& row_buffer, int64_t row_idx, bool col_const,
325+
Status DataTypeStringSerDeBase<ColumnType>::write_column_to_mysql(
326+
const IColumn& column, MysqlRowBuffer<false>& row_buffer, int64_t row_idx, bool col_const,
329327
const FormatOptions& options) const {
330328
return _write_column_to_mysql(column, row_buffer, row_idx, col_const, options);
331329
}

be/test/vec/data_types/serde/data_type_serde_datetime_v2_test.cpp

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18+
#include <arrow/api.h>
19+
#include <cctz/time_zone.h>
1820
#include <gtest/gtest-message.h>
1921
#include <gtest/gtest-test-part.h>
2022
#include <gtest/gtest.h>
@@ -221,4 +223,83 @@ TEST_F(DataTypeDateTimeV2SerDeTest, serdes) {
221223
test_func(*serde_time_v2_0, column_time_v2_0);
222224
}
223225

226+
// Run with UBSan enabled to catch misalignment errors.
227+
TEST_F(DataTypeDateTimeV2SerDeTest, ArrowMemNotAlignedDate) {
228+
// 1.Prepare the data.
229+
std::vector<int32_t> dates = {
230+
0,
231+
365,
232+
1000,
233+
5000,
234+
10000
235+
};
236+
const int64_t num_elements = dates.size();
237+
const int64_t element_size = sizeof(int32_t);
238+
239+
// 2.Create an unaligned memory buffer.
240+
std::vector<uint8_t> data_storage(num_elements * element_size + 10);
241+
uint8_t* unaligned_data = data_storage.data() + 1;
242+
243+
// 3.Copy data to unaligned memory
244+
for (size_t i = 0; i < dates.size(); ++i) {
245+
memcpy(unaligned_data + i * element_size, &dates[i], element_size);
246+
}
247+
248+
// 4. Create Arrow array with unaligned memory
249+
auto unaligned_buffer = arrow::Buffer::Wrap(unaligned_data, num_elements * element_size);
250+
auto arr = std::make_shared<arrow::Date32Array>(
251+
num_elements,
252+
unaligned_buffer
253+
);
254+
const auto* raw_values_ptr = arr->raw_values();
255+
uintptr_t address = reinterpret_cast<uintptr_t>(raw_values_ptr);
256+
EXPECT_EQ(address % 4, 1);
257+
258+
// 5.Test read_column_from_arrow
259+
cctz::time_zone tz;
260+
auto st = serde_date_v2->read_column_from_arrow(*column_date_v2, arr.get(), 0, 1, tz);
261+
EXPECT_TRUE(st.ok());
262+
}
263+
264+
// Run with UBSan enabled to catch misalignment errors.
265+
TEST_F(DataTypeDateTimeV2SerDeTest, ArrowMemNotAlignedDateTime) {
266+
// 1.Prepare the data.
267+
std::vector<int64_t> timestamps = {
268+
0,
269+
86400000000,
270+
31536000000000,
271+
100000000000,
272+
500000000000
273+
};
274+
const int64_t num_elements = timestamps.size();
275+
const int64_t element_size = sizeof(int64_t);
276+
277+
// 2.Create an unaligned memory buffer.
278+
std::vector<uint8_t> data_storage(num_elements * element_size + 10);
279+
uint8_t* unaligned_data = data_storage.data() + 1;
280+
281+
// 3. Copy data to unaligned memory
282+
for (size_t i = 0; i < timestamps.size(); ++i) {
283+
memcpy(unaligned_data + i * element_size, &timestamps[i], element_size);
284+
}
285+
286+
// 4. Create Arrow array with unaligned memory
287+
auto unaligned_buffer = arrow::Buffer::Wrap(unaligned_data, num_elements * element_size);
288+
auto timestamp_type = arrow::timestamp(arrow::TimeUnit::MICRO);
289+
auto arr = std::make_shared<arrow::TimestampArray>(
290+
timestamp_type,
291+
num_elements,
292+
unaligned_buffer
293+
);
294+
295+
const auto* raw_values_ptr = arr->raw_values();
296+
uintptr_t address = reinterpret_cast<uintptr_t>(raw_values_ptr);
297+
EXPECT_EQ(address % 4, 1);
298+
299+
// 5.Test read_column_from_arrow
300+
cctz::time_zone tz;
301+
auto st = serde_datetime_v2_6->read_column_from_arrow(*column_datetime_v2_6, arr.get(), 0, 1, tz);
302+
EXPECT_TRUE(st.ok());
303+
}
304+
224305
} // namespace doris::vectorized

0 commit comments

Comments
 (0)