@@ -627,4 +627,257 @@ TEST_F(ColumnVarbinaryTest, GetPermutationAscDescIgnoreLimit) {
627627 }
628628}
629629
630+ TEST_F (ColumnVarbinaryTest, InsertManyStrings) {
631+ auto col = ColumnVarbinary::create ();
632+
633+ // Test 1: Insert empty array
634+ {
635+ std::vector<StringRef> empty_refs;
636+ col->insert_many_strings (empty_refs.data (), empty_refs.size ());
637+ EXPECT_EQ (col->size (), 0U );
638+ }
639+
640+ // Test 2: Insert single string
641+ {
642+ std::string s1 = " hello" ;
643+ StringRef ref1 (s1.data (), s1.size ());
644+ col->insert_many_strings (&ref1, 1 );
645+ EXPECT_EQ (col->size (), 1U );
646+ auto data = col->get_data_at (0 );
647+ EXPECT_EQ (data.size , 5U );
648+ EXPECT_EQ (memcmp (data.data , " hello" , 5 ), 0 );
649+ }
650+
651+ // Test 3: Insert multiple inline strings (size <= kInlineSize)
652+ {
653+ std::string s2 = " abc" ;
654+ std::string s3 = " def" ;
655+ std::string s4 = make_bytes (doris::StringView::kInlineSize , 0xAA );
656+ std::vector<StringRef> refs = {StringRef (s2.data (), s2.size ()),
657+ StringRef (s3.data (), s3.size ()),
658+ StringRef (s4.data (), s4.size ())};
659+ col->insert_many_strings (refs.data (), refs.size ());
660+ EXPECT_EQ (col->size (), 4U ); // 1 from test 2 + 3 new
661+
662+ auto data1 = col->get_data_at (1 );
663+ EXPECT_EQ (data1.size , 3U );
664+ EXPECT_EQ (memcmp (data1.data , " abc" , 3 ), 0 );
665+
666+ auto data2 = col->get_data_at (2 );
667+ EXPECT_EQ (data2.size , 3U );
668+ EXPECT_EQ (memcmp (data2.data , " def" , 3 ), 0 );
669+
670+ auto data3 = col->get_data_at (3 );
671+ EXPECT_EQ (data3.size , doris::StringView::kInlineSize );
672+ EXPECT_EQ (memcmp (data3.data , s4.data (), s4.size ()), 0 );
673+ }
674+
675+ // Test 4: Insert multiple large strings (size > kInlineSize)
676+ {
677+ std::string large1 = make_bytes (doris::StringView::kInlineSize + 10 , 0x11 );
678+ std::string large2 = make_bytes (doris::StringView::kInlineSize + 20 , 0x22 );
679+ std::string large3 = make_bytes (doris::StringView::kInlineSize + 30 , 0x33 );
680+
681+ std::vector<StringRef> large_refs = {StringRef (large1.data (), large1.size ()),
682+ StringRef (large2.data (), large2.size ()),
683+ StringRef (large3.data (), large3.size ())};
684+ size_t before_size = col->size ();
685+ col->insert_many_strings (large_refs.data (), large_refs.size ());
686+ EXPECT_EQ (col->size (), before_size + 3 );
687+
688+ auto data_large1 = col->get_data_at (before_size);
689+ EXPECT_EQ (data_large1.size , large1.size ());
690+ EXPECT_EQ (memcmp (data_large1.data , large1.data (), large1.size ()), 0 );
691+
692+ auto data_large2 = col->get_data_at (before_size + 1 );
693+ EXPECT_EQ (data_large2.size , large2.size ());
694+ EXPECT_EQ (memcmp (data_large2.data , large2.data (), large2.size ()), 0 );
695+
696+ auto data_large3 = col->get_data_at (before_size + 2 );
697+ EXPECT_EQ (data_large3.size , large3.size ());
698+ EXPECT_EQ (memcmp (data_large3.data , large3.data (), large3.size ()), 0 );
699+ }
700+
701+ // Test 5: Insert strings with null bytes
702+ {
703+ std::string null_str1 = std::string (" abc\0 def" , 7 );
704+ std::string null_str2 = std::string (" \0\0\0 " , 3 );
705+ std::vector<StringRef> null_refs = {StringRef (null_str1.data (), null_str1.size ()),
706+ StringRef (null_str2.data (), null_str2.size ())};
707+ size_t before_size = col->size ();
708+ col->insert_many_strings (null_refs.data (), null_refs.size ());
709+ EXPECT_EQ (col->size (), before_size + 2 );
710+
711+ auto data_null1 = col->get_data_at (before_size);
712+ EXPECT_EQ (data_null1.size , 7U );
713+ EXPECT_EQ (memcmp (data_null1.data , null_str1.data (), 7 ), 0 );
714+
715+ auto data_null2 = col->get_data_at (before_size + 1 );
716+ EXPECT_EQ (data_null2.size , 3U );
717+ EXPECT_EQ (memcmp (data_null2.data , null_str2.data (), 3 ), 0 );
718+ }
719+
720+ // Test 6: Insert mixed inline and non-inline strings
721+ {
722+ std::string small = " xy" ;
723+ std::string medium = make_bytes (doris::StringView::kInlineSize , 0xBB );
724+ std::string large = make_bytes (doris::StringView::kInlineSize + 50 , 0xCC );
725+ std::vector<StringRef> mixed_refs = {StringRef (small.data (), small.size ()),
726+ StringRef (medium.data (), medium.size ()),
727+ StringRef (large.data (), large.size ())};
728+ size_t before_size = col->size ();
729+ col->insert_many_strings (mixed_refs.data (), mixed_refs.size ());
730+ EXPECT_EQ (col->size (), before_size + 3 );
731+
732+ auto data_small = col->get_data_at (before_size);
733+ EXPECT_EQ (data_small.size , 2U );
734+ EXPECT_EQ (memcmp (data_small.data , " xy" , 2 ), 0 );
735+
736+ auto data_medium = col->get_data_at (before_size + 1 );
737+ EXPECT_EQ (data_medium.size , doris::StringView::kInlineSize );
738+
739+ auto data_large = col->get_data_at (before_size + 2 );
740+ EXPECT_EQ (data_large.size , large.size ());
741+ EXPECT_EQ (memcmp (data_large.data , large.data (), large.size ()), 0 );
742+ }
743+
744+ // Test 7: Insert UUID-like binary data (16 bytes)
745+ {
746+ std::string uuid1 = make_bytes (16 , 0x55 );
747+ std::string uuid2 = make_bytes (16 , 0x12 );
748+ std::vector<StringRef> uuid_refs = {StringRef (uuid1.data (), uuid1.size ()),
749+ StringRef (uuid2.data (), uuid2.size ())};
750+ size_t before_size = col->size ();
751+ col->insert_many_strings (uuid_refs.data (), uuid_refs.size ());
752+ EXPECT_EQ (col->size (), before_size + 2 );
753+
754+ auto data_uuid1 = col->get_data_at (before_size);
755+ EXPECT_EQ (data_uuid1.size , 16U );
756+ EXPECT_EQ (memcmp (data_uuid1.data , uuid1.data (), 16 ), 0 );
757+
758+ auto data_uuid2 = col->get_data_at (before_size + 1 );
759+ EXPECT_EQ (data_uuid2.size , 16U );
760+ EXPECT_EQ (memcmp (data_uuid2.data , uuid2.data (), 16 ), 0 );
761+ }
762+ }
763+
764+ TEST_F (ColumnVarbinaryTest, InsertManyStringsOverflow) {
765+ auto col = ColumnVarbinary::create ();
766+
767+ // Test 1: Insert with max_length larger than actual strings (no overflow)
768+ {
769+ std::string s1 = " hello" ;
770+ std::string s2 = " world" ;
771+ std::vector<StringRef> refs = {StringRef (s1.data (), s1.size ()),
772+ StringRef (s2.data (), s2.size ())};
773+ col->insert_many_strings_overflow (refs.data (), refs.size (), 100 );
774+ EXPECT_EQ (col->size (), 2U );
775+
776+ auto data1 = col->get_data_at (0 );
777+ EXPECT_EQ (data1.size , 5U );
778+ EXPECT_EQ (memcmp (data1.data , " hello" , 5 ), 0 );
779+
780+ auto data2 = col->get_data_at (1 );
781+ EXPECT_EQ (data2.size , 5U );
782+ EXPECT_EQ (memcmp (data2.data , " world" , 5 ), 0 );
783+ }
784+
785+ // Test 2: Insert with max_length equal to string length (exact fit)
786+ {
787+ std::string s3 = " test123" ;
788+ StringRef ref3 (s3.data (), s3.size ());
789+ col->insert_many_strings_overflow (&ref3, 1 , 7 );
790+ EXPECT_EQ (col->size (), 3U );
791+
792+ auto data3 = col->get_data_at (2 );
793+ EXPECT_EQ (data3.size , 7U );
794+ EXPECT_EQ (memcmp (data3.data , " test123" , 7 ), 0 );
795+ }
796+
797+ // Test 3: Insert large strings with max_length
798+ // Note: Current implementation doesn't actually truncate, it just calls insert_many_strings
799+ // This test verifies the current behavior
800+ {
801+ std::string large = make_bytes (doris::StringView::kInlineSize + 100 , 0xAA );
802+ StringRef ref_large (large.data (), large.size ());
803+ size_t before_size = col->size ();
804+ col->insert_many_strings_overflow (&ref_large, 1 , 50 );
805+ EXPECT_EQ (col->size (), before_size + 1 );
806+
807+ auto data_large = col->get_data_at (before_size);
808+ // Current implementation doesn't truncate, so full size is preserved
809+ EXPECT_EQ (data_large.size , large.size ());
810+ EXPECT_EQ (memcmp (data_large.data , large.data (), large.size ()), 0 );
811+ }
812+
813+ // Test 4: Insert multiple strings with overflow parameter
814+ {
815+ std::string s4 = make_bytes (20 , 0x11 );
816+ std::string s5 = make_bytes (30 , 0x22 );
817+ std::string s6 = make_bytes (40 , 0x33 );
818+ std::vector<StringRef> refs = {StringRef (s4.data (), s4.size ()),
819+ StringRef (s5.data (), s5.size ()),
820+ StringRef (s6.data (), s6.size ())};
821+ size_t before_size = col->size ();
822+ col->insert_many_strings_overflow (refs.data (), refs.size (), 100 );
823+ EXPECT_EQ (col->size (), before_size + 3 );
824+
825+ // Verify all strings are inserted correctly
826+ auto data4 = col->get_data_at (before_size);
827+ EXPECT_EQ (data4.size , 20U );
828+ EXPECT_EQ (memcmp (data4.data , s4.data (), 20 ), 0 );
829+
830+ auto data5 = col->get_data_at (before_size + 1 );
831+ EXPECT_EQ (data5.size , 30U );
832+ EXPECT_EQ (memcmp (data5.data , s5.data (), 30 ), 0 );
833+
834+ auto data6 = col->get_data_at (before_size + 2 );
835+ EXPECT_EQ (data6.size , 40U );
836+ EXPECT_EQ (memcmp (data6.data , s6.data (), 40 ), 0 );
837+ }
838+
839+ // Test 5: Insert binary data (like UUID) with overflow
840+ {
841+ std::string uuid = make_bytes (16 , 0x55 );
842+ StringRef uuid_ref (uuid.data (), uuid.size ());
843+ size_t before_size = col->size ();
844+ col->insert_many_strings_overflow (&uuid_ref, 1 , 32 );
845+ EXPECT_EQ (col->size (), before_size + 1 );
846+
847+ auto data_uuid = col->get_data_at (before_size);
848+ EXPECT_EQ (data_uuid.size , 16U );
849+ EXPECT_EQ (memcmp (data_uuid.data , uuid.data (), 16 ), 0 );
850+ }
851+
852+ // Test 6: Insert empty strings with max_length
853+ {
854+ std::string empty1;
855+ std::string empty2;
856+ std::vector<StringRef> empty_refs = {StringRef (empty1.data (), empty1.size ()),
857+ StringRef (empty2.data (), empty2.size ())};
858+ size_t before_size = col->size ();
859+ col->insert_many_strings_overflow (empty_refs.data (), empty_refs.size (), 10 );
860+ EXPECT_EQ (col->size (), before_size + 2 );
861+
862+ auto data_empty1 = col->get_data_at (before_size);
863+ EXPECT_EQ (data_empty1.size , 0U );
864+
865+ auto data_empty2 = col->get_data_at (before_size + 1 );
866+ EXPECT_EQ (data_empty2.size , 0U );
867+ }
868+
869+ // Test 7: Insert strings with null bytes and overflow parameter
870+ {
871+ std::string null_data = std::string (" abc\0\0\0 def" , 9 );
872+ StringRef null_ref (null_data.data (), null_data.size ());
873+ size_t before_size = col->size ();
874+ col->insert_many_strings_overflow (&null_ref, 1 , 20 );
875+ EXPECT_EQ (col->size (), before_size + 1 );
876+
877+ auto data_null = col->get_data_at (before_size);
878+ EXPECT_EQ (data_null.size , 9U );
879+ EXPECT_EQ (memcmp (data_null.data , null_data.data (), 9 ), 0 );
880+ }
881+ }
882+
630883} // namespace doris::vectorized
0 commit comments