1+ import click
2+ from typing import Dict , List , Optional
3+ import weaviate .classes .config as wvc
4+ from weaviate .client import WeaviateClient
5+
6+
7+ class BatchManager :
8+ def __init__ (self , client : WeaviateClient ) -> None :
9+ self .client = client
10+
11+ def create_collection (
12+ self ,
13+ collection : str ,
14+ vectorizer : str = "contextionary" ,
15+ shards : int = 1 ,
16+ replication_factor : int = 1 ,
17+ force_auto_schema : bool = True ,
18+ ) -> None :
19+ """
20+ Create a collection dynamically for batch insertion.
21+
22+ Args:
23+ collection (str): Name of the collection to create.
24+ vectorizer (str): Vectorizer type (e.g., openai, transformers).
25+ shards (int): Number of shards for the collection.
26+ replication_factor (int): Replication factor for the collection.
27+ force_auto_schema (bool): Whether to let Weaviate infer schema from inserted data.
28+ """
29+ if self .client .collections .exists (collection ):
30+ click .echo (f"Collection '{ collection } ' already exists. Skipping creation." )
31+ return
32+
33+ # Map vectorizers to Weaviate configurations
34+ vectorizer_map : Dict [str , wvc .VectorizerConfig ] = {
35+ "contextionary" : wvc .Configure .Vectorizer .text2vec_contextionary (),
36+ "transformers" : wvc .Configure .Vectorizer .text2vec_transformers (),
37+ "openai" : wvc .Configure .Vectorizer .text2vec_openai (),
38+ "ollama" : wvc .Configure .Vectorizer .text2vec_ollama (
39+ model = "snowflake-arctic-embed:33m"
40+ ),
41+ "cohere" : wvc .Configure .Vectorizer .text2vec_cohere (),
42+ "jinaai" : wvc .Configure .Vectorizer .text2vec_jinaai (),
43+ }
44+
45+ # Validate vectorizer
46+ if vectorizer not in vectorizer_map :
47+ raise ValueError (
48+ f"Invalid vectorizer '{ vectorizer } '. Choose from: { list (vectorizer_map .keys ())} "
49+ )
50+
51+ try :
52+ # Create collection with configuration
53+ self .client .collections .create (
54+ name = collection ,
55+ vector_index_config = wvc .Configure .VectorIndex .hnsw (),
56+ replication_config = wvc .Configure .replication (
57+ factor = replication_factor ,
58+ async_enabled = False ,
59+ deletion_strategy = wvc .ReplicationDeletionStrategy .DELETE_ON_CONFLICT ,
60+ ),
61+ sharding_config = wvc .Configure .sharding (desired_count = shards ),
62+ vectorizer_config = vectorizer_map [vectorizer ],
63+ properties = None if force_auto_schema else [],
64+ )
65+ click .echo (f"Collection '{ collection } ' created successfully with vectorizer '{ vectorizer } '." )
66+ except Exception as e :
67+ raise Exception (f"Error creating collection '{ collection } ': { e } " )
68+
69+ def batch_insert (
70+ self ,
71+ collection : str ,
72+ data : List [Dict ],
73+ ) -> None :
74+ """
75+ Insert data into a collection in batch.
76+
77+ Args:
78+ collection (str): Name of the collection.
79+ data (List[Dict]): Data to be inserted.
80+ """
81+ if not self .client .collections .exists (collection ):
82+ raise Exception (f"Collection '{ collection } ' does not exist. Cannot insert data." )
83+
84+ try :
85+ # Perform batch insertion using Weaviate's dynamic batch
86+ with self .client .batch .dynamic () as batch :
87+ for record in data :
88+ # Remove the reserved 'id' key, if present - to avoid Error message: WeaviateInsertInvalidPropertyError("It is forbidden to insert `id` or `vector`
89+ if "id" in record :
90+ record .pop ("id" )
91+
92+ # Add the object to the batch
93+ batch .add_object (
94+ collection = collection ,
95+ properties = record ,
96+ )
97+ click .echo (f"Processed record" ) # add '{record}' <- if you would like to see the record being processed
98+ except Exception as e :
99+ raise Exception (f"Batch insertion failed: { e } " )
100+
101+ # Check for failed objects
102+ failed_objects = self .client .batch .failed_objects
103+ if failed_objects :
104+ click .echo (f"Number of failed objects: { len (failed_objects )} " )
105+ for i , failed_obj in enumerate (failed_objects , 1 ):
106+ click .echo (f"Failed object { i } : { failed_obj } " )
107+ else :
108+ click .echo (f"All objects successfully inserted into '{ collection } '." )
0 commit comments