@@ -277,6 +277,42 @@ def pick_individual_type(jsonschema_type: dict):
277277 if "object" in jsonschema_type ["type" ]:
278278 return JSONB ()
279279 if "array" in jsonschema_type ["type" ]:
280+ # This currently uses a non-conformant definition for the Singer SCHEMA,
281+ # using the `additionalProperties` attribute to convey additional type
282+ # information, agnostic of the target database.
283+ """
284+ Schema override rule in `meltano.yml`:
285+
286+ type: "array"
287+ items:
288+ type: "number"
289+ additionalProperties:
290+ storage:
291+ type: "vector"
292+ dim: 4
293+
294+ Produced schema annotation in `catalog.json`:
295+
296+ {"type": "array",
297+ "items": {"type": "number"},
298+ "additionalProperties": {"storage": {"type": "vector", "dim": 4}}}
299+ """
300+ if (
301+ "additionalProperties" in jsonschema_type
302+ and "storage" in jsonschema_type ["additionalProperties" ]
303+ ):
304+ storage_properties = jsonschema_type ["additionalProperties" ]["storage" ]
305+ if (
306+ "type" in storage_properties
307+ and storage_properties ["type" ] == "vector"
308+ ):
309+ # On PostgreSQL/pgvector, use the corresponding type definition
310+ # from its SQLAlchemy dialect.
311+ from pgvector .sqlalchemy import (
312+ Vector , # type: ignore[import-untyped]
313+ )
314+
315+ return Vector (storage_properties ["dim" ])
280316 return ARRAY (JSONB ())
281317 if jsonschema_type .get ("format" ) == "date-time" :
282318 return TIMESTAMP ()
@@ -310,6 +346,13 @@ def pick_best_sql_type(sql_type_array: list):
310346 NOTYPE ,
311347 ]
312348
349+ try :
350+ from pgvector .sqlalchemy import Vector
351+
352+ precedence_order .append (Vector )
353+ except ImportError :
354+ pass
355+
313356 for sql_type in precedence_order :
314357 for obj in sql_type_array :
315358 if isinstance (obj , sql_type ):
0 commit comments