Skip to content

Commit f96234d

Browse files
committed
added memoization for TableColumnMetadata -> AnyDbColumnTypeInformation
1 parent 0ee7024 commit f96234d

File tree

5 files changed

+159
-142
lines changed

5 files changed

+159
-142
lines changed

dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/db/DbColumnTypeInformation.kt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,9 @@ public typealias AnyDbColumnTypeInformation = DbColumnTypeInformation<*, *, *>
88

99
/**
1010
* Represents all type information that can be retrieved from an SQL column.
11-
* This can be extended for your specific [DbType2] if you need extra information.
11+
* This can be extended for your specific [DbType] if you need extra information.
12+
*
13+
* This class needs to be stateless, so it can be memoized in [DbType.getOrGenerateTypeInformation].
1214
*
1315
* @property targetSchema the target schema of the column after running the optional
1416
* [valuePreprocessor] and [columnPostprocessor].

dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/db/DbType.kt

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,8 +134,19 @@ public abstract class DbType(public val dbTypeInJdbcUrl: String) {
134134
Types.TIMESTAMP_WITH_TIMEZONE to typeOf<OffsetDateTime>(),
135135
)
136136

137+
private val typeInformationCache = mutableMapOf<TableColumnMetadata, AnyDbColumnTypeInformation>()
138+
139+
/**
140+
* Returns a [DbColumnTypeInformation] produced from [tableColumnMetadata].
141+
*/
142+
public fun getOrGenerateTypeInformation(tableColumnMetadata: TableColumnMetadata): AnyDbColumnTypeInformation =
143+
typeInformationCache.getOrPut(tableColumnMetadata) { generateTypeInformation(tableColumnMetadata) }
144+
137145
/**
138146
* Returns a [DbColumnTypeInformation] produced from [tableColumnMetadata].
147+
*
148+
* This function can be overridden by returning your own [DbColumnTypeInformation] or a subtype of that.
149+
* Do note that this class needs to be stateless, so this function can be memoized.
139150
*/
140151
public open fun generateTypeInformation(tableColumnMetadata: TableColumnMetadata): AnyDbColumnTypeInformation {
141152
val kType = when {

dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/db/DuckDb.kt

Lines changed: 143 additions & 139 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,8 @@ public object DuckDb : DbType("duckdb") {
100100
override fun generateTypeInformation(tableColumnMetadata: TableColumnMetadata): AnyDbColumnTypeInformation =
101101
parseDuckDbType(tableColumnMetadata.sqlTypeName, tableColumnMetadata.isNullable)
102102

103+
private val duckDbTypeCache = mutableMapOf<Pair<String, Boolean>, AnyDbColumnTypeInformation>()
104+
103105
/**
104106
* How a column type from JDBC, [sqlTypeName], is read in Java/Kotlin.
105107
* The returned type must exactly follow [ResultSet.getObject] of your specific database's JDBC driver.
@@ -109,165 +111,167 @@ public object DuckDb : DbType("duckdb") {
109111
*
110112
*/
111113
internal fun parseDuckDbType(sqlTypeName: String, isNullable: Boolean): AnyDbColumnTypeInformation =
112-
when (DuckDBResultSetMetaData.TypeNameToType(sqlTypeName)) {
113-
BOOLEAN -> dbColumnTypeInformation<Boolean>(
114-
ColumnSchema.Value(typeOf<Boolean>().withNullability(isNullable)),
115-
)
116-
117-
TINYINT -> dbColumnTypeInformation<Byte>(
118-
ColumnSchema.Value(typeOf<Byte>().withNullability(isNullable)),
119-
)
120-
121-
SMALLINT -> dbColumnTypeInformation<Short>(
122-
ColumnSchema.Value(typeOf<Short>().withNullability(isNullable)),
123-
)
124-
125-
INTEGER -> dbColumnTypeInformation<Int>(
126-
ColumnSchema.Value(typeOf<Int>().withNullability(isNullable)),
127-
)
128-
129-
BIGINT -> dbColumnTypeInformation<Long>(
130-
ColumnSchema.Value(typeOf<Long>().withNullability(isNullable)),
131-
)
132-
133-
HUGEINT -> dbColumnTypeInformation<BigInteger>(
134-
ColumnSchema.Value(typeOf<BigInteger>().withNullability(isNullable)),
135-
)
136-
137-
UHUGEINT -> dbColumnTypeInformation<BigInteger>(
138-
ColumnSchema.Value(typeOf<BigInteger>().withNullability(isNullable)),
139-
)
140-
141-
UTINYINT -> dbColumnTypeInformation<Short>(
142-
ColumnSchema.Value(typeOf<Short>().withNullability(isNullable)),
143-
)
144-
145-
USMALLINT -> dbColumnTypeInformation<Int>(
146-
ColumnSchema.Value(typeOf<Int>().withNullability(isNullable)),
147-
)
148-
149-
UINTEGER -> dbColumnTypeInformation<Long>(
150-
ColumnSchema.Value(typeOf<Long>().withNullability(isNullable)),
151-
)
152-
153-
UBIGINT -> dbColumnTypeInformation<BigInteger>(
154-
ColumnSchema.Value(typeOf<BigInteger>().withNullability(isNullable)),
155-
)
156-
157-
FLOAT -> dbColumnTypeInformation<Float>(
158-
ColumnSchema.Value(typeOf<Float>().withNullability(isNullable)),
159-
)
160-
161-
DOUBLE -> dbColumnTypeInformation<Double>(
162-
ColumnSchema.Value(typeOf<Double>().withNullability(isNullable)),
163-
)
164-
165-
DECIMAL -> dbColumnTypeInformation<BigDecimal>(
166-
ColumnSchema.Value(typeOf<BigDecimal>().withNullability(isNullable)),
167-
)
168-
169-
// DataFrame can do this conversion
170-
TIME -> dbColumnTypeInformationWithPreprocessing<JavaLocalTime, LocalTime>(
171-
ColumnSchema.Value(typeOf<LocalTime>().withNullability(isNullable)),
172-
) { it, _ -> it?.toKotlinLocalTime() }
173-
174-
// todo?
175-
TIME_WITH_TIME_ZONE -> dbColumnTypeInformation<JavaOffsetTime>(
176-
ColumnSchema.Value(typeOf<JavaOffsetTime>().withNullability(isNullable)),
177-
)
178-
179-
DATE -> dbColumnTypeInformationWithPreprocessing<JavaLocalDate, LocalDate>(
180-
ColumnSchema.Value(typeOf<LocalDate>().withNullability(isNullable)),
181-
) { it, _ ->
182-
it?.toKotlinLocalDate()
183-
}
114+
duckDbTypeCache.getOrPut(Pair(sqlTypeName, isNullable)) {
115+
when (DuckDBResultSetMetaData.TypeNameToType(sqlTypeName)) {
116+
BOOLEAN -> dbColumnTypeInformation<Boolean>(
117+
ColumnSchema.Value(typeOf<Boolean>().withNullability(isNullable)),
118+
)
184119

185-
TIMESTAMP, TIMESTAMP_MS, TIMESTAMP_NS, TIMESTAMP_S ->
186-
dbColumnTypeInformationWithPreprocessing<SqlTimestamp, Instant>(
187-
ColumnSchema.Value(typeOf<Instant>().withNullability(isNullable)),
188-
) { it, _ ->
189-
it?.toInstant()?.toKotlinInstant()
190-
}
120+
TINYINT -> dbColumnTypeInformation<Byte>(
121+
ColumnSchema.Value(typeOf<Byte>().withNullability(isNullable)),
122+
)
123+
124+
SMALLINT -> dbColumnTypeInformation<Short>(
125+
ColumnSchema.Value(typeOf<Short>().withNullability(isNullable)),
126+
)
127+
128+
INTEGER -> dbColumnTypeInformation<Int>(
129+
ColumnSchema.Value(typeOf<Int>().withNullability(isNullable)),
130+
)
131+
132+
BIGINT -> dbColumnTypeInformation<Long>(
133+
ColumnSchema.Value(typeOf<Long>().withNullability(isNullable)),
134+
)
191135

192-
// todo?
193-
TIMESTAMP_WITH_TIME_ZONE -> dbColumnTypeInformation<JavaOffsetDateTime>(
194-
ColumnSchema.Value(typeOf<JavaOffsetDateTime>().withNullability(isNullable)),
195-
)
136+
HUGEINT -> dbColumnTypeInformation<BigInteger>(
137+
ColumnSchema.Value(typeOf<BigInteger>().withNullability(isNullable)),
138+
)
196139

197-
// TODO!
198-
JSON -> dbColumnTypeInformation<JsonNode>(
199-
ColumnSchema.Value(typeOf<JsonNode>().withNullability(isNullable)),
200-
)
140+
UHUGEINT -> dbColumnTypeInformation<BigInteger>(
141+
ColumnSchema.Value(typeOf<BigInteger>().withNullability(isNullable)),
142+
)
201143

202-
BLOB -> dbColumnTypeInformation<Blob>(
203-
ColumnSchema.Value(typeOf<Blob>().withNullability(isNullable)),
204-
)
144+
UTINYINT -> dbColumnTypeInformation<Short>(
145+
ColumnSchema.Value(typeOf<Short>().withNullability(isNullable)),
146+
)
205147

206-
UUID -> dbColumnTypeInformationWithPreprocessing<JavaUUID, Uuid>(
207-
ColumnSchema.Value(typeOf<Uuid>().withNullability(isNullable)),
208-
) { it, _ -> it?.toKotlinUuid() }
148+
USMALLINT -> dbColumnTypeInformation<Int>(
149+
ColumnSchema.Value(typeOf<Int>().withNullability(isNullable)),
150+
)
209151

210-
MAP -> {
211-
val (key, value) = parseMapTypes(sqlTypeName)
152+
UINTEGER -> dbColumnTypeInformation<Long>(
153+
ColumnSchema.Value(typeOf<Long>().withNullability(isNullable)),
154+
)
212155

213-
val parsedKeyType = parseDuckDbType(key, false)
214-
val parsedValueType =
215-
parseDuckDbType(value, true).cast<Any?, Any?, Any?>()
156+
UBIGINT -> dbColumnTypeInformation<BigInteger>(
157+
ColumnSchema.Value(typeOf<BigInteger>().withNullability(isNullable)),
158+
)
216159

217-
val targetMapType = Map::class.createType(
218-
listOf(
219-
KTypeProjection.invariant(parsedKeyType.targetSchema.type),
220-
KTypeProjection.invariant(parsedValueType.targetSchema.type),
221-
),
160+
FLOAT -> dbColumnTypeInformation<Float>(
161+
ColumnSchema.Value(typeOf<Float>().withNullability(isNullable)),
222162
)
223163

224-
dbColumnTypeInformationWithPreprocessing<Map<String, Any?>, Map<String, Any?>>(
225-
ColumnSchema.Value(targetMapType),
226-
) { map, _ ->
227-
// only need to preprocess the values, as the keys are just Strings
228-
map?.mapValues { (_, value) ->
229-
parsedValueType.preprocess(value)
230-
}
164+
DOUBLE -> dbColumnTypeInformation<Double>(
165+
ColumnSchema.Value(typeOf<Double>().withNullability(isNullable)),
166+
)
167+
168+
DECIMAL -> dbColumnTypeInformation<BigDecimal>(
169+
ColumnSchema.Value(typeOf<BigDecimal>().withNullability(isNullable)),
170+
)
171+
172+
// DataFrame can do this conversion
173+
TIME -> dbColumnTypeInformationWithPreprocessing<JavaLocalTime, LocalTime>(
174+
ColumnSchema.Value(typeOf<LocalTime>().withNullability(isNullable)),
175+
) { it, _ -> it?.toKotlinLocalTime() }
176+
177+
// todo?
178+
TIME_WITH_TIME_ZONE -> dbColumnTypeInformation<JavaOffsetTime>(
179+
ColumnSchema.Value(typeOf<JavaOffsetTime>().withNullability(isNullable)),
180+
)
181+
182+
DATE -> dbColumnTypeInformationWithPreprocessing<JavaLocalDate, LocalDate>(
183+
ColumnSchema.Value(typeOf<LocalDate>().withNullability(isNullable)),
184+
) { it, _ ->
185+
it?.toKotlinLocalDate()
231186
}
232-
}
233187

234-
LIST, ARRAY -> {
235-
// TODO requires #1266 and #1273 for specific types
236-
val listType = parseListType(sqlTypeName)
237-
val parsedListType =
238-
parseDuckDbType(listType, true).cast<Any?, Any?, Any?>()
188+
TIMESTAMP, TIMESTAMP_MS, TIMESTAMP_NS, TIMESTAMP_S ->
189+
dbColumnTypeInformationWithPreprocessing<SqlTimestamp, Instant>(
190+
ColumnSchema.Value(typeOf<Instant>().withNullability(isNullable)),
191+
) { it, _ ->
192+
it?.toInstant()?.toKotlinInstant()
193+
}
194+
195+
// todo?
196+
TIMESTAMP_WITH_TIME_ZONE -> dbColumnTypeInformation<JavaOffsetDateTime>(
197+
ColumnSchema.Value(typeOf<JavaOffsetDateTime>().withNullability(isNullable)),
198+
)
199+
200+
// TODO!
201+
JSON -> dbColumnTypeInformation<JsonNode>(
202+
ColumnSchema.Value(typeOf<JsonNode>().withNullability(isNullable)),
203+
)
239204

240-
val targetListType = List::class.createType(
241-
listOf(KTypeProjection.invariant(parsedListType.targetSchema.type)),
205+
BLOB -> dbColumnTypeInformation<Blob>(
206+
ColumnSchema.Value(typeOf<Blob>().withNullability(isNullable)),
242207
)
243208

244-
// todo maybe List<DataRow> should become FrameColumn
245-
dbColumnTypeInformationWithPreprocessing<SqlArray, List<Any?>>(
246-
ColumnSchema.Value(targetListType),
247-
) { array, _ ->
248-
array
249-
?.toList()
250-
?.map(parsedListType::preprocess) // recursively preprocess
209+
UUID -> dbColumnTypeInformationWithPreprocessing<JavaUUID, Uuid>(
210+
ColumnSchema.Value(typeOf<Uuid>().withNullability(isNullable)),
211+
) { it, _ -> it?.toKotlinUuid() }
212+
213+
MAP -> {
214+
val (key, value) = parseMapTypes(sqlTypeName)
215+
216+
val parsedKeyType = parseDuckDbType(key, false)
217+
val parsedValueType =
218+
parseDuckDbType(value, true).cast<Any?, Any?, Any?>()
219+
220+
val targetMapType = Map::class.createType(
221+
listOf(
222+
KTypeProjection.invariant(parsedKeyType.targetSchema.type),
223+
KTypeProjection.invariant(parsedValueType.targetSchema.type),
224+
),
225+
)
226+
227+
dbColumnTypeInformationWithPreprocessing<Map<String, Any?>, Map<String, Any?>>(
228+
ColumnSchema.Value(targetMapType),
229+
) { map, _ ->
230+
// only need to preprocess the values, as the keys are just Strings
231+
map?.mapValues { (_, value) ->
232+
parsedValueType.preprocess(value)
233+
}
234+
}
235+
}
236+
237+
LIST, ARRAY -> {
238+
// TODO requires #1266 and #1273 for specific types
239+
val listType = parseListType(sqlTypeName)
240+
val parsedListType =
241+
parseDuckDbType(listType, true).cast<Any?, Any?, Any?>()
242+
243+
val targetListType = List::class.createType(
244+
listOf(KTypeProjection.invariant(parsedListType.targetSchema.type)),
245+
)
246+
247+
// todo maybe List<DataRow> should become FrameColumn
248+
dbColumnTypeInformationWithPreprocessing<SqlArray, List<Any?>>(
249+
ColumnSchema.Value(targetListType),
250+
) { array, _ ->
251+
array
252+
?.toList()
253+
?.map(parsedListType::preprocess) // recursively preprocess
254+
}
251255
}
252-
}
253256

254-
// TODO requires #1266 for specific types
255-
STRUCT -> dbColumnTypeInformation<Struct>(
256-
ColumnSchema.Value(typeOf<Struct>().withNullability(isNullable)),
257-
)
257+
// TODO requires #1266 for specific types
258+
STRUCT -> dbColumnTypeInformation<Struct>(
259+
ColumnSchema.Value(typeOf<Struct>().withNullability(isNullable)),
260+
)
258261

259-
// Cannot handle this in Kotlin
260-
UNION -> dbColumnTypeInformation<Any>(
261-
ColumnSchema.Value(typeOf<Any>().withNullability(isNullable)),
262-
)
262+
// Cannot handle this in Kotlin
263+
UNION -> dbColumnTypeInformation<Any>(
264+
ColumnSchema.Value(typeOf<Any>().withNullability(isNullable)),
265+
)
263266

264-
VARCHAR -> dbColumnTypeInformation<String>(
265-
ColumnSchema.Value(typeOf<String>().withNullability(isNullable)),
266-
)
267+
VARCHAR -> dbColumnTypeInformation<String>(
268+
ColumnSchema.Value(typeOf<String>().withNullability(isNullable)),
269+
)
267270

268-
UNKNOWN, BIT, INTERVAL, ENUM -> dbColumnTypeInformation<String>(
269-
ColumnSchema.Value(typeOf<String>().withNullability(isNullable)),
270-
)
271+
UNKNOWN, BIT, INTERVAL, ENUM -> dbColumnTypeInformation<String>(
272+
ColumnSchema.Value(typeOf<String>().withNullability(isNullable)),
273+
)
274+
}
271275
}
272276

273277
private fun SqlArray.toList(): List<Any?> =

dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/readDataFrameSchema.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -451,7 +451,7 @@ internal fun buildSchemaByTableColumns(
451451
dbType: DbType,
452452
): DataFrameSchema {
453453
val schemaColumns = tableColumns.associate {
454-
it.name to dbType.generateTypeInformation(it).targetSchema
454+
it.name to dbType.getOrGenerateTypeInformation(it).targetSchema
455455
}
456456
return DataFrameSchemaImpl(columns = schemaColumns)
457457
}

dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/readJdbc.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -912,7 +912,7 @@ private fun buildColumnTypeInformation(
912912
dbType: DbType,
913913
): List<AnyDbColumnTypeInformation> =
914914
tableColumns.indices.map { index ->
915-
dbType.generateTypeInformation(tableColumns[index])
915+
dbType.getOrGenerateTypeInformation(tableColumns[index])
916916
}
917917

918918
/**

0 commit comments

Comments
 (0)