@@ -4,6 +4,7 @@ import io.github.oshai.kotlinlogging.KotlinLogging
4
4
import org.jetbrains.kotlinx.dataframe.AnyFrame
5
5
import org.jetbrains.kotlinx.dataframe.DataColumn
6
6
import org.jetbrains.kotlinx.dataframe.DataFrame
7
+ import org.jetbrains.kotlinx.dataframe.api.Infer
7
8
import org.jetbrains.kotlinx.dataframe.api.toDataFrame
8
9
import org.jetbrains.kotlinx.dataframe.impl.schema.DataFrameSchemaImpl
9
10
import org.jetbrains.kotlinx.dataframe.io.db.DbType
@@ -105,15 +106,17 @@ public data class DatabaseConfiguration(val url: String, val user: String = "",
105
106
* @param [dbConfig] the configuration for the database, including URL, user, and password.
106
107
* @param [tableName] the name of the table to read data from.
107
108
* @param [limit] the maximum number of rows to retrieve from the table.
109
+ * @param [inferNullability] indicates how the column nullability should be inferred.
108
110
* @return the DataFrame containing the data from the SQL table.
109
111
*/
110
112
public fun DataFrame.Companion.readSqlTable (
111
113
dbConfig : DatabaseConfiguration ,
112
114
tableName : String ,
113
- limit : Int = DEFAULT_LIMIT
115
+ limit : Int = DEFAULT_LIMIT ,
116
+ inferNullability : Boolean = true,
114
117
): AnyFrame {
115
118
DriverManager .getConnection(dbConfig.url, dbConfig.user, dbConfig.password).use { connection ->
116
- return readSqlTable(connection, tableName, limit)
119
+ return readSqlTable(connection, tableName, limit, inferNullability )
117
120
}
118
121
}
119
122
@@ -123,14 +126,16 @@ public fun DataFrame.Companion.readSqlTable(
123
126
* @param [connection] the database connection to read tables from.
124
127
* @param [tableName] the name of the table to read data from.
125
128
* @param [limit] the maximum number of rows to retrieve from the table.
129
+ * @param [inferNullability] indicates how the column nullability should be inferred.
126
130
* @return the DataFrame containing the data from the SQL table.
127
131
*
128
132
* @see DriverManager.getConnection
129
133
*/
130
134
public fun DataFrame.Companion.readSqlTable (
131
135
connection : Connection ,
132
136
tableName : String ,
133
- limit : Int = DEFAULT_LIMIT
137
+ limit : Int = DEFAULT_LIMIT ,
138
+ inferNullability : Boolean = true,
134
139
): AnyFrame {
135
140
var preparedQuery = " SELECT * FROM $tableName "
136
141
if (limit > 0 ) preparedQuery + = " LIMIT $limit "
@@ -145,7 +150,7 @@ public fun DataFrame.Companion.readSqlTable(
145
150
preparedQuery
146
151
).use { rs ->
147
152
val tableColumns = getTableColumnsMetadata(rs)
148
- return fetchAndConvertDataFromResultSet(tableColumns, rs, dbType, limit)
153
+ return fetchAndConvertDataFromResultSet(tableColumns, rs, dbType, limit, inferNullability )
149
154
}
150
155
}
151
156
}
@@ -159,15 +164,17 @@ public fun DataFrame.Companion.readSqlTable(
159
164
* @param [dbConfig] the database configuration to connect to the database, including URL, user, and password.
160
165
* @param [sqlQuery] the SQL query to execute.
161
166
* @param [limit] the maximum number of rows to retrieve from the result of the SQL query execution.
167
+ * @param [inferNullability] indicates how the column nullability should be inferred.
162
168
* @return the DataFrame containing the result of the SQL query.
163
169
*/
164
170
public fun DataFrame.Companion.readSqlQuery (
165
171
dbConfig : DatabaseConfiguration ,
166
172
sqlQuery : String ,
167
- limit : Int = DEFAULT_LIMIT
173
+ limit : Int = DEFAULT_LIMIT ,
174
+ inferNullability : Boolean = true,
168
175
): AnyFrame {
169
176
DriverManager .getConnection(dbConfig.url, dbConfig.user, dbConfig.password).use { connection ->
170
- return readSqlQuery(connection, sqlQuery, limit)
177
+ return readSqlQuery(connection, sqlQuery, limit, inferNullability )
171
178
}
172
179
}
173
180
@@ -180,16 +187,21 @@ public fun DataFrame.Companion.readSqlQuery(
180
187
* @param [connection] the database connection to execute the SQL query.
181
188
* @param [sqlQuery] the SQL query to execute.
182
189
* @param [limit] the maximum number of rows to retrieve from the result of the SQL query execution.
190
+ * @param [inferNullability] indicates how the column nullability should be inferred.
183
191
* @return the DataFrame containing the result of the SQL query.
184
192
*
185
193
* @see DriverManager.getConnection
186
194
*/
187
195
public fun DataFrame.Companion.readSqlQuery (
188
196
connection : Connection ,
189
197
sqlQuery : String ,
190
- limit : Int = DEFAULT_LIMIT
198
+ limit : Int = DEFAULT_LIMIT ,
199
+ inferNullability : Boolean = true,
191
200
): AnyFrame {
192
- require(isValid(sqlQuery)) { " SQL query should start from SELECT and contain one query for reading data without any manipulation. " }
201
+ require(isValid(sqlQuery)) {
202
+ " SQL query should start from SELECT and contain one query for reading data without any manipulation. " +
203
+ " Also it should not contain any separators like `;`."
204
+ }
193
205
194
206
val url = connection.metaData.url
195
207
val dbType = extractDBTypeFromUrl(url)
@@ -202,12 +214,12 @@ public fun DataFrame.Companion.readSqlQuery(
202
214
connection.createStatement().use { st ->
203
215
st.executeQuery(internalSqlQuery).use { rs ->
204
216
val tableColumns = getTableColumnsMetadata(rs)
205
- return fetchAndConvertDataFromResultSet(tableColumns, rs, dbType, DEFAULT_LIMIT )
217
+ return fetchAndConvertDataFromResultSet(tableColumns, rs, dbType, limit, inferNullability )
206
218
}
207
219
}
208
220
}
209
221
210
- /* * SQL- query is accepted only if it starts from SELECT */
222
+ /* * SQL query is accepted only if it starts from SELECT */
211
223
private fun isValid (sqlQuery : String ): Boolean {
212
224
val normalizedSqlQuery = sqlQuery.trim().uppercase()
213
225
@@ -221,15 +233,17 @@ private fun isValid(sqlQuery: String): Boolean {
221
233
* @param [resultSet] the [ResultSet] containing the data to read.
222
234
* @param [dbType] the type of database that the [ResultSet] belongs to.
223
235
* @param [limit] the maximum number of rows to read from the [ResultSet].
236
+ * @param [inferNullability] indicates how the column nullability should be inferred.
224
237
* @return the DataFrame generated from the [ResultSet] data.
225
238
*/
226
239
public fun DataFrame.Companion.readResultSet (
227
240
resultSet : ResultSet ,
228
241
dbType : DbType ,
229
- limit : Int = DEFAULT_LIMIT
242
+ limit : Int = DEFAULT_LIMIT ,
243
+ inferNullability : Boolean = true,
230
244
): AnyFrame {
231
245
val tableColumns = getTableColumnsMetadata(resultSet)
232
- return fetchAndConvertDataFromResultSet(tableColumns, resultSet, dbType, limit)
246
+ return fetchAndConvertDataFromResultSet(tableColumns, resultSet, dbType, limit, inferNullability )
233
247
}
234
248
235
249
/* *
@@ -238,33 +252,38 @@ public fun DataFrame.Companion.readResultSet(
238
252
* @param [resultSet] the [ResultSet] containing the data to read.
239
253
* @param [connection] the connection to the database (it's required to extract the database type).
240
254
* @param [limit] the maximum number of rows to read from the [ResultSet].
255
+ * @param [inferNullability] indicates how the column nullability should be inferred.
241
256
* @return the DataFrame generated from the [ResultSet] data.
242
257
*/
243
258
public fun DataFrame.Companion.readResultSet (
244
259
resultSet : ResultSet ,
245
260
connection : Connection ,
246
- limit : Int = DEFAULT_LIMIT
261
+ limit : Int = DEFAULT_LIMIT ,
262
+ inferNullability : Boolean = true,
247
263
): AnyFrame {
248
264
val url = connection.metaData.url
249
265
val dbType = extractDBTypeFromUrl(url)
250
266
251
- return readResultSet(resultSet, dbType, limit)
267
+ return readResultSet(resultSet, dbType, limit, inferNullability )
252
268
}
253
269
254
270
/* *
255
271
* Reads all tables from the given database using the provided database configuration and limit.
256
272
*
257
273
* @param [dbConfig] the database configuration to connect to the database, including URL, user, and password.
258
274
* @param [limit] the maximum number of rows to read from each table.
275
+ * @param [catalogue] a name of the catalog from which tables will be retrieved. A null value retrieves tables from all catalogs.
276
+ * @param [inferNullability] indicates how the column nullability should be inferred.
259
277
* @return a list of [AnyFrame] objects representing the non-system tables from the database.
260
278
*/
261
279
public fun DataFrame.Companion.readAllSqlTables (
262
280
dbConfig : DatabaseConfiguration ,
263
281
catalogue : String? = null,
264
- limit : Int = DEFAULT_LIMIT
282
+ limit : Int = DEFAULT_LIMIT ,
283
+ inferNullability : Boolean = true,
265
284
): List <AnyFrame > {
266
285
DriverManager .getConnection(dbConfig.url, dbConfig.user, dbConfig.password).use { connection ->
267
- return readAllSqlTables(connection, catalogue, limit)
286
+ return readAllSqlTables(connection, catalogue, limit, inferNullability )
268
287
}
269
288
}
270
289
@@ -273,14 +292,17 @@ public fun DataFrame.Companion.readAllSqlTables(
273
292
*
274
293
* @param [connection] the database connection to read tables from.
275
294
* @param [limit] the maximum number of rows to read from each table.
295
+ * @param [catalogue] a name of the catalog from which tables will be retrieved. A null value retrieves tables from all catalogs.
296
+ * @param [inferNullability] indicates how the column nullability should be inferred.
276
297
* @return a list of [AnyFrame] objects representing the non-system tables from the database.
277
298
*
278
299
* @see DriverManager.getConnection
279
300
*/
280
301
public fun DataFrame.Companion.readAllSqlTables (
281
302
connection : Connection ,
282
303
catalogue : String? = null,
283
- limit : Int = DEFAULT_LIMIT
304
+ limit : Int = DEFAULT_LIMIT ,
305
+ inferNullability : Boolean = true,
284
306
): List <AnyFrame > {
285
307
val metaData = connection.metaData
286
308
val url = connection.metaData.url
@@ -304,7 +326,7 @@ public fun DataFrame.Companion.readAllSqlTables(
304
326
// could be Dialect/Database specific
305
327
logger.debug { " Reading table: $tableName " }
306
328
307
- val dataFrame = readSqlTable(connection, tableName, limit)
329
+ val dataFrame = readSqlTable(connection, tableName, limit, inferNullability )
308
330
dataFrames + = dataFrame
309
331
logger.debug { " Finished reading table: $tableName " }
310
332
}
@@ -450,7 +472,7 @@ public fun DataFrame.Companion.getSchemaForAllSqlTables(connection: Connection):
450
472
val dbType = extractDBTypeFromUrl(url)
451
473
452
474
val tableTypes = arrayOf(" TABLE" )
453
- // exclude system and other tables without data
475
+ // exclude a system and other tables without data
454
476
val tables = metaData.getTables(null , null , null , tableTypes)
455
477
456
478
val dataFrameSchemas = mutableListOf<DataFrameSchema >()
@@ -561,13 +583,15 @@ private fun manageColumnNameDuplication(columnNameCounter: MutableMap<String, In
561
583
* @param [rs] the ResultSet object containing the data to be fetched and converted.
562
584
* @param [dbType] the type of the database.
563
585
* @param [limit] the maximum number of rows to fetch and convert.
586
+ * @param [inferNullability] indicates how the column nullability should be inferred.
564
587
* @return A mutable map containing the fetched and converted data.
565
588
*/
566
589
private fun fetchAndConvertDataFromResultSet (
567
590
tableColumns : MutableList <TableColumnMetadata >,
568
591
rs : ResultSet ,
569
592
dbType : DbType ,
570
- limit : Int
593
+ limit : Int ,
594
+ inferNullability : Boolean ,
571
595
): AnyFrame {
572
596
val data = List (tableColumns.size) { mutableListOf<Any ?>() }
573
597
@@ -596,6 +620,7 @@ private fun fetchAndConvertDataFromResultSet(
596
620
DataColumn .createValueColumn(
597
621
name = tableColumns[index].name,
598
622
values = values,
623
+ infer = convertNullabilityInference(inferNullability),
599
624
type = kotlinTypesForSqlColumns[index]!!
600
625
)
601
626
}.toDataFrame()
@@ -605,6 +630,8 @@ private fun fetchAndConvertDataFromResultSet(
605
630
return dataFrame
606
631
}
607
632
633
+ private fun convertNullabilityInference (inferNullability : Boolean ) = if (inferNullability) Infer .Nulls else Infer .None
634
+
608
635
private fun extractNewRowFromResultSetAndAddToData (
609
636
tableColumns : MutableList <TableColumnMetadata >,
610
637
data : List <MutableList <Any ?>>,
0 commit comments