From 20133df91cc12418da93bd896ae5f852ee467fcb Mon Sep 17 00:00:00 2001 From: King-Ozymandias Date: Mon, 2 Sep 2024 18:56:10 +0200 Subject: [PATCH 1/9] Basic support for SQLite(Sqlite3Cursor) => DataFrame --- .../BaselineOfDataFrame.class.st | 18 +++++++++++++----- src/BaselineOfDataFrame/package.st | 2 +- src/DataFrame-IO-Sqlite/DataFrame.extension.st | 9 +++++++++ .../DataFrameSqliteReader.class.st | 15 +++++++++++++++ src/DataFrame-IO-Sqlite/package.st | 1 + 5 files changed, 39 insertions(+), 6 deletions(-) create mode 100644 src/DataFrame-IO-Sqlite/DataFrame.extension.st create mode 100644 src/DataFrame-IO-Sqlite/DataFrameSqliteReader.class.st create mode 100644 src/DataFrame-IO-Sqlite/package.st diff --git a/src/BaselineOfDataFrame/BaselineOfDataFrame.class.st b/src/BaselineOfDataFrame/BaselineOfDataFrame.class.st index 3e671122..cc331e7b 100644 --- a/src/BaselineOfDataFrame/BaselineOfDataFrame.class.st +++ b/src/BaselineOfDataFrame/BaselineOfDataFrame.class.st @@ -1,10 +1,11 @@ Class { - #name : #BaselineOfDataFrame, - #superclass : #BaselineOf, - #category : #BaselineOfDataFrame + #name : 'BaselineOfDataFrame', + #superclass : 'BaselineOf', + #category : 'BaselineOfDataFrame', + #package : 'BaselineOfDataFrame' } -{ #category : #baselines } +{ #category : 'baselines' } BaselineOfDataFrame >> baseline: spec [ spec for: #common do: [ @@ -18,13 +19,20 @@ BaselineOfDataFrame >> baseline: spec [ spec baseline: 'AINormalization' with: [ spec repository: 'github://pharo-ai/normalization/src' ]. + spec + baseline: 'SQLite3' + with: [ spec repository: 'github://pharo-rdbms/Pharo-SQLite3/src' ]. "Packages" spec package: 'DataFrame' with: [ spec requires: #('AINormalization') ]; package: 'DataFrame-Tests' with: [ spec requires: #('DataFrame') ]; package: 'DataFrame-IO' with: [ spec requires: #('DataFrame' 'NeoCSV' 'NeoJSON') ]; + package: 'DataFrame-IO-Sqlite' with: [ spec requires: #('DataFrame' 'SQLite3') ]; package: 'DataFrame-IO-Tests' with: [ spec requires: #('DataFrame-IO') ] ]. + + spec group: 'default' with: #('DataFrame-IO-Tests'). + spec group: 'sqlite' with: #('DataFrame-IO-Sqlite' 'DataFrame-IO-Tests'). spec for: #'pharo7.x' @@ -37,5 +45,5 @@ BaselineOfDataFrame >> baseline: spec [ do: [ spec package: 'DataFrame-Pharo6'; - package: 'DataFrame-Pharo67' ] + package: 'DataFrame-Pharo67' ] ] diff --git a/src/BaselineOfDataFrame/package.st b/src/BaselineOfDataFrame/package.st index 5ebc01ee..985805ac 100644 --- a/src/BaselineOfDataFrame/package.st +++ b/src/BaselineOfDataFrame/package.st @@ -1 +1 @@ -Package { #name : #BaselineOfDataFrame } +Package { #name : 'BaselineOfDataFrame' } diff --git a/src/DataFrame-IO-Sqlite/DataFrame.extension.st b/src/DataFrame-IO-Sqlite/DataFrame.extension.st new file mode 100644 index 00000000..0dd65a4a --- /dev/null +++ b/src/DataFrame-IO-Sqlite/DataFrame.extension.st @@ -0,0 +1,9 @@ +Extension { #name : 'DataFrame' } + +{ #category : '*DataFrame-IO-Sqlite' } +DataFrame class >> readFromSqliteCursor: aSqliteCursor [ + + | reader | + reader := DataFrameSqliteReader new. + ^ self readFrom: aSqliteCursor using: reader +] diff --git a/src/DataFrame-IO-Sqlite/DataFrameSqliteReader.class.st b/src/DataFrame-IO-Sqlite/DataFrameSqliteReader.class.st new file mode 100644 index 00000000..b508cbd6 --- /dev/null +++ b/src/DataFrame-IO-Sqlite/DataFrameSqliteReader.class.st @@ -0,0 +1,15 @@ +Class { + #name : 'DataFrameSqliteReader', + #superclass : 'DataFrameReader', + #category : 'DataFrame-IO-Sqlite', + #package : 'DataFrame-IO-Sqlite' +} + +{ #category : 'reading' } +DataFrameSqliteReader >> readFrom: aSqliteCursor [ + "Read all rows from cursor, stuff them into a new dataframe with columns of same name" + + | cols | + cols := aSqliteCursor columnNames. "need to grab columns before exhausting the cursor" + ^ DataFrame withRows: aSqliteCursor rows columnNames: cols +] diff --git a/src/DataFrame-IO-Sqlite/package.st b/src/DataFrame-IO-Sqlite/package.st new file mode 100644 index 00000000..9673a3c0 --- /dev/null +++ b/src/DataFrame-IO-Sqlite/package.st @@ -0,0 +1 @@ +Package { #name : 'DataFrame-IO-Sqlite' } From f28451b48f051f099d5a1c24b697c6156fb53e11 Mon Sep 17 00:00:00 2001 From: King-Ozymandias <168571791+King-Ozymandias@users.noreply.github.com> Date: Mon, 2 Sep 2024 19:15:31 +0200 Subject: [PATCH 2/9] Update DataFrame.extension.st ugly--; conciseness++; --- src/DataFrame-IO-Sqlite/DataFrame.extension.st | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/DataFrame-IO-Sqlite/DataFrame.extension.st b/src/DataFrame-IO-Sqlite/DataFrame.extension.st index 0dd65a4a..8341898b 100644 --- a/src/DataFrame-IO-Sqlite/DataFrame.extension.st +++ b/src/DataFrame-IO-Sqlite/DataFrame.extension.st @@ -2,8 +2,6 @@ Extension { #name : 'DataFrame' } { #category : '*DataFrame-IO-Sqlite' } DataFrame class >> readFromSqliteCursor: aSqliteCursor [ - - | reader | - reader := DataFrameSqliteReader new. - ^ self readFrom: aSqliteCursor using: reader + "Convenience shortcut for SQLite3Cursor => DataFrame" + ^ self readFrom: aSqliteCursor using: DataFrameSqliteReader new ] From 6889bd8ad1460e77953e3a784c0b63bb7343ac93 Mon Sep 17 00:00:00 2001 From: King-Ozymandias Date: Mon, 2 Sep 2024 19:40:26 +0200 Subject: [PATCH 3/9] Readme update --- README.md | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 8c3ca625..d35fcb8e 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ DataFrame is a tabular data structure for data analysis in [Pharo](https://pharo To install the latest stable version of DataFrame (`pre-v3`), go to the Playground (`Ctrl+OW`) in your Pharo image and execute the following Metacello script (select it and press Do-it button or `Ctrl+D`): ```st -EpMonitor disableDuring: [ +EpMonitor disableDuring: [ Metacello new baseline: 'DataFrame'; repository: 'github://PolyMathOrg/DataFrame:pre-v3/src'; @@ -21,13 +21,23 @@ EpMonitor disableDuring: [ Use this script if you want the latest version of DataFrame: ```st -EpMonitor disableDuring: [ +EpMonitor disableDuring: [ Metacello new baseline: 'DataFrame'; repository: 'github://PolyMathOrg/DataFrame/src'; load ]. ``` +If you'd be interested in (basic, read-only for now) SQLite support, use `load: 'sqlite'` at the end: + +```st +EpMonitor disableDuring: [ + Metacello new + baseline: 'DataFrame'; + repository: 'github://PolyMathOrg/DataFrame/src'; + load: 'sqlite' ]. +``` + _Note:_ `EpMonitor` serves to deactive [Epicea](https://github.com/pharo-open-documentation/pharo-wiki/blob/3cfb4ebc19821d607bec35c34ee928b4e06822ee/General/TweakingBigImages.md#disable-epicea), a Pharo code recovering mechanism, during the installation of DataFrame. ## How to depend on it? @@ -52,7 +62,7 @@ A data frame is like a database inside a variable. It is an object which can be In this section I show a very simple example of creating and manipulating a little data frame. For more advanced examples, please check the [DataFrame Booklet](#dataframe-booklet). -### Creating a data frame +### Creating a data frame ```Smalltalk weather := DataFrame withRows: #( @@ -120,6 +130,12 @@ weather transposed. | **2** | true | true | false | true | true | | **3** | snow | rain | - | rain | snow | +### Load data from SQLite query: +```st +"If you have a connection ready in conn" +df := DataFrame readFromSqliteCursor: (conn execute: 'SELECT * FROM table'). +``` + ## Documentation and Literature 1. [Data Analysis Made Simple with Pharo DataFrame](https://github.com/SquareBracketAssociates/Booklet-DataFrame) - a booklet that serves as the main source of documentation for the DataFrame project. It describes the complete API of DataFrame and DataSeries data structures, and provides examples for each method. From 9f31046de3ddf22a75042395af6c2845dc794b61 Mon Sep 17 00:00:00 2001 From: King-Ozymandias Date: Tue, 3 Sep 2024 13:24:50 +0200 Subject: [PATCH 4/9] SQLite write support --- README.md | 15 +++- .../DataFrame.extension.st | 18 ++++ .../DataFrameSqliteWriter.class.st | 85 +++++++++++++++++++ 3 files changed, 115 insertions(+), 3 deletions(-) create mode 100644 src/DataFrame-IO-Sqlite/DataFrameSqliteWriter.class.st diff --git a/README.md b/README.md index d35fcb8e..54443d0b 100644 --- a/README.md +++ b/README.md @@ -28,7 +28,7 @@ EpMonitor disableDuring: [ load ]. ``` -If you'd be interested in (basic, read-only for now) SQLite support, use `load: 'sqlite'` at the end: +If you'd be interested in SQLite support, use `load: 'sqlite'` at the end: ```st EpMonitor disableDuring: [ @@ -130,11 +130,20 @@ weather transposed. | **2** | true | true | false | true | true | | **3** | snow | rain | - | rain | snow | -### Load data from SQLite query: +### SQLite examples +*Following examples expect valid/connected SQLite connection in a variable `conn`* +#### Load data from SQLite query: ```st -"If you have a connection ready in conn" df := DataFrame readFromSqliteCursor: (conn execute: 'SELECT * FROM table'). ``` +#### Write data to SQLite table (DataFrame column names <=> table column names): +```st +df writeToSqlite: conn tableName: 'table'. +``` +#### Write to differently named colums (provide names for ALL DataFrame columns!) +```st +df writeToSqlite: conn tableName: 'table' columnNames: #('col1' 'col2' 'col3'). +``` ## Documentation and Literature diff --git a/src/DataFrame-IO-Sqlite/DataFrame.extension.st b/src/DataFrame-IO-Sqlite/DataFrame.extension.st index 8341898b..62388b01 100644 --- a/src/DataFrame-IO-Sqlite/DataFrame.extension.st +++ b/src/DataFrame-IO-Sqlite/DataFrame.extension.st @@ -5,3 +5,21 @@ DataFrame class >> readFromSqliteCursor: aSqliteCursor [ "Convenience shortcut for SQLite3Cursor => DataFrame" ^ self readFrom: aSqliteCursor using: DataFrameSqliteReader new ] + +{ #category : '*DataFrame-IO-Sqlite' } +DataFrame >> writeToSqlite: aSqlite3Connection tableName: aString [ + + | writer | + writer := DataFrameSqliteWriter writeToTable: aString. + self writeTo: aSqlite3Connection using: writer +] + +{ #category : '*DataFrame-IO-Sqlite' } +DataFrame >> writeToSqlite: aSqlite3Connection tableName: aString columnNames: aCollection [ + + | writer | + writer := DataFrameSqliteWriter + writeToTable: aString + columnNames: aCollection. + self writeTo: aSqlite3Connection using: writer +] diff --git a/src/DataFrame-IO-Sqlite/DataFrameSqliteWriter.class.st b/src/DataFrame-IO-Sqlite/DataFrameSqliteWriter.class.st new file mode 100644 index 00000000..ff7c82d6 --- /dev/null +++ b/src/DataFrame-IO-Sqlite/DataFrameSqliteWriter.class.st @@ -0,0 +1,85 @@ +Class { + #name : 'DataFrameSqliteWriter', + #superclass : 'DataFrameWriter', + #instVars : [ + 'tableName', + 'columnNames' + ], + #category : 'DataFrame-IO-Sqlite', + #package : 'DataFrame-IO-Sqlite' +} + +{ #category : 'instance creation' } +DataFrameSqliteWriter class >> writeToTable: aString [ + + ^ self new + tableName: aString; + yourself +] + +{ #category : 'instance creation' } +DataFrameSqliteWriter class >> writeToTable: aString columnNames: aCollection [ + + ^ self new + tableName: aString; + columnNames: aCollection; + yourself +] + +{ #category : 'accessing' } +DataFrameSqliteWriter >> columnNames [ + + ^ columnNames +] + +{ #category : 'accessing' } +DataFrameSqliteWriter >> columnNames: anObject [ + + columnNames := anObject +] + +{ #category : 'helpers' } +DataFrameSqliteWriter >> getColumnNamesFor: aDataFrame [ + + columnNames ifNil: [ ^ aDataFrame columnNames ]. + columnNames size ~= aDataFrame columns size ifTrue: [ + self error: + 'Column count mismatch (Writer columns <=> DataFrame columns)' ]. + ^ columnNames +] + +{ #category : 'helpers' } +DataFrameSqliteWriter >> insertQueryForColumns: aSequence [ + "" + ^ String streamContents: [ :strm | + strm + nextPutAll: 'INSERT INTO '; + nextPutAll: tableName; + nextPut: $(; + nextPutAll: (',' join: aSequence); + nextPutAll: ')VALUES('. + aSequence do: [ :ignore | strm nextPut: $? ] separatedBy: [ strm nextPut: $, ]. + strm nextPut: $) ] +] + +{ #category : 'accessing' } +DataFrameSqliteWriter >> tableName [ + + ^ tableName +] + +{ #category : 'accessing' } +DataFrameSqliteWriter >> tableName: anObject [ + + tableName := anObject +] + +{ #category : 'writing' } +DataFrameSqliteWriter >> write: aDataFrame to: aSqliteConnection [ + + | stmt | + stmt := aSqliteConnection prepare: + (self insertQueryForColumns: + (self getColumnNamesFor: aDataFrame)). + aDataFrame do: [ :row | stmt execute: row asArray ] +] From c075a5726d98d3d79467795d11e46978b5b233b0 Mon Sep 17 00:00:00 2001 From: King-Ozymandias Date: Tue, 3 Sep 2024 14:50:07 +0200 Subject: [PATCH 5/9] Remove method that - according to github issues - belongs to entirely different library, and PR had merge issues. --- src/DataFrame/DataFrame.class.st | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/src/DataFrame/DataFrame.class.st b/src/DataFrame/DataFrame.class.st index 15f7176c..649d77d1 100644 --- a/src/DataFrame/DataFrame.class.st +++ b/src/DataFrame/DataFrame.class.st @@ -995,22 +995,6 @@ DataFrame >> crossTabulate: colName1 with: colName2 [ ^ col1 crossTabulateWith: col2 ] -{ #category : 'copying' } -DataFrame >> dataPreProcessingEncodeWith: anEncoder [ - "This method is here to speed up pharo-ai/data-preprocessing algos without coupling both projects." - - | copy cache | - copy := self copy. - cache := IdentityDictionary new. - self columns doWithIndex: [ :dataSerie :columnIndex | - | category | - category := cache at: columnIndex ifAbsentPut: [ ((anEncoder categories at: columnIndex) collectWithIndex: [ :elem :index | elem -> index ]) asDictionary ]. - dataSerie doWithIndex: [ :element :rowIndex | - copy at: rowIndex at: columnIndex put: (category at: element ifAbsent: [ AIMissingCategory signalFor: element ]) ] ]. - - ^ copy -] - { #category : 'data-types' } DataFrame >> dataTypeOfColumn: aColumnName [ "Given a column name of the DataFrame, it returns the data type of that column" From 178bab5e214c922b7c41b30925e2642f6799e772 Mon Sep 17 00:00:00 2001 From: King-Ozymandias Date: Tue, 3 Sep 2024 15:45:00 +0200 Subject: [PATCH 6/9] Support for selecting / renaming columns when writing to SQLite --- .../DataFrame.extension.st | 10 ++++ .../DataFrameAbstractSqliteWriter.class.st | 35 ++++++++++++ ...ataFrameSqliteColumnMappingWriter.class.st | 56 +++++++++++++++++++ .../DataFrameSqliteWriter.class.st | 29 +--------- 4 files changed, 102 insertions(+), 28 deletions(-) create mode 100644 src/DataFrame-IO-Sqlite/DataFrameAbstractSqliteWriter.class.st create mode 100644 src/DataFrame-IO-Sqlite/DataFrameSqliteColumnMappingWriter.class.st diff --git a/src/DataFrame-IO-Sqlite/DataFrame.extension.st b/src/DataFrame-IO-Sqlite/DataFrame.extension.st index 62388b01..dd910be6 100644 --- a/src/DataFrame-IO-Sqlite/DataFrame.extension.st +++ b/src/DataFrame-IO-Sqlite/DataFrame.extension.st @@ -14,6 +14,16 @@ DataFrame >> writeToSqlite: aSqlite3Connection tableName: aString [ self writeTo: aSqlite3Connection using: writer ] +{ #category : '*DataFrame-IO-Sqlite' } +DataFrame >> writeToSqlite: aSqlite3Connection tableName: aString columnMappings: aCollection [ + + | writer | + writer := DataFrameSqliteColumnMappingWriter + writeToTable: aString + columnMappings: aCollection. + self writeTo: aSqlite3Connection using: writer +] + { #category : '*DataFrame-IO-Sqlite' } DataFrame >> writeToSqlite: aSqlite3Connection tableName: aString columnNames: aCollection [ diff --git a/src/DataFrame-IO-Sqlite/DataFrameAbstractSqliteWriter.class.st b/src/DataFrame-IO-Sqlite/DataFrameAbstractSqliteWriter.class.st new file mode 100644 index 00000000..bb8a94c7 --- /dev/null +++ b/src/DataFrame-IO-Sqlite/DataFrameAbstractSqliteWriter.class.st @@ -0,0 +1,35 @@ +Class { + #name : 'DataFrameAbstractSqliteWriter', + #superclass : 'DataFrameWriter', + #instVars : [ + 'tableName' + ], + #category : 'DataFrame-IO-Sqlite', + #package : 'DataFrame-IO-Sqlite' +} + +{ #category : 'helpers' } +DataFrameAbstractSqliteWriter >> insertQueryForColumns: aSequence [ + "" + ^ String streamContents: [ :strm | + strm + nextPutAll: 'INSERT INTO '; + nextPutAll: tableName; + nextPut: $(; + nextPutAll: (',' join: aSequence); + nextPutAll: ')VALUES('. + aSequence do: [ :ignore | strm nextPut: $? ] separatedBy: [ strm nextPut: $, ]. + strm nextPut: $) ] +] + +{ #category : 'accessing' } +DataFrameAbstractSqliteWriter >> tableName [ + + ^ tableName +] + +{ #category : 'accessing' } +DataFrameAbstractSqliteWriter >> tableName: anObject [ + + tableName := anObject +] diff --git a/src/DataFrame-IO-Sqlite/DataFrameSqliteColumnMappingWriter.class.st b/src/DataFrame-IO-Sqlite/DataFrameSqliteColumnMappingWriter.class.st new file mode 100644 index 00000000..8fb9e175 --- /dev/null +++ b/src/DataFrame-IO-Sqlite/DataFrameSqliteColumnMappingWriter.class.st @@ -0,0 +1,56 @@ +Class { + #name : 'DataFrameSqliteColumnMappingWriter', + #superclass : 'DataFrameAbstractSqliteWriter', + #instVars : [ + 'columnMappings' + ], + #category : 'DataFrame-IO-Sqlite', + #package : 'DataFrame-IO-Sqlite' +} + +{ #category : 'writing' } +DataFrameSqliteColumnMappingWriter class >> writeToTable: aString columnMappings: aCollection [ + + ^ self new + tableName: aString; + columnMappings: aCollection; + yourself +] + +{ #category : 'accessing' } +DataFrameSqliteColumnMappingWriter >> columnMappings [ + + ^ columnMappings +] + +{ #category : 'accessing' } +DataFrameSqliteColumnMappingWriter >> columnMappings: anObject [ + + columnMappings := anObject +] + +{ #category : 'writing' } +DataFrameSqliteColumnMappingWriter >> write: aDataFrame to: aSqliteConnection [ + + | fieldIndices args dfCols tblCols stmt nCols | + nCols := columnMappings size. + dfCols := aDataFrame columnNames. + fieldIndices := columnMappings collect: [ :m | + dfCols indexOf: (m isAssociation + ifTrue: [ m key ] + ifFalse: [ m ]) ]. + tblCols := columnMappings collect: [ :m | m value ]. + args := Array new: fieldIndices size. + + stmt := aSqliteConnection prepare: + (self insertQueryForColumns: tblCols). + + aDataFrame do: [ :r | + | row | + row := r asArray. + 1 to: nCols do: [ :i | + | rowVal | + rowVal := row at: (fieldIndices at: i). + args at: i put: rowVal ]. + stmt execute: args ] +] diff --git a/src/DataFrame-IO-Sqlite/DataFrameSqliteWriter.class.st b/src/DataFrame-IO-Sqlite/DataFrameSqliteWriter.class.st index ff7c82d6..4e5c7ccb 100644 --- a/src/DataFrame-IO-Sqlite/DataFrameSqliteWriter.class.st +++ b/src/DataFrame-IO-Sqlite/DataFrameSqliteWriter.class.st @@ -1,8 +1,7 @@ Class { #name : 'DataFrameSqliteWriter', - #superclass : 'DataFrameWriter', + #superclass : 'DataFrameAbstractSqliteWriter', #instVars : [ - 'tableName', 'columnNames' ], #category : 'DataFrame-IO-Sqlite', @@ -48,32 +47,6 @@ DataFrameSqliteWriter >> getColumnNamesFor: aDataFrame [ ^ columnNames ] -{ #category : 'helpers' } -DataFrameSqliteWriter >> insertQueryForColumns: aSequence [ - "" - ^ String streamContents: [ :strm | - strm - nextPutAll: 'INSERT INTO '; - nextPutAll: tableName; - nextPut: $(; - nextPutAll: (',' join: aSequence); - nextPutAll: ')VALUES('. - aSequence do: [ :ignore | strm nextPut: $? ] separatedBy: [ strm nextPut: $, ]. - strm nextPut: $) ] -] - -{ #category : 'accessing' } -DataFrameSqliteWriter >> tableName [ - - ^ tableName -] - -{ #category : 'accessing' } -DataFrameSqliteWriter >> tableName: anObject [ - - tableName := anObject -] - { #category : 'writing' } DataFrameSqliteWriter >> write: aDataFrame to: aSqliteConnection [ From 8d90242aba4088d5f3871ea40302b36c50dfc2e5 Mon Sep 17 00:00:00 2001 From: King-Ozymandias Date: Tue, 3 Sep 2024 15:58:35 +0200 Subject: [PATCH 7/9] Update readme for column mapping --- README.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/README.md b/README.md index 54443d0b..a4dda6f0 100644 --- a/README.md +++ b/README.md @@ -144,6 +144,19 @@ df writeToSqlite: conn tableName: 'table'. ```st df writeToSqlite: conn tableName: 'table' columnNames: #('col1' 'col2' 'col3'). ``` +#### Mapping (selecting / renaming dataframe columns): +Let's assume: +- CREATE TABLE tbl (a,b,c) +- DataFrame with columns (a,x,c,d) +- We want to write: + - a to a + - x to b + - c to c + - ignore d +- NB: no mention of column d, order is irrelevant +```st +df writeToSqlite: conn tableName: 'table' columnMappings: { #c. #x -> #b. #a }. +``` ## Documentation and Literature From d1f88c12d8f55dfc81880db5bdf5bbb06a95731f Mon Sep 17 00:00:00 2001 From: King-Ozymandias Date: Tue, 3 Sep 2024 18:29:54 +0200 Subject: [PATCH 8/9] Nicer & faster SQLite #write:to: --- ...ataFrameSqliteColumnMappingWriter.class.st | 37 ++++++++++--------- 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/src/DataFrame-IO-Sqlite/DataFrameSqliteColumnMappingWriter.class.st b/src/DataFrame-IO-Sqlite/DataFrameSqliteColumnMappingWriter.class.st index 8fb9e175..b7912bc5 100644 --- a/src/DataFrame-IO-Sqlite/DataFrameSqliteColumnMappingWriter.class.st +++ b/src/DataFrame-IO-Sqlite/DataFrameSqliteColumnMappingWriter.class.st @@ -29,28 +29,29 @@ DataFrameSqliteColumnMappingWriter >> columnMappings: anObject [ columnMappings := anObject ] +{ #category : 'helpers' } +DataFrameSqliteColumnMappingWriter >> fieldIndicesFor: aDataFrame [ + "gather indices of columns in dataframe (to avoid lookup by field name later, in loop)" + + ^ columnMappings collect: [ :m | + | sourceName | + sourceName := m isAssociation + ifTrue: [ m key ] + ifFalse: [ m ]. + aDataFrame columnNames indexOf: sourceName ] +] + { #category : 'writing' } DataFrameSqliteColumnMappingWriter >> write: aDataFrame to: aSqliteConnection [ - | fieldIndices args dfCols tblCols stmt nCols | - nCols := columnMappings size. - dfCols := aDataFrame columnNames. - fieldIndices := columnMappings collect: [ :m | - dfCols indexOf: (m isAssociation - ifTrue: [ m key ] - ifFalse: [ m ]) ]. - tblCols := columnMappings collect: [ :m | m value ]. + | fieldIndices args stmt | + fieldIndices := self fieldIndicesFor: aDataFrame. args := Array new: fieldIndices size. + stmt := aSqliteConnection prepare: (self insertQueryForColumns: + (columnMappings collect: [ :m | m value ])). - stmt := aSqliteConnection prepare: - (self insertQueryForColumns: tblCols). - - aDataFrame do: [ :r | - | row | - row := r asArray. - 1 to: nCols do: [ :i | - | rowVal | - rowVal := row at: (fieldIndices at: i). - args at: i put: rowVal ]. + 1 to: aDataFrame dimensions x do: [ :rowIndex | + fieldIndices withIndexDo: [ :srcCol :dstCol | + args at: dstCol put: (aDataFrame contents at: rowIndex at: srcCol) ]. stmt execute: args ] ] From bdbde8f51ff351a3977d92eb9c59b0ff589334c5 Mon Sep 17 00:00:00 2001 From: King-Ozymandias Date: Wed, 4 Sep 2024 20:50:30 +0200 Subject: [PATCH 9/9] Unification of writing. Also, the "mapping" version doesn't copy rows out of the dataframe. --- .../DataFrame.extension.st | 4 +- .../DataFrameAbstractSqliteWriter.class.st | 35 -------- ...ataFrameSqliteColumnMappingWriter.class.st | 57 ------------- .../DataFrameSqliteWriter.class.st | 85 ++++++++++++++----- 4 files changed, 68 insertions(+), 113 deletions(-) delete mode 100644 src/DataFrame-IO-Sqlite/DataFrameAbstractSqliteWriter.class.st delete mode 100644 src/DataFrame-IO-Sqlite/DataFrameSqliteColumnMappingWriter.class.st diff --git a/src/DataFrame-IO-Sqlite/DataFrame.extension.st b/src/DataFrame-IO-Sqlite/DataFrame.extension.st index dd910be6..de8dacef 100644 --- a/src/DataFrame-IO-Sqlite/DataFrame.extension.st +++ b/src/DataFrame-IO-Sqlite/DataFrame.extension.st @@ -18,7 +18,7 @@ DataFrame >> writeToSqlite: aSqlite3Connection tableName: aString [ DataFrame >> writeToSqlite: aSqlite3Connection tableName: aString columnMappings: aCollection [ | writer | - writer := DataFrameSqliteColumnMappingWriter + writer := DataFrameSqliteWriter writeToTable: aString columnMappings: aCollection. self writeTo: aSqlite3Connection using: writer @@ -30,6 +30,6 @@ DataFrame >> writeToSqlite: aSqlite3Connection tableName: aString columnNames: a | writer | writer := DataFrameSqliteWriter writeToTable: aString - columnNames: aCollection. + columnMappings: aCollection. self writeTo: aSqlite3Connection using: writer ] diff --git a/src/DataFrame-IO-Sqlite/DataFrameAbstractSqliteWriter.class.st b/src/DataFrame-IO-Sqlite/DataFrameAbstractSqliteWriter.class.st deleted file mode 100644 index bb8a94c7..00000000 --- a/src/DataFrame-IO-Sqlite/DataFrameAbstractSqliteWriter.class.st +++ /dev/null @@ -1,35 +0,0 @@ -Class { - #name : 'DataFrameAbstractSqliteWriter', - #superclass : 'DataFrameWriter', - #instVars : [ - 'tableName' - ], - #category : 'DataFrame-IO-Sqlite', - #package : 'DataFrame-IO-Sqlite' -} - -{ #category : 'helpers' } -DataFrameAbstractSqliteWriter >> insertQueryForColumns: aSequence [ - "" - ^ String streamContents: [ :strm | - strm - nextPutAll: 'INSERT INTO '; - nextPutAll: tableName; - nextPut: $(; - nextPutAll: (',' join: aSequence); - nextPutAll: ')VALUES('. - aSequence do: [ :ignore | strm nextPut: $? ] separatedBy: [ strm nextPut: $, ]. - strm nextPut: $) ] -] - -{ #category : 'accessing' } -DataFrameAbstractSqliteWriter >> tableName [ - - ^ tableName -] - -{ #category : 'accessing' } -DataFrameAbstractSqliteWriter >> tableName: anObject [ - - tableName := anObject -] diff --git a/src/DataFrame-IO-Sqlite/DataFrameSqliteColumnMappingWriter.class.st b/src/DataFrame-IO-Sqlite/DataFrameSqliteColumnMappingWriter.class.st deleted file mode 100644 index b7912bc5..00000000 --- a/src/DataFrame-IO-Sqlite/DataFrameSqliteColumnMappingWriter.class.st +++ /dev/null @@ -1,57 +0,0 @@ -Class { - #name : 'DataFrameSqliteColumnMappingWriter', - #superclass : 'DataFrameAbstractSqliteWriter', - #instVars : [ - 'columnMappings' - ], - #category : 'DataFrame-IO-Sqlite', - #package : 'DataFrame-IO-Sqlite' -} - -{ #category : 'writing' } -DataFrameSqliteColumnMappingWriter class >> writeToTable: aString columnMappings: aCollection [ - - ^ self new - tableName: aString; - columnMappings: aCollection; - yourself -] - -{ #category : 'accessing' } -DataFrameSqliteColumnMappingWriter >> columnMappings [ - - ^ columnMappings -] - -{ #category : 'accessing' } -DataFrameSqliteColumnMappingWriter >> columnMappings: anObject [ - - columnMappings := anObject -] - -{ #category : 'helpers' } -DataFrameSqliteColumnMappingWriter >> fieldIndicesFor: aDataFrame [ - "gather indices of columns in dataframe (to avoid lookup by field name later, in loop)" - - ^ columnMappings collect: [ :m | - | sourceName | - sourceName := m isAssociation - ifTrue: [ m key ] - ifFalse: [ m ]. - aDataFrame columnNames indexOf: sourceName ] -] - -{ #category : 'writing' } -DataFrameSqliteColumnMappingWriter >> write: aDataFrame to: aSqliteConnection [ - - | fieldIndices args stmt | - fieldIndices := self fieldIndicesFor: aDataFrame. - args := Array new: fieldIndices size. - stmt := aSqliteConnection prepare: (self insertQueryForColumns: - (columnMappings collect: [ :m | m value ])). - - 1 to: aDataFrame dimensions x do: [ :rowIndex | - fieldIndices withIndexDo: [ :srcCol :dstCol | - args at: dstCol put: (aDataFrame contents at: rowIndex at: srcCol) ]. - stmt execute: args ] -] diff --git a/src/DataFrame-IO-Sqlite/DataFrameSqliteWriter.class.st b/src/DataFrame-IO-Sqlite/DataFrameSqliteWriter.class.st index 4e5c7ccb..ca167f92 100644 --- a/src/DataFrame-IO-Sqlite/DataFrameSqliteWriter.class.st +++ b/src/DataFrame-IO-Sqlite/DataFrameSqliteWriter.class.st @@ -1,14 +1,15 @@ Class { #name : 'DataFrameSqliteWriter', - #superclass : 'DataFrameAbstractSqliteWriter', + #superclass : 'DataFrameWriter', #instVars : [ - 'columnNames' + 'tableName', + 'columnMappings' ], #category : 'DataFrame-IO-Sqlite', #package : 'DataFrame-IO-Sqlite' } -{ #category : 'instance creation' } +{ #category : 'writing' } DataFrameSqliteWriter class >> writeToTable: aString [ ^ self new @@ -16,43 +17,89 @@ DataFrameSqliteWriter class >> writeToTable: aString [ yourself ] -{ #category : 'instance creation' } -DataFrameSqliteWriter class >> writeToTable: aString columnNames: aCollection [ +{ #category : 'writing' } +DataFrameSqliteWriter class >> writeToTable: aString columnMappings: aCollection [ ^ self new tableName: aString; - columnNames: aCollection; + columnMappings: aCollection; yourself ] { #category : 'accessing' } -DataFrameSqliteWriter >> columnNames [ +DataFrameSqliteWriter >> columnMappings [ - ^ columnNames + ^ columnMappings ] { #category : 'accessing' } -DataFrameSqliteWriter >> columnNames: anObject [ +DataFrameSqliteWriter >> columnMappings: anObject [ + + columnMappings := anObject +] + +{ #category : 'helpers' } +DataFrameSqliteWriter >> fieldIndicesFor: aDataFrame [ + "gather indices of columns in dataframe (to avoid lookup by field name later, in loop)" + + ^ (self getColumnMappings: aDataFrame) collect: [ :m | + | sourceName | + sourceName := m isAssociation + ifTrue: [ m key ] + ifFalse: [ m ]. + aDataFrame columnNames indexOf: sourceName ] +] + +{ #category : 'helpers' } +DataFrameSqliteWriter >> getColumnMappings: aDataFrame [ + + ^ columnMappings ifNil: [ aDataFrame columnNames ] +] + +{ #category : 'helpers' } +DataFrameSqliteWriter >> getColumnNames: aDataFrame [ - columnNames := anObject + ^ (self getColumnMappings: aDataFrame) collect: [ :m | m value ] ] { #category : 'helpers' } -DataFrameSqliteWriter >> getColumnNamesFor: aDataFrame [ +DataFrameSqliteWriter >> insertQueryForColumns: aSequence [ + "" + ^ String streamContents: [ :strm | + strm + nextPutAll: 'INSERT INTO '; + nextPutAll: tableName; + nextPut: $(; + nextPutAll: (',' join: aSequence); + nextPutAll: ')VALUES('. + aSequence do: [ :ignore | strm nextPut: $? ] separatedBy: [ strm nextPut: $, ]. + strm nextPut: $) ] +] + +{ #category : 'accessing' } +DataFrameSqliteWriter >> tableName [ - columnNames ifNil: [ ^ aDataFrame columnNames ]. - columnNames size ~= aDataFrame columns size ifTrue: [ - self error: - 'Column count mismatch (Writer columns <=> DataFrame columns)' ]. - ^ columnNames + ^ tableName +] + +{ #category : 'accessing' } +DataFrameSqliteWriter >> tableName: anObject [ + + tableName := anObject ] { #category : 'writing' } DataFrameSqliteWriter >> write: aDataFrame to: aSqliteConnection [ - | stmt | + | fieldIndices args stmt | + fieldIndices := self fieldIndicesFor: aDataFrame. + args := Array new: fieldIndices size. stmt := aSqliteConnection prepare: (self insertQueryForColumns: - (self getColumnNamesFor: aDataFrame)). - aDataFrame do: [ :row | stmt execute: row asArray ] + (self getColumnNames: aDataFrame)). + + 1 to: aDataFrame dimensions x do: [ :rowIndex | + fieldIndices withIndexDo: [ :srcCol :dstCol | + args at: dstCol put: (aDataFrame contents at: rowIndex at: srcCol) ]. + stmt execute: args ] ]