PolyMathOrg · King-Ozymandias · Sep 2, 2024 · Sep 2, 2024 · Sep 2, 2024 · Sep 3, 2024
diff --git a/README.md b/README.md
@@ -11,7 +11,7 @@ DataFrame is a tabular data structure for data analysis in [Pharo](https://pharo
 To install the latest stable version of DataFrame (`pre-v3`), go to the Playground (`Ctrl+OW`) in your Pharo image and execute the following Metacello script (select it and press Do-it button or `Ctrl+D`):
 
 ```st
-EpMonitor disableDuring: [ 
+EpMonitor disableDuring: [
     Metacello new
       baseline: 'DataFrame';
       repository: 'github://PolyMathOrg/DataFrame:pre-v3/src';
@@ -21,13 +21,23 @@ EpMonitor disableDuring: [
 Use this script if you want the latest version of DataFrame:
 
 ```st
-EpMonitor disableDuring: [ 
+EpMonitor disableDuring: [
     Metacello new
       baseline: 'DataFrame';
       repository: 'github://PolyMathOrg/DataFrame/src';
       load ].
 ```
 
+If you'd be interested in SQLite support, use `load: 'sqlite'` at the end:
+
+```st
+EpMonitor disableDuring: [
+    Metacello new
+      baseline: 'DataFrame';
+      repository: 'github://PolyMathOrg/DataFrame/src';
+      load: 'sqlite' ].
+```
+
 _Note:_ `EpMonitor` serves to deactive [Epicea](https://github.com/pharo-open-documentation/pharo-wiki/blob/3cfb4ebc19821d607bec35c34ee928b4e06822ee/General/TweakingBigImages.md#disable-epicea), a Pharo code recovering mechanism, during the installation of DataFrame.
 
 ## How to depend on it?
@@ -52,7 +62,7 @@ A data frame is like a database inside a variable. It is an object which can be
 
 In this section I show a very simple example of creating and manipulating a little data frame. For more advanced examples, please check the [DataFrame Booklet](#dataframe-booklet).
 
-### Creating a data frame 
+### Creating a data frame
 
 ```Smalltalk
 weather := DataFrame withRows: #(
@@ -120,6 +130,34 @@ weather transposed.
 | **2** | true | true | false | true | true |
 | **3** | snow | rain | -     | rain | snow |
 
+### SQLite examples
+*Following examples expect valid/connected SQLite connection in a variable `conn`*
+#### Load data from SQLite query:
+```st
+df := DataFrame readFromSqliteCursor: (conn execute: 'SELECT * FROM table').
+```
+#### Write data to SQLite table (DataFrame column names <=> table column names):
+```st
+df writeToSqlite: conn tableName: 'table'.
+```
+#### Write to differently named colums (provide names for ALL DataFrame columns!)
+```st
+df writeToSqlite: conn tableName: 'table' columnNames: #('col1' 'col2' 'col3').
+```
+#### Mapping (selecting / renaming dataframe columns):
+Let's assume:
+- CREATE TABLE tbl (a,b,c)
+- DataFrame with columns (a,x,c,d)
+- We want to write:
+  - a to a
+  - x to b
+  - c to c
+  - ignore d
+- NB: no mention of column d, order is irrelevant
+```st
+df writeToSqlite: conn tableName: 'table' columnMappings: { #c. #x -> #b. #a }.
+```
+
 ## Documentation and Literature
 
 1. [Data Analysis Made Simple with Pharo DataFrame](https://github.com/SquareBracketAssociates/Booklet-DataFrame) - a booklet that serves as the main source of documentation for the DataFrame project. It describes the complete API of DataFrame and DataSeries data structures, and provides examples for each method.

diff --git a/src/BaselineOfDataFrame/BaselineOfDataFrame.class.st b/src/BaselineOfDataFrame/BaselineOfDataFrame.class.st
@@ -1,10 +1,11 @@
 Class {
-	#name : #BaselineOfDataFrame,
-	#superclass : #BaselineOf,
-	#category : #BaselineOfDataFrame
+	#name : 'BaselineOfDataFrame',
+	#superclass : 'BaselineOf',
+	#category : 'BaselineOfDataFrame',
+	#package : 'BaselineOfDataFrame'
 }
 
-{ #category : #baselines }
+{ #category : 'baselines' }
 BaselineOfDataFrame >> baseline: spec [
 	<baseline>
 	spec for: #common do: [
@@ -18,13 +19,20 @@ BaselineOfDataFrame >> baseline: spec [
 		spec
 			baseline: 'AINormalization'
 			with: [ spec repository: 'github://pharo-ai/normalization/src' ].
+		spec
+			baseline: 'SQLite3'
+			with: [ spec repository: 'github://pharo-rdbms/Pharo-SQLite3/src' ].
 
 		"Packages"
 		spec
 			package: 'DataFrame' with: [ spec requires: #('AINormalization') ];
 			package: 'DataFrame-Tests' with: [ spec requires: #('DataFrame') ];
 			package: 'DataFrame-IO' with: [ spec requires: #('DataFrame' 'NeoCSV' 'NeoJSON') ];
+			package: 'DataFrame-IO-Sqlite' with: [ spec requires: #('DataFrame' 'SQLite3') ];
 			package: 'DataFrame-IO-Tests' with: [ spec requires: #('DataFrame-IO') ] ].
+
+		spec group: 'default' with: #('DataFrame-IO-Tests').
+		spec group: 'sqlite' with: #('DataFrame-IO-Sqlite' 'DataFrame-IO-Tests').
 
 		spec
 			for: #'pharo7.x'
@@ -37,5 +45,5 @@ BaselineOfDataFrame >> baseline: spec [
 			do: [
 				spec
 					package: 'DataFrame-Pharo6';
-					package: 'DataFrame-Pharo67' ]
+					package: 'DataFrame-Pharo67' ]	
 ]
diff --git a/src/BaselineOfDataFrame/package.st b/src/BaselineOfDataFrame/package.st
@@ -1 +1 @@
-Package { #name : #BaselineOfDataFrame }
+Package { #name : 'BaselineOfDataFrame' }
diff --git a/src/DataFrame-IO-Sqlite/DataFrame.extension.st b/src/DataFrame-IO-Sqlite/DataFrame.extension.st
@@ -0,0 +1,35 @@
+Extension { #name : 'DataFrame' }
+
+{ #category : '*DataFrame-IO-Sqlite' }
+DataFrame class >> readFromSqliteCursor: aSqliteCursor [
+	"Convenience shortcut for SQLite3Cursor => DataFrame"
+	^ self readFrom: aSqliteCursor using: DataFrameSqliteReader new
+]
+
+{ #category : '*DataFrame-IO-Sqlite' }
+DataFrame >> writeToSqlite: aSqlite3Connection tableName: aString [
+
+	| writer |
+	writer := DataFrameSqliteWriter writeToTable: aString.
+	self writeTo: aSqlite3Connection using: writer
+]
+
+{ #category : '*DataFrame-IO-Sqlite' }
+DataFrame >> writeToSqlite: aSqlite3Connection tableName: aString columnMappings: aCollection [
+
+	| writer |
+	writer := DataFrameSqliteWriter 
+		          writeToTable: aString
+		          columnMappings: aCollection.
+	self writeTo: aSqlite3Connection using: writer
+]
+
+{ #category : '*DataFrame-IO-Sqlite' }
+DataFrame >> writeToSqlite: aSqlite3Connection tableName: aString columnNames: aCollection [
+
+	| writer |
+	writer := DataFrameSqliteWriter
+		          writeToTable: aString
+		          columnMappings: aCollection.
+	self writeTo: aSqlite3Connection using: writer
+]
diff --git a/src/DataFrame-IO-Sqlite/DataFrameSqliteReader.class.st b/src/DataFrame-IO-Sqlite/DataFrameSqliteReader.class.st
@@ -0,0 +1,15 @@
+Class {
+	#name : 'DataFrameSqliteReader',
+	#superclass : 'DataFrameReader',
+	#category : 'DataFrame-IO-Sqlite',
+	#package : 'DataFrame-IO-Sqlite'
+}
+
+{ #category : 'reading' }
+DataFrameSqliteReader >> readFrom: aSqliteCursor [
+	"Read all rows from cursor, stuff them into a new dataframe with columns of same name"
+
+	| cols |
+	cols := aSqliteCursor columnNames. "need to grab columns before exhausting the cursor"
+	^ DataFrame withRows: aSqliteCursor rows columnNames: cols
+]
diff --git a/src/DataFrame-IO-Sqlite/DataFrameSqliteWriter.class.st b/src/DataFrame-IO-Sqlite/DataFrameSqliteWriter.class.st
@@ -0,0 +1,105 @@
+Class {
+	#name : 'DataFrameSqliteWriter',
+	#superclass : 'DataFrameWriter',
+	#instVars : [
+		'tableName',
+		'columnMappings'
+	],
+	#category : 'DataFrame-IO-Sqlite',
+	#package : 'DataFrame-IO-Sqlite'
+}
+
+{ #category : 'writing' }
+DataFrameSqliteWriter class >> writeToTable: aString [
+
+	^ self new
+		  tableName: aString;
+		  yourself
+]
+
+{ #category : 'writing' }
+DataFrameSqliteWriter class >> writeToTable: aString columnMappings: aCollection [
+
+	^ self new
+		  tableName: aString;
+		  columnMappings: aCollection;
+		  yourself
+]
+
+{ #category : 'accessing' }
+DataFrameSqliteWriter >> columnMappings [
+
+	^ columnMappings
+]
+
+{ #category : 'accessing' }
+DataFrameSqliteWriter >> columnMappings: anObject [
+
+	columnMappings := anObject
+]
+
+{ #category : 'helpers' }
+DataFrameSqliteWriter >> fieldIndicesFor: aDataFrame [
+	"gather indices of columns in dataframe (to avoid lookup by field name later, in loop)"
+
+	^ (self getColumnMappings: aDataFrame) collect: [ :m |
+		  | sourceName |
+		  sourceName := m isAssociation
+			                ifTrue: [ m key ]
+			                ifFalse: [ m ].
+		  aDataFrame columnNames indexOf: sourceName ]
+]
+
+{ #category : 'helpers' }
+DataFrameSqliteWriter >> getColumnMappings: aDataFrame [
+
+	^ columnMappings ifNil: [ aDataFrame columnNames ]
+]
+
+{ #category : 'helpers' }
+DataFrameSqliteWriter >> getColumnNames: aDataFrame [
+
+	^ (self getColumnMappings: aDataFrame) collect: [ :m | m value ]
+]
+
+{ #category : 'helpers' }
+DataFrameSqliteWriter >> insertQueryForColumns: aSequence [
+	""
+	^ String streamContents: [ :strm |
+		strm
+			nextPutAll: 'INSERT INTO ';
+			nextPutAll: tableName;
+			nextPut: $(;
+			nextPutAll: (',' join: aSequence);
+			nextPutAll: ')VALUES('.
+		aSequence do: [ :ignore | strm nextPut: $? ] separatedBy: [ strm nextPut: $, ].
+		strm nextPut: $) ]
+]
+
+{ #category : 'accessing' }
+DataFrameSqliteWriter >> tableName [
+
+	^ tableName
+]
+
+{ #category : 'accessing' }
+DataFrameSqliteWriter >> tableName: anObject [
+
+	tableName := anObject
+]
+
+{ #category : 'writing' }
+DataFrameSqliteWriter >> write: aDataFrame to: aSqliteConnection [
+
+	| fieldIndices args stmt |
+	fieldIndices := self fieldIndicesFor: aDataFrame.
+	args := Array new: fieldIndices size.
+	stmt := aSqliteConnection prepare:
+		        (self insertQueryForColumns:
+			         (self getColumnNames: aDataFrame)).
+
+	1 to: aDataFrame dimensions x do: [ :rowIndex |
+		fieldIndices withIndexDo: [ :srcCol :dstCol |
+			args at: dstCol put: (aDataFrame contents at: rowIndex at: srcCol) ].
+		stmt execute: args ]
+]
diff --git a/src/DataFrame-IO-Sqlite/package.st b/src/DataFrame-IO-Sqlite/package.st
@@ -0,0 +1 @@
+Package { #name : 'DataFrame-IO-Sqlite' }
diff --git a/src/DataFrame/DataFrame.class.st b/src/DataFrame/DataFrame.class.st
@@ -995,22 +995,6 @@ DataFrame >> crossTabulate: colName1 with: colName2 [
 	^ col1 crossTabulateWith: col2
 ]
 
-{ #category : 'copying' }
-DataFrame >> dataPreProcessingEncodeWith: anEncoder [
-	"This method is here to speed up pharo-ai/data-preprocessing algos without coupling both projects."
-
-	| copy cache |
-	copy := self copy.
-	cache := IdentityDictionary new.
-	self columns doWithIndex: [ :dataSerie :columnIndex |
-		| category |
-		category := cache at: columnIndex ifAbsentPut: [ ((anEncoder categories at: columnIndex) collectWithIndex: [ :elem :index | elem -> index ]) asDictionary ].
-		dataSerie doWithIndex: [ :element :rowIndex |
-			copy at: rowIndex at: columnIndex put: (category at: element ifAbsent: [ AIMissingCategory signalFor: element ]) ] ].
-
-	^ copy
-]
-
 { #category : 'data-types' }
 DataFrame >> dataTypeOfColumn: aColumnName [
 	"Given a column name of the DataFrame, it returns the data type of that column"