From 792e66ef47a6102e877765481b1fbdd93d3bb818 Mon Sep 17 00:00:00 2001 From: Simon Chen <1020359403@qq.com> Date: Thu, 4 Jul 2024 07:14:57 +0000 Subject: [PATCH 1/3] Support pandas StringArray and ArrowStringArray --- clickhouse_driver/numpy/helpers.py | 7 +++++-- tests/numpy/columns/test_string.py | 27 +++++++++++++++++++++++++++ 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/clickhouse_driver/numpy/helpers.py b/clickhouse_driver/numpy/helpers.py index e1f813db..b80f8940 100644 --- a/clickhouse_driver/numpy/helpers.py +++ b/clickhouse_driver/numpy/helpers.py @@ -1,13 +1,16 @@ import numpy as np import pandas as pd +from pandas.core.arrays import ExtensionArray def column_chunks(columns, n): for column in columns: - if not isinstance(column, (np.ndarray, pd.DatetimeIndex)): + if not isinstance( + column, (np.ndarray, pd.DatetimeIndex, ExtensionArray) + ): raise TypeError( 'Unsupported column type: {}. ' - 'ndarray/DatetimeIndex is expected.' + 'ndarray/DatetimeIndex/ExtensionArray is expected.' .format(type(column)) ) diff --git a/tests/numpy/columns/test_string.py b/tests/numpy/columns/test_string.py index df241515..a5558e71 100644 --- a/tests/numpy/columns/test_string.py +++ b/tests/numpy/columns/test_string.py @@ -3,6 +3,11 @@ except ImportError: np = None +try: + import pandas as pd +except ImportError: + pd = None + from tests.numpy.testcase import NumpyBaseTestCase @@ -40,6 +45,28 @@ def test_nullable(self): ) self.assertEqual(inserted[0].dtype, object) + def test_insert_pandas_string(self): + with self.create_table('a String'): + df = pd.DataFrame({'a': ['a', 'b', 'c']}, dtype='string') + self.client.insert_dataframe( + 'INSERT INTO test VALUES', dataframe=df + ) + + query = 'SELECT * FROM test' + inserted = self.emit_cli(query) + self.assertEqual(inserted, 'a\nb\nc\n') + + def test_insert_pandas_pyarrow_string(self): + with self.create_table('a String'): + df = pd.DataFrame({'a': ['a', 'b', 'c']}, dtype='string[pyarrow]') + self.client.insert_dataframe( + 'INSERT INTO test VALUES', dataframe=df + ) + + query = 'SELECT * FROM test' + inserted = self.emit_cli(query) + self.assertEqual(inserted, 'a\nb\nc\n') + class ByteStringTestCase(NumpyBaseTestCase): client_kwargs = {'settings': {'strings_as_bytes': True, 'use_numpy': True}} From f173c63c2741c95584b9544a4927f45365e750b9 Mon Sep 17 00:00:00 2001 From: Simon Chen <1020359403@qq.com> Date: Thu, 4 Jul 2024 17:01:12 +0800 Subject: [PATCH 2/3] Update testsrequire.py --- testsrequire.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testsrequire.py b/testsrequire.py index 06c98cc4..6cfee6a3 100644 --- a/testsrequire.py +++ b/testsrequire.py @@ -17,7 +17,7 @@ tests_require.append('lz4') if USE_NUMPY: - tests_require.extend(['numpy', 'pandas']) + tests_require.extend(['numpy', 'pandas', 'pyarrow>=7.0.0']) try: from pip import main as pipmain From 40e9a6f8aaa08dea55e435e54e99283ca8cd9393 Mon Sep 17 00:00:00 2001 From: Simon Chen <1020359403@qq.com> Date: Fri, 5 Jul 2024 16:04:49 +0800 Subject: [PATCH 3/3] remove pyarrow from test --- tests/numpy/columns/test_string.py | 11 ----------- testsrequire.py | 2 +- 2 files changed, 1 insertion(+), 12 deletions(-) diff --git a/tests/numpy/columns/test_string.py b/tests/numpy/columns/test_string.py index a5558e71..57c2adf0 100644 --- a/tests/numpy/columns/test_string.py +++ b/tests/numpy/columns/test_string.py @@ -56,17 +56,6 @@ def test_insert_pandas_string(self): inserted = self.emit_cli(query) self.assertEqual(inserted, 'a\nb\nc\n') - def test_insert_pandas_pyarrow_string(self): - with self.create_table('a String'): - df = pd.DataFrame({'a': ['a', 'b', 'c']}, dtype='string[pyarrow]') - self.client.insert_dataframe( - 'INSERT INTO test VALUES', dataframe=df - ) - - query = 'SELECT * FROM test' - inserted = self.emit_cli(query) - self.assertEqual(inserted, 'a\nb\nc\n') - class ByteStringTestCase(NumpyBaseTestCase): client_kwargs = {'settings': {'strings_as_bytes': True, 'use_numpy': True}} diff --git a/testsrequire.py b/testsrequire.py index 6cfee6a3..06c98cc4 100644 --- a/testsrequire.py +++ b/testsrequire.py @@ -17,7 +17,7 @@ tests_require.append('lz4') if USE_NUMPY: - tests_require.extend(['numpy', 'pandas', 'pyarrow>=7.0.0']) + tests_require.extend(['numpy', 'pandas']) try: from pip import main as pipmain