Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix ibis duckdb no rowid #5527

Draft
wants to merge 12 commits into
base: main
Choose a base branch
from
12 changes: 10 additions & 2 deletions holoviews/core/data/ibis.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,13 @@ def nonzero(cls, dataset):
@cached
def range(cls, dataset, dimension):
dimension = dataset.get_dimension(dimension, strict=True)
if cls.dtype(dataset, dimension).kind in 'SUO':
if cls.dtype(dataset, dimension).kind == 'O':
# Can this be done more efficiently?
column = dataset.data[dimension.name].execute()
first = column.iloc[0]
last = column.iloc[-1]
return first, last
if cls.dtype(dataset, dimension).kind in 'SU':
return None, None
if dimension.nodata is not None:
return Interface.range(dataset, dimension)
Expand Down Expand Up @@ -172,7 +178,9 @@ def dtype(cls, dataset, dimension):
dimension = dataset.get_dimension(dimension)
return dataset.data.head(0).execute().dtypes[dimension.name]

dimension_type = dtype
@classmethod
def dimension_type(cls, dataset, dim):
return cls.dtype(dataset, dim).type

@classmethod
def sort(cls, dataset, by=[], reverse=False):
Expand Down
1 change: 0 additions & 1 deletion holoviews/core/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -890,7 +890,6 @@ def isfinite(val):
return finite & (~pd.isna(val))
return finite


def isdatetime(value):
"""
Whether the array or scalar is recognized datetime type.
Expand Down
104 changes: 104 additions & 0 deletions holoviews/tests/core/data/test_ibisinterface.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,12 @@
import numpy as np
import pandas as pd

from bokeh.models import axes as bokeh_axes
from holoviews import render
from holoviews.core.data import Dataset
from holoviews.core.spaces import HoloMap
from holoviews.core.data.ibis import IbisInterface
from holoviews.element.chart import Curve

from .base import HeterogeneousColumnTests, ScalarColumnTests, InterfaceTests

Expand Down Expand Up @@ -100,6 +103,20 @@ def init_column_data(self):
hm_db.table("hm"), kdims=[("x", "X")], vdims=[("y", "Y")]
)

reference_df = pd.DataFrame(
{
"actual": [100, 150, 125, 140, 145, 135, 123],
"forecast": [90, 160, 125, 150, 141, 141, 120],
"numerical": [1.1, 1.9, 3.2, 3.8, 4.3, 5.0, 5.5],
"date": pd.date_range("2022-01-03", "2022-01-09"),
"string": ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"],
},
)
reference_db = create_temp_db(reference_df, "reference_df")
self.reference_table = Dataset(
reference_db.table("reference_df"), kdims=["numerical", "date", "string"], vdims=["actual", "forecast"]
)

def test_dataset_array_init_hm(self):
raise SkipTest("Not supported")

Expand Down Expand Up @@ -303,3 +320,90 @@ def test_dataset_iloc_ellipsis_list_cols(self):

def test_dataset_boolean_index(self):
raise SkipTest("Not supported")

def test_range(self):
assert IbisInterface.range(self.reference_table, "date") == (np.datetime64('2022-01-03'), np.datetime64('2022-01-09'))
assert IbisInterface.range(self.reference_table, "string") == ('Mon', 'Sun')
assert IbisInterface.range(self.reference_table, "numerical") == (np.float64(1.1), np.float64(5.5))

def test_dimension_type(self):
assert IbisInterface.dimension_type(self.reference_table, "date") is np.datetime64
assert IbisInterface.dimension_type(self.reference_table, "string") is np.object_
assert IbisInterface.dimension_type(self.reference_table, "numerical") is np.float64

def test_datetime_xaxis(self):
"""Test to make sure a DateTimeAxis can be identified for the bokeh backend"""
plot_ibis = Curve(self.reference_table, kdims="date", vdims="actual")
# When
plot_bokeh = render(plot_ibis, "bokeh")
xaxis, yaxis = plot_bokeh.axis
# Then
assert isinstance(xaxis, bokeh_axes.DatetimeAxis)
assert isinstance(yaxis, bokeh_axes.LinearAxis)

def test_categorical_xaxis(self):
"""Test to make sure a Categorical axis can be identified for the bokeh backend"""
plot_ibis = Curve(self.reference_table, kdims="string", vdims="actual")
# When
plot_bokeh = render(plot_ibis, "bokeh")
xaxis, yaxis = plot_bokeh.axis
# Then
assert isinstance(xaxis, bokeh_axes.CategoricalAxis)
assert isinstance(yaxis, bokeh_axes.LinearAxis)

def test_numerical_xaxis(self):
"""Test to make sure a LinearAxis axis can be identified for the bokeh backend"""
plot_ibis = Curve(self.reference_table, kdims="numerical", vdims="actual")
# When
plot_bokeh = render(plot_ibis, "bokeh")
xaxis, yaxis = plot_bokeh.axis
# Then
assert isinstance(xaxis, bokeh_axes.LinearAxis)
assert isinstance(yaxis, bokeh_axes.LinearAxis)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

First of all, really appreciate your work investigating these issues. That said these tests can't live here. The plotting and data parts of HoloViews are separated for a reason and if test coverage of the data parts is sufficiently extensive then there's no reason to ever test the plotting code with different data backends.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if test coverage of the data parts is sufficiently extensive

To be clear, that is of course clearly not the case since you're discovering these issues.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Besides the non-comprehensive test, the missing docstrings and type annotations from the base Interface also makes it harder than it has to be, to know what a method on the interface actually should do and return.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How would someone be able to setup appropriate tests for a backend @philippjfr? If I look into the tests/core/data folder I see nothing that systematically tests the same things across backends?

And knowing what to test for is also quite difficult due to missing docstrings, type annotations, very complex functions and similar.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are various baseclasses in tests/core/data which implement systematic tests which are then subclassed for each interface.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The best thing to do is probably to continue extending tests/core/data/base HeterogeneousColumnTests with more tests.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Specifically testing for datetime columns seems to be pretty lacking.


import pytest
import ibis
import duckdb
from pathlib import Path

def create_pandas_connection(df: pd.DataFrame, *args, **kwargs):
return ibis.pandas.connect({"df": df})

def create_duckdb_connection(df: pd.DataFrame, *args, **kwargs):
tmpdir = kwargs["tmpdir"]
filename = str(Path(tmpdir)/"db.db")
duckdb_con = duckdb.connect(filename)
duckdb_con.execute("CREATE TABLE df AS SELECT * FROM df")

return ibis.duckdb.connect(filename)

def create_sqlite_connection(df: pd.DataFrame):
return create_temp_db(df, "df")

@pytest.fixture
def reference_df():
return pd.DataFrame(
{
"actual": [100, 150, 125, 140, 145, 135, 123],
"forecast": [90, 160, 125, 150, 141, 141, 120],
"numerical": [1.1, 1.9, 3.2, 3.8, 4.3, 5.0, 5.5],
"date": pd.date_range("2022-01-03", "2022-01-09"),
"string": ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"],
},
)

@pytest.fixture(params=[create_pandas_connection, create_duckdb_connection, create_sqlite_connection])
def connection(request, reference_df, tmpdir):
return request.param(reference_df, tmpdir=tmpdir)

@pytest.fixture
def data(connection):
return connection.table("df")

@pytest.fixture
def dataset(data):
return Dataset(data, kdims=["numerical", "date", "string"], vdims=["actual", "forecast"])

def test_index_ibis_table(data):
table = IbisInterface._index_ibis_table(data)
table.execute()