From 121f6f85285691f39c6e6c9cbe735919db05c3aa Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Thu, 12 Sep 2024 22:41:31 +0200 Subject: [PATCH 01/27] WIP --- narwhals/_arrow/utils.py | 8 ++++--- narwhals/_pandas_like/utils.py | 33 ++++++++++++++++++--------- narwhals/_polars/utils.py | 14 ++++++++---- narwhals/dtypes.py | 36 +++++++++++++++++++++++++++++- tests/expr_and_series/cast_test.py | 30 +++++++++++++++++++++++++ 5 files changed, 103 insertions(+), 18 deletions(-) diff --git a/narwhals/_arrow/utils.py b/narwhals/_arrow/utils.py index b8294839c..55b5c360e 100644 --- a/narwhals/_arrow/utils.py +++ b/narwhals/_arrow/utils.py @@ -46,7 +46,7 @@ def translate_dtype(dtype: Any) -> dtypes.DType: if pa.types.is_date32(dtype): return dtypes.Date() if pa.types.is_timestamp(dtype): - return dtypes.Datetime() + return dtypes.Datetime(time_unit=dtype.unit, time_zone=dtype.tz) if pa.types.is_duration(dtype): return dtypes.Duration() if pa.types.is_dictionary(dtype): @@ -88,8 +88,10 @@ def narwhals_to_native_dtype(dtype: dtypes.DType | type[dtypes.DType]) -> Any: # with Polars for now return pa.dictionary(pa.uint32(), pa.string()) if isinstance_or_issubclass(dtype, dtypes.Datetime): - # Use Polars' default - return pa.timestamp("us") + time_unit = getattr(dtype, "time_unit", "us") + time_zone = getattr(dtype, "time_zone", None) + return pa.timestamp(time_unit, tz=time_zone) + if isinstance_or_issubclass(dtype, dtypes.Duration): # Use Polars' default return pa.duration("us") diff --git a/narwhals/_pandas_like/utils.py b/narwhals/_pandas_like/utils.py index 9e1d79ce9..0a3981734 100644 --- a/narwhals/_pandas_like/utils.py +++ b/narwhals/_pandas_like/utils.py @@ -1,8 +1,10 @@ from __future__ import annotations +import re from typing import TYPE_CHECKING from typing import Any from typing import Iterable +from typing import Literal from typing import TypeVar from narwhals.dependencies import get_cudf @@ -221,6 +223,12 @@ def translate_dtype(column: Any) -> DType: from narwhals import dtypes dtype = column.dtype + + pd_datetime_rgx = ( + r"^datetime64\[(?Pms|us|ns)(?:, (?P[a-zA-Z\/]+))?\]$" + ) + pa_datetime_rgx = r"^timestamp\[(?Pms|us|ns)(?:, tz=(?P[a-zA-Z\/]+))?\]\[pyarrow\]$" + if str(dtype) in ("int64", "Int64", "Int64[pyarrow]", "int64[pyarrow]"): return dtypes.Int64() if str(dtype) in ("int32", "Int32", "Int32[pyarrow]", "int32[pyarrow]"): @@ -264,16 +272,15 @@ def translate_dtype(column: Any) -> DType: return dtypes.Boolean() if str(dtype) in ("category",) or str(dtype).startswith("dictionary<"): return dtypes.Categorical() - if str(dtype).startswith("datetime64"): - # TODO(Unassigned): different time units and time zones - return dtypes.Datetime() + if (match_ := re.match(pd_datetime_rgx, str(dtype))) or ( + match_ := re.match(pa_datetime_rgx, str(dtype)) + ): + time_unit: Literal["us", "ns", "ms"] = match_.group("time_unit") # type: ignore[assignment] + time_zone: str | None = match_.group("time_zone") + return dtypes.Datetime(time_unit, time_zone) if str(dtype).startswith("timedelta64") or str(dtype).startswith("duration"): # TODO(Unassigned): different time units return dtypes.Duration() - if str(dtype).startswith("timestamp["): - # pyarrow-backed datetime - # TODO(Unassigned): different time units and time zones - return dtypes.Datetime() if str(dtype) == "date32[day][pyarrow]": return dtypes.Date() if str(dtype) == "object": @@ -425,10 +432,16 @@ def narwhals_to_native_dtype( # noqa: PLR0915 # convert to it? return "category" if isinstance_or_issubclass(dtype, dtypes.Datetime): - # TODO(Unassigned): different time units and time zones + time_unit = getattr(dtype, "time_unit", "us") + time_zone = getattr(dtype, "time_zone", None) + if dtype_backend == "pyarrow-nullable": - return "timestamp[ns][pyarrow]" - return "datetime64[ns]" + tz_part = f", tz={time_zone}" if time_zone else "" + return f"timestamp[{time_unit}{tz_part}][pyarrow]" + else: + tz_part = f", {time_zone}" if time_zone else "" + return f"datetime64[{time_unit}{tz_part}]" + if isinstance_or_issubclass(dtype, dtypes.Duration): # TODO(Unassigned): different time units and time zones if dtype_backend == "pyarrow-nullable": diff --git a/narwhals/_polars/utils.py b/narwhals/_polars/utils.py index 51f0b1898..dc8696fa6 100644 --- a/narwhals/_polars/utils.py +++ b/narwhals/_polars/utils.py @@ -1,9 +1,11 @@ from __future__ import annotations from typing import Any +from typing import Literal from narwhals import dtypes from narwhals.dependencies import get_polars +from narwhals.utils import isinstance_or_issubclass def extract_native(obj: Any) -> Any: @@ -59,8 +61,10 @@ def translate_dtype(dtype: Any) -> dtypes.DType: return dtypes.Categorical() if dtype == pl.Enum: return dtypes.Enum() - if dtype == pl.Datetime: - return dtypes.Datetime() + if isinstance_or_issubclass(dtype, pl.Datetime): + time_unit: Literal["us", "ns", "ms"] = getattr(dtype, "time_unit", "us") + time_zone = getattr(dtype, "time_zone", None) + return dtypes.Datetime(time_unit=time_unit, time_zone=time_zone) if dtype == pl.Duration: return dtypes.Duration() if dtype == pl.Date: @@ -103,8 +107,10 @@ def narwhals_to_native_dtype(dtype: dtypes.DType | type[dtypes.DType]) -> Any: if dtype == dtypes.Enum: msg = "Converting to Enum is not (yet) supported" raise NotImplementedError(msg) - if dtype == dtypes.Datetime: - return pl.Datetime() + if isinstance_or_issubclass(dtype, dtypes.Datetime): + time_unit = getattr(dtype, "time_unit", "us") + time_zone = getattr(dtype, "time_zone", None) + return pl.Datetime(time_unit, time_zone) if dtype == dtypes.Duration: return pl.Duration() if dtype == dtypes.Date: diff --git a/narwhals/dtypes.py b/narwhals/dtypes.py index 4d8da4293..157967938 100644 --- a/narwhals/dtypes.py +++ b/narwhals/dtypes.py @@ -1,6 +1,8 @@ from __future__ import annotations +from datetime import timezone from typing import TYPE_CHECKING +from typing import Literal if TYPE_CHECKING: from typing_extensions import Self @@ -71,7 +73,39 @@ class Object(DType): ... class Unknown(DType): ... -class Datetime(TemporalType): ... +class Datetime(TemporalType): + """ + Data type representing a calendar date and time of day. + + Arguments: + time_unit: Unit of time. Defaults to `'us'` (microseconds). + time_zone: Time zone string, as defined in zoneinfo (to see valid strings run + `import zoneinfo; zoneinfo.available_timezones()` for a full list). + When used to match dtypes, can set this to "*" to check for Datetime + columns that have any (non-null) timezone. + + Notes: + Adapted from Polars implementation at: + https://github.com/pola-rs/polars/blob/py-1.7.1/py-polars/polars/datatypes/classes.py#L398-L457 + """ + + def __init__( + self: Self, + time_unit: Literal["us", "ns", "ms"] = "us", + time_zone: str | timezone | None = None, + ) -> None: + if time_unit not in {"ms", "us", "ns"}: + msg = ( + "invalid `time_unit`" + f"\n\nExpected one of {{'ns','us','ms'}}, got {time_unit!r}." + ) + raise ValueError(msg) + + if isinstance(time_zone, timezone): + time_zone = str(time_zone) + + self.time_unit = time_unit + self.time_zone = time_zone class Duration(TemporalType): ... diff --git a/tests/expr_and_series/cast_test.py b/tests/expr_and_series/cast_test.py index 0b496d7ae..f16f46ff9 100644 --- a/tests/expr_and_series/cast_test.py +++ b/tests/expr_and_series/cast_test.py @@ -1,3 +1,8 @@ +from __future__ import annotations + +from datetime import datetime +from datetime import timedelta +from datetime import timezone from typing import Any import pandas as pd @@ -6,6 +11,7 @@ import narwhals.stable.v1 as nw from narwhals.utils import parse_version +from tests.utils import compare_dicts data = { "a": [1], @@ -179,3 +185,27 @@ class Banana: with pytest.raises(AssertionError, match=r"Unknown dtype"): df.select(nw.col("a").cast(Banana)) + + +def test_cast_datetime_tz_aware(constructor: Any, request: Any) -> None: + if "dask" in str(constructor): + request.applymarker(pytest.mark.xfail) + + data = { + "date": [ + datetime(2024, 1, 1, tzinfo=timezone.utc) + timedelta(days=i) + for i in range(3) + ] + } + expected = { + "date": ["2024-01-01 01:00:00", "2024-01-02 01:00:00", "2024-01-03 01:00:00"] + } + + df = nw.from_native(constructor(data)) + result = df.select( + nw.col("date") + .cast(nw.Datetime("ms", time_zone="Europe/Rome")) + .cast(nw.String()) + .str.slice(offset=0, length=19) + ) + compare_dicts(result, expected) From 4896df23a21a8e7f82c40a975f9d77e1cdeec54c Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Thu, 12 Sep 2024 23:09:31 +0200 Subject: [PATCH 02/27] order matters? --- narwhals/_polars/utils.py | 17 +++++++++-------- tests/series_only/cast_test.py | 6 +++--- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/narwhals/_polars/utils.py b/narwhals/_polars/utils.py index dc8696fa6..084651ac9 100644 --- a/narwhals/_polars/utils.py +++ b/narwhals/_polars/utils.py @@ -61,14 +61,14 @@ def translate_dtype(dtype: Any) -> dtypes.DType: return dtypes.Categorical() if dtype == pl.Enum: return dtypes.Enum() - if isinstance_or_issubclass(dtype, pl.Datetime): - time_unit: Literal["us", "ns", "ms"] = getattr(dtype, "time_unit", "us") - time_zone = getattr(dtype, "time_zone", None) - return dtypes.Datetime(time_unit=time_unit, time_zone=time_zone) if dtype == pl.Duration: return dtypes.Duration() if dtype == pl.Date: return dtypes.Date() + if isinstance_or_issubclass(dtype, pl.Datetime): + time_unit: Literal["us", "ns", "ms"] = getattr(dtype, "time_unit", "us") + time_zone = getattr(dtype, "time_zone", None) + return dtypes.Datetime(time_unit=time_unit, time_zone=time_zone) return dtypes.Unknown() @@ -107,12 +107,13 @@ def narwhals_to_native_dtype(dtype: dtypes.DType | type[dtypes.DType]) -> Any: if dtype == dtypes.Enum: msg = "Converting to Enum is not (yet) supported" raise NotImplementedError(msg) - if isinstance_or_issubclass(dtype, dtypes.Datetime): - time_unit = getattr(dtype, "time_unit", "us") - time_zone = getattr(dtype, "time_zone", None) - return pl.Datetime(time_unit, time_zone) if dtype == dtypes.Duration: return pl.Duration() if dtype == dtypes.Date: return pl.Date() + if isinstance_or_issubclass(dtype, dtypes.Datetime): + time_unit = getattr(dtype, "time_unit", "us") + time_zone = getattr(dtype, "time_zone", None) + return pl.Datetime(time_unit, time_zone) + return pl.Unknown() # pragma: no cover diff --git a/tests/series_only/cast_test.py b/tests/series_only/cast_test.py index 37ae55a01..db0242fb5 100644 --- a/tests/series_only/cast_test.py +++ b/tests/series_only/cast_test.py @@ -75,13 +75,13 @@ def test_cast_date_datetime_pandas() -> None: df = df.select(nw.col("a").cast(nw.Datetime)) result = nw.to_native(df) expected = pd.DataFrame({"a": [datetime(2020, 1, 1), datetime(2020, 1, 2)]}).astype( - {"a": "timestamp[ns][pyarrow]"} + {"a": "timestamp[us][pyarrow]"} ) pd.testing.assert_frame_equal(result, expected) - # pandas: pyarrow datetime to date + # # pandas: pyarrow datetime to date dfpd = pd.DataFrame({"a": [datetime(2020, 1, 1), datetime(2020, 1, 2)]}).astype( - {"a": "timestamp[ns][pyarrow]"} + {"a": "timestamp[us][pyarrow]"} ) df = nw.from_native(dfpd) df = df.select(nw.col("a").cast(nw.Date)) From cd2ed4071446a0e42d634f0a8600b4af9a7ce62f Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Fri, 13 Sep 2024 09:38:28 +0200 Subject: [PATCH 03/27] datetime test and polars fix --- narwhals/_polars/utils.py | 5 ++--- tests/dtypes_test.py | 26 ++++++++++++++++++++++++++ 2 files changed, 28 insertions(+), 3 deletions(-) create mode 100644 tests/dtypes_test.py diff --git a/narwhals/_polars/utils.py b/narwhals/_polars/utils.py index 084651ac9..9b4ed3b8f 100644 --- a/narwhals/_polars/utils.py +++ b/narwhals/_polars/utils.py @@ -5,7 +5,6 @@ from narwhals import dtypes from narwhals.dependencies import get_polars -from narwhals.utils import isinstance_or_issubclass def extract_native(obj: Any) -> Any: @@ -65,7 +64,7 @@ def translate_dtype(dtype: Any) -> dtypes.DType: return dtypes.Duration() if dtype == pl.Date: return dtypes.Date() - if isinstance_or_issubclass(dtype, pl.Datetime): + if dtype == pl.Datetime or isinstance(dtype, pl.Datetime): time_unit: Literal["us", "ns", "ms"] = getattr(dtype, "time_unit", "us") time_zone = getattr(dtype, "time_zone", None) return dtypes.Datetime(time_unit=time_unit, time_zone=time_zone) @@ -111,7 +110,7 @@ def narwhals_to_native_dtype(dtype: dtypes.DType | type[dtypes.DType]) -> Any: return pl.Duration() if dtype == dtypes.Date: return pl.Date() - if isinstance_or_issubclass(dtype, dtypes.Datetime): + if dtype == dtypes.Datetime or isinstance(dtype, dtypes.Datetime): time_unit = getattr(dtype, "time_unit", "us") time_zone = getattr(dtype, "time_zone", None) return pl.Datetime(time_unit, time_zone) diff --git a/tests/dtypes_test.py b/tests/dtypes_test.py new file mode 100644 index 000000000..e93c50404 --- /dev/null +++ b/tests/dtypes_test.py @@ -0,0 +1,26 @@ +from __future__ import annotations + +from datetime import timezone +from types import NoneType +from typing import Literal + +import pytest + +import narwhals.stable.v1 as nw + + +@pytest.mark.parametrize("time_unit", ["us", "ns", "ms"]) +@pytest.mark.parametrize("time_zone", ["Europe/Rome", timezone.utc, None]) +def test_datetime_valid( + time_unit: Literal["us", "ns", "ms"], time_zone: str | timezone | None +) -> None: + dtype = nw.Datetime(time_unit=time_unit, time_zone=time_zone) + + assert dtype.time_unit == time_unit + assert isinstance(dtype.time_zone, (str, NoneType)) + + +@pytest.mark.parametrize("time_unit", ["abc", "s"]) +def test_datetime_invalid(time_unit: str) -> None: + with pytest.raises(ValueError, match="invalid `time_unit`"): + nw.Datetime(time_unit=time_unit) # type: ignore[arg-type] From eb1468e6764d4631432e33736c6219ed67a2bbdc Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Fri, 13 Sep 2024 09:47:22 +0200 Subject: [PATCH 04/27] rm NoneType --- tests/dtypes_test.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/dtypes_test.py b/tests/dtypes_test.py index e93c50404..7470037fc 100644 --- a/tests/dtypes_test.py +++ b/tests/dtypes_test.py @@ -1,7 +1,6 @@ from __future__ import annotations from datetime import timezone -from types import NoneType from typing import Literal import pytest @@ -17,7 +16,7 @@ def test_datetime_valid( dtype = nw.Datetime(time_unit=time_unit, time_zone=time_zone) assert dtype.time_unit == time_unit - assert isinstance(dtype.time_zone, (str, NoneType)) + assert isinstance(dtype.time_zone, str) or dtype.time_zone is None @pytest.mark.parametrize("time_unit", ["abc", "s"]) From e71f9c357d17d3ce7917e0274e6c58fcac2b87b1 Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Fri, 13 Sep 2024 10:22:42 +0200 Subject: [PATCH 05/27] pandas pre 1.5 --- narwhals/_pandas_like/series.py | 4 +++- narwhals/_pandas_like/utils.py | 10 +++++++++- narwhals/functions.py | 9 +++++++-- 3 files changed, 19 insertions(+), 4 deletions(-) diff --git a/narwhals/_pandas_like/series.py b/narwhals/_pandas_like/series.py index 8288be263..5297c6379 100644 --- a/narwhals/_pandas_like/series.py +++ b/narwhals/_pandas_like/series.py @@ -189,7 +189,9 @@ def cast( dtype: Any, ) -> Self: ser = self._native_series - dtype = narwhals_to_native_dtype(dtype, ser.dtype, self._implementation) + dtype = narwhals_to_native_dtype( + dtype, ser.dtype, self._implementation, self._backend_version + ) return self._from_native_series(ser.astype(dtype)) def item(self: Self, index: int | None = None) -> Any: diff --git a/narwhals/_pandas_like/utils.py b/narwhals/_pandas_like/utils.py index 0a3981734..9b5197286 100644 --- a/narwhals/_pandas_like/utils.py +++ b/narwhals/_pandas_like/utils.py @@ -328,7 +328,10 @@ def get_dtype_backend(dtype: Any, implementation: Implementation) -> str: def narwhals_to_native_dtype( # noqa: PLR0915 - dtype: DType | type[DType], starting_dtype: Any, implementation: Implementation + dtype: DType | type[DType], + starting_dtype: Any, + implementation: Implementation, + backend_version: tuple[int, ...], ) -> Any: from narwhals import dtypes @@ -435,6 +438,11 @@ def narwhals_to_native_dtype( # noqa: PLR0915 time_unit = getattr(dtype, "time_unit", "us") time_zone = getattr(dtype, "time_zone", None) + # Pandas does not support "ms" or "us" time units before version 1.5.0 + # Let's overwrite with "ns" + if implementation is Implementation.PANDAS and backend_version < (1, 5, 0): + time_unit = "ns" + if dtype_backend == "pyarrow-nullable": tz_part = f", tz={time_zone}" if time_zone else "" return f"timestamp[{time_unit}{tz_part}][pyarrow]" diff --git a/narwhals/functions.py b/narwhals/functions.py index 430705e66..58181040b 100644 --- a/narwhals/functions.py +++ b/narwhals/functions.py @@ -13,6 +13,7 @@ from narwhals.dataframe import LazyFrame from narwhals.translate import from_native from narwhals.utils import Implementation +from narwhals.utils import parse_version from narwhals.utils import validate_laziness # Missing type parameters for generic type "DataFrame" @@ -215,7 +216,10 @@ def new_series( narwhals_to_native_dtype as pandas_like_narwhals_to_native_dtype, ) - dtype = pandas_like_narwhals_to_native_dtype(dtype, None, implementation) + backend_version = parse_version(native_namespace.__version__) + dtype = pandas_like_narwhals_to_native_dtype( + dtype, None, implementation, backend_version + ) native_series = native_namespace.Series(values, name=name, dtype=dtype) elif implementation is Implementation.PYARROW: @@ -332,9 +336,10 @@ def from_dict( narwhals_to_native_dtype as pandas_like_narwhals_to_native_dtype, ) + backend_version = parse_version(native_namespace.__version__) schema = { name: pandas_like_narwhals_to_native_dtype( - schema[name], native_type, implementation + schema[name], native_type, implementation, backend_version ) for name, native_type in native_frame.dtypes.items() } From 32385d0ac020b7164d9c0e25c3c3c9221896f38c Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Fri, 13 Sep 2024 10:26:34 +0200 Subject: [PATCH 06/27] no cover backend version branch --- narwhals/_pandas_like/utils.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/narwhals/_pandas_like/utils.py b/narwhals/_pandas_like/utils.py index 9b5197286..e43a06b95 100644 --- a/narwhals/_pandas_like/utils.py +++ b/narwhals/_pandas_like/utils.py @@ -440,7 +440,11 @@ def narwhals_to_native_dtype( # noqa: PLR0915 # Pandas does not support "ms" or "us" time units before version 1.5.0 # Let's overwrite with "ns" - if implementation is Implementation.PANDAS and backend_version < (1, 5, 0): + if implementation is Implementation.PANDAS and backend_version < ( + 1, + 5, + 0, + ): # pragma: no cover time_unit = "ns" if dtype_backend == "pyarrow-nullable": From 3abeaf8632050abf2c1c7594fc0ac0eb70087e42 Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Fri, 13 Sep 2024 11:53:44 +0200 Subject: [PATCH 07/27] add pytz to dev requirements for testing --- requirements-dev.txt | 1 + tests/series_only/cast_test.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/requirements-dev.txt b/requirements-dev.txt index 23ff1757e..dae985370 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -8,6 +8,7 @@ pyarrow pytest pytest-cov pytest-env +pytz hypothesis scikit-learn dask[dataframe]; python_version >= '3.9' diff --git a/tests/series_only/cast_test.py b/tests/series_only/cast_test.py index db0242fb5..672cbebc2 100644 --- a/tests/series_only/cast_test.py +++ b/tests/series_only/cast_test.py @@ -79,7 +79,7 @@ def test_cast_date_datetime_pandas() -> None: ) pd.testing.assert_frame_equal(result, expected) - # # pandas: pyarrow datetime to date + # pandas: pyarrow datetime to date dfpd = pd.DataFrame({"a": [datetime(2020, 1, 1), datetime(2020, 1, 2)]}).astype( {"a": "timestamp[us][pyarrow]"} ) From 4415e3c0bec46ba5fd5e054eadef1f78c5c1aeef Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Fri, 13 Sep 2024 13:41:35 +0200 Subject: [PATCH 08/27] xfail pyarrow table on windows --- tests/expr_and_series/cast_test.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/expr_and_series/cast_test.py b/tests/expr_and_series/cast_test.py index ae8fcb67f..dafe876ab 100644 --- a/tests/expr_and_series/cast_test.py +++ b/tests/expr_and_series/cast_test.py @@ -12,6 +12,7 @@ from narwhals.utils import parse_version from tests.utils import Constructor from tests.utils import compare_dicts +from tests.utils import is_windows data = { "a": [1], @@ -192,7 +193,9 @@ class Banana: def test_cast_datetime_tz_aware( constructor: Constructor, request: pytest.FixtureRequest ) -> None: - if "dask" in str(constructor): + if "dask" in str(constructor) or ( + "pyarrow_table" in str(constructor) and is_windows() + ): request.applymarker(pytest.mark.xfail) data = { From 5309d4fab66c1cd49de7673ea51799e524cf5dfe Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Sat, 14 Sep 2024 10:52:48 +0200 Subject: [PATCH 09/27] Duration(time_unit) --- narwhals/_arrow/utils.py | 7 +++--- narwhals/_pandas_like/utils.py | 43 ++++++++++++++++++++-------------- narwhals/_polars/utils.py | 22 +++++++++-------- narwhals/dtypes.py | 25 +++++++++++++++++++- tests/dtypes_test.py | 13 ++++++++++ 5 files changed, 77 insertions(+), 33 deletions(-) diff --git a/narwhals/_arrow/utils.py b/narwhals/_arrow/utils.py index 55b5c360e..2c512c2f6 100644 --- a/narwhals/_arrow/utils.py +++ b/narwhals/_arrow/utils.py @@ -48,7 +48,7 @@ def translate_dtype(dtype: Any) -> dtypes.DType: if pa.types.is_timestamp(dtype): return dtypes.Datetime(time_unit=dtype.unit, time_zone=dtype.tz) if pa.types.is_duration(dtype): - return dtypes.Duration() + return dtypes.Duration(time_unit=dtype.unit) if pa.types.is_dictionary(dtype): return dtypes.Categorical() return dtypes.Unknown() # pragma: no cover @@ -91,10 +91,9 @@ def narwhals_to_native_dtype(dtype: dtypes.DType | type[dtypes.DType]) -> Any: time_unit = getattr(dtype, "time_unit", "us") time_zone = getattr(dtype, "time_zone", None) return pa.timestamp(time_unit, tz=time_zone) - if isinstance_or_issubclass(dtype, dtypes.Duration): - # Use Polars' default - return pa.duration("us") + time_unit = getattr(dtype, "time_unit", "us") + return pa.duration(time_unit) if isinstance_or_issubclass(dtype, dtypes.Date): return pa.date32() msg = f"Unknown dtype: {dtype}" # pragma: no cover diff --git a/narwhals/_pandas_like/utils.py b/narwhals/_pandas_like/utils.py index e43a06b95..3088ef0a4 100644 --- a/narwhals/_pandas_like/utils.py +++ b/narwhals/_pandas_like/utils.py @@ -229,6 +229,9 @@ def translate_dtype(column: Any) -> DType: ) pa_datetime_rgx = r"^timestamp\[(?Pms|us|ns)(?:, tz=(?P[a-zA-Z\/]+))?\]\[pyarrow\]$" + pd_duration_rgx = r"^timedelta64\[(?Pms|us|ns)\]$" + pa_duration_rgx = r"^duration\[(?Pms|us|ns)\]\[pyarrow\]$" + if str(dtype) in ("int64", "Int64", "Int64[pyarrow]", "int64[pyarrow]"): return dtypes.Int64() if str(dtype) in ("int32", "Int32", "Int32[pyarrow]", "int32[pyarrow]"): @@ -275,12 +278,14 @@ def translate_dtype(column: Any) -> DType: if (match_ := re.match(pd_datetime_rgx, str(dtype))) or ( match_ := re.match(pa_datetime_rgx, str(dtype)) ): - time_unit: Literal["us", "ns", "ms"] = match_.group("time_unit") # type: ignore[assignment] - time_zone: str | None = match_.group("time_zone") - return dtypes.Datetime(time_unit, time_zone) - if str(dtype).startswith("timedelta64") or str(dtype).startswith("duration"): - # TODO(Unassigned): different time units - return dtypes.Duration() + dt_time_unit: Literal["us", "ns", "ms"] = match_.group("time_unit") # type: ignore[assignment] + dt_time_zone: str | None = match_.group("time_zone") + return dtypes.Datetime(dt_time_unit, dt_time_zone) + if (match_ := re.match(pd_duration_rgx, str(dtype))) or ( + match_ := re.match(pa_duration_rgx, str(dtype)) + ): + du_time_unit: Literal["us", "ns", "ms"] = match_.group("time_unit") # type: ignore[assignment] + return dtypes.Duration(du_time_unit) if str(dtype) == "date32[day][pyarrow]": return dtypes.Date() if str(dtype) == "object": @@ -435,8 +440,8 @@ def narwhals_to_native_dtype( # noqa: PLR0915 # convert to it? return "category" if isinstance_or_issubclass(dtype, dtypes.Datetime): - time_unit = getattr(dtype, "time_unit", "us") - time_zone = getattr(dtype, "time_zone", None) + dt_time_unit = getattr(dtype, "time_unit", "us") + dt_time_zone = getattr(dtype, "time_zone", None) # Pandas does not support "ms" or "us" time units before version 1.5.0 # Let's overwrite with "ns" @@ -445,20 +450,22 @@ def narwhals_to_native_dtype( # noqa: PLR0915 5, 0, ): # pragma: no cover - time_unit = "ns" + dt_time_unit = "ns" if dtype_backend == "pyarrow-nullable": - tz_part = f", tz={time_zone}" if time_zone else "" - return f"timestamp[{time_unit}{tz_part}][pyarrow]" + tz_part = f", tz={dt_time_zone}" if dt_time_zone else "" + return f"timestamp[{dt_time_unit}{tz_part}][pyarrow]" else: - tz_part = f", {time_zone}" if time_zone else "" - return f"datetime64[{time_unit}{tz_part}]" - + tz_part = f", {dt_time_zone}" if dt_time_zone else "" + return f"datetime64[{dt_time_unit}{tz_part}]" if isinstance_or_issubclass(dtype, dtypes.Duration): - # TODO(Unassigned): different time units and time zones - if dtype_backend == "pyarrow-nullable": - return "duration[ns][pyarrow]" - return "timedelta64[ns]" + du_time_unit = getattr(dtype, "time_unit", "us") + return ( + f"duration[{du_time_unit}][pyarrow]" + if dtype_backend == "pyarrow-nullable" + else f"timedelta64[{du_time_unit}]" + ) + if isinstance_or_issubclass(dtype, dtypes.Date): if dtype_backend == "pyarrow-nullable": return "date32[pyarrow]" diff --git a/narwhals/_polars/utils.py b/narwhals/_polars/utils.py index 9b4ed3b8f..93e719090 100644 --- a/narwhals/_polars/utils.py +++ b/narwhals/_polars/utils.py @@ -60,14 +60,15 @@ def translate_dtype(dtype: Any) -> dtypes.DType: return dtypes.Categorical() if dtype == pl.Enum: return dtypes.Enum() - if dtype == pl.Duration: - return dtypes.Duration() if dtype == pl.Date: return dtypes.Date() if dtype == pl.Datetime or isinstance(dtype, pl.Datetime): - time_unit: Literal["us", "ns", "ms"] = getattr(dtype, "time_unit", "us") - time_zone = getattr(dtype, "time_zone", None) - return dtypes.Datetime(time_unit=time_unit, time_zone=time_zone) + dt_time_unit: Literal["us", "ns", "ms"] = getattr(dtype, "time_unit", "us") + dt_time_zone = getattr(dtype, "time_zone", None) + return dtypes.Datetime(time_unit=dt_time_unit, time_zone=dt_time_zone) + if dtype == pl.Duration or isinstance(dtype, pl.Duration): + du_time_unit: Literal["us", "ns", "ms"] = getattr(dtype, "time_unit", "us") + return dtypes.Duration(time_unit=du_time_unit) return dtypes.Unknown() @@ -106,13 +107,14 @@ def narwhals_to_native_dtype(dtype: dtypes.DType | type[dtypes.DType]) -> Any: if dtype == dtypes.Enum: msg = "Converting to Enum is not (yet) supported" raise NotImplementedError(msg) - if dtype == dtypes.Duration: - return pl.Duration() if dtype == dtypes.Date: return pl.Date() if dtype == dtypes.Datetime or isinstance(dtype, dtypes.Datetime): - time_unit = getattr(dtype, "time_unit", "us") - time_zone = getattr(dtype, "time_zone", None) - return pl.Datetime(time_unit, time_zone) + dt_time_unit = getattr(dtype, "time_unit", "us") + dt_time_zone = getattr(dtype, "time_zone", None) + return pl.Datetime(dt_time_unit, dt_time_zone) + if dtype == dtypes.Duration or isinstance(dtype, dtypes.Duration): + du_time_unit: Literal["us", "ns", "ms"] = getattr(dtype, "time_unit", "us") + return pl.Duration(time_unit=du_time_unit) return pl.Unknown() # pragma: no cover diff --git a/narwhals/dtypes.py b/narwhals/dtypes.py index 157967938..8c9851ebb 100644 --- a/narwhals/dtypes.py +++ b/narwhals/dtypes.py @@ -108,7 +108,30 @@ def __init__( self.time_zone = time_zone -class Duration(TemporalType): ... +class Duration(TemporalType): + """ + Data type representing a time duration. + + Arguments: + time_unit: Unit of time. Defaults to `'us'` (microseconds). + + Notes: + Adapted from Polars implementation at: + https://github.com/pola-rs/polars/blob/py-1.7.1/py-polars/polars/datatypes/classes.py#L460-L502 + """ + + def __init__( + self: Self, + time_unit: Literal["us", "ns", "ms"] = "us", + ) -> None: + if time_unit not in ("ms", "us", "ns"): + msg = ( + "invalid `time_unit`" + f"\n\nExpected one of {{'ns','us','ms'}}, got {time_unit!r}." + ) + raise ValueError(msg) + + self.time_unit = time_unit class Categorical(DType): ... diff --git a/tests/dtypes_test.py b/tests/dtypes_test.py index 7470037fc..ac312ac58 100644 --- a/tests/dtypes_test.py +++ b/tests/dtypes_test.py @@ -23,3 +23,16 @@ def test_datetime_valid( def test_datetime_invalid(time_unit: str) -> None: with pytest.raises(ValueError, match="invalid `time_unit`"): nw.Datetime(time_unit=time_unit) # type: ignore[arg-type] + + +@pytest.mark.parametrize("time_unit", ["us", "ns", "ms"]) +def test_duration_valid(time_unit: Literal["us", "ns", "ms"]) -> None: + dtype = nw.Duration(time_unit=time_unit) + + assert dtype.time_unit == time_unit + + +@pytest.mark.parametrize("time_unit", ["abc", "s"]) +def test_duration_invalid(time_unit: str) -> None: + with pytest.raises(ValueError, match="invalid `time_unit`"): + nw.Duration(time_unit=time_unit) # type: ignore[arg-type] From 20e36a154d50f5bdaf3f677146df8b27117a68b8 Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Sat, 14 Sep 2024 20:00:51 +0200 Subject: [PATCH 10/27] add Datetime and Duration methods, as in polars --- narwhals/dtypes.py | 32 ++++++++++++++++++++++++++++++++ tests/dtypes_test.py | 15 ++++++++++++--- 2 files changed, 44 insertions(+), 3 deletions(-) diff --git a/narwhals/dtypes.py b/narwhals/dtypes.py index 8c9851ebb..aacb296c1 100644 --- a/narwhals/dtypes.py +++ b/narwhals/dtypes.py @@ -107,6 +107,22 @@ def __init__( self.time_unit = time_unit self.time_zone = time_zone + def __eq__(self: Self, other: type[DType] | DType) -> bool: # type: ignore[override] + # allow comparing object instances to class + if type(other) is type and issubclass(other, Datetime): + return True + elif isinstance(other, Datetime): + return self.time_unit == other.time_unit and self.time_zone == other.time_zone + else: + return False + + def __hash__(self: Self) -> int: # pragma: no cover + return hash((self.__class__, self.time_unit, self.time_zone)) + + def __repr__(self: Self) -> str: # pragma: no cover + class_name = self.__class__.__name__ + return f"{class_name}(time_unit={self.time_unit!r}, time_zone={self.time_zone!r})" + class Duration(TemporalType): """ @@ -133,6 +149,22 @@ def __init__( self.time_unit = time_unit + def __eq__(self: Self, other: type[DType] | DType) -> bool: # type: ignore[override] + # allow comparing object instances to class + if type(other) is type and issubclass(other, Duration): + return True + elif isinstance(other, Duration): + return self.time_unit == other.time_unit + else: + return False + + def __hash__(self: Self) -> int: # pragma: no cover + return hash((self.__class__, self.time_unit)) + + def __repr__(self: Self) -> str: # pragma: no cover + class_name = self.__class__.__name__ + return f"{class_name}(time_unit={self.time_unit!r})" + class Categorical(DType): ... diff --git a/tests/dtypes_test.py b/tests/dtypes_test.py index ac312ac58..874fb55d3 100644 --- a/tests/dtypes_test.py +++ b/tests/dtypes_test.py @@ -15,8 +15,13 @@ def test_datetime_valid( ) -> None: dtype = nw.Datetime(time_unit=time_unit, time_zone=time_zone) - assert dtype.time_unit == time_unit - assert isinstance(dtype.time_zone, str) or dtype.time_zone is None + assert dtype == nw.Datetime(time_unit=time_unit, time_zone=time_zone) + assert dtype == nw.Datetime + + if time_zone: + assert dtype != nw.Datetime(time_unit=time_unit) + if time_unit != "ms": + assert dtype != nw.Datetime(time_unit="ms") @pytest.mark.parametrize("time_unit", ["abc", "s"]) @@ -29,7 +34,11 @@ def test_datetime_invalid(time_unit: str) -> None: def test_duration_valid(time_unit: Literal["us", "ns", "ms"]) -> None: dtype = nw.Duration(time_unit=time_unit) - assert dtype.time_unit == time_unit + assert dtype == nw.Duration(time_unit=time_unit) + assert dtype == nw.Duration + + if time_unit != "ms": + assert dtype != nw.Duration(time_unit="ms") @pytest.mark.parametrize("time_unit", ["abc", "s"]) From ec1cb5e89ab549cc5aaa8273f62cafb6d5299d70 Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Sat, 14 Sep 2024 22:16:25 +0200 Subject: [PATCH 11/27] downstream? --- narwhals/dtypes.py | 24 ++++++++---------------- tests/dtypes_test.py | 2 -- 2 files changed, 8 insertions(+), 18 deletions(-) diff --git a/narwhals/dtypes.py b/narwhals/dtypes.py index aacb296c1..297f5433e 100644 --- a/narwhals/dtypes.py +++ b/narwhals/dtypes.py @@ -107,14 +107,12 @@ def __init__( self.time_unit = time_unit self.time_zone = time_zone - def __eq__(self: Self, other: type[DType] | DType) -> bool: # type: ignore[override] - # allow comparing object instances to class - if type(other) is type and issubclass(other, Datetime): - return True - elif isinstance(other, Datetime): - return self.time_unit == other.time_unit and self.time_zone == other.time_zone - else: - return False + def __eq__(self: Self, other: object) -> bool: + return ( + isinstance(other, Datetime) + and self.time_unit == other.time_unit + and self.time_zone == other.time_zone + ) def __hash__(self: Self) -> int: # pragma: no cover return hash((self.__class__, self.time_unit, self.time_zone)) @@ -149,14 +147,8 @@ def __init__( self.time_unit = time_unit - def __eq__(self: Self, other: type[DType] | DType) -> bool: # type: ignore[override] - # allow comparing object instances to class - if type(other) is type and issubclass(other, Duration): - return True - elif isinstance(other, Duration): - return self.time_unit == other.time_unit - else: - return False + def __eq__(self: Self, other: object) -> bool: + return isinstance(other, Duration) and self.time_unit == other.time_unit def __hash__(self: Self) -> int: # pragma: no cover return hash((self.__class__, self.time_unit)) diff --git a/tests/dtypes_test.py b/tests/dtypes_test.py index 874fb55d3..991e6a427 100644 --- a/tests/dtypes_test.py +++ b/tests/dtypes_test.py @@ -16,7 +16,6 @@ def test_datetime_valid( dtype = nw.Datetime(time_unit=time_unit, time_zone=time_zone) assert dtype == nw.Datetime(time_unit=time_unit, time_zone=time_zone) - assert dtype == nw.Datetime if time_zone: assert dtype != nw.Datetime(time_unit=time_unit) @@ -35,7 +34,6 @@ def test_duration_valid(time_unit: Literal["us", "ns", "ms"]) -> None: dtype = nw.Duration(time_unit=time_unit) assert dtype == nw.Duration(time_unit=time_unit) - assert dtype == nw.Duration if time_unit != "ms": assert dtype != nw.Duration(time_unit="ms") From 2147ec683dec1b0e422f3b0fc1ac936eaa409464 Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Sat, 14 Sep 2024 22:50:19 +0200 Subject: [PATCH 12/27] revert --- narwhals/dtypes.py | 20 ++++++++++++++------ tests/dtypes_test.py | 2 ++ 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/narwhals/dtypes.py b/narwhals/dtypes.py index 297f5433e..53c930e74 100644 --- a/narwhals/dtypes.py +++ b/narwhals/dtypes.py @@ -108,11 +108,13 @@ def __init__( self.time_zone = time_zone def __eq__(self: Self, other: object) -> bool: - return ( - isinstance(other, Datetime) - and self.time_unit == other.time_unit - and self.time_zone == other.time_zone - ) + # allow comparing object instances to class + if type(other) is type and issubclass(other, Datetime): + return True + elif isinstance(other, Datetime): + return self.time_unit == other.time_unit and self.time_zone == other.time_zone + else: + return False def __hash__(self: Self) -> int: # pragma: no cover return hash((self.__class__, self.time_unit, self.time_zone)) @@ -148,7 +150,13 @@ def __init__( self.time_unit = time_unit def __eq__(self: Self, other: object) -> bool: - return isinstance(other, Duration) and self.time_unit == other.time_unit + # allow comparing object instances to class + if type(other) is type and issubclass(other, Duration): + return True + elif isinstance(other, Duration): + return self.time_unit == other.time_unit + else: + return False def __hash__(self: Self) -> int: # pragma: no cover return hash((self.__class__, self.time_unit)) diff --git a/tests/dtypes_test.py b/tests/dtypes_test.py index 991e6a427..874fb55d3 100644 --- a/tests/dtypes_test.py +++ b/tests/dtypes_test.py @@ -16,6 +16,7 @@ def test_datetime_valid( dtype = nw.Datetime(time_unit=time_unit, time_zone=time_zone) assert dtype == nw.Datetime(time_unit=time_unit, time_zone=time_zone) + assert dtype == nw.Datetime if time_zone: assert dtype != nw.Datetime(time_unit=time_unit) @@ -34,6 +35,7 @@ def test_duration_valid(time_unit: Literal["us", "ns", "ms"]) -> None: dtype = nw.Duration(time_unit=time_unit) assert dtype == nw.Duration(time_unit=time_unit) + assert dtype == nw.Duration if time_unit != "ms": assert dtype != nw.Duration(time_unit="ms") From 0f69ec1707d5690b16a3110a58f1fcd0867e2574 Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Sat, 14 Sep 2024 23:01:23 +0200 Subject: [PATCH 13/27] hash class only --- narwhals/dtypes.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/narwhals/dtypes.py b/narwhals/dtypes.py index 53c930e74..24da6b152 100644 --- a/narwhals/dtypes.py +++ b/narwhals/dtypes.py @@ -117,7 +117,7 @@ def __eq__(self: Self, other: object) -> bool: return False def __hash__(self: Self) -> int: # pragma: no cover - return hash((self.__class__, self.time_unit, self.time_zone)) + return hash(self.__class__) def __repr__(self: Self) -> str: # pragma: no cover class_name = self.__class__.__name__ @@ -159,7 +159,7 @@ def __eq__(self: Self, other: object) -> bool: return False def __hash__(self: Self) -> int: # pragma: no cover - return hash((self.__class__, self.time_unit)) + return hash(self.__class__) def __repr__(self: Self) -> str: # pragma: no cover class_name = self.__class__.__name__ From 22836a065c72826d68875f478ae359f1420ee848 Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Sat, 14 Sep 2024 23:05:20 +0200 Subject: [PATCH 14/27] else case no cover --- narwhals/dtypes.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/narwhals/dtypes.py b/narwhals/dtypes.py index 24da6b152..f9d0ec3d0 100644 --- a/narwhals/dtypes.py +++ b/narwhals/dtypes.py @@ -113,7 +113,7 @@ def __eq__(self: Self, other: object) -> bool: return True elif isinstance(other, Datetime): return self.time_unit == other.time_unit and self.time_zone == other.time_zone - else: + else: # pragma: no cover return False def __hash__(self: Self) -> int: # pragma: no cover @@ -155,7 +155,7 @@ def __eq__(self: Self, other: object) -> bool: return True elif isinstance(other, Duration): return self.time_unit == other.time_unit - else: + else: # pragma: no cover return False def __hash__(self: Self) -> int: # pragma: no cover From 80a574d03aadbeaab4b41348bfb921c4d61916d8 Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Mon, 23 Sep 2024 10:36:51 +0200 Subject: [PATCH 15/27] trigger ci From 916eac54514370d7108ca4548a82f3ed4afd1265 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Wed, 25 Sep 2024 17:09:40 +0200 Subject: [PATCH 16/27] try making stable dtypes --- narwhals/_polars/utils.py | 12 ++++++++++-- narwhals/dtypes.py | 12 ++++++------ narwhals/stable/v1.py | 40 +++++++++++++++++++++++++++++++++++++-- 3 files changed, 54 insertions(+), 10 deletions(-) diff --git a/narwhals/_polars/utils.py b/narwhals/_polars/utils.py index 66c156750..6741ed7f1 100644 --- a/narwhals/_polars/utils.py +++ b/narwhals/_polars/utils.py @@ -109,11 +109,19 @@ def narwhals_to_native_dtype(dtype: dtypes.DType | type[dtypes.DType]) -> Any: raise NotImplementedError(msg) if dtype == dtypes.Date: return pl.Date() - if dtype == dtypes.Datetime or isinstance(dtype, dtypes.Datetime): + if ( + dtype == dtypes.Datetime + or isinstance(dtype, dtypes.Datetime) + or (isinstance(dtype, type) and issubclass(dtype, dtypes.Datetime)) + ): dt_time_unit = getattr(dtype, "time_unit", "us") dt_time_zone = getattr(dtype, "time_zone", None) return pl.Datetime(dt_time_unit, dt_time_zone) - if dtype == dtypes.Duration or isinstance(dtype, dtypes.Duration): + if ( + dtype == dtypes.Duration + or isinstance(dtype, dtypes.Duration) + or (isinstance(dtype, type) and issubclass(dtype, dtypes.Duration)) + ): du_time_unit: Literal["us", "ns", "ms"] = getattr(dtype, "time_unit", "us") return pl.Duration(time_unit=du_time_unit) diff --git a/narwhals/dtypes.py b/narwhals/dtypes.py index f9d0ec3d0..405239c7a 100644 --- a/narwhals/dtypes.py +++ b/narwhals/dtypes.py @@ -109,15 +109,15 @@ def __init__( def __eq__(self: Self, other: object) -> bool: # allow comparing object instances to class - if type(other) is type and issubclass(other, Datetime): + if type(other) is type and issubclass(other, self.__class__): return True - elif isinstance(other, Datetime): + elif isinstance(other, self.__class__): return self.time_unit == other.time_unit and self.time_zone == other.time_zone else: # pragma: no cover return False def __hash__(self: Self) -> int: # pragma: no cover - return hash(self.__class__) + return hash((self.__class__, self.time_unit, self.time_zone)) def __repr__(self: Self) -> str: # pragma: no cover class_name = self.__class__.__name__ @@ -151,15 +151,15 @@ def __init__( def __eq__(self: Self, other: object) -> bool: # allow comparing object instances to class - if type(other) is type and issubclass(other, Duration): + if type(other) is type and issubclass(other, self.__class__): return True - elif isinstance(other, Duration): + elif isinstance(other, self.__class__): return self.time_unit == other.time_unit else: # pragma: no cover return False def __hash__(self: Self) -> int: # pragma: no cover - return hash(self.__class__) + return hash((self.__class__, self.time_unit)) def __repr__(self: Self) -> str: # pragma: no cover class_name = self.__class__.__name__ diff --git a/narwhals/stable/v1.py b/narwhals/stable/v1.py index 0f34c88fb..ce5a654c9 100644 --- a/narwhals/stable/v1.py +++ b/narwhals/stable/v1.py @@ -17,8 +17,8 @@ from narwhals.dtypes import Boolean from narwhals.dtypes import Categorical from narwhals.dtypes import Date -from narwhals.dtypes import Datetime -from narwhals.dtypes import Duration +from narwhals.dtypes import Datetime as NwDatetime +from narwhals.dtypes import Duration as NwDuration from narwhals.dtypes import Enum from narwhals.dtypes import Float32 from narwhals.dtypes import Float64 @@ -62,6 +62,42 @@ T = TypeVar("T") +class Duration(NwDuration): + """ + Data type representing a time duration. + + Arguments: + time_unit: Unit of time. Defaults to `'us'` (microseconds). + + Notes: + Adapted from Polars implementation at: + https://github.com/pola-rs/polars/blob/py-1.7.1/py-polars/polars/datatypes/classes.py#L460-L502 + """ + + def __hash__(self) -> int: + return hash(super().__class__) + + +class Datetime(NwDatetime): + """ + Data type representing a calendar date and time of day. + + Arguments: + time_unit: Unit of time. Defaults to `'us'` (microseconds). + time_zone: Time zone string, as defined in zoneinfo (to see valid strings run + `import zoneinfo; zoneinfo.available_timezones()` for a full list). + When used to match dtypes, can set this to "*" to check for Datetime + columns that have any (non-null) timezone. + + Notes: + Adapted from Polars implementation at: + https://github.com/pola-rs/polars/blob/py-1.7.1/py-polars/polars/datatypes/classes.py#L398-L457 + """ + + def __hash__(self) -> int: + return hash(super().__class__) + + class DataFrame(NwDataFrame[IntoDataFrameT]): """ Narwhals DataFrame, backed by a native dataframe. From 180b86e30ab7bdfa4644c0c4e2996b55165ac8ef Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Sat, 28 Sep 2024 10:09:22 +0200 Subject: [PATCH 17/27] broken, but getting there? --- narwhals/_pandas_like/series.py | 5 +- narwhals/_pandas_like/utils.py | 4 +- narwhals/dtypes.py | 2 + narwhals/series.py | 4 +- narwhals/stable/v1.py | 2021 ------------------------------- 5 files changed, 8 insertions(+), 2028 deletions(-) delete mode 100644 narwhals/stable/v1.py diff --git a/narwhals/_pandas_like/series.py b/narwhals/_pandas_like/series.py index 603efaeb9..e5f375e91 100644 --- a/narwhals/_pandas_like/series.py +++ b/narwhals/_pandas_like/series.py @@ -165,9 +165,8 @@ def name(self) -> str: def shape(self) -> tuple[int]: return self._native_series.shape # type: ignore[no-any-return] - @property - def dtype(self: Self) -> DType: - return translate_dtype(self._native_series) + def dtype(self: Self, dtypes) -> DType: + return translate_dtype(self._native_series, dtypes) def scatter(self, indices: int | Sequence[int], values: Any) -> Self: if isinstance(values, self.__class__): diff --git a/narwhals/_pandas_like/utils.py b/narwhals/_pandas_like/utils.py index 09c836926..91246274c 100644 --- a/narwhals/_pandas_like/utils.py +++ b/narwhals/_pandas_like/utils.py @@ -208,9 +208,7 @@ def set_axis( return obj.set_axis(index, axis=0, **kwargs) # type: ignore[attr-defined, no-any-return] -def translate_dtype(column: Any) -> DType: - from narwhals import dtypes - +def translate_dtype(column: Any, dtypes) -> DType: dtype = str(column.dtype) pd_datetime_rgx = ( diff --git a/narwhals/dtypes.py b/narwhals/dtypes.py index dd761fda8..df9f6ad15 100644 --- a/narwhals/dtypes.py +++ b/narwhals/dtypes.py @@ -108,6 +108,7 @@ def __init__( self.time_zone = time_zone def __eq__(self: Self, other: object) -> bool: + breakpoint() # allow comparing object instances to class if type(other) is type and issubclass(other, self.__class__): return True @@ -117,6 +118,7 @@ def __eq__(self: Self, other: object) -> bool: return False def __hash__(self: Self) -> int: # pragma: no cover + breakpoint() return hash((self.__class__, self.time_unit, self.time_zone)) def __repr__(self: Self) -> str: # pragma: no cover diff --git a/narwhals/series.py b/narwhals/series.py index bd99fc5c7..bd3cee274 100644 --- a/narwhals/series.py +++ b/narwhals/series.py @@ -9,6 +9,7 @@ from typing import overload from narwhals.utils import parse_version +from narwhals import dtypes if TYPE_CHECKING: from types import ModuleType @@ -32,6 +33,7 @@ class Series: `narwhals.from_native`, making sure to pass `allow_series=True` or `series_only=True`. """ + _dtypes = dtypes def __init__( self, @@ -331,7 +333,7 @@ def dtype(self: Self) -> DType: >>> func(s_pl) Int64 """ - return self._compliant_series.dtype # type: ignore[no-any-return] + return self._compliant_series.dtype(self._dtypes) # type: ignore[no-any-return] @property def name(self) -> str: diff --git a/narwhals/stable/v1.py b/narwhals/stable/v1.py deleted file mode 100644 index 50bb261cc..000000000 --- a/narwhals/stable/v1.py +++ /dev/null @@ -1,2021 +0,0 @@ -from __future__ import annotations - -from typing import TYPE_CHECKING -from typing import Any -from typing import Callable -from typing import Iterable -from typing import Literal -from typing import Sequence -from typing import TypeVar -from typing import overload - -import narwhals as nw -from narwhals import dependencies -from narwhals import selectors -from narwhals.dataframe import DataFrame as NwDataFrame -from narwhals.dataframe import LazyFrame as NwLazyFrame -from narwhals.dtypes import Array -from narwhals.dtypes import Boolean -from narwhals.dtypes import Categorical -from narwhals.dtypes import Date -from narwhals.dtypes import Datetime as NwDatetime -from narwhals.dtypes import Duration as NwDuration -from narwhals.dtypes import Enum -from narwhals.dtypes import Float32 -from narwhals.dtypes import Float64 -from narwhals.dtypes import Int8 -from narwhals.dtypes import Int16 -from narwhals.dtypes import Int32 -from narwhals.dtypes import Int64 -from narwhals.dtypes import List -from narwhals.dtypes import Object -from narwhals.dtypes import String -from narwhals.dtypes import Struct -from narwhals.dtypes import UInt8 -from narwhals.dtypes import UInt16 -from narwhals.dtypes import UInt32 -from narwhals.dtypes import UInt64 -from narwhals.dtypes import Unknown -from narwhals.expr import Expr as NwExpr -from narwhals.expr import Then as NwThen -from narwhals.expr import When as NwWhen -from narwhals.expr import when as nw_when -from narwhals.functions import show_versions -from narwhals.schema import Schema as NwSchema -from narwhals.series import Series as NwSeries -from narwhals.translate import get_native_namespace as nw_get_native_namespace -from narwhals.translate import narwhalify as nw_narwhalify -from narwhals.translate import to_native -from narwhals.typing import IntoDataFrameT -from narwhals.typing import IntoFrameT -from narwhals.utils import is_ordered_categorical as nw_is_ordered_categorical -from narwhals.utils import maybe_align_index as nw_maybe_align_index -from narwhals.utils import maybe_convert_dtypes as nw_maybe_convert_dtypes -from narwhals.utils import maybe_get_index as nw_maybe_get_index -from narwhals.utils import maybe_set_index as nw_maybe_set_index - -if TYPE_CHECKING: - from types import ModuleType - - from typing_extensions import Self - - from narwhals.dtypes import DType - from narwhals.typing import IntoExpr - -T = TypeVar("T") - - -class Duration(NwDuration): - """ - Data type representing a time duration. - - Arguments: - time_unit: Unit of time. Defaults to `'us'` (microseconds). - - Notes: - Adapted from Polars implementation at: - https://github.com/pola-rs/polars/blob/py-1.7.1/py-polars/polars/datatypes/classes.py#L460-L502 - """ - - def __hash__(self) -> int: - return hash(super().__class__) - - -class Datetime(NwDatetime): - """ - Data type representing a calendar date and time of day. - - Arguments: - time_unit: Unit of time. Defaults to `'us'` (microseconds). - time_zone: Time zone string, as defined in zoneinfo (to see valid strings run - `import zoneinfo; zoneinfo.available_timezones()` for a full list). - When used to match dtypes, can set this to "*" to check for Datetime - columns that have any (non-null) timezone. - - Notes: - Adapted from Polars implementation at: - https://github.com/pola-rs/polars/blob/py-1.7.1/py-polars/polars/datatypes/classes.py#L398-L457 - """ - - def __hash__(self) -> int: - return hash(super().__class__) - - -class DataFrame(NwDataFrame[IntoDataFrameT]): - """ - Narwhals DataFrame, backed by a native dataframe. - - The native dataframe might be pandas.DataFrame, polars.DataFrame, ... - - This class is not meant to be instantiated directly - instead, use - `narwhals.from_native`. - """ - - @overload - def __getitem__(self, item: tuple[Sequence[int], slice]) -> Self: ... - @overload - def __getitem__(self, item: tuple[Sequence[int], Sequence[int]]) -> Self: ... - @overload - def __getitem__(self, item: tuple[slice, Sequence[int]]) -> Self: ... - @overload - def __getitem__(self, item: tuple[Sequence[int], str]) -> Series: ... # type: ignore[overload-overlap] - @overload - def __getitem__(self, item: tuple[slice, str]) -> Series: ... # type: ignore[overload-overlap] - @overload - def __getitem__(self, item: tuple[Sequence[int], Sequence[str]]) -> Self: ... - @overload - def __getitem__(self, item: tuple[slice, Sequence[str]]) -> Self: ... - @overload - def __getitem__(self, item: tuple[Sequence[int], int]) -> Series: ... # type: ignore[overload-overlap] - @overload - def __getitem__(self, item: tuple[slice, int]) -> Series: ... # type: ignore[overload-overlap] - - @overload - def __getitem__(self, item: Sequence[int]) -> Self: ... - - @overload - def __getitem__(self, item: str) -> Series: ... # type: ignore[overload-overlap] - - @overload - def __getitem__(self, item: Sequence[str]) -> Self: ... - - @overload - def __getitem__(self, item: slice) -> Self: ... - - @overload - def __getitem__(self, item: tuple[slice, slice]) -> Self: ... - - def __getitem__(self, item: Any) -> Any: - return _stableify(super().__getitem__(item)) - - def lazy(self) -> LazyFrame[Any]: - """ - Lazify the DataFrame (if possible). - - If a library does not support lazy execution, then this is a no-op. - - Examples: - Construct pandas and Polars DataFrames: - - >>> import pandas as pd - >>> import polars as pl - >>> import narwhals.stable.v1 as nw - >>> df = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]} - >>> df_pd = pd.DataFrame(df) - >>> df_pl = pl.DataFrame(df) - - We define a library agnostic function: - - >>> @nw.narwhalify - ... def func(df): - ... return df.lazy() - - Note that then, pandas dataframe stay eager, but Polars DataFrame becomes a Polars LazyFrame: - - >>> func(df_pd) - foo bar ham - 0 1 6.0 a - 1 2 7.0 b - 2 3 8.0 c - >>> func(df_pl) - - """ - return _stableify(super().lazy()) # type: ignore[no-any-return] - - # Not sure what mypy is complaining about, probably some fancy - # thing that I need to understand category theory for - @overload # type: ignore[override] - def to_dict(self, *, as_series: Literal[True] = ...) -> dict[str, Series]: ... - @overload - def to_dict(self, *, as_series: Literal[False]) -> dict[str, list[Any]]: ... - @overload - def to_dict(self, *, as_series: bool) -> dict[str, Series] | dict[str, list[Any]]: ... - def to_dict( - self, *, as_series: bool = True - ) -> dict[str, Series] | dict[str, list[Any]]: - """ - Convert DataFrame to a dictionary mapping column name to values. - - Arguments: - as_series: If set to true ``True``, then the values are Narwhals Series, - otherwise the values are Any. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals.stable.v1 as nw - >>> df = { - ... "A": [1, 2, 3, 4, 5], - ... "fruits": ["banana", "banana", "apple", "apple", "banana"], - ... "B": [5, 4, 3, 2, 1], - ... "animals": ["beetle", "fly", "beetle", "beetle", "beetle"], - ... "optional": [28, 300, None, 2, -30], - ... } - >>> df_pd = pd.DataFrame(df) - >>> df_pl = pl.DataFrame(df) - >>> df_pa = pa.table(df) - - We define a library agnostic function: - - >>> @nw.narwhalify - ... def func(df): - ... return df.to_dict(as_series=False) - - We can then pass either pandas, Polars or PyArrow to `func`: - - >>> func(df_pd) - {'A': [1, 2, 3, 4, 5], 'fruits': ['banana', 'banana', 'apple', 'apple', 'banana'], 'B': [5, 4, 3, 2, 1], 'animals': ['beetle', 'fly', 'beetle', 'beetle', 'beetle'], 'optional': [28.0, 300.0, nan, 2.0, -30.0]} - >>> func(df_pl) - {'A': [1, 2, 3, 4, 5], 'fruits': ['banana', 'banana', 'apple', 'apple', 'banana'], 'B': [5, 4, 3, 2, 1], 'animals': ['beetle', 'fly', 'beetle', 'beetle', 'beetle'], 'optional': [28, 300, None, 2, -30]} - >>> func(df_pa) - {'A': [1, 2, 3, 4, 5], 'fruits': ['banana', 'banana', 'apple', 'apple', 'banana'], 'B': [5, 4, 3, 2, 1], 'animals': ['beetle', 'fly', 'beetle', 'beetle', 'beetle'], 'optional': [28, 300, None, 2, -30]} - """ - if as_series: - return {key: _stableify(value) for key, value in super().to_dict().items()} - return super().to_dict(as_series=False) - - def is_duplicated(self: Self) -> Series: - r""" - Get a mask of all duplicated rows in this DataFrame. - - Examples: - >>> import narwhals.stable.v1 as nw - >>> import pandas as pd - >>> import polars as pl - >>> df_pd = pd.DataFrame( - ... { - ... "a": [1, 2, 3, 1], - ... "b": ["x", "y", "z", "x"], - ... } - ... ) - >>> df_pl = pl.DataFrame( - ... { - ... "a": [1, 2, 3, 1], - ... "b": ["x", "y", "z", "x"], - ... } - ... ) - - Let's define a dataframe-agnostic function: - - >>> @nw.narwhalify - ... def func(df): - ... return df.is_duplicated() - - We can then pass either pandas or Polars to `func`: - - >>> func(df_pd) # doctest: +NORMALIZE_WHITESPACE - 0 True - 1 False - 2 False - 3 True - dtype: bool - - >>> func(df_pl) # doctest: +NORMALIZE_WHITESPACE - shape: (4,) - Series: '' [bool] - [ - true - false - false - true - ] - """ - return _stableify(super().is_duplicated()) - - def is_unique(self: Self) -> Series: - r""" - Get a mask of all unique rows in this DataFrame. - - Examples: - >>> import narwhals.stable.v1 as nw - >>> import pandas as pd - >>> import polars as pl - >>> df_pd = pd.DataFrame( - ... { - ... "a": [1, 2, 3, 1], - ... "b": ["x", "y", "z", "x"], - ... } - ... ) - >>> df_pl = pl.DataFrame( - ... { - ... "a": [1, 2, 3, 1], - ... "b": ["x", "y", "z", "x"], - ... } - ... ) - - Let's define a dataframe-agnostic function: - - >>> @nw.narwhalify - ... def func(df): - ... return df.is_unique() - - We can then pass either pandas or Polars to `func`: - - >>> func(df_pd) # doctest: +NORMALIZE_WHITESPACE - 0 False - 1 True - 2 True - 3 False - dtype: bool - - >>> func(df_pl) # doctest: +NORMALIZE_WHITESPACE - shape: (4,) - Series: '' [bool] - [ - false - true - true - false - ] - """ - return _stableify(super().is_unique()) - - -class LazyFrame(NwLazyFrame[IntoFrameT]): - """ - Narwhals DataFrame, backed by a native dataframe. - - The native dataframe might be pandas.DataFrame, polars.LazyFrame, ... - - This class is not meant to be instantiated directly - instead, use - `narwhals.from_native`. - """ - - def collect(self) -> DataFrame[Any]: - r""" - Materialize this LazyFrame into a DataFrame. - - Returns: - DataFrame - - Examples: - >>> import narwhals as nw - >>> import polars as pl - >>> lf_pl = pl.LazyFrame( - ... { - ... "a": ["a", "b", "a", "b", "b", "c"], - ... "b": [1, 2, 3, 4, 5, 6], - ... "c": [6, 5, 4, 3, 2, 1], - ... } - ... ) - >>> lf = nw.from_native(lf_pl) - >>> lf - ┌───────────────────────────────────────┐ - | Narwhals LazyFrame | - | Use `.to_native` to see native output | - └───────────────────────────────────────┘ - >>> df = lf.group_by("a").agg(nw.all().sum()).collect() - >>> df.to_native().sort("a") - shape: (3, 3) - ┌─────┬─────┬─────┐ - │ a ┆ b ┆ c │ - │ --- ┆ --- ┆ --- │ - │ str ┆ i64 ┆ i64 │ - ╞═════╪═════╪═════╡ - │ a ┆ 4 ┆ 10 │ - │ b ┆ 11 ┆ 10 │ - │ c ┆ 6 ┆ 1 │ - └─────┴─────┴─────┘ - """ - return _stableify(super().collect()) # type: ignore[no-any-return] - - -class Series(NwSeries): - """ - Narwhals Series, backed by a native series. - - The native series might be pandas.Series, polars.Series, ... - - This class is not meant to be instantiated directly - instead, use - `narwhals.from_native`, making sure to pass `allow_series=True` or - `series_only=True`. - """ - - def to_frame(self) -> DataFrame[Any]: - """ - Convert to dataframe. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import narwhals.stable.v1 as nw - >>> s = [1, 2, 3] - >>> s_pd = pd.Series(s, name="a") - >>> s_pl = pl.Series("a", s) - - We define a library agnostic function: - - >>> @nw.narwhalify - ... def func(s): - ... return s.to_frame() - - We can then pass either pandas or Polars to `func`: - - >>> func(s_pd) - a - 0 1 - 1 2 - 2 3 - >>> func(s_pl) - shape: (3, 1) - ┌─────┐ - │ a │ - │ --- │ - │ i64 │ - ╞═════╡ - │ 1 │ - │ 2 │ - │ 3 │ - └─────┘ - """ - return _stableify(super().to_frame()) # type: ignore[no-any-return] - - def value_counts( - self: Self, - *, - sort: bool = False, - parallel: bool = False, - name: str | None = None, - normalize: bool = False, - ) -> DataFrame[Any]: - r""" - Count the occurrences of unique values. - - Arguments: - sort: Sort the output by count in descending order. If set to False (default), - the order of the output is random. - parallel: Execute the computation in parallel. Used for Polars only. - name: Give the resulting count column a specific name; if `normalize` is True - defaults to "proportion", otherwise defaults to "count". - normalize: If true gives relative frequencies of the unique values - - Examples: - >>> import narwhals.stable.v1 as nw - >>> import pandas as pd - >>> import polars as pl - >>> s_pd = pd.Series([1, 1, 2, 3, 2], name="s") - >>> s_pl = pl.Series(values=[1, 1, 2, 3, 2], name="s") - - Let's define a dataframe-agnostic function: - - >>> @nw.narwhalify - ... def func(s): - ... return s.value_counts(sort=True) - - We can then pass either pandas or Polars to `func`: - - >>> func(s_pd) # doctest: +NORMALIZE_WHITESPACE - s count - 0 1 2 - 1 2 2 - 2 3 1 - - >>> func(s_pl) # doctest: +NORMALIZE_WHITESPACE - shape: (3, 2) - ┌─────┬───────┐ - │ s ┆ count │ - │ --- ┆ --- │ - │ i64 ┆ u32 │ - ╞═════╪═══════╡ - │ 1 ┆ 2 │ - │ 2 ┆ 2 │ - │ 3 ┆ 1 │ - └─────┴───────┘ - """ - return _stableify( # type: ignore[no-any-return] - super().value_counts( - sort=sort, parallel=parallel, name=name, normalize=normalize - ) - ) - - -class Expr(NwExpr): - def _l1_norm(self) -> Self: - return super()._taxicab_norm() - - -class Schema(NwSchema): - """ - Ordered mapping of column names to their data type. - - Arguments: - schema: Mapping[str, DType] | Iterable[tuple[str, DType]] | None - The schema definition given by column names and their associated. - *instantiated* Narwhals data type. Accepts a mapping or an iterable of tuples. - - Examples: - Define a schema by passing *instantiated* data types. - - >>> import narwhals.stable.v1 as nw - >>> schema = nw.Schema({"foo": nw.Int8(), "bar": nw.String()}) - >>> schema # doctest:+SKIP - Schema({'foo': Int8, 'bar': String}) - - Access the data type associated with a specific column name. - - >>> schema["foo"] - Int8 - - Access various schema properties using the `names`, `dtypes`, and `len` methods. - - >>> schema.names() - ['foo', 'bar'] - >>> schema.dtypes() - [Int8, String] - >>> schema.len() - 2 - """ - - -@overload -def _stableify(obj: NwDataFrame[IntoFrameT]) -> DataFrame[IntoFrameT]: ... -@overload -def _stableify(obj: NwLazyFrame[IntoFrameT]) -> LazyFrame[IntoFrameT]: ... -@overload -def _stableify(obj: NwSeries) -> Series: ... -@overload -def _stableify(obj: NwExpr) -> Expr: ... -@overload -def _stableify(obj: Any) -> Any: ... - - -def _stableify( - obj: NwDataFrame[IntoFrameT] | NwLazyFrame[IntoFrameT] | NwSeries | NwExpr | Any, -) -> DataFrame[IntoFrameT] | LazyFrame[IntoFrameT] | Series | Expr | Any: - if isinstance(obj, NwDataFrame): - return DataFrame( - obj._compliant_frame, - level=obj._level, - ) - if isinstance(obj, NwLazyFrame): - return LazyFrame( - obj._compliant_frame, - level=obj._level, - ) - if isinstance(obj, NwSeries): - return Series( - obj._compliant_series, - level=obj._level, - ) - if isinstance(obj, NwExpr): - return Expr(obj._call) - return obj - - -@overload -def from_native( - native_dataframe: Any, - *, - strict: Literal[False], - eager_only: None = ..., - eager_or_interchange_only: Literal[True], - series_only: None = ..., - allow_series: Literal[True], -) -> Any: ... - - -@overload -def from_native( - native_dataframe: Any, - *, - strict: Literal[False], - eager_only: Literal[True], - eager_or_interchange_only: None = ..., - series_only: None = ..., - allow_series: Literal[True], -) -> Any: ... - - -@overload -def from_native( - native_dataframe: IntoDataFrameT, - *, - strict: Literal[False], - eager_only: None = ..., - eager_or_interchange_only: Literal[True], - series_only: None = ..., - allow_series: None = ..., -) -> DataFrame[IntoDataFrameT]: ... - - -@overload -def from_native( - native_dataframe: T, - *, - strict: Literal[False], - eager_only: None = ..., - eager_or_interchange_only: Literal[True], - series_only: None = ..., - allow_series: None = ..., -) -> T: ... - - -@overload -def from_native( - native_dataframe: IntoDataFrameT, - *, - strict: Literal[False], - eager_only: Literal[True], - eager_or_interchange_only: None = ..., - series_only: None = ..., - allow_series: None = ..., -) -> DataFrame[IntoDataFrameT]: ... - - -@overload -def from_native( - native_dataframe: T, - *, - strict: Literal[False], - eager_only: Literal[True], - eager_or_interchange_only: None = ..., - series_only: None = ..., - allow_series: None = ..., -) -> T: ... - - -@overload -def from_native( - native_dataframe: Any, - *, - strict: Literal[False], - eager_only: None = ..., - eager_or_interchange_only: None = ..., - series_only: None = ..., - allow_series: Literal[True], -) -> Any: ... - - -@overload -def from_native( - native_dataframe: Any, - *, - strict: Literal[False], - eager_only: None = ..., - eager_or_interchange_only: None = ..., - series_only: Literal[True], - allow_series: None = ..., -) -> Any: ... - - -@overload -def from_native( - native_dataframe: IntoFrameT, - *, - strict: Literal[False], - eager_only: None = ..., - eager_or_interchange_only: None = ..., - series_only: None = ..., - allow_series: None = ..., -) -> DataFrame[IntoFrameT] | LazyFrame[IntoFrameT]: ... - - -@overload -def from_native( - native_dataframe: T, - *, - strict: Literal[False], - eager_only: None = ..., - eager_or_interchange_only: None = ..., - series_only: None = ..., - allow_series: None = ..., -) -> T: ... - - -@overload -def from_native( - native_dataframe: IntoDataFrameT, - *, - strict: Literal[True] = ..., - eager_only: None = ..., - eager_or_interchange_only: Literal[True], - series_only: None = ..., - allow_series: None = ..., -) -> DataFrame[IntoDataFrameT]: - """ - from_native(df, strict=True, eager_or_interchange_only=True) - from_native(df, eager_or_interchange_only=True) - """ - - -@overload -def from_native( - native_dataframe: IntoDataFrameT, - *, - strict: Literal[True] = ..., - eager_only: Literal[True], - eager_or_interchange_only: None = ..., - series_only: None = ..., - allow_series: None = ..., -) -> DataFrame[IntoDataFrameT]: - """ - from_native(df, strict=True, eager_only=True) - from_native(df, eager_only=True) - """ - - -@overload -def from_native( - native_dataframe: Any, - *, - strict: Literal[True] = ..., - eager_only: None = ..., - eager_or_interchange_only: None = ..., - series_only: None = ..., - allow_series: Literal[True], -) -> DataFrame[Any] | LazyFrame[Any] | Series: - """ - from_native(df, strict=True, allow_series=True) - from_native(df, allow_series=True) - """ - - -@overload -def from_native( - native_dataframe: Any, - *, - strict: Literal[True] = ..., - eager_only: None = ..., - eager_or_interchange_only: None = ..., - series_only: Literal[True], - allow_series: None = ..., -) -> Series: - """ - from_native(df, strict=True, series_only=True) - from_native(df, series_only=True) - """ - - -@overload -def from_native( - native_dataframe: IntoFrameT, - *, - strict: Literal[True] = ..., - eager_only: None = ..., - eager_or_interchange_only: None = ..., - series_only: None = ..., - allow_series: None = ..., -) -> DataFrame[IntoFrameT] | LazyFrame[IntoFrameT]: - """ - from_native(df, strict=True) - from_native(df) - """ - - -# All params passed in as variables -@overload -def from_native( - native_dataframe: Any, - *, - strict: bool, - eager_only: bool | None, - eager_or_interchange_only: bool | None = None, - series_only: bool | None, - allow_series: bool | None, -) -> Any: ... - - -def from_native( - native_dataframe: Any, - *, - strict: bool = True, - eager_only: bool | None = None, - eager_or_interchange_only: bool | None = None, - series_only: bool | None = None, - allow_series: bool | None = None, -) -> Any: - """ - Convert dataframe/series to Narwhals DataFrame, LazyFrame, or Series. - - Arguments: - native_dataframe: Raw object from user. - Depending on the other arguments, input object can be: - - - pandas.DataFrame - - polars.DataFrame - - polars.LazyFrame - - anything with a `__narwhals_dataframe__` or `__narwhals_lazyframe__` method - - pandas.Series - - polars.Series - - anything with a `__narwhals_series__` method - strict: Whether to raise if object can't be converted (default) or - to just leave it as-is. - eager_only: Whether to only allow eager objects. - eager_or_interchange_only: Whether to only allow eager objects or objects which - implement the Dataframe Interchange Protocol. - series_only: Whether to only allow series. - allow_series: Whether to allow series (default is only dataframe / lazyframe). - - Returns: - narwhals.DataFrame or narwhals.LazyFrame or narwhals.Series - """ - # Early returns - if isinstance(native_dataframe, (DataFrame, LazyFrame)) and not series_only: - return native_dataframe - if isinstance(native_dataframe, Series) and (series_only or allow_series): - return native_dataframe - result = nw.from_native( - native_dataframe, - strict=strict, - eager_only=eager_only, - eager_or_interchange_only=eager_or_interchange_only, - series_only=series_only, - allow_series=allow_series, - ) - return _stableify(result) - - -def narwhalify( - func: Callable[..., Any] | None = None, - *, - strict: bool = False, - eager_only: bool | None = False, - eager_or_interchange_only: bool | None = False, - series_only: bool | None = False, - allow_series: bool | None = True, -) -> Callable[..., Any]: - """ - Decorate function so it becomes dataframe-agnostic. - - `narwhalify` will try to convert any dataframe/series-like object into the narwhal - respective DataFrame/Series, while leaving the other parameters as they are. - - Similarly, if the output of the function is a narwhals DataFrame or Series, it will be - converted back to the original dataframe/series type, while if the output is another - type it will be left as is. - - By setting `strict=True`, then every input and every output will be required to be a - dataframe/series-like object. - - Instead of writing - - ```python - import narwhals.stable.v1 as nw - - - def func(df): - df = nw.from_native(df, strict=False) - df = df.group_by("a").agg(nw.col("b").sum()) - return nw.to_native(df) - ``` - - you can just write - - ```python - import narwhals.stable.v1 as nw - - - @nw.narwhalify - def func(df): - return df.group_by("a").agg(nw.col("b").sum()) - ``` - - You can also pass in extra arguments, e.g. - - ```python - @nw.narwhalify(eager_only=True) - ``` - - that will get passed down to `nw.from_native`. - - Arguments: - func: Function to wrap in a `from_native`-`to_native` block. - strict: Whether to raise if object can't be converted or to just leave it as-is - (default). - eager_only: Whether to only allow eager objects. - eager_or_interchange_only: Whether to only allow eager objects or objects which - implement the Dataframe Interchange Protocol. - series_only: Whether to only allow series. - allow_series: Whether to allow series (default is only dataframe / lazyframe). - """ - - return nw_narwhalify( - func=func, - strict=strict, - eager_only=eager_only, - eager_or_interchange_only=eager_or_interchange_only, - series_only=series_only, - allow_series=allow_series, - ) - - -def all() -> Expr: - """ - Instantiate an expression representing all columns. - - Examples: - >>> import polars as pl - >>> import pandas as pd - >>> import narwhals.stable.v1 as nw - >>> df_pd = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) - >>> df_pl = pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) - - Let's define a dataframe-agnostic function: - - >>> @nw.narwhalify - ... def func(df): - ... return df.select(nw.all() * 2) - - We can then pass either pandas or Polars to `func`: - - >>> func(df_pd) - a b - 0 2 8 - 1 4 10 - 2 6 12 - >>> func(df_pl) - shape: (3, 2) - ┌─────┬─────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ i64 ┆ i64 │ - ╞═════╪═════╡ - │ 2 ┆ 8 │ - │ 4 ┆ 10 │ - │ 6 ┆ 12 │ - └─────┴─────┘ - """ - return _stableify(nw.all()) - - -def col(*names: str | Iterable[str]) -> Expr: - """ - Creates an expression that references one or more columns by their name(s). - - Arguments: - names: Name(s) of the columns to use in the aggregation function. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import narwhals.stable.v1 as nw - >>> df_pl = pl.DataFrame({"a": [1, 2], "b": [3, 4]}) - >>> df_pd = pd.DataFrame({"a": [1, 2], "b": [3, 4]}) - - We define a dataframe-agnostic function: - - >>> @nw.narwhalify - ... def func(df): - ... return df.select(nw.col("a") * nw.col("b")) - - We can then pass either pandas or polars to `func`: - - >>> func(df_pd) - a - 0 3 - 1 8 - >>> func(df_pl) - shape: (2, 1) - ┌─────┐ - │ a │ - │ --- │ - │ i64 │ - ╞═════╡ - │ 3 │ - │ 8 │ - └─────┘ - """ - return _stableify(nw.col(*names)) - - -def nth(*indices: int | Sequence[int]) -> Expr: - """ - Creates an expression that references one or more columns by their index(es). - - Notes: - `nth` is not supported for Polars version<1.0.0. Please use [`col`](/api-reference/narwhals/#narwhals.col) instead. - - Arguments: - indices: One or more indices representing the columns to retrieve. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals.stable.v1 as nw - >>> data = {"a": [1, 2], "b": [3, 4]} - >>> df_pl = pl.DataFrame(data) - >>> df_pd = pd.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a dataframe-agnostic function: - - >>> @nw.narwhalify - ... def func(df): - ... return df.select(nw.nth(0) * 2) - - We can then pass either pandas or polars to `func`: - - >>> func(df_pd) - a - 0 2 - 1 4 - >>> func(df_pl) # doctest: +SKIP - shape: (2, 1) - ┌─────┐ - │ a │ - │ --- │ - │ i64 │ - ╞═════╡ - │ 2 │ - │ 4 │ - └─────┘ - >>> func(df_pa) - pyarrow.Table - a: int64 - ---- - a: [[2,4]] - """ - return _stableify(nw.nth(*indices)) - - -def len() -> Expr: - """ - Return the number of rows. - - Examples: - >>> import polars as pl - >>> import pandas as pd - >>> import narwhals.stable.v1 as nw - >>> df_pd = pd.DataFrame({"a": [1, 2], "b": [5, 10]}) - >>> df_pl = pl.DataFrame({"a": [1, 2], "b": [5, 10]}) - - Let's define a dataframe-agnostic function: - - >>> @nw.narwhalify - ... def func(df): - ... return df.select(nw.len()) - - We can then pass either pandas or Polars to `func`: - - >>> func(df_pd) - len - 0 2 - >>> func(df_pl) - shape: (1, 1) - ┌─────┐ - │ len │ - │ --- │ - │ u32 │ - ╞═════╡ - │ 2 │ - └─────┘ - """ - return _stableify(nw.len()) - - -def lit(value: Any, dtype: DType | None = None) -> Expr: - """ - Return an expression representing a literal value. - - Arguments: - value: The value to use as literal. - dtype: The data type of the literal value. If not provided, the data type will be inferred. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import narwhals.stable.v1 as nw - >>> df_pl = pl.DataFrame({"a": [1, 2]}) - >>> df_pd = pd.DataFrame({"a": [1, 2]}) - - We define a dataframe-agnostic function: - - >>> @nw.narwhalify - ... def func(df): - ... return df.with_columns(nw.lit(3).alias("b")) - - We can then pass either pandas or polars to `func`: - - >>> func(df_pd) - a b - 0 1 3 - 1 2 3 - >>> func(df_pl) - shape: (2, 2) - ┌─────┬─────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ i64 ┆ i32 │ - ╞═════╪═════╡ - │ 1 ┆ 3 │ - │ 2 ┆ 3 │ - └─────┴─────┘ - - """ - return _stableify(nw.lit(value, dtype)) - - -def min(*columns: str) -> Expr: - """ - Return the minimum value. - - Note: - Syntactic sugar for ``nw.col(columns).min()``. - - Arguments: - columns: Name(s) of the columns to use in the aggregation function. - - Examples: - >>> import polars as pl - >>> import pandas as pd - >>> import narwhals.stable.v1 as nw - >>> df_pd = pd.DataFrame({"a": [1, 2], "b": [5, 10]}) - >>> df_pl = pl.DataFrame({"a": [1, 2], "b": [5, 10]}) - - Let's define a dataframe-agnostic function: - - >>> @nw.narwhalify - ... def func(df): - ... return df.select(nw.min("b")) - - We can then pass either pandas or Polars to `func`: - - >>> func(df_pd) - b - 0 5 - >>> func(df_pl) - shape: (1, 1) - ┌─────┐ - │ b │ - │ --- │ - │ i64 │ - ╞═════╡ - │ 5 │ - └─────┘ - """ - return _stableify(nw.min(*columns)) - - -def max(*columns: str) -> Expr: - """ - Return the maximum value. - - Note: - Syntactic sugar for ``nw.col(columns).max()``. - - Arguments: - columns: Name(s) of the columns to use in the aggregation function. - - Examples: - >>> import polars as pl - >>> import pandas as pd - >>> import narwhals.stable.v1 as nw - >>> df_pd = pd.DataFrame({"a": [1, 2], "b": [5, 10]}) - >>> df_pl = pl.DataFrame({"a": [1, 2], "b": [5, 10]}) - - Let's define a dataframe-agnostic function: - - >>> @nw.narwhalify - ... def func(df): - ... return df.select(nw.max("a")) - - We can then pass either pandas or Polars to `func`: - - >>> func(df_pd) - a - 0 2 - >>> func(df_pl) - shape: (1, 1) - ┌─────┐ - │ a │ - │ --- │ - │ i64 │ - ╞═════╡ - │ 2 │ - └─────┘ - """ - return _stableify(nw.max(*columns)) - - -def mean(*columns: str) -> Expr: - """ - Get the mean value. - - Note: - Syntactic sugar for ``nw.col(columns).mean()`` - - Arguments: - columns: Name(s) of the columns to use in the aggregation function - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import narwhals.stable.v1 as nw - >>> df_pl = pl.DataFrame({"a": [1, 8, 3]}) - >>> df_pd = pd.DataFrame({"a": [1, 8, 3]}) - - We define a dataframe agnostic function: - - >>> @nw.narwhalify - ... def func(df): - ... return df.select(nw.mean("a")) - - We can then pass either pandas or Polars to `func`: - - >>> func(df_pd) - a - 0 4.0 - >>> func(df_pl) - shape: (1, 1) - ┌─────┐ - │ a │ - │ --- │ - │ f64 │ - ╞═════╡ - │ 4.0 │ - └─────┘ - """ - return _stableify(nw.mean(*columns)) - - -def sum(*columns: str) -> Expr: - """ - Sum all values. - - Note: - Syntactic sugar for ``nw.col(columns).sum()`` - - Arguments: - columns: Name(s) of the columns to use in the aggregation function - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import narwhals.stable.v1 as nw - >>> df_pl = pl.DataFrame({"a": [1, 2]}) - >>> df_pd = pd.DataFrame({"a": [1, 2]}) - - We define a dataframe-agnostic function: - - >>> @nw.narwhalify - ... def func(df): - ... return df.select(nw.sum("a")) - - We can then pass either pandas or polars to `func`: - - >>> func(df_pd) - a - 0 3 - >>> func(df_pl) - shape: (1, 1) - ┌─────┐ - │ a │ - │ --- │ - │ i64 │ - ╞═════╡ - │ 3 │ - └─────┘ - """ - return _stableify(nw.sum(*columns)) - - -def sum_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr: - """ - Sum all values horizontally across columns. - - Warning: - Unlike Polars, we support horizontal sum over numeric columns only. - - Arguments: - exprs: Name(s) of the columns to use in the aggregation function. Accepts - expression input. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import narwhals.stable.v1 as nw - >>> data = {"a": [1, 2, 3], "b": [5, 10, None]} - >>> df_pl = pl.DataFrame(data) - >>> df_pd = pd.DataFrame(data) - - We define a dataframe-agnostic function: - - >>> @nw.narwhalify - ... def func(df): - ... return df.select(nw.sum_horizontal("a", "b")) - - We can then pass either pandas or polars to `func`: - - >>> func(df_pd) - a - 0 6.0 - 1 12.0 - 2 3.0 - >>> func(df_pl) - shape: (3, 1) - ┌─────┐ - │ a │ - │ --- │ - │ i64 │ - ╞═════╡ - │ 6 │ - │ 12 │ - │ 3 │ - └─────┘ - """ - return _stableify(nw.sum_horizontal(*exprs)) - - -def all_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr: - r""" - Compute the bitwise AND horizontally across columns. - - Arguments: - exprs: Name(s) of the columns to use in the aggregation function. Accepts expression input. - - Notes: - pandas and Polars handle null values differently. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import narwhals.stable.v1 as nw - >>> data = { - ... "a": [False, False, True, True, False, None], - ... "b": [False, True, True, None, None, None], - ... } - >>> df_pl = pl.DataFrame(data) - >>> df_pd = pd.DataFrame(data) - - We define a dataframe-agnostic function: - - >>> @nw.narwhalify - ... def func(df): - ... return df.select("a", "b", all=nw.all_horizontal("a", "b")) - - We can then pass either pandas or polars to `func`: - - >>> func(df_pd) - a b all - 0 False False False - 1 False True False - 2 True True True - 3 True None False - 4 False None False - 5 None None False - - >>> func(df_pl) - shape: (6, 3) - ┌───────┬───────┬───────┐ - │ a ┆ b ┆ all │ - │ --- ┆ --- ┆ --- │ - │ bool ┆ bool ┆ bool │ - ╞═══════╪═══════╪═══════╡ - │ false ┆ false ┆ false │ - │ false ┆ true ┆ false │ - │ true ┆ true ┆ true │ - │ true ┆ null ┆ null │ - │ false ┆ null ┆ false │ - │ null ┆ null ┆ null │ - └───────┴───────┴───────┘ - """ - return _stableify(nw.all_horizontal(*exprs)) - - -def any_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr: - r""" - Compute the bitwise OR horizontally across columns. - - Arguments: - exprs: Name(s) of the columns to use in the aggregation function. Accepts expression input. - - Notes: - pandas and Polars handle null values differently. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import narwhals.stable.v1 as nw - >>> data = { - ... "a": [False, False, True, True, False, None], - ... "b": [False, True, True, None, None, None], - ... } - >>> df_pl = pl.DataFrame(data) - >>> df_pd = pd.DataFrame(data) - - We define a dataframe-agnostic function: - - >>> @nw.narwhalify - ... def func(df): - ... return df.select("a", "b", any=nw.any_horizontal("a", "b")) - - We can then pass either pandas or polars to `func`: - - >>> func(df_pd) - a b any - 0 False False False - 1 False True True - 2 True True True - 3 True None True - 4 False None False - 5 None None False - - >>> func(df_pl) - shape: (6, 3) - ┌───────┬───────┬───────┐ - │ a ┆ b ┆ any │ - │ --- ┆ --- ┆ --- │ - │ bool ┆ bool ┆ bool │ - ╞═══════╪═══════╪═══════╡ - │ false ┆ false ┆ false │ - │ false ┆ true ┆ true │ - │ true ┆ true ┆ true │ - │ true ┆ null ┆ true │ - │ false ┆ null ┆ null │ - │ null ┆ null ┆ null │ - └───────┴───────┴───────┘ - """ - return _stableify(nw.any_horizontal(*exprs)) - - -def mean_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr: - """ - Compute the mean of all values horizontally across columns. - - Arguments: - exprs: Name(s) of the columns to use in the aggregation function. Accepts - expression input. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import narwhals.stable.v1 as nw - >>> data = { - ... "a": [1, 8, 3], - ... "b": [4, 5, None], - ... "c": ["x", "y", "z"], - ... } - >>> df_pl = pl.DataFrame(data) - >>> df_pd = pd.DataFrame(data) - - We define a dataframe-agnostic function that computes the horizontal mean of "a" - and "b" columns: - - >>> @nw.narwhalify - ... def func(df): - ... return df.select(nw.mean_horizontal("a", "b")) - - We can then pass either pandas or polars to `func`: - - >>> func(df_pd) - a - 0 2.5 - 1 6.5 - 2 3.0 - >>> func(df_pl) - shape: (3, 1) - ┌─────┐ - │ a │ - │ --- │ - │ f64 │ - ╞═════╡ - │ 2.5 │ - │ 6.5 │ - │ 3.0 │ - └─────┘ - """ - return _stableify(nw.mean_horizontal(*exprs)) - - -@overload -def concat( - items: Iterable[DataFrame[Any]], - *, - how: Literal["horizontal", "vertical"] = "vertical", -) -> DataFrame[Any]: ... - - -@overload -def concat( - items: Iterable[LazyFrame[Any]], - *, - how: Literal["horizontal", "vertical"] = "vertical", -) -> LazyFrame[Any]: ... - - -def concat( - items: Iterable[DataFrame[Any] | LazyFrame[Any]], - *, - how: Literal["horizontal", "vertical"] = "vertical", -) -> DataFrame[Any] | LazyFrame[Any]: - """ - Concatenate multiple DataFrames, LazyFrames into a single entity. - - Arguments: - items: DataFrames, LazyFrames to concatenate. - - how: {'vertical', 'horizontal'} - * vertical: Stacks Series from DataFrames vertically and fills with `null` - if the lengths don't match. - * horizontal: Stacks Series from DataFrames horizontally and fills with `null` - if the lengths don't match. - - Returns: - A new DataFrame, Lazyframe resulting from the concatenation. - - Raises: - NotImplementedError: The items to concatenate should either all be eager, or all lazy - - Examples: - - Let's take an example of vertical concatenation: - - >>> import pandas as pd - >>> import polars as pl - >>> import narwhals.stable.v1 as nw - >>> data_1 = {"a": [1, 2, 3], "b": [4, 5, 6]} - >>> data_2 = {"a": [5, 2], "b": [1, 4]} - - >>> df_pd_1 = pd.DataFrame(data_1) - >>> df_pd_2 = pd.DataFrame(data_2) - >>> df_pl_1 = pl.DataFrame(data_1) - >>> df_pl_2 = pl.DataFrame(data_2) - - Let's define a dataframe-agnostic function: - - >>> @nw.narwhalify - ... def func(df1, df2): - ... return nw.concat([df1, df2], how="vertical") - - >>> func(df_pd_1, df_pd_2) - a b - 0 1 4 - 1 2 5 - 2 3 6 - 0 5 1 - 1 2 4 - >>> func(df_pl_1, df_pl_2) - shape: (5, 2) - ┌─────┬─────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ i64 ┆ i64 │ - ╞═════╪═════╡ - │ 1 ┆ 4 │ - │ 2 ┆ 5 │ - │ 3 ┆ 6 │ - │ 5 ┆ 1 │ - │ 2 ┆ 4 │ - └─────┴─────┘ - - Let's look at case a for horizontal concatenation: - - >>> import pandas as pd - >>> import polars as pl - >>> import narwhals.stable.v1 as nw - >>> data_1 = {"a": [1, 2, 3], "b": [4, 5, 6]} - >>> data_2 = {"c": [5, 2], "d": [1, 4]} - - >>> df_pd_1 = pd.DataFrame(data_1) - >>> df_pd_2 = pd.DataFrame(data_2) - >>> df_pl_1 = pl.DataFrame(data_1) - >>> df_pl_2 = pl.DataFrame(data_2) - - Defining a dataframe-agnostic function: - - >>> @nw.narwhalify - ... def func(df1, df2): - ... return nw.concat([df1, df2], how="horizontal") - - >>> func(df_pd_1, df_pd_2) - a b c d - 0 1 4 5.0 1.0 - 1 2 5 2.0 4.0 - 2 3 6 NaN NaN - - >>> func(df_pl_1, df_pl_2) - shape: (3, 4) - ┌─────┬─────┬──────┬──────┐ - │ a ┆ b ┆ c ┆ d │ - │ --- ┆ --- ┆ --- ┆ --- │ - │ i64 ┆ i64 ┆ i64 ┆ i64 │ - ╞═════╪═════╪══════╪══════╡ - │ 1 ┆ 4 ┆ 5 ┆ 1 │ - │ 2 ┆ 5 ┆ 2 ┆ 4 │ - │ 3 ┆ 6 ┆ null ┆ null │ - └─────┴─────┴──────┴──────┘ - - """ - return _stableify(nw.concat(items, how=how)) # type: ignore[no-any-return] - - -def is_ordered_categorical(series: Series) -> bool: - """ - Return whether indices of categories are semantically meaningful. - - This is a convenience function to accessing what would otherwise be - the `is_ordered` property from the DataFrame Interchange Protocol, - see https://data-apis.org/dataframe-protocol/latest/API.html. - - - For Polars: - - Enums are always ordered. - - Categoricals are ordered if `dtype.ordering == "physical"`. - - For pandas-like APIs: - - Categoricals are ordered if `dtype.cat.ordered == True`. - - For PyArrow table: - - Categoricals are ordered if `dtype.type.ordered == True`. - - Examples: - >>> import narwhals.stable.v1 as nw - >>> import pandas as pd - >>> import polars as pl - >>> data = ["x", "y"] - >>> s_pd = pd.Series(data, dtype=pd.CategoricalDtype(ordered=True)) - >>> s_pl = pl.Series(data, dtype=pl.Categorical(ordering="physical")) - - Let's define a library-agnostic function: - - >>> @nw.narwhalify - ... def func(s): - ... return nw.is_ordered_categorical(s) - - Then, we can pass any supported library to `func`: - - >>> func(s_pd) - True - >>> func(s_pl) - True - """ - return nw_is_ordered_categorical(series) - - -def maybe_align_index(lhs: T, rhs: Series | DataFrame[Any] | LazyFrame[Any]) -> T: - """ - Align `lhs` to the Index of `rhs`, if they're both pandas-like. - - Notes: - This is only really intended for backwards-compatibility purposes, - for example if your library already aligns indices for users. - If you're designing a new library, we highly encourage you to not - rely on the Index. - For non-pandas-like inputs, this only checks that `lhs` and `rhs` - are the same length. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import narwhals.stable.v1 as nw - >>> df_pd = pd.DataFrame({"a": [1, 2]}, index=[3, 4]) - >>> s_pd = pd.Series([6, 7], index=[4, 3]) - >>> df = nw.from_native(df_pd) - >>> s = nw.from_native(s_pd, series_only=True) - >>> nw.to_native(nw.maybe_align_index(df, s)) - a - 4 2 - 3 1 - """ - return nw_maybe_align_index(lhs, rhs) - - -def maybe_convert_dtypes(df: T, *args: bool, **kwargs: bool | str) -> T: - """ - Convert columns or series to the best possible dtypes using dtypes supporting ``pd.NA``, if df is pandas-like. - - Arguments: - obj: DataFrame or Series. - *args: Additional arguments which gets passed through. - **kwargs: Additional arguments which gets passed through. - - Notes: - For non-pandas-like inputs, this is a no-op. - Also, `args` and `kwargs` just get passed down to the underlying library as-is. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import narwhals.stable.v1 as nw - >>> import numpy as np - >>> df_pd = pd.DataFrame( - ... { - ... "a": pd.Series([1, 2, 3], dtype=np.dtype("int32")), - ... "b": pd.Series([True, False, np.nan], dtype=np.dtype("O")), - ... } - ... ) - >>> df = nw.from_native(df_pd) - >>> nw.to_native(nw.maybe_convert_dtypes(df)).dtypes # doctest: +NORMALIZE_WHITESPACE - a Int32 - b boolean - dtype: object - """ - return nw_maybe_convert_dtypes(df, *args, **kwargs) - - -def maybe_get_index(obj: T) -> Any | None: - """ - Get the index of a DataFrame or a Series, if it's pandas-like. - - Notes: - This is only really intended for backwards-compatibility purposes, - for example if your library already aligns indices for users. - If you're designing a new library, we highly encourage you to not - rely on the Index. - For non-pandas-like inputs, this returns `None`. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import narwhals.stable.v1 as nw - >>> df_pd = pd.DataFrame({"a": [1, 2], "b": [4, 5]}) - >>> df = nw.from_native(df_pd) - >>> nw.maybe_get_index(df) - RangeIndex(start=0, stop=2, step=1) - >>> series_pd = pd.Series([1, 2]) - >>> series = nw.from_native(series_pd, series_only=True) - >>> nw.maybe_get_index(series) - RangeIndex(start=0, stop=2, step=1) - """ - return nw_maybe_get_index(obj) - - -def maybe_set_index(df: T, column_names: str | list[str]) -> T: - """ - Set columns `columns` to be the index of `df`, if `df` is pandas-like. - - Notes: - This is only really intended for backwards-compatibility purposes, - for example if your library already aligns indices for users. - If you're designing a new library, we highly encourage you to not - rely on the Index. - For non-pandas-like inputs, this is a no-op. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import narwhals.stable.v1 as nw - >>> df_pd = pd.DataFrame({"a": [1, 2], "b": [4, 5]}) - >>> df = nw.from_native(df_pd) - >>> nw.to_native(nw.maybe_set_index(df, "b")) # doctest: +NORMALIZE_WHITESPACE - a - b - 4 1 - 5 2 - """ - return nw_maybe_set_index(df, column_names) - - -def get_native_namespace(obj: Any) -> Any: - """ - Get native namespace from object. - - Examples: - >>> import polars as pl - >>> import pandas as pd - >>> import narwhals.stable.v1 as nw - >>> df = nw.from_native(pd.DataFrame({"a": [1, 2, 3]})) - >>> nw.get_native_namespace(df) - - >>> df = nw.from_native(pl.DataFrame({"a": [1, 2, 3]})) - >>> nw.get_native_namespace(df) - - """ - return nw_get_native_namespace(obj) - - -def get_level( - obj: DataFrame[Any] | LazyFrame[Any] | Series, -) -> Literal["full", "interchange"]: - """ - Level of support Narwhals has for current object. - - This can be one of: - - - 'full': full Narwhals API support - - 'metadata': only metadata operations are supported (`df.schema`) - """ - return nw.get_level(obj) - - -class When(NwWhen): - @classmethod - def from_when(cls, when: NwWhen) -> Self: - return cls(*when._predicates) - - def then(self, value: Any) -> Then: - return Then.from_then(super().then(value)) - - -class Then(NwThen, Expr): - @classmethod - def from_then(cls, then: NwThen) -> Self: - return cls(then._call) - - def otherwise(self, value: Any) -> Expr: - return _stableify(super().otherwise(value)) - - -def when(*predicates: IntoExpr | Iterable[IntoExpr]) -> When: - """ - Start a `when-then-otherwise` expression. - - Expression similar to an `if-else` statement in Python. Always initiated by a `pl.when().then()`., and optionally followed by chaining one or more `.when().then()` statements. - Chained when-then operations should be read as Python `if, elif, ... elif` blocks, not as `if, if, ... if`, i.e. the first condition that evaluates to `True` will be picked. - If none of the conditions are `True`, an optional `.otherwise()` can be appended at the end. If not appended, and none of the conditions are `True`, `None` will be returned. - - Arguments: - predicates: Condition(s) that must be met in order to apply the subsequent statement. Accepts one or more boolean expressions, which are implicitly combined with `&`. String input is parsed as a column name. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import narwhals.stable.v1 as nw - >>> df_pl = pl.DataFrame({"a": [1, 2, 3], "b": [5, 10, 15]}) - >>> df_pd = pd.DataFrame({"a": [1, 2, 3], "b": [5, 10, 15]}) - - We define a dataframe-agnostic function: - - >>> @nw.narwhalify - ... def func(df_any): - ... return df_any.with_columns( - ... nw.when(nw.col("a") < 3).then(5).otherwise(6).alias("a_when") - ... ) - - We can then pass either pandas or polars to `func`: - - >>> func(df_pd) - a b a_when - 0 1 5 5 - 1 2 10 5 - 2 3 15 6 - >>> func(df_pl) - shape: (3, 3) - ┌─────┬─────┬────────┐ - │ a ┆ b ┆ a_when │ - │ --- ┆ --- ┆ --- │ - │ i64 ┆ i64 ┆ i32 │ - ╞═════╪═════╪════════╡ - │ 1 ┆ 5 ┆ 5 │ - │ 2 ┆ 10 ┆ 5 │ - │ 3 ┆ 15 ┆ 6 │ - └─────┴─────┴────────┘ - """ - return When.from_when(nw_when(*predicates)) - - -def new_series( - name: str, - values: Any, - dtype: DType | type[DType] | None = None, - *, - native_namespace: ModuleType, -) -> Series: - """ - Instantiate Narwhals Series from raw data. - - Arguments: - name: Name of resulting Series. - values: Values of make Series from. - dtype: (Narwhals) dtype. If not provided, the native library - may auto-infer it from `values`. - native_namespace: The native library to use for DataFrame creation. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import narwhals.stable.v1 as nw - >>> data = {"a": [1, 2, 3], "b": [4, 5, 6]} - - Let's define a dataframe-agnostic function: - - >>> @nw.narwhalify - ... def func(df): - ... values = [4, 1, 2] - ... native_namespace = nw.get_native_namespace(df) - ... return nw.new_series("c", values, nw.Int32, native_namespace=native_namespace) - - Let's see what happens when passing pandas / Polars input: - - >>> func(pd.DataFrame(data)) - 0 4 - 1 1 - 2 2 - Name: c, dtype: int32 - >>> func(pl.DataFrame(data)) # doctest: +NORMALIZE_WHITESPACE - shape: (3,) - Series: 'c' [i32] - [ - 4 - 1 - 2 - ] - """ - return _stableify( - nw.new_series(name, values, dtype, native_namespace=native_namespace) - ) - - -def from_dict( - data: dict[str, Any], - schema: dict[str, DType] | Schema | None = None, - *, - native_namespace: ModuleType | None = None, -) -> DataFrame[Any]: - """ - Instantiate DataFrame from dictionary. - - Notes: - For pandas-like dataframes, conversion to schema is applied after dataframe - creation. - - Arguments: - data: Dictionary to create DataFrame from. - schema: The DataFrame schema as Schema or dict of {name: type}. - native_namespace: The native library to use for DataFrame creation. Only - necessary if inputs are not Narwhals Series. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import narwhals.stable.v1 as nw - >>> data = {"a": [1, 2, 3], "b": [4, 5, 6]} - - Let's create a new dataframe of the same class as the dataframe we started with, from a dict of new data: - - >>> @nw.narwhalify - ... def func(df): - ... new_data = {"c": [5, 2], "d": [1, 4]} - ... native_namespace = nw.get_native_namespace(df) - ... return nw.from_dict(new_data, native_namespace=native_namespace) - - Let's see what happens when passing pandas / Polars input: - - >>> func(pd.DataFrame(data)) - c d - 0 5 1 - 1 2 4 - >>> func(pl.DataFrame(data)) - shape: (2, 2) - ┌─────┬─────┐ - │ c ┆ d │ - │ --- ┆ --- │ - │ i64 ┆ i64 │ - ╞═════╪═════╡ - │ 5 ┆ 1 │ - │ 2 ┆ 4 │ - └─────┴─────┘ - """ - return _stableify( # type: ignore[no-any-return] - nw.from_dict(data, schema=schema, native_namespace=native_namespace) - ) - - -__all__ = [ - "selectors", - "concat", - "dependencies", - "to_native", - "from_native", - "is_ordered_categorical", - "maybe_align_index", - "maybe_convert_dtypes", - "maybe_get_index", - "maybe_set_index", - "get_native_namespace", - "get_level", - "all", - "all_horizontal", - "any_horizontal", - "col", - "nth", - "len", - "lit", - "min", - "max", - "mean", - "mean_horizontal", - "sum", - "sum_horizontal", - "when", - "DataFrame", - "LazyFrame", - "Series", - "Expr", - "Int64", - "Int32", - "Int16", - "Int8", - "UInt64", - "UInt32", - "UInt16", - "UInt8", - "Float64", - "Float32", - "Boolean", - "Object", - "Unknown", - "Categorical", - "Enum", - "String", - "Datetime", - "Duration", - "Struct", - "Array", - "List", - "Date", - "narwhalify", - "show_versions", - "Schema", - "from_dict", - "new_series", -] From 114be74dfa3f907ce8864261c5ae7c61c379f91f Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Sun, 29 Sep 2024 21:37:50 +0100 Subject: [PATCH 18/27] fixup --- narwhals/dtypes.py | 2 -- narwhals/series.py | 5 +---- narwhals/stable/v1/dtypes.py | 23 ++++++++++++++++++++++- tests/stable_api_test.py | 6 +++--- 4 files changed, 26 insertions(+), 10 deletions(-) diff --git a/narwhals/dtypes.py b/narwhals/dtypes.py index df9f6ad15..dd761fda8 100644 --- a/narwhals/dtypes.py +++ b/narwhals/dtypes.py @@ -108,7 +108,6 @@ def __init__( self.time_zone = time_zone def __eq__(self: Self, other: object) -> bool: - breakpoint() # allow comparing object instances to class if type(other) is type and issubclass(other, self.__class__): return True @@ -118,7 +117,6 @@ def __eq__(self: Self, other: object) -> bool: return False def __hash__(self: Self) -> int: # pragma: no cover - breakpoint() return hash((self.__class__, self.time_unit, self.time_zone)) def __repr__(self: Self) -> str: # pragma: no cover diff --git a/narwhals/series.py b/narwhals/series.py index aa1955687..5a84a9a5d 100644 --- a/narwhals/series.py +++ b/narwhals/series.py @@ -8,7 +8,6 @@ from typing import Sequence from typing import overload -from narwhals import dtypes from narwhals.utils import parse_version if TYPE_CHECKING: @@ -34,8 +33,6 @@ class Series: `series_only=True`. """ - _dtypes = dtypes - @property def _dataframe(self) -> type[DataFrame[Any]]: from narwhals.dataframe import DataFrame @@ -340,7 +337,7 @@ def dtype(self: Self) -> DType: >>> func(s_pl) Int64 """ - return self._compliant_series.dtype(self._dtypes) # type: ignore[no-any-return] + return self._compliant_series.dtype # type: ignore[no-any-return] @property def name(self) -> str: diff --git a/narwhals/stable/v1/dtypes.py b/narwhals/stable/v1/dtypes.py index 942881ba4..163a2900d 100644 --- a/narwhals/stable/v1/dtypes.py +++ b/narwhals/stable/v1/dtypes.py @@ -2,7 +2,7 @@ from narwhals.dtypes import Boolean from narwhals.dtypes import Categorical from narwhals.dtypes import Date -from narwhals.dtypes import Datetime +from narwhals.dtypes import Datetime as NwDatetime from narwhals.dtypes import Duration from narwhals.dtypes import Enum from narwhals.dtypes import Float32 @@ -21,6 +21,27 @@ from narwhals.dtypes import UInt64 from narwhals.dtypes import Unknown + +class Datetime(NwDatetime): + """ + Data type representing a calendar date and time of day. + + Arguments: + time_unit: Unit of time. Defaults to `'us'` (microseconds). + time_zone: Time zone string, as defined in zoneinfo (to see valid strings run + `import zoneinfo; zoneinfo.available_timezones()` for a full list). + When used to match dtypes, can set this to "*" to check for Datetime + columns that have any (non-null) timezone. + + Notes: + Adapted from Polars implementation at: + https://github.com/pola-rs/polars/blob/py-1.7.1/py-polars/polars/datatypes/classes.py#L398-L457 + """ + + def __hash__(self) -> int: + return hash(self.__class__) + + __all__ = [ "Array", "Boolean", diff --git a/tests/stable_api_test.py b/tests/stable_api_test.py index a12b20cc6..1e48c39ae 100644 --- a/tests/stable_api_test.py +++ b/tests/stable_api_test.py @@ -139,7 +139,7 @@ def test_series_docstrings() -> None: def test_dtypes(constructor: Constructor) -> None: - df = nw.from_native(constructor({"a": [1], "b": [datetime(2020, 1, 1)]})) + df = nw_v1.from_native(constructor({"a": [1], "b": [datetime(2020, 1, 1)]})) dtype = df.collect_schema()["b"] - assert dtype in {nw.Datetime} - assert isinstance(dtype, nw.Datetime) + assert dtype in {nw_v1.Datetime} + assert isinstance(dtype, nw_v1.Datetime) From 587d917147de78b6d0388752fbea03d428f97b58 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Sun, 29 Sep 2024 21:51:59 +0100 Subject: [PATCH 19/27] reduce diff --- narwhals/_polars/utils.py | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/narwhals/_polars/utils.py b/narwhals/_polars/utils.py index 1ef2bb889..b2f060906 100644 --- a/narwhals/_polars/utils.py +++ b/narwhals/_polars/utils.py @@ -117,19 +117,11 @@ def narwhals_to_native_dtype(dtype: DType | type[DType], dtypes: DTypes) -> Any: raise NotImplementedError(msg) if dtype == dtypes.Date: return pl.Date() - if ( - dtype == dtypes.Datetime - or isinstance(dtype, dtypes.Datetime) - or (isinstance(dtype, type) and issubclass(dtype, dtypes.Datetime)) - ): + if dtype == dtypes.Datetime or isinstance(dtype, dtypes.Datetime): dt_time_unit = getattr(dtype, "time_unit", "us") dt_time_zone = getattr(dtype, "time_zone", None) return pl.Datetime(dt_time_unit, dt_time_zone) # type: ignore[arg-type] - if ( - dtype == dtypes.Duration - or isinstance(dtype, dtypes.Duration) - or (isinstance(dtype, type) and issubclass(dtype, dtypes.Duration)) - ): + if dtype == dtypes.Duration or isinstance(dtype, dtypes.Duration): du_time_unit: Literal["us", "ns", "ms"] = getattr(dtype, "time_unit", "us") return pl.Duration(time_unit=du_time_unit) From dd050a83ef547319c170cd5ef698b9ceb1c4f9b8 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Sun, 29 Sep 2024 21:53:26 +0100 Subject: [PATCH 20/27] stableify duration too --- narwhals/stable/v1/dtypes.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/narwhals/stable/v1/dtypes.py b/narwhals/stable/v1/dtypes.py index 163a2900d..331b270ac 100644 --- a/narwhals/stable/v1/dtypes.py +++ b/narwhals/stable/v1/dtypes.py @@ -3,7 +3,7 @@ from narwhals.dtypes import Categorical from narwhals.dtypes import Date from narwhals.dtypes import Datetime as NwDatetime -from narwhals.dtypes import Duration +from narwhals.dtypes import Duration as NwDuration from narwhals.dtypes import Enum from narwhals.dtypes import Float32 from narwhals.dtypes import Float64 @@ -42,6 +42,22 @@ def __hash__(self) -> int: return hash(self.__class__) +class Duration(NwDuration): + """ + Data type representing a time duration. + + Arguments: + time_unit: Unit of time. Defaults to `'us'` (microseconds). + + Notes: + Adapted from Polars implementation at: + https://github.com/pola-rs/polars/blob/py-1.7.1/py-polars/polars/datatypes/classes.py#L460-L502 + """ + + def __hash__(self) -> int: + return hash(self.__class__) + + __all__ = [ "Array", "Boolean", From b4de1f7ce22043976c8c7973e9f5adc0d22b8082 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Sun, 29 Sep 2024 22:00:58 +0100 Subject: [PATCH 21/27] test duration too --- tests/stable_api_test.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/stable_api_test.py b/tests/stable_api_test.py index 1e48c39ae..7a67f5723 100644 --- a/tests/stable_api_test.py +++ b/tests/stable_api_test.py @@ -1,4 +1,5 @@ from datetime import datetime +from datetime import timedelta from typing import Any import polars as pl @@ -139,7 +140,12 @@ def test_series_docstrings() -> None: def test_dtypes(constructor: Constructor) -> None: - df = nw_v1.from_native(constructor({"a": [1], "b": [datetime(2020, 1, 1)]})) + df = nw_v1.from_native( + constructor({"a": [1], "b": [datetime(2020, 1, 1)], "c": [timedelta(1)]}) + ) dtype = df.collect_schema()["b"] assert dtype in {nw_v1.Datetime} assert isinstance(dtype, nw_v1.Datetime) + dtype = df.collect_schema()["c"] + assert dtype in {nw_v1.Duration} + assert isinstance(dtype, nw_v1.Duration) From 458f2a286467dfcffc381e1305055f249c117376 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Sun, 29 Sep 2024 22:05:36 +0100 Subject: [PATCH 22/27] try removing pytz --- requirements-dev.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/requirements-dev.txt b/requirements-dev.txt index 3dbcb3acd..2158f0821 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -9,7 +9,6 @@ pytest pytest-cov pytest-randomly pytest-env -pytz hypothesis scikit-learn typing_extensions From 34c27ef502bc4f9fb17c0fd4462af33d15c93aa6 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Sun, 29 Sep 2024 22:26:42 +0100 Subject: [PATCH 23/27] try fix ci --- .github/workflows/extremes.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/extremes.yml b/.github/workflows/extremes.yml index f11a4f4bb..fb8d8de4a 100644 --- a/.github/workflows/extremes.yml +++ b/.github/workflows/extremes.yml @@ -121,6 +121,9 @@ jobs: run: python -m pip install *.whl - name: install-reqs run: uv pip install --upgrade tox virtualenv setuptools pip -r requirements-dev.txt --system + - name: install cython + # needed for pandas nightly, else it'll install cython nightly which isn't compatible with pandas nightly + run: uv pip install --upgrade cython --system - name: uninstall pyarrow run: uv pip uninstall pyarrow --system # - name: install pyarrow nightly From 0de71a6a2c4b40233f731d44850b6f5ec2c24ce3 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Sun, 29 Sep 2024 22:29:33 +0100 Subject: [PATCH 24/27] try fix ci --- .github/workflows/extremes.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/extremes.yml b/.github/workflows/extremes.yml index fb8d8de4a..bc7d76f69 100644 --- a/.github/workflows/extremes.yml +++ b/.github/workflows/extremes.yml @@ -131,7 +131,7 @@ jobs: - name: uninstall pandas run: uv pip uninstall pandas --system - name: install-pandas-nightly - run: uv pip install --prerelease=allow --pre --extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple pandas --system + run: uv pip install --pre --extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple pandas --system - name: uninstall numpy run: uv pip uninstall numpy --system - name: install numpy nightly From d105911803682715741a92ce8f64deaa99210ea7 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Sun, 29 Sep 2024 22:33:01 +0100 Subject: [PATCH 25/27] try fix ci --- .github/workflows/extremes.yml | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/.github/workflows/extremes.yml b/.github/workflows/extremes.yml index bc7d76f69..9b9276e54 100644 --- a/.github/workflows/extremes.yml +++ b/.github/workflows/extremes.yml @@ -121,15 +121,17 @@ jobs: run: python -m pip install *.whl - name: install-reqs run: uv pip install --upgrade tox virtualenv setuptools pip -r requirements-dev.txt --system - - name: install cython - # needed for pandas nightly, else it'll install cython nightly which isn't compatible with pandas nightly - run: uv pip install --upgrade cython --system - name: uninstall pyarrow run: uv pip uninstall pyarrow --system # - name: install pyarrow nightly # run: uv pip install --extra-index-url https://pypi.fury.io/arrow-nightlies/ --pre pyarrow --system - name: uninstall pandas run: uv pip uninstall pandas --system + - name: install cython + # needed for pandas nightly, else it'll install cython nightly which isn't compatible with pandas nightly + run: uv pip install --upgrade cython --system + - name: show-deps + run: uv pip freeze - name: install-pandas-nightly run: uv pip install --pre --extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple pandas --system - name: uninstall numpy From a773d8581b9b34fd569e2dbea55062f4d4cd3cea Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Sun, 29 Sep 2024 22:39:07 +0100 Subject: [PATCH 26/27] try fix ci --- .github/workflows/extremes.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/extremes.yml b/.github/workflows/extremes.yml index 9b9276e54..5ad3b36ba 100644 --- a/.github/workflows/extremes.yml +++ b/.github/workflows/extremes.yml @@ -133,7 +133,7 @@ jobs: - name: show-deps run: uv pip freeze - name: install-pandas-nightly - run: uv pip install --pre --extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple pandas --system + run: pip install --pre --extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple pandas - name: uninstall numpy run: uv pip uninstall numpy --system - name: install numpy nightly From 0149431cd739959927738cb105ab5fd086ed0f96 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Sun, 29 Sep 2024 22:42:47 +0100 Subject: [PATCH 27/27] try fix ci --- .github/workflows/extremes.yml | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/.github/workflows/extremes.yml b/.github/workflows/extremes.yml index 5ad3b36ba..858d0b6e2 100644 --- a/.github/workflows/extremes.yml +++ b/.github/workflows/extremes.yml @@ -119,6 +119,8 @@ jobs: kaggle kernels output "marcogorelli/variable-brink-glacier" - name: install-polars run: python -m pip install *.whl + - name: install-pandas-nightly + run: pip install --pre --extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple pandas - name: install-reqs run: uv pip install --upgrade tox virtualenv setuptools pip -r requirements-dev.txt --system - name: uninstall pyarrow @@ -127,13 +129,8 @@ jobs: # run: uv pip install --extra-index-url https://pypi.fury.io/arrow-nightlies/ --pre pyarrow --system - name: uninstall pandas run: uv pip uninstall pandas --system - - name: install cython - # needed for pandas nightly, else it'll install cython nightly which isn't compatible with pandas nightly - run: uv pip install --upgrade cython --system - name: show-deps run: uv pip freeze - - name: install-pandas-nightly - run: pip install --pre --extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple pandas - name: uninstall numpy run: uv pip uninstall numpy --system - name: install numpy nightly