Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Delete base and loffset parameters to resample #9233

Merged
merged 7 commits into from
Jul 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,11 @@ New Features

Breaking changes
~~~~~~~~~~~~~~~~
- The ``base`` and ``loffset`` parameters to :py:meth:`Dataset.resample` and :py:meth:`DataArray.resample`
is now removed. These parameters has been deprecated since v2023.03.0. Using the
``origin`` or ``offset`` parameters is recommended as a replacement for using
the ``base`` parameter and using time offset arithmetic is recommended as a
replacement for using the ``loffset`` parameter.


Deprecations
Expand Down
46 changes: 2 additions & 44 deletions xarray/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -881,10 +881,8 @@ def _resample(
skipna: bool | None,
closed: SideOptions | None,
label: SideOptions | None,
base: int | None,
offset: pd.Timedelta | datetime.timedelta | str | None,
origin: str | DatetimeLike,
loffset: datetime.timedelta | str | None,
restore_coord_dims: bool | None,
**indexer_kwargs: str | Resampler,
) -> T_Resample:
Expand All @@ -906,16 +904,6 @@ def _resample(
Side of each interval to treat as closed.
label : {"left", "right"}, optional
Side of each interval to use for labeling.
base : int, optional
For frequencies that evenly subdivide 1 day, the "origin" of the
aggregated intervals. For example, for "24H" frequency, base could
range from 0 through 23.

.. deprecated:: 2023.03.0
Following pandas, the ``base`` parameter is deprecated in favor
of the ``origin`` and ``offset`` parameters, and will be removed
in a future version of xarray.

origin : {'epoch', 'start', 'start_day', 'end', 'end_day'}, pd.Timestamp, datetime.datetime, np.datetime64, or cftime.datetime, default 'start_day'
The datetime on which to adjust the grouping. The timezone of origin
must match the timezone of the index.
Expand All @@ -928,15 +916,6 @@ def _resample(
- 'end_day': `origin` is the ceiling midnight of the last day
offset : pd.Timedelta, datetime.timedelta, or str, default is None
An offset timedelta added to the origin.
loffset : timedelta or str, optional
Offset used to adjust the resampled time labels. Some pandas date
offset strings are supported.

.. deprecated:: 2023.03.0
Following pandas, the ``loffset`` parameter is deprecated in favor
of using time offset arithmetic, and will be removed in a future
version of xarray.

restore_coord_dims : bool, optional
If True, also restore the dimension order of multi-dimensional
coordinates.
Expand Down Expand Up @@ -1072,18 +1051,6 @@ def _resample(
from xarray.core.groupers import Resampler, TimeResampler
from xarray.core.resample import RESAMPLE_DIM

# note: the second argument (now 'skipna') use to be 'dim'
if (
(skipna is not None and not isinstance(skipna, bool))
or ("how" in indexer_kwargs and "how" not in self.dims)
or ("dim" in indexer_kwargs and "dim" not in self.dims)
):
raise TypeError(
"resample() no longer supports the `how` or "
"`dim` arguments. Instead call methods on resample "
"objects, e.g., data.resample(time='1D').mean()"
)

indexer = either_dict_or_kwargs(indexer, indexer_kwargs, "resample")
if len(indexer) != 1:
raise ValueError("Resampling only supported along single dimensions.")
Expand All @@ -1093,22 +1060,13 @@ def _resample(
dim_coord = self[dim]

group = DataArray(
dim_coord,
coords=dim_coord.coords,
dims=dim_coord.dims,
name=RESAMPLE_DIM,
dim_coord, coords=dim_coord.coords, dims=dim_coord.dims, name=RESAMPLE_DIM
)

grouper: Resampler
if isinstance(freq, str):
grouper = TimeResampler(
freq=freq,
closed=closed,
label=label,
origin=origin,
offset=offset,
loffset=loffset,
base=base,
freq=freq, closed=closed, label=label, origin=origin, offset=offset
)
elif isinstance(freq, Resampler):
grouper = freq
Expand Down
17 changes: 0 additions & 17 deletions xarray/core/dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -7245,10 +7245,8 @@ def resample(
skipna: bool | None = None,
closed: SideOptions | None = None,
label: SideOptions | None = None,
base: int | None = None,
offset: pd.Timedelta | datetime.timedelta | str | None = None,
origin: str | DatetimeLike = "start_day",
loffset: datetime.timedelta | str | None = None,
restore_coord_dims: bool | None = None,
**indexer_kwargs: str | Resampler,
) -> DataArrayResample:
Expand All @@ -7270,10 +7268,6 @@ def resample(
Side of each interval to treat as closed.
label : {"left", "right"}, optional
Side of each interval to use for labeling.
base : int, optional
For frequencies that evenly subdivide 1 day, the "origin" of the
aggregated intervals. For example, for "24H" frequency, base could
range from 0 through 23.
origin : {'epoch', 'start', 'start_day', 'end', 'end_day'}, pd.Timestamp, datetime.datetime, np.datetime64, or cftime.datetime, default 'start_day'
The datetime on which to adjust the grouping. The timezone of origin
must match the timezone of the index.
Expand All @@ -7286,15 +7280,6 @@ def resample(
- 'end_day': `origin` is the ceiling midnight of the last day
offset : pd.Timedelta, datetime.timedelta, or str, default is None
An offset timedelta added to the origin.
loffset : timedelta or str, optional
Offset used to adjust the resampled time labels. Some pandas date
offset strings are supported.

.. deprecated:: 2023.03.0
Following pandas, the ``loffset`` parameter is deprecated in favor
of using time offset arithmetic, and will be removed in a future
version of xarray.

restore_coord_dims : bool, optional
If True, also restore the dimension order of multi-dimensional
coordinates.
Expand Down Expand Up @@ -7399,10 +7384,8 @@ def resample(
skipna=skipna,
closed=closed,
label=label,
base=base,
offset=offset,
origin=origin,
loffset=loffset,
restore_coord_dims=restore_coord_dims,
**indexer_kwargs,
)
Expand Down
17 changes: 0 additions & 17 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -10626,10 +10626,8 @@ def resample(
skipna: bool | None = None,
closed: SideOptions | None = None,
label: SideOptions | None = None,
base: int | None = None,
offset: pd.Timedelta | datetime.timedelta | str | None = None,
origin: str | DatetimeLike = "start_day",
loffset: datetime.timedelta | str | None = None,
restore_coord_dims: bool | None = None,
**indexer_kwargs: str | Resampler,
) -> DatasetResample:
Expand All @@ -10651,10 +10649,6 @@ def resample(
Side of each interval to treat as closed.
label : {"left", "right"}, optional
Side of each interval to use for labeling.
base : int, optional
For frequencies that evenly subdivide 1 day, the "origin" of the
aggregated intervals. For example, for "24H" frequency, base could
range from 0 through 23.
origin : {'epoch', 'start', 'start_day', 'end', 'end_day'}, pd.Timestamp, datetime.datetime, np.datetime64, or cftime.datetime, default 'start_day'
The datetime on which to adjust the grouping. The timezone of origin
must match the timezone of the index.
Expand All @@ -10667,15 +10661,6 @@ def resample(
- 'end_day': `origin` is the ceiling midnight of the last day
offset : pd.Timedelta, datetime.timedelta, or str, default is None
An offset timedelta added to the origin.
loffset : timedelta or str, optional
Offset used to adjust the resampled time labels. Some pandas date
offset strings are supported.

.. deprecated:: 2023.03.0
Following pandas, the ``loffset`` parameter is deprecated in favor
of using time offset arithmetic, and will be removed in a future
version of xarray.

restore_coord_dims : bool, optional
If True, also restore the dimension order of multi-dimensional
coordinates.
Expand Down Expand Up @@ -10708,10 +10693,8 @@ def resample(
skipna=skipna,
closed=closed,
label=label,
base=base,
offset=offset,
origin=origin,
loffset=loffset,
restore_coord_dims=restore_coord_dims,
**indexer_kwargs,
)
Expand Down
118 changes: 13 additions & 105 deletions xarray/core/groupers.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import datetime
from abc import ABC, abstractmethod
from dataclasses import dataclass, field
from typing import TYPE_CHECKING, Any, Literal, cast
from typing import Any, Literal, cast

import numpy as np
import pandas as pd
Expand All @@ -21,12 +21,8 @@
from xarray.core.indexes import safe_cast_to_index
from xarray.core.resample_cftime import CFTimeGrouper
from xarray.core.types import Bins, DatetimeLike, GroupIndices, SideOptions
from xarray.core.utils import emit_user_level_warning
from xarray.core.variable import Variable

if TYPE_CHECKING:
pass

__all__ = [
"EncodedGroups",
"Grouper",
Expand Down Expand Up @@ -299,17 +295,7 @@ class TimeResampler(Resampler):
Side of each interval to treat as closed.
label : {"left", "right"}, optional
Side of each interval to use for labeling.
base : int, optional
For frequencies that evenly subdivide 1 day, the "origin" of the
aggregated intervals. For example, for "24H" frequency, base could
range from 0 through 23.

.. deprecated:: 2023.03.0
Following pandas, the ``base`` parameter is deprecated in favor
of the ``origin`` and ``offset`` parameters, and will be removed
in a future version of xarray.

origin : {"epoch", "start", "start_day", "end", "end_day"}, pandas.Timestamp, datetime.datetime, numpy.datetime64, or cftime.datetime, default: "start_day"
origin : {'epoch', 'start', 'start_day', 'end', 'end_day'}, pandas.Timestamp, datetime.datetime, numpy.datetime64, or cftime.datetime, default 'start_day'
The datetime on which to adjust the grouping. The timezone of origin
must match the timezone of the index.

Expand All @@ -321,60 +307,22 @@ class TimeResampler(Resampler):
- 'end_day': `origin` is the ceiling midnight of the last day
offset : pd.Timedelta, datetime.timedelta, or str, default is None
An offset timedelta added to the origin.
loffset : timedelta or str, optional
Offset used to adjust the resampled time labels. Some pandas date
offset strings are supported.

.. deprecated:: 2023.03.0
Following pandas, the ``loffset`` parameter is deprecated in favor
of using time offset arithmetic, and will be removed in a future
version of xarray.

"""

freq: str
closed: SideOptions | None = field(default=None)
label: SideOptions | None = field(default=None)
origin: str | DatetimeLike = field(default="start_day")
offset: pd.Timedelta | datetime.timedelta | str | None = field(default=None)
loffset: datetime.timedelta | str | None = field(default=None)
base: int | None = field(default=None)

index_grouper: CFTimeGrouper | pd.Grouper = field(init=False, repr=False)
group_as_index: pd.Index = field(init=False, repr=False)

def __post_init__(self):
if self.loffset is not None:
emit_user_level_warning(
"Following pandas, the `loffset` parameter to resample is deprecated. "
"Switch to updating the resampled dataset time coordinate using "
"time offset arithmetic. For example:\n"
" >>> offset = pd.tseries.frequencies.to_offset(freq) / 2\n"
' >>> resampled_ds["time"] = resampled_ds.get_index("time") + offset',
FutureWarning,
)

if self.base is not None:
emit_user_level_warning(
"Following pandas, the `base` parameter to resample will be deprecated in "
"a future version of xarray. Switch to using `origin` or `offset` instead.",
FutureWarning,
)

if self.base is not None and self.offset is not None:
raise ValueError("base and offset cannot be present at the same time")

def _init_properties(self, group: T_Group) -> None:
from xarray import CFTimeIndex
from xarray.core.pdcompat import _convert_base_to_offset

group_as_index = safe_cast_to_index(group)

if self.base is not None:
# grouper constructor verifies that grouper.offset is None at this point
offset = _convert_base_to_offset(self.base, self.freq, group_as_index)
else:
offset = self.offset
offset = self.offset

if not group_as_index.is_monotonic_increasing:
# TODO: sort instead of raising an error
Expand All @@ -389,7 +337,6 @@ def _init_properties(self, group: T_Group) -> None:
label=self.label,
origin=self.origin,
offset=offset,
loffset=self.loffset,
)
else:
self.index_grouper = pd.Grouper(
Expand Down Expand Up @@ -419,18 +366,16 @@ def first_items(self) -> tuple[pd.Series, np.ndarray]:
return self.index_grouper.first_items(
cast(CFTimeIndex, self.group_as_index)
)

s = pd.Series(np.arange(self.group_as_index.size), self.group_as_index)
grouped = s.groupby(self.index_grouper)
first_items = grouped.first()
counts = grouped.count()
# This way we generate codes for the final output index: full_index.
# So for _flox_reduce we avoid one reindex and copy by avoiding
# _maybe_restore_empty_groups
codes = np.repeat(np.arange(len(first_items)), counts)
if self.loffset is not None:
_apply_loffset(self.loffset, first_items)
return first_items, codes
else:
s = pd.Series(np.arange(self.group_as_index.size), self.group_as_index)
grouped = s.groupby(self.index_grouper)
first_items = grouped.first()
counts = grouped.count()
# This way we generate codes for the final output index: full_index.
# So for _flox_reduce we avoid one reindex and copy by avoiding
# _maybe_restore_empty_groups
codes = np.repeat(np.arange(len(first_items)), counts)
return first_items, codes

def factorize(self, group: T_Group) -> EncodedGroups:
self._init_properties(group)
Expand All @@ -454,43 +399,6 @@ def factorize(self, group: T_Group) -> EncodedGroups:
)


def _apply_loffset(
loffset: str | pd.DateOffset | datetime.timedelta | pd.Timedelta,
result: pd.Series | pd.DataFrame,
):
"""
(copied from pandas)
if loffset is set, offset the result index

This is NOT an idempotent routine, it will be applied
exactly once to the result.

Parameters
----------
result : Series or DataFrame
the result of resample
"""
# pd.Timedelta is a subclass of datetime.timedelta so we do not need to
# include it in instance checks.
if not isinstance(loffset, (str, pd.DateOffset, datetime.timedelta)):
raise ValueError(
f"`loffset` must be a str, pd.DateOffset, datetime.timedelta, or pandas.Timedelta object. "
f"Got {loffset}."
)

if isinstance(loffset, str):
loffset = pd.tseries.frequencies.to_offset(loffset) # type: ignore[assignment]

needs_offset = (
isinstance(loffset, (pd.DateOffset, datetime.timedelta))
and isinstance(result.index, pd.DatetimeIndex)
and len(result.index) > 0
)

if needs_offset:
result.index = result.index + loffset


def unique_value_groups(
ar, sort: bool = True
) -> tuple[np.ndarray | pd.Index, np.ndarray]:
Expand Down
Loading
Loading