Skip to content

Commit

Permalink
Add days_in_year and decimal_year to dt accessor (pydata#9105)
Browse files Browse the repository at this point in the history
* Add days_in_year and decimal_year to dt accessor

* Upd whats new - add gregorian calendar - rename to decimal_year

* Add to api.rst and pr number

* Add requires cftime decorators where needed

* Rewrite functions using suggestions from review

* cleaner custom date field - docstrings - remove bad merge

* add new fields to dask access test

* Revert to rollback method

* Revert "Revert to rollback method"

This reverts commit 3f429c9.

* explicit float cast?

* Revert back to rollback method

* Fix dask compatibility issues

* Approach that passes tests under NumPy 1.26.4

* Adapt decimal_year test to be more comprehensive

* Use proper sphinx roles for cross-referencing.

---------

Co-authored-by: Spencer Clark <[email protected]>
  • Loading branch information
aulemahal and spencerkclark authored Sep 10, 2024
1 parent cea354f commit cc74d3a
Show file tree
Hide file tree
Showing 6 changed files with 161 additions and 52 deletions.
2 changes: 2 additions & 0 deletions doc/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -529,9 +529,11 @@ Datetimelike properties
DataArray.dt.quarter
DataArray.dt.days_in_month
DataArray.dt.daysinmonth
DataArray.dt.days_in_year
DataArray.dt.season
DataArray.dt.time
DataArray.dt.date
DataArray.dt.decimal_year
DataArray.dt.calendar
DataArray.dt.is_month_start
DataArray.dt.is_month_end
Expand Down
7 changes: 7 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,13 @@ v2024.07.1 (unreleased)

New Features
~~~~~~~~~~~~

- Add :py:attr:`~core.accessor_dt.DatetimeAccessor.days_in_year` and :py:attr:`~core.accessor_dt.DatetimeAccessor.decimal_year` to the Datetime accessor on DataArrays. (:pull:`9105`).
By `Pascal Bourgault <https://github.com/aulemahal>`_.

Performance
~~~~~~~~~~~

- Make chunk manager an option in ``set_options`` (:pull:`9362`).
By `Tom White <https://github.com/tomwhite>`_.
- Support for :ref:`grouping by multiple variables <groupby.multiple>`.
Expand Down
115 changes: 63 additions & 52 deletions xarray/coding/calendar_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,12 @@
_should_cftime_be_used,
convert_times,
)
from xarray.core.common import _contains_datetime_like_objects, is_np_datetime_like
from xarray.core.common import (
_contains_datetime_like_objects,
full_like,
is_np_datetime_like,
)
from xarray.core.computation import apply_ufunc

try:
import cftime
Expand All @@ -25,16 +30,6 @@
]


def _days_in_year(year, calendar, use_cftime=True):
"""Return the number of days in the input year according to the input calendar."""
date_type = get_date_type(calendar, use_cftime=use_cftime)
if year == -1 and calendar in _CALENDARS_WITHOUT_YEAR_ZERO:
difference = date_type(year + 2, 1, 1) - date_type(year, 1, 1)
else:
difference = date_type(year + 1, 1, 1) - date_type(year, 1, 1)
return difference.days


def convert_calendar(
obj,
calendar,
Expand Down Expand Up @@ -191,11 +186,7 @@ def convert_calendar(
# Special case for conversion involving 360_day calendar
if align_on == "year":
# Instead of translating dates directly, this tries to keep the position within a year similar.
new_doy = time.groupby(f"{dim}.year").map(
_interpolate_day_of_year,
target_calendar=calendar,
use_cftime=use_cftime,
)
new_doy = _interpolate_day_of_year(time, target_calendar=calendar)
elif align_on == "random":
# The 5 days to remove are randomly chosen, one for each of the five 72-days periods of the year.
new_doy = time.groupby(f"{dim}.year").map(
Expand Down Expand Up @@ -242,16 +233,25 @@ def convert_calendar(
return out


def _interpolate_day_of_year(time, target_calendar, use_cftime):
"""Returns the nearest day in the target calendar of the corresponding
"decimal year" in the source calendar.
"""
year = int(time.dt.year[0])
source_calendar = time.dt.calendar
def _is_leap_year(years, calendar):
func = np.vectorize(cftime.is_leap_year)
return func(years, calendar=calendar)


def _days_in_year(years, calendar):
"""The number of days in the year according to given calendar."""
if calendar == "360_day":
return full_like(years, 360)
return _is_leap_year(years, calendar).astype(int) + 365


def _interpolate_day_of_year(times, target_calendar):
"""Returns the nearest day in the target calendar of the corresponding "decimal year" in the source calendar."""
source_calendar = times.dt.calendar
return np.round(
_days_in_year(year, target_calendar, use_cftime)
* time.dt.dayofyear
/ _days_in_year(year, source_calendar, use_cftime)
_days_in_year(times.dt.year, target_calendar)
* times.dt.dayofyear
/ _days_in_year(times.dt.year, source_calendar)
).astype(int)


Expand All @@ -260,18 +260,18 @@ def _random_day_of_year(time, target_calendar, use_cftime):
Removes Feb 29th and five other days chosen randomly within five sections of 72 days.
"""
year = int(time.dt.year[0])
year = time.dt.year[0]
source_calendar = time.dt.calendar
new_doy = np.arange(360) + 1
rm_idx = np.random.default_rng().integers(0, 72, 5) + 72 * np.arange(5)
if source_calendar == "360_day":
for idx in rm_idx:
new_doy[idx + 1 :] = new_doy[idx + 1 :] + 1
if _days_in_year(year, target_calendar, use_cftime) == 366:
if _days_in_year(year, target_calendar) == 366:
new_doy[new_doy >= 60] = new_doy[new_doy >= 60] + 1
elif target_calendar == "360_day":
new_doy = np.insert(new_doy, rm_idx - np.arange(5), -1)
if _days_in_year(year, source_calendar, use_cftime) == 366:
if _days_in_year(year, source_calendar) == 366:
new_doy = np.insert(new_doy, 60, -1)
return new_doy[time.dt.dayofyear - 1]

Expand Down Expand Up @@ -304,32 +304,45 @@ def _convert_to_new_calendar_with_new_day_of_year(
return np.nan


def _datetime_to_decimal_year(times, dim="time", calendar=None):
"""Convert a datetime DataArray to decimal years according to its calendar or the given one.
def _decimal_year_cftime(time, year, days_in_year, *, date_class):
year_start = date_class(year, 1, 1)
delta = np.timedelta64(time - year_start, "ns")
days_in_year = np.timedelta64(days_in_year, "D")
return year + delta / days_in_year


def _decimal_year_numpy(time, year, days_in_year, *, dtype):
time = np.asarray(time).astype(dtype)
year_start = np.datetime64(int(year) - 1970, "Y").astype(dtype)
delta = time - year_start
days_in_year = np.timedelta64(days_in_year, "D")
return year + delta / days_in_year


def _decimal_year(times):
"""Convert a datetime DataArray to decimal years according to its calendar.
The decimal year of a timestamp is its year plus its sub-year component
converted to the fraction of its year.
Ex: '2000-03-01 12:00' is 2000.1653 in a standard calendar,
2000.16301 in a "noleap" or 2000.16806 in a "360_day".
"""
from xarray.core.dataarray import DataArray

calendar = calendar or times.dt.calendar

if is_np_datetime_like(times.dtype):
times = times.copy(data=convert_times(times.values, get_date_type("standard")))

def _make_index(time):
year = int(time.dt.year[0])
doys = cftime.date2num(time, f"days since {year:04d}-01-01", calendar=calendar)
return DataArray(
year + doys / _days_in_year(year, calendar),
dims=(dim,),
coords=time.coords,
name=dim,
)

return times.groupby(f"{dim}.year").map(_make_index)
if times.dtype == "O":
function = _decimal_year_cftime
kwargs = {"date_class": get_date_type(times.dt.calendar, True)}
else:
function = _decimal_year_numpy
kwargs = {"dtype": times.dtype}
return apply_ufunc(
function,
times,
times.dt.year,
times.dt.days_in_year,
kwargs=kwargs,
vectorize=True,
dask="parallelized",
output_dtypes=[np.float64],
)


def interp_calendar(source, target, dim="time"):
Expand Down Expand Up @@ -372,9 +385,7 @@ def interp_calendar(source, target, dim="time"):
f"Both 'source.{dim}' and 'target' must contain datetime objects."
)

source_calendar = source[dim].dt.calendar
target_calendar = target.dt.calendar

if (
source[dim].time.dt.year == 0
).any() and target_calendar in _CALENDARS_WITHOUT_YEAR_ZERO:
Expand All @@ -383,8 +394,8 @@ def interp_calendar(source, target, dim="time"):
)

out = source.copy()
out[dim] = _datetime_to_decimal_year(source[dim], dim=dim, calendar=source_calendar)
target_idx = _datetime_to_decimal_year(target, dim=dim, calendar=target_calendar)
out[dim] = _decimal_year(source[dim])
target_idx = _decimal_year(target)
out = out.interp(**{dim: target_idx})
out[dim] = target
return out
5 changes: 5 additions & 0 deletions xarray/coding/cftimeindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -801,6 +801,11 @@ def round(self, freq):
"""
return self._round_via_method(freq, _round_to_nearest_half_even)

@property
def is_leap_year(self):
func = np.vectorize(cftime.is_leap_year)
return func(self.year, calendar=self.calendar)


def _parse_iso8601_without_reso(date_type, datetime_str):
date, _ = _parse_iso8601_with_reso(date_type, datetime_str)
Expand Down
29 changes: 29 additions & 0 deletions xarray/core/accessor_dt.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,12 @@
import numpy as np
import pandas as pd

from xarray.coding.calendar_ops import _decimal_year
from xarray.coding.times import infer_calendar_name
from xarray.core import duck_array_ops
from xarray.core.common import (
_contains_datetime_like_objects,
full_like,
is_np_datetime_like,
is_np_timedelta_like,
)
Expand Down Expand Up @@ -543,6 +545,33 @@ def calendar(self) -> CFCalendar:
"""
return infer_calendar_name(self._obj.data)

@property
def days_in_year(self) -> T_DataArray:
"""Each datetime as the year plus the fraction of the year elapsed."""
if self.calendar == "360_day":
result = full_like(self.year, 360)
else:
result = self.is_leap_year.astype(int) + 365
newvar = Variable(
dims=self._obj.dims,
attrs=self._obj.attrs,
encoding=self._obj.encoding,
data=result,
)
return self._obj._replace(newvar, name="days_in_year")

@property
def decimal_year(self) -> T_DataArray:
"""Convert the dates as a fractional year."""
result = _decimal_year(self._obj)
newvar = Variable(
dims=self._obj.dims,
attrs=self._obj.attrs,
encoding=self._obj.encoding,
data=result,
)
return self._obj._replace(newvar, name="decimal_year")


class TimedeltaAccessor(TimeAccessor[T_DataArray]):
"""Access Timedelta fields for DataArrays with Timedelta-like dtypes.
Expand Down
55 changes: 55 additions & 0 deletions xarray/tests/test_accessor_dt.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,17 @@ def test_strftime(self) -> None:
"2000-01-01 01:00:00" == self.data.time.dt.strftime("%Y-%m-%d %H:%M:%S")[1]
)

@requires_cftime
@pytest.mark.parametrize(
"calendar,expected",
[("standard", 366), ("noleap", 365), ("360_day", 360), ("all_leap", 366)],
)
def test_days_in_year(self, calendar, expected) -> None:
assert (
self.data.convert_calendar(calendar, align_on="year").time.dt.days_in_year
== expected
).all()

def test_not_datetime_type(self) -> None:
nontime_data = self.data.copy()
int_data = np.arange(len(self.data.time)).astype("int8")
Expand Down Expand Up @@ -177,6 +188,7 @@ def test_not_datetime_type(self) -> None:
"is_year_start",
"is_year_end",
"is_leap_year",
"days_in_year",
],
)
def test_dask_field_access(self, field) -> None:
Expand Down Expand Up @@ -698,3 +710,46 @@ def test_cftime_round_accessor(
result = cftime_rounding_dataarray.dt.round(freq)

assert_identical(result, expected)


@pytest.mark.parametrize(
"use_cftime",
[False, pytest.param(True, marks=requires_cftime)],
ids=lambda x: f"use_cftime={x}",
)
@pytest.mark.parametrize(
"use_dask",
[False, pytest.param(True, marks=requires_dask)],
ids=lambda x: f"use_dask={x}",
)
def test_decimal_year(use_cftime, use_dask) -> None:
year = 2000
periods = 10
freq = "h"

shape = (2, 5)
dims = ["x", "y"]
hours_in_year = 24 * 366

times = xr.date_range(f"{year}", periods=periods, freq=freq, use_cftime=use_cftime)

da = xr.DataArray(times.values.reshape(shape), dims=dims)

if use_dask:
da = da.chunk({"y": 2})
# Computing the decimal year for a cftime datetime array requires a
# number of small computes (6):
# - 4x one compute per .dt accessor call (requires inspecting one
# object-dtype array element to see if it is time-like)
# - 2x one compute per calendar inference (requires inspecting one
# array element to read off the calendar)
max_computes = 6 * use_cftime
with raise_if_dask_computes(max_computes=max_computes):
result = da.dt.decimal_year
else:
result = da.dt.decimal_year

expected = xr.DataArray(
year + np.arange(periods).reshape(shape) / hours_in_year, dims=dims
)
xr.testing.assert_equal(result, expected)

0 comments on commit cc74d3a

Please sign in to comment.