Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Convert 360_day calendars by choosing random dates to drop or add #8603

Merged
merged 9 commits into from
Apr 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ v2024.04.0 (unreleased)

New Features
~~~~~~~~~~~~
- New "random" method for converting to and from 360_day calendars (:pull:`8603`).
By `Pascal Bourgault <https://github.com/aulemahal>`_.


Breaking changes
Expand Down
53 changes: 45 additions & 8 deletions xarray/coding/calendar_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def convert_calendar(
The target calendar name.
dim : str
Name of the time coordinate in the input DataArray or Dataset.
align_on : {None, 'date', 'year'}
align_on : {None, 'date', 'year', 'random'}
Must be specified when either the source or target is a `"360_day"`
calendar; ignored otherwise. See Notes.
missing : any, optional
Expand Down Expand Up @@ -143,6 +143,16 @@ def convert_calendar(
will be dropped as there are no equivalent dates in a standard calendar.

This option is best used with data on a frequency coarser than daily.

"random"
Similar to "year", each day of year of the source is mapped to another day of year
of the target. However, instead of having always the same missing days according
the source and target years, here 5 days are chosen randomly, one for each fifth
of the year. However, February 29th is always missing when converting to a leap year,
or its value is dropped when converting from a leap year. This is similar to the method
used in the LOCA dataset (see Pierce, Cayan, and Thrasher (2014). doi:10.1175/JHM-D-14-0082.1).

This option is best used on daily data.
"""
from xarray.core.dataarray import DataArray

Expand Down Expand Up @@ -174,14 +184,20 @@ def convert_calendar(

out = obj.copy()

if align_on == "year":
if align_on in ["year", "random"]:
# Special case for conversion involving 360_day calendar
# Instead of translating dates directly, this tries to keep the position within a year similar.

new_doy = time.groupby(f"{dim}.year").map(
_interpolate_day_of_year, target_calendar=calendar, use_cftime=use_cftime
)

if align_on == "year":
# Instead of translating dates directly, this tries to keep the position within a year similar.
new_doy = time.groupby(f"{dim}.year").map(
_interpolate_day_of_year,
target_calendar=calendar,
use_cftime=use_cftime,
)
elif align_on == "random":
# The 5 days to remove are randomly chosen, one for each of the five 72-days periods of the year.
new_doy = time.groupby(f"{dim}.year").map(
_random_day_of_year, target_calendar=calendar, use_cftime=use_cftime
)
# Convert the source datetimes, but override the day of year with our new day of years.
out[dim] = DataArray(
[
Expand Down Expand Up @@ -229,6 +245,27 @@ def _interpolate_day_of_year(time, target_calendar, use_cftime):
).astype(int)


def _random_day_of_year(time, target_calendar, use_cftime):
"""Return a day of year in the new calendar.

Removes Feb 29th and five other days chosen randomly within five sections of 72 days.
"""
year = int(time.dt.year[0])
source_calendar = time.dt.calendar
new_doy = np.arange(360) + 1
rm_idx = np.random.default_rng().integers(0, 72, 5) + 72 * np.arange(5)
if source_calendar == "360_day":
for idx in rm_idx:
new_doy[idx + 1 :] = new_doy[idx + 1 :] + 1
if _days_in_year(year, target_calendar, use_cftime) == 366:
new_doy[new_doy >= 60] = new_doy[new_doy >= 60] + 1
elif target_calendar == "360_day":
new_doy = np.insert(new_doy, rm_idx - np.arange(5), -1)
if _days_in_year(year, source_calendar, use_cftime) == 366:
new_doy = np.insert(new_doy, 60, -1)
return new_doy[time.dt.dayofyear - 1]


def _convert_to_new_calendar_with_new_day_of_year(
date, day_of_year, calendar, use_cftime
):
Expand Down
39 changes: 39 additions & 0 deletions xarray/tests/test_calendar_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,45 @@ def test_convert_calendar_360_days(source, target, freq, align_on):
assert conv.size == 359 if freq == "D" else 359 * 4


def test_convert_calendar_360_days_random():
da_std = DataArray(
np.linspace(0, 1, 366),
dims=("time",),
coords={
"time": date_range(
"2004-01-01",
"2004-12-31",
freq="D",
calendar="standard",
use_cftime=False,
)
},
)
da_360 = DataArray(
np.linspace(0, 1, 360),
dims=("time",),
coords={
"time": date_range("2004-01-01", "2004-12-30", freq="D", calendar="360_day")
},
)

conv = convert_calendar(da_std, "360_day", align_on="random")
conv2 = convert_calendar(da_std, "360_day", align_on="random")
assert (conv != conv2).any()

conv = convert_calendar(da_360, "standard", use_cftime=False, align_on="random")
assert np.datetime64("2004-02-29") not in conv.time
conv2 = convert_calendar(da_360, "standard", use_cftime=False, align_on="random")
assert (conv2 != conv).any()

# Ensure that added days are evenly distributed in the 5 fifths of each year
conv = convert_calendar(da_360, "noleap", align_on="random", missing=np.NaN)
conv = conv.where(conv.isnull(), drop=True)
nandoys = conv.time.dt.dayofyear[:366]
assert all(nandoys < np.array([74, 147, 220, 293, 366]))
assert all(nandoys > np.array([0, 73, 146, 219, 292]))


@requires_cftime
@pytest.mark.parametrize(
"source,target,freq",
Expand Down
Loading