Skip to content

Commit

Permalink
Convert 360_day calendars by choosing random dates to drop or add (py…
Browse files Browse the repository at this point in the history
…data#8603)

* Convert 360 calendar randomly

* add note to whats new

* add pull number to whats new entry

* run pre-commit

* Change test to use recommended freq

* Apply suggestions from code review

Co-authored-by: Spencer Clark <[email protected]>

* Fix merge - remove rng arg

---------

Co-authored-by: Spencer Clark <[email protected]>
  • Loading branch information
aulemahal and spencerkclark authored Apr 16, 2024
1 parent 2b2de81 commit 239309f
Show file tree
Hide file tree
Showing 3 changed files with 86 additions and 8 deletions.
2 changes: 2 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ v2024.04.0 (unreleased)

New Features
~~~~~~~~~~~~
- New "random" method for converting to and from 360_day calendars (:pull:`8603`).
By `Pascal Bourgault <https://github.com/aulemahal>`_.


Breaking changes
Expand Down
53 changes: 45 additions & 8 deletions xarray/coding/calendar_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def convert_calendar(
The target calendar name.
dim : str
Name of the time coordinate in the input DataArray or Dataset.
align_on : {None, 'date', 'year'}
align_on : {None, 'date', 'year', 'random'}
Must be specified when either the source or target is a `"360_day"`
calendar; ignored otherwise. See Notes.
missing : any, optional
Expand Down Expand Up @@ -143,6 +143,16 @@ def convert_calendar(
will be dropped as there are no equivalent dates in a standard calendar.
This option is best used with data on a frequency coarser than daily.
"random"
Similar to "year", each day of year of the source is mapped to another day of year
of the target. However, instead of having always the same missing days according
the source and target years, here 5 days are chosen randomly, one for each fifth
of the year. However, February 29th is always missing when converting to a leap year,
or its value is dropped when converting from a leap year. This is similar to the method
used in the LOCA dataset (see Pierce, Cayan, and Thrasher (2014). doi:10.1175/JHM-D-14-0082.1).
This option is best used on daily data.
"""
from xarray.core.dataarray import DataArray

Expand Down Expand Up @@ -174,14 +184,20 @@ def convert_calendar(

out = obj.copy()

if align_on == "year":
if align_on in ["year", "random"]:
# Special case for conversion involving 360_day calendar
# Instead of translating dates directly, this tries to keep the position within a year similar.

new_doy = time.groupby(f"{dim}.year").map(
_interpolate_day_of_year, target_calendar=calendar, use_cftime=use_cftime
)

if align_on == "year":
# Instead of translating dates directly, this tries to keep the position within a year similar.
new_doy = time.groupby(f"{dim}.year").map(
_interpolate_day_of_year,
target_calendar=calendar,
use_cftime=use_cftime,
)
elif align_on == "random":
# The 5 days to remove are randomly chosen, one for each of the five 72-days periods of the year.
new_doy = time.groupby(f"{dim}.year").map(
_random_day_of_year, target_calendar=calendar, use_cftime=use_cftime
)
# Convert the source datetimes, but override the day of year with our new day of years.
out[dim] = DataArray(
[
Expand Down Expand Up @@ -229,6 +245,27 @@ def _interpolate_day_of_year(time, target_calendar, use_cftime):
).astype(int)


def _random_day_of_year(time, target_calendar, use_cftime):
"""Return a day of year in the new calendar.
Removes Feb 29th and five other days chosen randomly within five sections of 72 days.
"""
year = int(time.dt.year[0])
source_calendar = time.dt.calendar
new_doy = np.arange(360) + 1
rm_idx = np.random.default_rng().integers(0, 72, 5) + 72 * np.arange(5)
if source_calendar == "360_day":
for idx in rm_idx:
new_doy[idx + 1 :] = new_doy[idx + 1 :] + 1
if _days_in_year(year, target_calendar, use_cftime) == 366:
new_doy[new_doy >= 60] = new_doy[new_doy >= 60] + 1
elif target_calendar == "360_day":
new_doy = np.insert(new_doy, rm_idx - np.arange(5), -1)
if _days_in_year(year, source_calendar, use_cftime) == 366:
new_doy = np.insert(new_doy, 60, -1)
return new_doy[time.dt.dayofyear - 1]


def _convert_to_new_calendar_with_new_day_of_year(
date, day_of_year, calendar, use_cftime
):
Expand Down
39 changes: 39 additions & 0 deletions xarray/tests/test_calendar_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,45 @@ def test_convert_calendar_360_days(source, target, freq, align_on):
assert conv.size == 359 if freq == "D" else 359 * 4


def test_convert_calendar_360_days_random():
da_std = DataArray(
np.linspace(0, 1, 366),
dims=("time",),
coords={
"time": date_range(
"2004-01-01",
"2004-12-31",
freq="D",
calendar="standard",
use_cftime=False,
)
},
)
da_360 = DataArray(
np.linspace(0, 1, 360),
dims=("time",),
coords={
"time": date_range("2004-01-01", "2004-12-30", freq="D", calendar="360_day")
},
)

conv = convert_calendar(da_std, "360_day", align_on="random")
conv2 = convert_calendar(da_std, "360_day", align_on="random")
assert (conv != conv2).any()

conv = convert_calendar(da_360, "standard", use_cftime=False, align_on="random")
assert np.datetime64("2004-02-29") not in conv.time
conv2 = convert_calendar(da_360, "standard", use_cftime=False, align_on="random")
assert (conv2 != conv).any()

# Ensure that added days are evenly distributed in the 5 fifths of each year
conv = convert_calendar(da_360, "noleap", align_on="random", missing=np.NaN)
conv = conv.where(conv.isnull(), drop=True)
nandoys = conv.time.dt.dayofyear[:366]
assert all(nandoys < np.array([74, 147, 220, 293, 366]))
assert all(nandoys > np.array([0, 73, 146, 219, 292]))


@requires_cftime
@pytest.mark.parametrize(
"source,target,freq",
Expand Down

0 comments on commit 239309f

Please sign in to comment.