Skip to content

Commit

Permalink
got through for loop, check colocation nans
Browse files Browse the repository at this point in the history
  • Loading branch information
lewisblake committed Jul 17, 2023
1 parent de6d8cc commit 13d8247
Show file tree
Hide file tree
Showing 4 changed files with 92 additions and 23 deletions.
12 changes: 9 additions & 3 deletions pyaerocom/colocation.py
Original file line number Diff line number Diff line change
Expand Up @@ -437,7 +437,9 @@ def _colocate_site_data_helper(
# time resolution, particularly the obs data)
grid_ts = stat_data.resample_time(
var, ts_type=ts_type, how=resample_how, min_num_obs=min_num_obs, inplace=True
)[var]
)[
var
] # LB: this is good

if use_climatology_ref:
obs_ts = stat_data_ref.calc_climatology(var_ref, min_num_obs=min_num_obs)[var_ref]
Expand All @@ -446,6 +448,10 @@ def _colocate_site_data_helper(
var_ref, ts_type=ts_type, how=resample_how, min_num_obs=min_num_obs, inplace=True
)[var_ref]

if not isinstance(obs_ts, pd.Series):
obs_ts = (
obs_ts.to_series()
) # LB: place here for now for earlinet, may think of more clever place to put it
# fill up missing time stamps
return pd.concat([obs_ts, grid_ts], axis=1, keys=["ref", "data"])

Expand Down Expand Up @@ -748,7 +754,7 @@ def colocate_gridded_ungridded(
lat_range = [np.min(latitude), np.max(latitude)]
lon_range = [np.min(longitude), np.max(longitude)]
# use only sites that are within model domain

# LB: filter_by_meta wipes is_vertical_profile
data_ref = data_ref.filter_by_meta(latitude=lat_range, longitude=lon_range)

Expand Down Expand Up @@ -1052,4 +1058,4 @@ def correct_model_stp_coldata(coldata, p0=None, t0=273.15, inplace=False):

coldata.data.attrs["Model_STP_corr"] = True
coldata.data.attrs["Model_STP_corr_info"] = info_str
return coldata
return coldata
93 changes: 79 additions & 14 deletions pyaerocom/colocation_3d.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,8 @@ def colocate_vertical_profile_gridded(

# LB: Add station altitude so everything is in terms of beign above sea level

list_of_colocateddata_objects = [None] * len(colocation_layer_limits)
# list_of_colocateddata_objects = [None] * len(colocation_layer_limits)
list_of_colocateddata_objects = []

for (
vertical_layer
Expand All @@ -238,7 +239,9 @@ def colocate_vertical_profile_gridded(
# Add coordinates to arrays required for xarray.DataArray below
lons[i] = obs_stat.longitude
lats[i] = obs_stat.latitude
alts[i] = obs_stat.altitude
alts[i] = obs_stat.station_coords[
"altitude"
] # altitude refers to altitdue of the data. be explcit where getting from
station_names[i] = obs_stat.station_name

# for vertical_layer in colocation_layer_limits:
Expand Down Expand Up @@ -295,20 +298,26 @@ def colocate_vertical_profile_gridded(
# Make a copy of the station data and resample it to the mean based on hourly resolution. Needs testing!
obs_stat_this_layer = obs_stat.copy()

obs_stat_this_layer[var_ref] = obs_stat_this_layer.select_altitude(
var_name=var_ref, altitudes=list(vertical_layer.values())
).mean(
"altitude"
) # LB: note this is in beta, can implement directly like below
try:
obs_stat_this_layer[var_ref] = obs_stat_this_layer.select_altitude(
var_name=var_ref, altitudes=list(vertical_layer.values())
).mean(
"altitude"
) # LB: note this is in beta, can implement directly like below
except ValueError:
logger.warning(
f"Var: {var_ref}. Skipping {obs_stat_this_layer.station_name} in altitude layer {vertical_layer} because no data"
)
continue

breakpoint()
# breakpoint()

# obs_stat_this_layer[var_ref] = (
# this_layer_obs_stat[var_ref][
# obs_stat_this_layer[var_ref][
# (
# vertical_layer["start"] <= this_layer_obs_stat.altitude
# vertical_layer["start"] <= obs_stat_this_layer.altitude
# ) # altitude data should be given in terms of altitude above sea level
# & (this_layer_obs_stat.altitude < vertical_layer["end"])
# & (obs_stat_this_layer.altitude < vertical_layer["end"])
# ]
# # .resample(rule="H") # LB: forget why this is here
# .mean("altitude")
Expand Down Expand Up @@ -338,7 +347,6 @@ def colocate_vertical_profile_gridded(
use_climatology_ref=use_climatology_ref,
)
else:
breakpoint()
# LB: obs_stat_this_layer turning into nans. figure out why
_df = _colocate_site_data_helper(
stat_data=grid_stat_this_layer,
Expand Down Expand Up @@ -377,6 +385,63 @@ def colocate_vertical_profile_gridded(
f"{var_ref} data from site {obs_stat.station_name} will "
f"not be added to ColocatedData. Reason: {e}"
)
breakpoint()

return
try:
revision = data_ref.data_revision[dataset_ref]
except Exception:
try:
revision = data_ref._get_data_revision_helper(dataset_ref)
except MetaDataError:
revision = "MULTIPLE"
except Exception:
revision = "n/a"

files = [os.path.basename(x) for x in data.from_files]

meta = {
"data_source": [dataset_ref, data.data_id],
"var_name": [var_ref_aerocom, var_aerocom],
"var_name_input": [var_ref, var],
"ts_type": col_freq, # will be updated below if resampling
"filter_name": filter_name,
"ts_type_src": [ts_type_src_ref, ts_type_src_data],
"var_units": [data_ref_unit, data_unit],
"data_level": 3,
"revision_ref": revision,
"from_files": files,
"from_files_ref": None,
"colocate_time": colocate_time,
"obs_is_clim": use_climatology_ref,
"pyaerocom": pya_ver,
"min_num_obs": min_num_obs,
"resample_how": resample_how,
}

breakpoint()
# create coordinates of DataArray
coords = {
"data_source": meta["data_source"],
"time": time_idx,
"station_name": station_names,
"latitude": ("station_name", lats),
"longitude": ("station_name", lons),
"altitude": ("station_name", alts),
}

dims = ["data_source", "time", "station_name"]
coldata = ColocatedData(data=arr, coords=coords, dims=dims, name=var, attrs=meta)

# add correct units for lat / lon dimensions
coldata.latitude.attrs["standard_name"] = data.latitude.standard_name
coldata.latitude.attrs["units"] = str(data.latitude.units)

coldata.longitude.attrs["standard_name"] = data.longitude.standard_name
coldata.longitude.attrs["units"] = str(data.longitude.units)

list_of_colocateddata_objects.append(coldata)

# Then need to do profile colocation as well.

breakpoint()

return list_of_colocateddata_objects
7 changes: 3 additions & 4 deletions pyaerocom/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,6 @@


def varlist_aerocom(varlist):

if isinstance(varlist, str):
varlist = [varlist]
elif not isinstance(varlist, list):
Expand Down Expand Up @@ -348,7 +347,6 @@ def numpy_to_cube(data, dims=None, var_name=None, units=None, **attrs):
sh = data.shape
if dims is not None:
if not len(dims) == data.ndim:

raise DataDimensionError("Input number of dimensios must match array dimension number")
for i, dim in enumerate(dims):
if not isinstance(dim, iris.coords.DimCoord):
Expand Down Expand Up @@ -1164,7 +1162,9 @@ def resample_time_dataarray(arr, freq, how=None, min_num_obs=None):
pd_freq = to.to_pandas_freq()
invalid = None
if min_num_obs is not None:
invalid = arr.resample(time=pd_freq).count(dim="time") < min_num_obs
invalid = (
arr.resample(time=pd_freq).count(dim="time") < min_num_obs
) # LB: This is why everything is getting set to nan

freq, loffset = _get_pandas_freq_and_loffset(freq)
resampler = arr.resample(time=pd_freq, loffset=loffset)
Expand Down Expand Up @@ -1403,7 +1403,6 @@ def datetime2str(time, ts_type=None):


def start_stop_str(start, stop=None, ts_type=None):

conv = TS_TYPE_DATETIME_CONV[ts_type]
if is_year(start) and stop is None:
return str(start)
Expand Down
3 changes: 1 addition & 2 deletions pyaerocom/stationdata.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,6 @@ class StationData(StationMetaData):
]

def __init__(self, **meta_info):

self.dtime = []

self.var_info = BrowseDict()
Expand Down Expand Up @@ -720,7 +719,6 @@ def _check_ts_types_for_merge(self, other, var_name):
return ts_type

def _update_var_timeinfo(self):

for var, info in self.var_info.items():
data = self[var]
if not isinstance(data, pd.Series):
Expand Down Expand Up @@ -1319,6 +1317,7 @@ def select_altitude(self, var_name, altitudes):
f"Altitude data and {var_name} data have different lengths"
)
mask = np.logical_and(alt >= altitudes[0], alt <= altitudes[1])
# LB: Comment out for testing. Maybe issue a logging warning instead
if mask.sum() == 0:
raise ValueError(f"no data in specified altitude range")
return data[mask]
Expand Down

0 comments on commit 13d8247

Please sign in to comment.