From 13d8247cbf1cd8962c68f2c24f701c75005c5f50 Mon Sep 17 00:00:00 2001 From: lewisblake Date: Mon, 17 Jul 2023 11:29:29 +0000 Subject: [PATCH] got through for loop, check colocation nans --- pyaerocom/colocation.py | 12 +++-- pyaerocom/colocation_3d.py | 93 ++++++++++++++++++++++++++++++++------ pyaerocom/helpers.py | 7 ++- pyaerocom/stationdata.py | 3 +- 4 files changed, 92 insertions(+), 23 deletions(-) diff --git a/pyaerocom/colocation.py b/pyaerocom/colocation.py index 834ff6533..b16200d25 100644 --- a/pyaerocom/colocation.py +++ b/pyaerocom/colocation.py @@ -437,7 +437,9 @@ def _colocate_site_data_helper( # time resolution, particularly the obs data) grid_ts = stat_data.resample_time( var, ts_type=ts_type, how=resample_how, min_num_obs=min_num_obs, inplace=True - )[var] + )[ + var + ] # LB: this is good if use_climatology_ref: obs_ts = stat_data_ref.calc_climatology(var_ref, min_num_obs=min_num_obs)[var_ref] @@ -446,6 +448,10 @@ def _colocate_site_data_helper( var_ref, ts_type=ts_type, how=resample_how, min_num_obs=min_num_obs, inplace=True )[var_ref] + if not isinstance(obs_ts, pd.Series): + obs_ts = ( + obs_ts.to_series() + ) # LB: place here for now for earlinet, may think of more clever place to put it # fill up missing time stamps return pd.concat([obs_ts, grid_ts], axis=1, keys=["ref", "data"]) @@ -748,7 +754,7 @@ def colocate_gridded_ungridded( lat_range = [np.min(latitude), np.max(latitude)] lon_range = [np.min(longitude), np.max(longitude)] # use only sites that are within model domain - + # LB: filter_by_meta wipes is_vertical_profile data_ref = data_ref.filter_by_meta(latitude=lat_range, longitude=lon_range) @@ -1052,4 +1058,4 @@ def correct_model_stp_coldata(coldata, p0=None, t0=273.15, inplace=False): coldata.data.attrs["Model_STP_corr"] = True coldata.data.attrs["Model_STP_corr_info"] = info_str - return coldata \ No newline at end of file + return coldata diff --git a/pyaerocom/colocation_3d.py b/pyaerocom/colocation_3d.py index 41234da12..b7d6454be 100644 --- a/pyaerocom/colocation_3d.py +++ b/pyaerocom/colocation_3d.py @@ -212,7 +212,8 @@ def colocate_vertical_profile_gridded( # LB: Add station altitude so everything is in terms of beign above sea level - list_of_colocateddata_objects = [None] * len(colocation_layer_limits) + # list_of_colocateddata_objects = [None] * len(colocation_layer_limits) + list_of_colocateddata_objects = [] for ( vertical_layer @@ -238,7 +239,9 @@ def colocate_vertical_profile_gridded( # Add coordinates to arrays required for xarray.DataArray below lons[i] = obs_stat.longitude lats[i] = obs_stat.latitude - alts[i] = obs_stat.altitude + alts[i] = obs_stat.station_coords[ + "altitude" + ] # altitude refers to altitdue of the data. be explcit where getting from station_names[i] = obs_stat.station_name # for vertical_layer in colocation_layer_limits: @@ -295,20 +298,26 @@ def colocate_vertical_profile_gridded( # Make a copy of the station data and resample it to the mean based on hourly resolution. Needs testing! obs_stat_this_layer = obs_stat.copy() - obs_stat_this_layer[var_ref] = obs_stat_this_layer.select_altitude( - var_name=var_ref, altitudes=list(vertical_layer.values()) - ).mean( - "altitude" - ) # LB: note this is in beta, can implement directly like below + try: + obs_stat_this_layer[var_ref] = obs_stat_this_layer.select_altitude( + var_name=var_ref, altitudes=list(vertical_layer.values()) + ).mean( + "altitude" + ) # LB: note this is in beta, can implement directly like below + except ValueError: + logger.warning( + f"Var: {var_ref}. Skipping {obs_stat_this_layer.station_name} in altitude layer {vertical_layer} because no data" + ) + continue - breakpoint() + # breakpoint() # obs_stat_this_layer[var_ref] = ( - # this_layer_obs_stat[var_ref][ + # obs_stat_this_layer[var_ref][ # ( - # vertical_layer["start"] <= this_layer_obs_stat.altitude + # vertical_layer["start"] <= obs_stat_this_layer.altitude # ) # altitude data should be given in terms of altitude above sea level - # & (this_layer_obs_stat.altitude < vertical_layer["end"]) + # & (obs_stat_this_layer.altitude < vertical_layer["end"]) # ] # # .resample(rule="H") # LB: forget why this is here # .mean("altitude") @@ -338,7 +347,6 @@ def colocate_vertical_profile_gridded( use_climatology_ref=use_climatology_ref, ) else: - breakpoint() # LB: obs_stat_this_layer turning into nans. figure out why _df = _colocate_site_data_helper( stat_data=grid_stat_this_layer, @@ -377,6 +385,63 @@ def colocate_vertical_profile_gridded( f"{var_ref} data from site {obs_stat.station_name} will " f"not be added to ColocatedData. Reason: {e}" ) - breakpoint() - return + try: + revision = data_ref.data_revision[dataset_ref] + except Exception: + try: + revision = data_ref._get_data_revision_helper(dataset_ref) + except MetaDataError: + revision = "MULTIPLE" + except Exception: + revision = "n/a" + + files = [os.path.basename(x) for x in data.from_files] + + meta = { + "data_source": [dataset_ref, data.data_id], + "var_name": [var_ref_aerocom, var_aerocom], + "var_name_input": [var_ref, var], + "ts_type": col_freq, # will be updated below if resampling + "filter_name": filter_name, + "ts_type_src": [ts_type_src_ref, ts_type_src_data], + "var_units": [data_ref_unit, data_unit], + "data_level": 3, + "revision_ref": revision, + "from_files": files, + "from_files_ref": None, + "colocate_time": colocate_time, + "obs_is_clim": use_climatology_ref, + "pyaerocom": pya_ver, + "min_num_obs": min_num_obs, + "resample_how": resample_how, + } + + breakpoint() + # create coordinates of DataArray + coords = { + "data_source": meta["data_source"], + "time": time_idx, + "station_name": station_names, + "latitude": ("station_name", lats), + "longitude": ("station_name", lons), + "altitude": ("station_name", alts), + } + + dims = ["data_source", "time", "station_name"] + coldata = ColocatedData(data=arr, coords=coords, dims=dims, name=var, attrs=meta) + + # add correct units for lat / lon dimensions + coldata.latitude.attrs["standard_name"] = data.latitude.standard_name + coldata.latitude.attrs["units"] = str(data.latitude.units) + + coldata.longitude.attrs["standard_name"] = data.longitude.standard_name + coldata.longitude.attrs["units"] = str(data.longitude.units) + + list_of_colocateddata_objects.append(coldata) + + # Then need to do profile colocation as well. + + breakpoint() + + return list_of_colocateddata_objects diff --git a/pyaerocom/helpers.py b/pyaerocom/helpers.py index 79e590d67..f7eee0253 100644 --- a/pyaerocom/helpers.py +++ b/pyaerocom/helpers.py @@ -63,7 +63,6 @@ def varlist_aerocom(varlist): - if isinstance(varlist, str): varlist = [varlist] elif not isinstance(varlist, list): @@ -348,7 +347,6 @@ def numpy_to_cube(data, dims=None, var_name=None, units=None, **attrs): sh = data.shape if dims is not None: if not len(dims) == data.ndim: - raise DataDimensionError("Input number of dimensios must match array dimension number") for i, dim in enumerate(dims): if not isinstance(dim, iris.coords.DimCoord): @@ -1164,7 +1162,9 @@ def resample_time_dataarray(arr, freq, how=None, min_num_obs=None): pd_freq = to.to_pandas_freq() invalid = None if min_num_obs is not None: - invalid = arr.resample(time=pd_freq).count(dim="time") < min_num_obs + invalid = ( + arr.resample(time=pd_freq).count(dim="time") < min_num_obs + ) # LB: This is why everything is getting set to nan freq, loffset = _get_pandas_freq_and_loffset(freq) resampler = arr.resample(time=pd_freq, loffset=loffset) @@ -1403,7 +1403,6 @@ def datetime2str(time, ts_type=None): def start_stop_str(start, stop=None, ts_type=None): - conv = TS_TYPE_DATETIME_CONV[ts_type] if is_year(start) and stop is None: return str(start) diff --git a/pyaerocom/stationdata.py b/pyaerocom/stationdata.py index bc304bfe9..f49ced18a 100644 --- a/pyaerocom/stationdata.py +++ b/pyaerocom/stationdata.py @@ -83,7 +83,6 @@ class StationData(StationMetaData): ] def __init__(self, **meta_info): - self.dtime = [] self.var_info = BrowseDict() @@ -720,7 +719,6 @@ def _check_ts_types_for_merge(self, other, var_name): return ts_type def _update_var_timeinfo(self): - for var, info in self.var_info.items(): data = self[var] if not isinstance(data, pd.Series): @@ -1319,6 +1317,7 @@ def select_altitude(self, var_name, altitudes): f"Altitude data and {var_name} data have different lengths" ) mask = np.logical_and(alt >= altitudes[0], alt <= altitudes[1]) + # LB: Comment out for testing. Maybe issue a logging warning instead if mask.sum() == 0: raise ValueError(f"no data in specified altitude range") return data[mask]