diff --git a/eodal/core/band.py b/eodal/core/band.py index e91c0dd9..5295a849 100644 --- a/eodal/core/band.py +++ b/eodal/core/band.py @@ -1775,6 +1775,7 @@ def reduce( self, method: Union[str, List[str]], by: Optional[Union[Path, gpd.GeoDataFrame]] = None, + method_args: Optional[Dict[str, Any]] = None ) -> Dict[str, Union[int, float]]: """ Reduces the raster data to scalar values. @@ -1783,6 +1784,12 @@ def reduce( any ``numpy`` function taking a two-dimensional array as input and returning a single scalar. Can be a single function name (e.g., "mean") or a list of function names (e.g., ["mean", "median"]) + :param by: + define by what to reduce the band values (not implemented yet!!) + :param method_args: + optional dictionary with arguments to pass to the single methods in + case the reducer method requires extra arguments to function properly + (e.g., `np.quantile`) :returns: a dictionary with scalar results """ @@ -1813,7 +1820,11 @@ def reduce( try: # get function object and use its __call__ method numpy_function = eval(expression) - stats[operator] = numpy_function.__call__(self.values) + # check if there are any function arguments + args = [] + if method_args is not None: + args = method_args.get(method, None) + stats[operator] = numpy_function.__call__(self.values, *args) except TypeError: raise Exception(f"Unknown function name for {numpy_prefix}: {operator}") diff --git a/eodal/core/sensors/sentinel1.py b/eodal/core/sensors/sentinel1.py index 1d540755..22dba266 100644 --- a/eodal/core/sensors/sentinel1.py +++ b/eodal/core/sensors/sentinel1.py @@ -36,10 +36,11 @@ from eodal.core.band import Band from eodal.core.raster import RasterCollection from eodal.core.scene import SceneProperties +from eodal.utils.decorators import prepare_point_features from eodal.utils.sentinel1 import get_S1_platform_from_safe, \ get_S1_acquistion_time_from_safe, _url_to_safe_name, \ get_s1_imaging_mode_from_safe -from utils.exceptions import DataNotFoundError +from eodal.utils.exceptions import DataNotFoundError Settings = get_settings() @@ -151,6 +152,7 @@ def from_safe( return sentinel1 @classmethod + @prepare_point_features def read_pixels_from_safe( cls, vector_features: Path | gpd.GeoDataFrame, diff --git a/eodal/core/sensors/sentinel2.py b/eodal/core/sensors/sentinel2.py index d5407160..dad60011 100644 --- a/eodal/core/sensors/sentinel2.py +++ b/eodal/core/sensors/sentinel2.py @@ -49,6 +49,7 @@ s2_gain_factor, SCL_Classes, ) +from eodal.utils.decorators import prepare_point_features from eodal.utils.exceptions import BandNotFoundError from eodal.utils.sentinel2 import ( get_S2_bandfiles_with_res, @@ -471,6 +472,7 @@ def from_safe( return sentinel2 @classmethod + @prepare_point_features def read_pixels_from_safe( cls, vector_features: Union[Path, gpd.GeoDataFrame], @@ -565,23 +567,27 @@ def read_pixels_from_safe( if apply_scaling: gdf_scaled = gdf_band.copy() gdf_scaled[band_name] = 0.0 - gdf_scaled[band_name] = ( - offset + gdf_band[band_name].loc[gdf_band[band_name] != 0] - ) * gain + # use only pixel values were reflectance is != 0 + gdf_scaled[band_name] = gdf_band[band_name].apply( + lambda x, offset=offset, gain=gain: + (offset + x) * gain if x != 0 else 0 + ) band_gdfs.append(gdf_scaled) continue band_gdfs.append(gdf_band) - # concat the single GeoDataFrames with the band data + # concatenate the single GeoDataFrames with the band data gdf = pd.concat(band_gdfs, axis=1) # clean the dataframe and remove duplicate column names after merging # to avoid (large) redundancies gdf = gdf.loc[:, ~gdf.columns.duplicated()] # skip all pixels with zero reflectance (either blackfilled or outside of the # scene extent); in case of dtype float check for NaNs - if (gdf.dtypes[gdf.columns.str.startswith("B")] == "float64").all(): + band_names = gdf.columns[gdf.columns.str.startswith("B")] + if (gdf.dtypes[band_names] == "float64").all(): + gdf[band_names] = gdf[band_names].replace({0., np.nan}) gdf.dropna(axis=0, inplace=True) - elif (gdf.dtypes[gdf.columns.str.startswith("B")] == "int16").all(): + elif (gdf.dtypes[band_names] == "int16").all(): gdf = gdf.loc[~(gdf[band_df_safe.band_name] == 0).all(axis=1)] return gdf diff --git a/eodal/core/utils/geometry.py b/eodal/core/utils/geometry.py index 5aca063a..44729518 100644 --- a/eodal/core/utils/geometry.py +++ b/eodal/core/utils/geometry.py @@ -48,7 +48,6 @@ def read_geometries(in_dataset: Union[Path, gpd.GeoDataFrame]) -> gpd.GeoDataFra f"Could not read geometries of input type {type(in_dataset)}" ) - def check_geometry_types( in_dataset: Union[Path, gpd.GeoDataFrame], allowed_geometry_types: List[str], @@ -96,7 +95,6 @@ def check_geometry_types( ) return gdf - def convert_3D_2D(geometry: gpd.GeoSeries) -> gpd.GeoSeries: """ Takes a GeoSeries of 3D Multi/Polygons (has_z) and returns a list of 2D Multi/Polygons. @@ -127,3 +125,22 @@ def convert_3D_2D(geometry: gpd.GeoSeries) -> gpd.GeoSeries: new_geo = geometry break return new_geo + +def multi_to_single_points(point_features: gpd.GeoDataFrame | Path) -> gpd.GeoDataFrame: + """ + Casts MultiPoint geometries to single point geometries by calling + `gpd.GeoDataFrame.explode()` + + :param point_features: + point features to cast + :returns: + casted point features or input if all geometries are already single parted + """ + gdf = check_geometry_types( + in_dataset=point_features, + allowed_geometry_types=['Point', 'MultiPoint'] + ) + if (gdf.geometry.type == 'MultiPoint').any(): + gdf = gdf.explode(index_parts=False) + return gdf + diff --git a/eodal/metadata/database/querying.py b/eodal/metadata/database/querying.py index 0c6791a2..12fe0c20 100644 --- a/eodal/metadata/database/querying.py +++ b/eodal/metadata/database/querying.py @@ -20,12 +20,11 @@ import geopandas as gpd from sqlalchemy import create_engine - -from eodal.metadata.database import Regions -from eodal.utils.exceptions import RegionNotFoundError -from eodal.config import get_settings from sqlalchemy.orm.session import sessionmaker +from eodal.config import get_settings +from eodal.metadata.database import Regions, S2_Raw_Metadata +from eodal.utils.exceptions import DataNotFoundError, RegionNotFoundError Settings = get_settings() logger = Settings.logger @@ -44,17 +43,37 @@ def get_region(region: str) -> gpd.GeoDataFrame: unique region identifier :return: - geodataframe with the geometry of the queried region + `GeoDataFrame` with the geometry of the queried region """ - query_statement = ( session.query(Regions.geom, Regions.region_uid) .filter(Regions.region_uid == region) .statement ) - try: return gpd.read_postgis(query_statement, session.bind) - except Exception as e: raise RegionNotFoundError(f"{region} not found: {e}") + +def get_s2_tile_footprint(tile_name: str) -> gpd.GeoDataFrame: + """ + Queries the geographic extent of a Sentinel-2 tile + + :param sensor: + name of the sensor the tiling scheme belongs to (e.g., + 'sentinel2') + :param tile_name: + name of the tile in the tiling scheme (e.g., 'T32TMT') + :returns: + extent of the tile in geographic coordinates (WGS84) + """ + query_statement = ( + session.query(S2_Raw_Metadata.geom) + .filter(S2_Raw_Metadata.tile_id == tile_name) + .distinct() + .statement + ) + try: + return gpd.read_postgis(query_statement, session.bind) + except Exception as e: + raise DataNotFoundError(f"{tile_name} not found: {e}") diff --git a/eodal/operational/mapping/mapper.py b/eodal/operational/mapping/mapper.py index 7b24a76f..8b4d0a6e 100644 --- a/eodal/operational/mapping/mapper.py +++ b/eodal/operational/mapping/mapper.py @@ -539,7 +539,8 @@ def _get_observation( in_dir = scenes_date["real_path"].iloc[0] # if there is only one scene all we have to do is to read # read pixels in case the feature's dtype is point - if feature_dict["features"][0]["geometry"]["type"] == "Point": + feature_geom = feature_dict["features"][0]["geometry"]["type"] + if feature_geom in ["Point", "MultiPoint"]: if sensor.lower() == 'sentinel1': res = Sentinel1.read_pixels_from_safe( in_dir=in_dir, @@ -552,9 +553,14 @@ def _get_observation( band_selection=self.mapper_configs.band_names, **kwargs, ) - res["sensing_date"] = scenes_date["sensing_date"].values - res["scene_id"] = scenes_date["scene_id"].values - res['sensing_time'] = scenes_date['sensing_time'].values + res["sensing_date"] = scenes_date["sensing_date"].values[0] + res["scene_id"] = scenes_date["scene_id"].values[0] + res['sensing_time'] = scenes_date['sensing_time'].values[0] + # make sure the result is projected into the target EPSG code, otherwise + # the resulting GeoDataFrame contains wrong coordinates, i.e., the + # coordinates were not projected into the target CRS + if res.crs.to_epsg() != scenes_date.target_crs.unique()[0]: + res.to_crs(epsg=scenes_date.target_crs.unique()[0], inplace=True) # or the feature else: if sensor.lower() == 'sentinel1': diff --git a/eodal/operational/mapping/sentinel2.py b/eodal/operational/mapping/sentinel2.py index e88cfdcf..5307203c 100644 --- a/eodal/operational/mapping/sentinel2.py +++ b/eodal/operational/mapping/sentinel2.py @@ -189,7 +189,8 @@ def _read_multiple_scenes( # if the feature is a point we take the data set that is not blackfilled. # If more than one data set is not blackfilled we simply take the # first data set - if feature_gdf["geometry"].iloc[0].type == "Point": + feature_geom = feature_gdf["geometry"].iloc[0].type + if feature_geom in ["Point", "MultiPoint"]: for _, candidate_scene in scenes_date.iterrows(): if settings.USE_STAC: in_dir = candidate_scene["assets"] @@ -204,6 +205,10 @@ def _read_multiple_scenes( if feature_gdf.empty: continue res = feature_gdf + # make sure the coordinates are reprojected if necessary + if res.crs.to_epsg() != scenes_date.target_crs.unique()[0]: + res.to_crs(epsg=scenes_date.target_crs.unique()[0], inplace=True) + if isinstance(candidate_scene, (pd.Series, gpd.GeoSeries)): res["sensing_date"] = candidate_scene["sensing_date"] res['sensing_time'] = candidate_scene['sensing_time'] diff --git a/eodal/utils/decorators.py b/eodal/utils/decorators.py index 1153496c..983d82f0 100644 --- a/eodal/utils/decorators.py +++ b/eodal/utils/decorators.py @@ -24,8 +24,10 @@ from rasterio.coords import BoundingBox from eodal.config import get_settings +from eodal.core.utils.geometry import multi_to_single_points from eodal.utils.exceptions import UnknownProcessingLevel, BandNotFoundError from eodal.utils.geometry import box_to_geojson +from eodal.core.utils.geometry import multi_to_single_points Settings = get_settings() @@ -34,7 +36,7 @@ def prepare_bbox(f): @wraps(f) def wrapper(**kwargs): # a bounding box (vector features) is required - vector_features = kwargs.get('vector_features', None) + vector_features = kwargs.get('bounding_box', None) if vector_features is None: raise ValueError('A bounding box must be specified') if isinstance(vector_features, Path): @@ -44,10 +46,33 @@ def wrapper(**kwargs): # and provide bounds as geojson (required by STAC) bbox = box_to_geojson(gdf=vector_features) kwargs.update({'bounding_box': bbox}) - del kwargs['vector_features'] return f(**kwargs) return wrapper +def prepare_point_features(f): + """ + casts MultiPoint geometries to single parts before calling pixel extraction methods + """ + @wraps(f) + def wrapper(*args, **kwargs): + if len(args) >= 2: + vector_features = args[1] + else: + vector_features = kwargs.get('vector_features') + # cast to single point geometries + try: + vector_features_updated = multi_to_single_points(vector_features) + except Exception as e: + print(e) + if len(args) >= 2: + arg_list = list(args) + arg_list[1] = vector_features_updated + args = tuple(arg_list) + else: + kwargs.update({'vector_features': vector_features_updated}) + return f(*args, **kwargs) + return wrapper + def check_processing_level(f): @wraps(f) def wrapper(*args, **kwargs): diff --git a/examples/random_sentinel2_pixels.py b/examples/random_sentinel2_pixels.py new file mode 100644 index 00000000..77329c94 --- /dev/null +++ b/examples/random_sentinel2_pixels.py @@ -0,0 +1,139 @@ +''' +Created on Oct 1, 2022 + +@author: graflu +''' + +import cv2 +import geopandas as gpd +import numpy as np +import pandas as pd + +from datetime import date +from pathlib import Path +from typing import Optional + +from eodal.operational.mapping import MapperConfigs, Sentinel2Mapper +from eodal.utils.constants import ProcessingLevels + +def assign_pixel_ids(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame: + """ + Assigns unique pixel IDs based on the pixel geometries + + :param gdf: + GeoDataFrame with point geometries (pixels) + :returns: + the input GeoDataFrame with a new column containing unique + pixel IDs + """ + # we need the well-known-binary representation of the geometries + # to allow for fast assignment of pixel IDs (pandas cannot handle + # the actual geometries) + gdf['wkb'] = gdf.geometry.apply(lambda x: x.wkb) + unique_geoms = list(gdf.wkb.unique()) + pixel_ids = [x for x in range(len(unique_geoms))] + pixel_id_mapper = dict(zip(unique_geoms, pixel_ids)) + gdf['pixel_id'] = gdf.wkb.map(pixel_id_mapper) + return gdf + +def random_choice(pixel_series: gpd.GeoSeries, n: Optional[int] = 5) -> gpd.GeoSeries: + """ + Selects `n` observations from a pixel time series (all bands) + + :param pixel_series: + pixel time series + :param n: + number of observations to sample from the series + :returns: + randomly selected observations + """ + # get sensing dates available + dates = list(pixel_series.sensing_date.unique) + n_dates = len(dates) + # update (lower) n if required + if n_dates < n: + n = n_dates + # TODO select n dates and return the corresponding pixel values + return + +def get_pixels(date_start: date, date_end: date, scene_cloud_cover_threshold: int, + aois: gpd.GeoDataFrame | Path, **kwargs): + """ + Random selection of pixel observations from time series within one or more areas + of interest (AOIS, aka features). + + :param date_start: + start date for extracting Sentinel-2 data (inclusive) + :param date_end: + end date for extracting Sentinel-2 data (inclusive) + :param scene_cloud_cover_threshold: + scene-wide cloud cover threshold in percent [0-100]. Scenes with a cloud-cover + higher than the threshold are not considered + :param aois: + areas of interest (1 to N) for which to extract random pixel observations + """ + # setup Sentinel-2 mapper to get the relevant scenes + mapper_configs = MapperConfigs( + spatial_resolution=10., + resampling_method=cv2.INTER_NEAREST_EXACT, + band_names=['B01', 'B02', 'B03', 'B04', 'B05', 'B06', 'B07', 'B08', 'B8A', 'B09', 'B11', 'B12'] + ) + + # get a new mapper instance + mapper = Sentinel2Mapper( + date_start=date_start, + date_end=date_end, + processing_level=ProcessingLevels.L2A, + cloud_cover_threshold=scene_cloud_cover_threshold, + mapper_configs=mapper_configs, + feature_collection=aois + ) + # query the available scenes (spatio-temporal query in the metadata catalog) + mapper.get_scenes() + # extract the actual S2 data + s2_data = mapper.get_complete_timeseries() + + # extraction is based on features (1 to N geometries) + features = mapper.feature_collection['features'] + + # loop over features and extract scene data + for idx, feature in enumerate(features): + feature_id = mapper.get_feature_ids()[idx] + # scenes of the actual feature + feature_scenes = s2_data[feature_id] + # loop over scenes, drop non-cloudfree observations and save spectral values to GeoDataFrame + feature_refl_list = [] + for feature_scene in feature_scenes: + # drop all observations but SCL classes 4 and 5 + feature_scene.mask_clouds_and_shadows(inplace=True) + # save spectral values as GeoDataFrame + refl_df = feature_scene.to_dataframe() + # drop nans (results from the masking of clouds) + refl_df.drop_nan(inplace=True) + # save the sensing date + refl_df['sensing_date'] = pd.to_datetime(feature_scene.scene_properties.sensing_date) + feature_refl_list.append(refl_df) + # create a single data frame per feature + feature_refl_df = pd.concat(feature_refl_list) + feature_refl_df.sort_values(by='sensing_date', inplace=True) + # assign pixel ids based on the coordinates so that sampling per pixel time series is possible + feature_refl_df_pid = assign_pixel_ids(gdf=feature_refl_df) + # select 5 observations per pixel (or less if there are not enough) by random choice + feature_refl_grouped = feature_refl_df_pid.groupby(by='pixel_id') + # apply random choice on each pixel + + +if __name__ == '__main__': + + date_start = date(2022,3,1) + date_end = date(2022,3,31) + aois = Path('../data/sample_polygons/BY_AOI_2019_MNI_EPSG32632.shp') + scene_cloud_cover_threshold = 50 + + get_pixels( + date_start=date_start, + date_end=date_end, + scene_cloud_cover_threshold=scene_cloud_cover_threshold, + aois=aois + ) + \ No newline at end of file