Skip to content

Commit

Permalink
Use past PC search results and saved imagery (#17)
Browse files Browse the repository at this point in the history
* start using existing pc search file

* use existing satellite search results

* remove extra code

* add documentation

* update config

* fix errors

* update image array saving

* update test assets

* update features

* add debug mode to experiment

* improve get_items_metadata

* add test for image saving

* remove extra cache_dir argument

* add experiment debug mode to config

* update test_download_satellite_data

* features config argument for using past pc results

* update tests

* remove selected column from sat meta

* remove use_past_pc_results from config

* check for missing bands earlier

* linting tweak

* warn instead of raising error

* change handling of missing item bands
  • Loading branch information
klwetstone authored Aug 10, 2023
1 parent bb154d0 commit 4e3f2a5
Show file tree
Hide file tree
Showing 13 changed files with 304 additions and 113 deletions.
1 change: 1 addition & 0 deletions cyano/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ class FeaturesConfig(BaseModel):
pc_meters_search_window: Optional[int] = 1000
use_sentinel_bands: Optional[List] = ["B02", "B03", "B04"]
image_feature_meter_window: Optional[int] = 500
n_sentinel_items: Optional[int] = 1
satellite_features: Optional[List] = [
"B02_mean",
"B02_min",
Expand Down
22 changes: 14 additions & 8 deletions cyano/data/features.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,26 +51,32 @@ def generate_satellite_features(
# Iterate over samples
for uid in tqdm(uids):
satellite_features_dict[uid] = {}
sample_dir = Path(cache_dir) / f"satellite/{uid}"
sample_dir = Path(cache_dir) / f"sentinel_{config.image_feature_meter_window}/{uid}"
# Skip samples with no imagery
if not sample_dir.exists():
continue

# Load stacked array for each image
# Load band arrays for each image
# Right now we only have one item per sample, process will need to
# change if we have multiple
item_paths = list(sample_dir.glob("*.npy"))
if len(item_paths) > 1:
item_dirs = list(sample_dir.iterdir())
if len(item_dirs) == 0:
continue
elif len(item_dirs) > 1:
raise NotImplementedError(
f"{uid} has multiple items, cannot process multiple items per sample"
)
stacked_array = np.load(item_paths[0])

# Load stacked array in dictionary form with band names for keys
item_dir = item_dirs[0]
# Load band arrays into a dictionary with band names for keys
band_arrays = {}
# If we want to mask image data with water boundaries in some way, add here
for idx, band in enumerate(config.use_sentinel_bands):
band_arrays[band] = stacked_array[idx]
for band in config.use_sentinel_bands:
if not (item_dir / f"{band}.npy").exists():
raise FileNotFoundError(
f"Band {band} is missing from pystac item directory {item_dir}"
)
band_arrays[band] = np.load(item_dir / f"{band}.npy")

# Iterate over features to generate
for feature in config.satellite_features:
Expand Down
Loading

0 comments on commit 4e3f2a5

Please sign in to comment.