diff --git a/cyano/config.py b/cyano/config.py
index adc2eb85..46e8f94b 100644
--- a/cyano/config.py
+++ b/cyano/config.py
@@ -22,6 +22,7 @@ class FeaturesConfig(BaseModel):
     pc_meters_search_window: Optional[int] = 1000
     use_sentinel_bands: Optional[List] = ["B02", "B03", "B04"]
     image_feature_meter_window: Optional[int] = 500
+    n_sentinel_items: Optional[int] = 1
     satellite_features: Optional[List] = [
         "B02_mean",
         "B02_min",
diff --git a/cyano/data/features.py b/cyano/data/features.py
index e94b793e..373152f4 100644
--- a/cyano/data/features.py
+++ b/cyano/data/features.py
@@ -51,26 +51,32 @@ def generate_satellite_features(
     # Iterate over samples
     for uid in tqdm(uids):
         satellite_features_dict[uid] = {}
-        sample_dir = Path(cache_dir) / f"satellite/{uid}"
+        sample_dir = Path(cache_dir) / f"sentinel_{config.image_feature_meter_window}/{uid}"
         # Skip samples with no imagery
         if not sample_dir.exists():
             continue
 
-        # Load stacked array for each image
+        # Load band arrays for each image
         # Right now we only have one item per sample, process will need to
         # change if we have multiple
-        item_paths = list(sample_dir.glob("*.npy"))
-        if len(item_paths) > 1:
+        item_dirs = list(sample_dir.iterdir())
+        if len(item_dirs) == 0:
+            continue
+        elif len(item_dirs) > 1:
             raise NotImplementedError(
                 f"{uid} has multiple items, cannot process multiple items per sample"
             )
-        stacked_array = np.load(item_paths[0])
 
-        # Load stacked array in dictionary form with band names for keys
+        item_dir = item_dirs[0]
+        # Load band arrays into a dictionary with band names for keys
         band_arrays = {}
         # If we want to mask image data with water boundaries in some way, add here
-        for idx, band in enumerate(config.use_sentinel_bands):
-            band_arrays[band] = stacked_array[idx]
+        for band in config.use_sentinel_bands:
+            if not (item_dir / f"{band}.npy").exists():
+                raise FileNotFoundError(
+                    f"Band {band} is missing from pystac item directory {item_dir}"
+                )
+            band_arrays[band] = np.load(item_dir / f"{band}.npy")
 
         # Iterate over features to generate
         for feature in config.satellite_features:
diff --git a/cyano/data/satellite_data.py b/cyano/data/satellite_data.py
index fd7c2c0b..c54e8fba 100644
--- a/cyano/data/satellite_data.py
+++ b/cyano/data/satellite_data.py
@@ -1,6 +1,9 @@
 from datetime import timedelta
-from typing import List, Union
+import json
+import shutil
+from typing import Dict, List, Tuple, Union
 
+from cloudpathlib import AnyPath
 import geopy.distance as distance
 from loguru import logger
 import numpy as np
@@ -41,13 +44,13 @@ def get_bounding_box(latitude: float, longitude: float, meters_window: int) -> L
 def get_date_range(date: str, days_window: int) -> str:
     """Get a date range to search for in the planetary computer based
     on a sample's date. The time range will go from time_buffer_days
-    before the sample date to time_buffer_days after the sample date
+    before the sample date to the sample date
 
     Returns a string"""
     datetime_format = "%Y-%m-%d"
-    range_start = pd.to_datetime(date) - timedelta(days=days_window)
-    range_end = pd.to_datetime(date) + timedelta(days=days_window)
-    date_range = f"{range_start.strftime(datetime_format)}/{range_end.strftime(datetime_format)}"
+    date = pd.to_datetime(date)
+    range_start = date - timedelta(days=days_window)
+    date_range = f"{range_start.strftime(datetime_format)}/{date.strftime(datetime_format)}"
 
     return date_range
 
@@ -85,6 +88,28 @@ def search_planetary_computer(
     return search_results
 
 
+def bbox_from_geometry(geometry: Dict) -> Dict:
+    """For pystac items that don't have the bbox attribute, get the
+    bbox from the geometry
+
+    Args:
+        geometry (Dict): A dictionary of geometry from item.geometry
+
+    Returns:
+        Dict: Dictionary with keys for min_long, max_long, min_lat,
+            and max_lat
+    """
+    lons = [coord_pair[0] for coord_pair in geometry["coordinates"][0]]
+    lats = [coord_pair[1] for coord_pair in geometry["coordinates"][0]]
+
+    return {
+        "min_long": min(lons),
+        "max_long": max(lons),
+        "min_lat": min(lats),
+        "max_lat": max(lats),
+    }
+
+
 def get_items_metadata(
     search_results: ItemSearch,
     latitude: float,
@@ -111,13 +136,24 @@ def get_items_metadata(
         item_meta = {
             "item_id": item.id,
             "datetime": item.datetime.strftime("%Y-%m-%d"),
-            "min_long": item.bbox[0],
-            "max_long": item.bbox[2],
-            "min_lat": item.bbox[1],
-            "max_lat": item.bbox[3],
+            "platform": item.properties["platform"],
         }
+        # Add item bounding box
+        if "bbox" in item.to_dict():
+            item_meta.update(
+                {
+                    "min_long": item.bbox[0],
+                    "max_long": item.bbox[2],
+                    "min_lat": item.bbox[1],
+                    "max_lat": item.bbox[3],
+                }
+            )
+        elif "geometry" in item.to_dict():
+            bbox_dict = bbox_from_geometry(item.geometry)
+            item_meta.update(bbox_dict)
+
         if "eo:cloud_cover" in item.properties:
-            item_meta.update({"cloud_cover": item.properties["eo:cloud_cover"]})
+            item_meta.update({"eo:cloud_cover": item.properties["eo:cloud_cover"]})
         # Add links to download each band needed for features
         for band in config.use_sentinel_bands:
             item_meta.update({f"{band}_href": item.assets[band].href})
@@ -137,123 +173,207 @@ def get_items_metadata(
     return items_meta
 
 
+def generate_candidate_metadata(
+    samples: pd.DataFrame, config: FeaturesConfig
+) -> Tuple[pd.DataFrame, Dict]:
+    """Generate metadata for all of the satellite item candidates
+    that could be used to generate features for each sample
+
+    Args:
+        samples (pd.DataFrame): Dataframe where the index is uid and
+            there are columns for date, longitude, and latitude
+        config (FeaturesConfig): Features config
+
+    Returns:
+        Tuple[pd.DataFrame, Dict]: Tuple of (metadata for all sentinel item
+            candidates, dictionary mapping sample UIDs to the relevant
+            pystac item IDs)
+    """
+    logger.info("Generating metadata for all satellite item candidates")
+
+    if len(samples) > 20:
+        # Load from saved directory with search results for all competition data
+        # Remove for final package
+        pc_results_dir = (
+            AnyPath("s3://drivendata-competition-nasa-cyanobacteria")
+            / "data/interim/full_pc_search"
+        )
+        sentinel_meta = pd.read_csv(pc_results_dir / "sentinel_metadata.csv")
+        logger.info(
+            f"Loaded {sentinel_meta.shape[0]:,} rows of Sentinel candidate metadata from {pc_results_dir}"
+        )
+        with open(pc_results_dir / "sample_item_map.json", "r") as fp:
+            sample_item_map = json.load(fp)
+
+        return (sentinel_meta, sample_item_map)
+
+    # Otherwise, search the planetary computer
+    logger.info(
+        f"Searching {config.pc_collections} within {config.pc_days_search_window} days and {config.pc_meters_search_window} meters"
+    )
+    sentinel_meta = []
+    sample_item_map = {}
+    for sample in tqdm(samples.itertuples(), total=len(samples)):
+        # Search planetary computer
+        search_results = search_planetary_computer(
+            sample.date,
+            sample.latitude,
+            sample.longitude,
+            collections=config.pc_collections,
+            days_search_window=config.pc_days_search_window,
+            meters_search_window=config.pc_meters_search_window,
+        )
+
+        # Get satelite metadata
+        sample_items_meta = get_items_metadata(
+            search_results, sample.latitude, sample.longitude, config
+        )
+
+        sample_item_map[sample.Index] = {
+            "sentinel_item_ids": sample_items_meta.item_id.tolist()
+            if len(sample_items_meta) > 0
+            else []
+        }
+        sentinel_meta.append(sample_items_meta)
+    sentinel_meta = (
+        pd.concat(sentinel_meta).groupby("item_id", as_index=False).first().reset_index(drop=True)
+    )
+    logger.info(f"Generated metadata for {sentinel_meta.shape[0]:,} Sentinel item candidates")
+
+    return (sentinel_meta, sample_item_map)
+
+
 def select_items(
     items_meta: pd.DataFrame,
+    date: Union[str, pd.Timestamp],
+    config: FeaturesConfig,
 ) -> List[str]:
     """Select which pystac items to include for a given sample
 
     Args:
         item_meta (pd.DataFrame): Dataframe with metadata about all possible
             pystac items to include for the given sample
+        date (Union[str, pd.Timestamp]): Date the sample was collected
+        config (FeaturesConfig): Features config
 
     Returns:
         List[str]: List of the pystac items IDs for the selected items
     """
-    # Select least cloudy item
-    least_cloudy = items_meta.sort_values(by="cloud_cover").iloc[0].item_id
+    # Calculate days between sample and image
+    items_meta["day_diff"] = (pd.to_datetime(date) - pd.to_datetime(items_meta.datetime)).dt.days
+    # Filter by time frame
+    items_meta = items_meta[items_meta.day_diff.between(0, config.pc_days_search_window)].copy()
 
-    return [least_cloudy]
+    # Sort and select
+    items_meta["day_diff"] = np.abs(items_meta.day_diff)
+    selected = items_meta.sort_values(
+        by=["eo:cloud_cover", "day_diff"], ascending=[True, True]
+    ).head(config.n_sentinel_items)
 
+    return selected.item_id.tolist()
 
-def identify_satellite_data(
-    samples: pd.DataFrame, config: FeaturesConfig, cache_dir
-) -> pd.DataFrame:
+
+def identify_satellite_data(samples: pd.DataFrame, config: FeaturesConfig) -> pd.DataFrame:
     """Identify all pystac items to be used during feature
     generation for a given set of samples
 
     Args:
         samples (pd.DataFrame): Dataframe where the index is uid and
             there are columns for date, longitude, and latitude
-        config (FeaturesConfig): Featires config
+        config (FeaturesConfig): Features config
 
     Returns:
         pd.DataFrame: Each row is a unique combination of sample ID
             and pystac item id. The 'selected' column indicates
             which will be used in feature generation
     """
-    save_dir = Path(cache_dir) / "satellite"
-    save_dir.mkdir(exist_ok=True, parents=True)
-    logger.info(
-        f"Searching {config.pc_collections} within {config.pc_days_search_window} days and {config.pc_meters_search_window} meters"
-    )
+    ## Get all candidate item metadata
+    candidate_sentinel_meta, sample_item_map = generate_candidate_metadata(samples, config)
 
-    satellite_meta = []
-    no_results = 0
+    ## Select which items to use for each sample
+    logger.info("Selecting which items to use for feature generation")
+    selected_satellite_meta = []
     for sample in tqdm(samples.itertuples(), total=len(samples)):
-        # Search planetary computer
-        search_results = search_planetary_computer(
-            sample.date,
-            sample.latitude,
-            sample.longitude,
-            collections=config.pc_collections,
-            days_search_window=config.pc_days_search_window,
-            meters_search_window=config.pc_meters_search_window,
-        )
-
-        # Get satelite metadata
-        sample_items_meta = get_items_metadata(
-            search_results, sample.latitude, sample.longitude, config
-        )
-        if len(sample_items_meta) == 0:
-            no_results += 1
+        sample_item_ids = sample_item_map[sample.Index]["sentinel_item_ids"]
+        if len(sample_item_ids) == 0:
             continue
 
-        # Select items to use for features
-        selected_ids = select_items(sample_items_meta)
-        sample_items_meta["selected"] = sample_items_meta.item_id.isin(selected_ids)
+        sample_items_meta = candidate_sentinel_meta[
+            candidate_sentinel_meta.item_id.isin(sample_item_ids)
+        ].copy()
+        selected_ids = select_items(sample_items_meta, sample.date, config)
+
+        # Save out the selected items
+        sample_items_meta = sample_items_meta[sample_items_meta.item_id.isin(selected_ids)]
         sample_items_meta["sample_id"] = sample.Index
-        satellite_meta.append(sample_items_meta)
 
-    logger.info(f"{no_results} samples did not return any satellite imagery results")
+        selected_satellite_meta.append(sample_items_meta)
+
+    selected_satellite_meta = pd.concat(selected_satellite_meta).reset_index(drop=True)
+    logger.info(
+        f"Identified satellite imagery for {selected_satellite_meta.sample_id.nunique():,} samples"
+    )
 
-    # Concatenate satellite meta for all samples
-    return pd.concat(satellite_meta)
+    return selected_satellite_meta
 
 
 def download_satellite_data(
-    satellite_meta: pd.DataFrame, samples: pd.DataFrame, config: FeaturesConfig, cache_dir
+    satellite_meta: pd.DataFrame,
+    samples: pd.DataFrame,
+    config: FeaturesConfig,
+    cache_dir: Union[str, Path],
 ):
     """Download satellite images as one stacked numpy arrays per pystac item
 
     Args:
         satellite_meta (pd.DataFrame): Dataframe of satellite metadata
-            indicating which pystac item(s) will be used in feature
-            generation for each sample
+            for all pystac items that have been selected for us in
+            feature generation
         samples (pd.DataFrame): Dataframe where the index is uid and
             there are columns for date, longitude, and latitude
         config (FeaturesConfig): Features config
+        cache_dir (Union[str, Path]): Cache directory to save raw imagery
     """
-    # Filter to images selected for feature generation
-    selected = satellite_meta[satellite_meta.selected]
-
     # Iterate over all rows (item / sample combos)
     logger.info(f"Downloading bands {config.use_sentinel_bands}")
-    for _, download_row in tqdm(selected.iterrows(), total=len(selected)):
-        sample_row = samples.loc[download_row.sample_id]
-        sample_dir = Path(cache_dir) / f"satellite/{download_row.sample_id}"
-        sample_dir.mkdir(exist_ok=True, parents=True)
+    no_data_in_bounds_errs = 0
 
-        # Get bounding box for array to save out
-        (minx, miny, maxx, maxy) = get_bounding_box(
-            sample_row.latitude, sample_row.longitude, config.image_feature_meter_window
-        )
-        # Iterate over bands and stack
-        band_arrays = []
-        for band in config.use_sentinel_bands:
-            band_array = (
-                rioxarray.open_rasterio(pc.sign(download_row[f"{band}_href"]))
-                .rio.clip_box(
-                    minx=minx,
-                    miny=miny,
-                    maxx=maxx,
-                    maxy=maxy,
-                    crs="EPSG:4326",
-                )
-                .to_numpy()
+    imagery_dir = Path(cache_dir) / f"sentinel_{config.image_feature_meter_window}"
+    for _, download_row in tqdm(satellite_meta.iterrows(), total=len(satellite_meta)):
+        sample_row = samples.loc[download_row.sample_id]
+        sample_image_dir = imagery_dir / f"{download_row.sample_id}/{download_row.item_id}"
+        sample_image_dir.mkdir(exist_ok=True, parents=True)
+        try:
+            # Get bounding box for array to save out
+            (minx, miny, maxx, maxy) = get_bounding_box(
+                sample_row.latitude, sample_row.longitude, config.image_feature_meter_window
             )
-            band_arrays.append(band_array)
-        stacked_array = np.vstack(band_arrays)
-
-        # Save stacked array
-        array_save_path = sample_dir / f"{download_row.item_id}.npy"
-        np.save(array_save_path, stacked_array)
+            # Iterate over bands and save
+            for band in config.use_sentinel_bands:
+                # Check if the file already exists
+                array_save_path = sample_image_dir / f"{band}.npy"
+                if not array_save_path.exists():
+                    # Get unsigned URL so we don't use expired token
+                    unsigned_href = download_row[f"{band}_href"].split("?")[0]
+                    band_array = (
+                        rioxarray.open_rasterio(pc.sign(unsigned_href))
+                        .rio.clip_box(
+                            minx=minx,
+                            miny=miny,
+                            maxx=maxx,
+                            maxy=maxy,
+                            crs="EPSG:4326",
+                        )
+                        .to_numpy()
+                    )
+                    np.save(array_save_path, band_array)
+
+        except rioxarray.exceptions.NoDataInBounds:
+            no_data_in_bounds_errs += 1
+            # Delete item directory if it has already been created
+            if sample_image_dir.exists():
+                shutil.rmtree(sample_image_dir)
+    if no_data_in_bounds_errs > 0:
+        logger.warning(
+            f"Could not download {no_data_in_bounds_errs:,} image/sample combinations with no data in bounds"
+        )
diff --git a/cyano/experiment.py b/cyano/experiment.py
index 45f6e97f..2b7143c3 100644
--- a/cyano/experiment.py
+++ b/cyano/experiment.py
@@ -16,6 +16,7 @@ class ExperimentConfig(BaseModel):
     predict_csv: Path
     cache_dir: Path = None
     save_dir: Path = None
+    debug: bool = False
 
     @field_serializer("train_csv", "predict_csv", "cache_dir", "save_dir")
     def serialize_path_to_str(self, x, _info):
@@ -27,20 +28,25 @@ def run_experiment(self):
             model_training_config=self.model_training_config,
             cache_dir=self.cache_dir,
         )
-        pipeline.run_training(train_csv=self.train_csv, save_path=self.save_dir / "model.zip")
+        pipeline.run_training(
+            train_csv=self.train_csv, save_path=self.save_dir / "model.zip", debug=self.debug
+        )
 
         logger.success(f"Writing out artifact config to {self.save_dir}")
         with open(f"{self.save_dir}/config_artifact.yaml", "w") as fp:
             yaml.dump(self.model_dump(), fp)
 
         pipeline.run_prediction(
-            predict_csv=self.predict_csv, preds_path=self.save_dir / "preds.csv"
+            predict_csv=self.predict_csv, preds_path=self.save_dir / "preds.csv", debug=self.debug
         )
 
-        EvaluatePreds(
-            y_true_csv=self.predict_csv,
-            y_pred_csv=self.save_dir / "preds.csv",
-            save_dir=self.save_dir / "metrics",
-        ).calculate_all_and_save()
+        if self.debug:
+            logger.info("Evaluation is not run in debug mode")
+        else:
+            EvaluatePreds(
+                y_true_csv=self.predict_csv,
+                y_pred_csv=self.save_dir / "preds.csv",
+                save_dir=self.save_dir / "metrics",
+            ).calculate_all_and_save()
 
-        logger.success(f"Wrote out metrics to {self.save_dir}/metrics")
+            logger.success(f"Wrote out metrics to {self.save_dir}/metrics")
diff --git a/cyano/pipeline.py b/cyano/pipeline.py
index fb40f9f1..55a7a87a 100644
--- a/cyano/pipeline.py
+++ b/cyano/pipeline.py
@@ -53,7 +53,7 @@ def _prep_train_data(self, data, debug=False):
 
     def _prepare_features(self, samples):
         ## Identify satellite data
-        satellite_meta = identify_satellite_data(samples, self.features_config, self.cache_dir)
+        satellite_meta = identify_satellite_data(samples, self.features_config)
         save_satellite_to = self.cache_dir / "satellite_metadata_train.csv"
         satellite_meta.to_csv(save_satellite_to, index=False)
         logger.info(
@@ -148,8 +148,8 @@ def _write_predictions(self, preds_path):
         self.output_df.to_csv(preds_path, index=True)
         logger.success(f"Predictions saved to {preds_path}")
 
-    def run_prediction(self, predict_csv, preds_path):
-        self._prep_predict_data(predict_csv)
+    def run_prediction(self, predict_csv, preds_path, debug=False):
+        self._prep_predict_data(predict_csv, debug)
         self._prepare_predict_features()
         self._predict_model()
         self._write_predictions(preds_path)
diff --git a/tests/assets/evaluate_data.csv b/tests/assets/evaluate_data.csv
index f1dc0731..7509216a 100644
--- a/tests/assets/evaluate_data.csv
+++ b/tests/assets/evaluate_data.csv
@@ -1,6 +1,6 @@
-date,latitude,longitude,region,date,density_cells_per_ml,severity
-2021-06-29,41.424144,-73.206937,midwest,2018-05-14,585.0,1
-2021-07-25,36.045,-79.0919415955354,west,2016-08-31,5867500.0,4
-2021-08-21,35.8845244661109,-78.9539970867146,south,2020-11-19,290.0,1
-2021-08-28,41.39249,-75.3607,south,2016-08-24,1614.0,1
-2021-07-11,38.3056,-122.026,midwest,2019-07-23,111825.0,3
+latitude,longitude,date,split,region,severity,density
+40.090275,-76.873132,2018-05-21,train,northeast,1,0.0
+35.7200811863161,-79.1374207771809,2013-05-22,train,south,2,29046.0
+35.6940254103693,-79.1858165585188,2016-10-18,train,south,1,94.0
+35.68225,-79.07802,2015-03-24,train,south,1,2179.0
+35.79,-79.0264911210803,2018-07-11,train,south,1,11981.0
diff --git a/tests/assets/experiment/model.zip b/tests/assets/experiment/model.zip
index 67704be3..9ffa944a 100644
Binary files a/tests/assets/experiment/model.zip and b/tests/assets/experiment/model.zip differ
diff --git a/tests/assets/feature_cache/satellite/3a2c48812b551d720f8d56772efa6df1/S2B_MSIL2A_20190918T154919_R054_T18TVL_20201004T144350.npy b/tests/assets/feature_cache/sentinel_500/3a2c48812b551d720f8d56772efa6df1/S2B_MSIL2A_20190918T154919_R054_T18TVL_20201004T144350/B02.npy
similarity index 100%
rename from tests/assets/feature_cache/satellite/3a2c48812b551d720f8d56772efa6df1/S2B_MSIL2A_20190918T154919_R054_T18TVL_20201004T144350.npy
rename to tests/assets/feature_cache/sentinel_500/3a2c48812b551d720f8d56772efa6df1/S2B_MSIL2A_20190918T154919_R054_T18TVL_20201004T144350/B02.npy
diff --git a/tests/assets/predict_data.csv b/tests/assets/predict_data.csv
index 19042959..732a817b 100644
--- a/tests/assets/predict_data.csv
+++ b/tests/assets/predict_data.csv
@@ -1,6 +1,6 @@
 date,latitude,longitude
-2021-06-29,41.424144,-73.206937
-2021-07-25,36.045,-79.0919415955354
-2021-08-21,35.8845244661109,-78.9539970867146
-2021-08-28,41.39249,-75.3607
-2021-07-11,38.3056,-122.026
+2021-05-18,35.65,-78.6828160966743
+2018-10-22,37.5643183,-101.3355745
+2021-05-17,36.05,-76.7
+2016-08-31,35.7054164952572,-79.1646588522751
+2015-06-27,41.287577,-80.424543
diff --git a/tests/assets/satellite_meta.csv b/tests/assets/satellite_meta.csv
deleted file mode 100644
index 6c6e8239..00000000
--- a/tests/assets/satellite_meta.csv
+++ /dev/null
@@ -1,3 +0,0 @@
-item_id,datetime,min_long,max_long,min_lat,max_lat,cloud_cover,B02_href,selected,sample_id
-S2B_MSIL2A_20170723T155909_R097_T17SPV_20210210T132957,2017-07-23,-79.90214616115195,-78.66878536100633,35.13300633202725,36.13974073092875,42.945348,https://sentinel2l2a01.blob.core.windows.net/sentinel2-l2/17/S/PV/2017/07/23/S2B_MSIL2A_20170723T155909_N0212_R097_T17SPV_20210210T132957.SAFE/GRANULE/L2A_T17SPV_A001982_20170723T161238/IMG_DATA/R10m/T17SPV_20170723T155909_B02_10m.tif?st=2023-07-25T17%3A31%3A40Z&se=2023-07-26T18%3A16%3A40Z&sp=rl&sv=2021-06-08&sr=c&skoid=c85c15d6-d1ae-42d4-af60-e2ca0f81359b&sktid=72f988bf-86f1-41af-91ab-2d7cd011db47&skt=2023-07-26T13%3A00%3A01Z&ske=2023-08-02T13%3A00%3A01Z&sks=b&skv=2021-06-08&sig=Sm7ZNd3rsECuWRWlIQsrvfDHQAu5gpDfh0OpCEbORfM%3D,True,rszn
-S2B_MSIL2A_20190918T154919_R054_T18TVL_20201004T144350,2019-09-18,-76.19946,-74.882996,40.556706911840905,41.55135515173343,5.237853,https://sentinel2l2a01.blob.core.windows.net/sentinel2-l2/18/T/VL/2019/09/18/S2B_MSIL2A_20190918T154919_N0212_R054_T18TVL_20201004T144350.SAFE/GRANULE/L2A_T18TVL_A013236_20190918T160136/IMG_DATA/R10m/T18TVL_20190918T154919_B02_10m.tif?st=2023-07-25T17%3A31%3A40Z&se=2023-07-26T18%3A16%3A40Z&sp=rl&sv=2021-06-08&sr=c&skoid=c85c15d6-d1ae-42d4-af60-e2ca0f81359b&sktid=72f988bf-86f1-41af-91ab-2d7cd011db47&skt=2023-07-26T13%3A00%3A01Z&ske=2023-08-02T13%3A00%3A01Z&sks=b&skv=2021-06-08&sig=Sm7ZNd3rsECuWRWlIQsrvfDHQAu5gpDfh0OpCEbORfM%3D,True,ofhd
diff --git a/tests/assets/satellite_metadata.csv b/tests/assets/satellite_metadata.csv
new file mode 100644
index 00000000..a47d64e5
--- /dev/null
+++ b/tests/assets/satellite_metadata.csv
@@ -0,0 +1,3 @@
+item_id,datetime,platform,min_long,max_long,min_lat,max_lat,eo:cloud_cover,B02_href,B03_href,day_diff,selected,sample_id
+S2B_MSIL2A_20170723T155909_R097_T17SPV_20210210T132957,2017-07-23,Sentinel-2B,-79.90214616115195,-78.66878536100633,35.13300633202725,36.13974073092875,42.945348,https://sentinel2l2a01.blob.core.windows.net/sentinel2-l2/17/S/PV/2017/07/23/S2B_MSIL2A_20170723T155909_N0212_R097_T17SPV_20210210T132957.SAFE/GRANULE/L2A_T17SPV_A001982_20170723T161238/IMG_DATA/R10m/T17SPV_20170723T155909_B02_10m.tif?st=2023-08-08T20%3A04%3A50Z&se=2023-08-09T20%3A49%3A50Z&sp=rl&sv=2021-06-08&sr=c&skoid=c85c15d6-d1ae-42d4-af60-e2ca0f81359b&sktid=72f988bf-86f1-41af-91ab-2d7cd011db47&skt=2023-08-09T19%3A20%3A48Z&ske=2023-08-16T19%3A20%3A48Z&sks=b&skv=2021-06-08&sig=cROyki1g3qv0aPv4AINV%2Bs2p6xuGUnoAo8q6cTrB7cw%3D,https://sentinel2l2a01.blob.core.windows.net/sentinel2-l2/17/S/PV/2017/07/23/S2B_MSIL2A_20170723T155909_N0212_R097_T17SPV_20210210T132957.SAFE/GRANULE/L2A_T17SPV_A001982_20170723T161238/IMG_DATA/R10m/T17SPV_20170723T155909_B03_10m.tif?st=2023-08-08T20%3A04%3A50Z&se=2023-08-09T20%3A49%3A50Z&sp=rl&sv=2021-06-08&sr=c&skoid=c85c15d6-d1ae-42d4-af60-e2ca0f81359b&sktid=72f988bf-86f1-41af-91ab-2d7cd011db47&skt=2023-08-09T19%3A20%3A48Z&ske=2023-08-16T19%3A20%3A48Z&sks=b&skv=2021-06-08&sig=cROyki1g3qv0aPv4AINV%2Bs2p6xuGUnoAo8q6cTrB7cw%3D,29,True,9c601f226c2af07d570134127a7fda27
+S2B_MSIL2A_20190730T154819_R054_T18TVL_20201005T200628,2019-07-30,Sentinel-2B,-76.19946,-74.882965,40.556706911840905,41.55178511688993,5.31211,https://sentinel2l2a01.blob.core.windows.net/sentinel2-l2/18/T/VL/2019/07/30/S2B_MSIL2A_20190730T154819_N0212_R054_T18TVL_20201005T200628.SAFE/GRANULE/L2A_T18TVL_A012521_20190730T155818/IMG_DATA/R10m/T18TVL_20190730T154819_B02_10m.tif?st=2023-08-08T20%3A04%3A50Z&se=2023-08-09T20%3A49%3A50Z&sp=rl&sv=2021-06-08&sr=c&skoid=c85c15d6-d1ae-42d4-af60-e2ca0f81359b&sktid=72f988bf-86f1-41af-91ab-2d7cd011db47&skt=2023-08-09T19%3A20%3A48Z&ske=2023-08-16T19%3A20%3A48Z&sks=b&skv=2021-06-08&sig=cROyki1g3qv0aPv4AINV%2Bs2p6xuGUnoAo8q6cTrB7cw%3D,https://sentinel2l2a01.blob.core.windows.net/sentinel2-l2/18/T/VL/2019/07/30/S2B_MSIL2A_20190730T154819_N0212_R054_T18TVL_20201005T200628.SAFE/GRANULE/L2A_T18TVL_A012521_20190730T155818/IMG_DATA/R10m/T18TVL_20190730T154819_B03_10m.tif?st=2023-08-08T20%3A04%3A50Z&se=2023-08-09T20%3A49%3A50Z&sp=rl&sv=2021-06-08&sr=c&skoid=c85c15d6-d1ae-42d4-af60-e2ca0f81359b&sktid=72f988bf-86f1-41af-91ab-2d7cd011db47&skt=2023-08-09T19%3A20%3A48Z&ske=2023-08-16T19%3A20%3A48Z&sks=b&skv=2021-06-08&sig=cROyki1g3qv0aPv4AINV%2Bs2p6xuGUnoAo8q6cTrB7cw%3D,29,True,3a2c48812b551d720f8d56772efa6df1
diff --git a/tests/conftest.py b/tests/conftest.py
index 988c371c..34931bfd 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -19,6 +19,11 @@ def train_data(train_data_path) -> pd.DataFrame:
     return pd.read_csv(train_data_path)
 
 
+@pytest.fixture(scope="session")
+def satellite_meta() -> pd.DataFrame:
+    return pd.read_csv(ASSETS_DIR / "satellite_metadata.csv")
+
+
 @pytest.fixture(scope="session")
 def experiment_config_path() -> Path:
     return ASSETS_DIR / "experiment_config.yaml"
diff --git a/tests/test_features.py b/tests/test_features.py
index 7a70cafe..ce740589 100644
--- a/tests/test_features.py
+++ b/tests/test_features.py
@@ -3,6 +3,7 @@
 import numpy as np
 
 from cyano.data.features import generate_features
+from cyano.data.satellite_data import download_satellite_data, generate_candidate_metadata
 from cyano.data.utils import add_unique_identifier
 
 ASSETS_DIR = Path(__file__).parent / "assets"
@@ -22,3 +23,55 @@ def test_known_features(train_data, features_config):
     assert np.isclose(features.loc["3a2c48812b551d720f8d56772efa6df1", "B02_mean"], 161.532712)
     assert np.isclose(features.loc["3a2c48812b551d720f8d56772efa6df1", "B02_min"], 50)
     assert np.isclose(features.loc["3a2c48812b551d720f8d56772efa6df1", "B02_max"], 1182)
+
+
+def test_generate_candidate_metadata(train_data, features_config):
+    train_data = add_unique_identifier(train_data)
+
+    candidate_meta, sample_item_map = generate_candidate_metadata(train_data, features_config)
+
+    # Check that item map has the correct samples and matches known values
+    assert len(sample_item_map) == len(train_data)
+    assert set(sample_item_map.keys()) == set(train_data.index)
+    assert sample_item_map["3a2c48812b551d720f8d56772efa6df1"]["sentinel_item_ids"] == [
+        "S2A_MSIL2A_20190824T154911_R054_T18TVL_20201106T052956",
+        "S2B_MSIL2A_20190819T154819_R054_T18TVL_20201005T022720",
+        "S2A_MSIL2A_20190814T154911_R054_T18TVL_20201005T001501",
+        "S2B_MSIL2A_20190809T154819_R054_T18TVL_20201004T222827",
+        "S2A_MSIL2A_20190804T154911_R054_T18TVL_20201004T201836",
+        "S2B_MSIL2A_20190730T154819_R054_T18TVL_20201005T200628",
+    ]
+
+    # Check that candidate metadata matches known expected values
+    assert candidate_meta.item_id.is_unique
+    assert len(candidate_meta) == 9
+    assert (
+        "S2A_MSIL2A_20170728T155901_R097_T17SPV_20210210T154351" in candidate_meta.item_id.values
+    )
+    assert (
+        "S2B_MSIL2A_20190819T154819_R054_T18TVL_20201005T022720" in candidate_meta.item_id.values
+    )
+
+
+def test_download_satellite_data(tmp_path, satellite_meta, train_data, features_config):
+    # Download imagery
+    features_config.use_sentinel_bands = ["B02", "B03"]
+    train_data = add_unique_identifier(train_data)
+    download_satellite_data(satellite_meta, train_data, features_config, tmp_path)
+
+    # Sentinel image cache directory exists
+    sentinel_dir = tmp_path / f"sentinel_{features_config.image_feature_meter_window}"
+    assert sentinel_dir.exists()
+    assert len(list(sentinel_dir.rglob("*.npy"))) > 0
+
+    # Check that the structure of saved image arrays is correct
+    for sample_dir in sentinel_dir.iterdir():
+        # Correct number of items per sample
+        sample_item_dirs = list(sample_dir.iterdir())
+        assert len(sample_item_dirs) == features_config.n_sentinel_items
+
+        # Correct bands for each item
+        for sample_item_dir in sample_item_dirs:
+            assert set([pth.stem for pth in sample_item_dir.iterdir()]) == set(
+                features_config.use_sentinel_bands
+            )