From 6b9d56c105d14fe52f2d40847d5216ad4c4a631a Mon Sep 17 00:00:00 2001
From: Emily Davis <eadavis@stanford.edu>
Date: Tue, 10 Sep 2024 17:53:28 -0600
Subject: [PATCH 1/4] Copies existing fields in UFRM source aoi vector to
 target vector

---
 .../invest/urban_flood_risk_mitigation.py     |  5 ++++
 tests/test_ufrm.py                            | 25 ++++++++++++++-----
 2 files changed, 24 insertions(+), 6 deletions(-)

diff --git a/src/natcap/invest/urban_flood_risk_mitigation.py b/src/natcap/invest/urban_flood_risk_mitigation.py
index 7f865e5e3..493d60b5a 100644
--- a/src/natcap/invest/urban_flood_risk_mitigation.py
+++ b/src/natcap/invest/urban_flood_risk_mitigation.py
@@ -554,6 +554,7 @@ def _write_summary_vector(
     """
     source_aoi_vector = gdal.OpenEx(source_aoi_vector_path, gdal.OF_VECTOR)
     source_aoi_layer = source_aoi_vector.GetLayer()
+    source_aoi_field_defns = source_aoi_layer.schema
     source_geom_type = source_aoi_layer.GetGeomType()
     source_srs_wkt = pygeoprocessing.get_vector_info(
         source_aoi_vector_path)['projection_wkt']
@@ -612,6 +613,10 @@ def _write_summary_vector(
             'flood_vol', float(flood_volume_stats[feature_id]['sum']))
 
         target_watershed_layer.CreateFeature(target_feature)
+
+    for field_defn in source_aoi_field_defns:
+        target_watershed_layer.CreateField(field_defn)
+
     target_watershed_layer.SyncToDisk()
     target_watershed_layer = None
     target_watershed_vector = None
diff --git a/tests/test_ufrm.py b/tests/test_ufrm.py
index 07df67157..1d5e1d96b 100644
--- a/tests/test_ufrm.py
+++ b/tests/test_ufrm.py
@@ -59,6 +59,11 @@ def test_ufrm_regression(self):
         """UFRM: regression test."""
         from natcap.invest import urban_flood_risk_mitigation
         args = self._make_args()
+        input_vector = gdal.OpenEx(args['aoi_watersheds_path'],
+                                   gdal.OF_VECTOR)
+        input_layer = input_vector.GetLayer()
+        input_fields = [field.GetName() for field in input_layer.schema]
+
         urban_flood_risk_mitigation.execute(args)
 
         result_vector = gdal.OpenEx(os.path.join(
@@ -66,10 +71,12 @@ def test_ufrm_regression(self):
             gdal.OF_VECTOR)
         result_layer = result_vector.GetLayer()
 
-        # Check that all four expected fields are there.
+        # Check that all expected fields are there.
+        output_fields = ['aff_bld', 'serv_blt', 'rnf_rt_idx',
+                         'rnf_rt_m3', 'flood_vol']
+        output_fields += input_fields
         self.assertEqual(
-            set(('aff_bld', 'serv_blt', 'rnf_rt_idx', 'rnf_rt_m3',
-                 'flood_vol')),
+            set(output_fields),
             set(field.GetName() for field in result_layer.schema))
 
         result_feature = result_layer.GetFeature(0)
@@ -94,6 +101,11 @@ def test_ufrm_regression_no_infrastructure(self):
         from natcap.invest import urban_flood_risk_mitigation
         args = self._make_args()
         del args['built_infrastructure_vector_path']
+        input_vector = gdal.OpenEx(args['aoi_watersheds_path'],
+                                   gdal.OF_VECTOR)
+        input_layer = input_vector.GetLayer()
+        input_fields = [field.GetName() for field in input_layer.schema]
+
         urban_flood_risk_mitigation.execute(args)
 
         result_raster = gdal.OpenEx(os.path.join(
@@ -115,9 +127,11 @@ def test_ufrm_regression_no_infrastructure(self):
         result_layer = result_vector.GetLayer()
         result_feature = result_layer.GetFeature(0)
 
-        # Check that only the two expected fields are there.
+        # Check that only the expected fields are there.
+        output_fields = ['rnf_rt_idx', 'rnf_rt_m3', 'flood_vol']
+        output_fields += input_fields
         self.assertEqual(
-            set(('rnf_rt_idx', 'rnf_rt_m3', 'flood_vol')),
+            set(output_fields),
             set(field.GetName() for field in result_layer.schema))
 
         for fieldname, expected_value in (
@@ -218,7 +232,6 @@ def test_ufrm_explicit_zeros_in_table(self):
         except ValueError:
             self.fail('unexpected ValueError when testing curve number row with all zeros')
 
-
     def test_ufrm_string_damage_to_infrastructure(self):
         """UFRM: handle str(int) structure indices.
 

From 2797c4c32502063c729f47c8fb0f3ae6265c55cb Mon Sep 17 00:00:00 2001
From: Emily Davis <eadavis@stanford.edu>
Date: Wed, 11 Sep 2024 09:55:16 -0600
Subject: [PATCH 2/4] Update HISTORY.rst

---
 HISTORY.rst | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/HISTORY.rst b/HISTORY.rst
index 14a2c709b..686eaaaca 100644
--- a/HISTORY.rst
+++ b/HISTORY.rst
@@ -45,6 +45,9 @@ Unreleased Changes
       (https://github.com/natcap/invest/issues/1598)
     * Fixed a bug that was allowing readonly workspace directories on Windows
       (https://github.com/natcap/invest/issues/1599)
+* Urban Flood Risk
+    * Fields present on the input AOI vector are now retained in the output.
+      (https://github.com/natcap/invest/issues/1600)
 
 3.14.2 (2024-05-29)
 -------------------

From 3ec1485d28b7eb70ed66eff97761cbfe734958a7 Mon Sep 17 00:00:00 2001
From: Emily Davis <eadavis@stanford.edu>
Date: Mon, 30 Sep 2024 16:52:58 -0600
Subject: [PATCH 3/4] Update UFRM to use CreateCopy when generating summary
 vector

---
 .../invest/urban_flood_risk_mitigation.py     | 51 +++++++------------
 tests/test_ufrm.py                            |  7 ++-
 2 files changed, 24 insertions(+), 34 deletions(-)

diff --git a/src/natcap/invest/urban_flood_risk_mitigation.py b/src/natcap/invest/urban_flood_risk_mitigation.py
index 493d60b5a..fa5b64cac 100644
--- a/src/natcap/invest/urban_flood_risk_mitigation.py
+++ b/src/natcap/invest/urban_flood_risk_mitigation.py
@@ -553,22 +553,11 @@ def _write_summary_vector(
         ``None``
     """
     source_aoi_vector = gdal.OpenEx(source_aoi_vector_path, gdal.OF_VECTOR)
-    source_aoi_layer = source_aoi_vector.GetLayer()
-    source_aoi_field_defns = source_aoi_layer.schema
-    source_geom_type = source_aoi_layer.GetGeomType()
-    source_srs_wkt = pygeoprocessing.get_vector_info(
-        source_aoi_vector_path)['projection_wkt']
-    source_srs = osr.SpatialReference()
-    source_srs.ImportFromWkt(source_srs_wkt)
-
     esri_driver = gdal.GetDriverByName('ESRI Shapefile')
-    target_watershed_vector = esri_driver.Create(
-        target_vector_path, 0, 0, 0, gdal.GDT_Unknown)
-    layer_name = os.path.splitext(os.path.basename(
-        target_vector_path))[0]
-    LOGGER.debug(f"creating layer {layer_name}")
-    target_watershed_layer = target_watershed_vector.CreateLayer(
-        layer_name, source_srs, source_geom_type)
+    esri_driver.CreateCopy(target_vector_path, source_aoi_vector)
+    target_watershed_vector = gdal.OpenEx(target_vector_path,
+                                          gdal.OF_VECTOR | gdal.GA_Update)
+    target_watershed_layer = target_watershed_vector.GetLayer()
 
     target_fields = ['rnf_rt_idx', 'rnf_rt_m3', 'flood_vol']
     if damage_per_aoi_stats is not None:
@@ -580,42 +569,38 @@ def _write_summary_vector(
         field_def.SetPrecision(11)
         target_watershed_layer.CreateField(field_def)
 
-    target_layer_defn = target_watershed_layer.GetLayerDefn()
-    for base_feature in source_aoi_layer:
-        feature_id = base_feature.GetFID()
-        target_feature = ogr.Feature(target_layer_defn)
-        base_geom_ref = base_feature.GetGeometryRef()
-        target_feature.SetGeometry(base_geom_ref.Clone())
-        base_geom_ref = None
+    target_watershed_layer.ResetReading()
+    for target_feature in target_watershed_layer:
+        # Target vector is SHP, where FIDs start at 0, but stats were
+        # generated based on GPKG reprojection, where FIDs start at 1.
+        # Therefore, we need to reference stats at SHP FID + 1.
+        stat_key = target_feature.GetFID() + 1
 
-        pixel_count = runoff_ret_stats[feature_id]['count']
+        pixel_count = runoff_ret_stats[stat_key]['count']
         if pixel_count > 0:
             mean_value = (
-                runoff_ret_stats[feature_id]['sum'] / float(pixel_count))
+                runoff_ret_stats[stat_key]['sum'] / float(pixel_count))
             target_feature.SetField('rnf_rt_idx', float(mean_value))
 
         target_feature.SetField(
             'rnf_rt_m3', float(
-                runoff_ret_vol_stats[feature_id]['sum']))
+                runoff_ret_vol_stats[stat_key]['sum']))
 
         if damage_per_aoi_stats is not None:
-            pixel_count = runoff_ret_vol_stats[feature_id]['count']
+            pixel_count = runoff_ret_vol_stats[stat_key]['count']
             if pixel_count > 0:
-                damage_sum = damage_per_aoi_stats[feature_id]
+                damage_sum = damage_per_aoi_stats[stat_key]
                 target_feature.SetField('aff_bld', damage_sum)
 
                 # This is the service_built equation.
                 target_feature.SetField(
                     'serv_blt', (
-                        damage_sum * runoff_ret_vol_stats[feature_id]['sum']))
+                        damage_sum * runoff_ret_vol_stats[stat_key]['sum']))
 
         target_feature.SetField(
-            'flood_vol', float(flood_volume_stats[feature_id]['sum']))
-
-        target_watershed_layer.CreateFeature(target_feature)
+            'flood_vol', float(flood_volume_stats[stat_key]['sum']))
 
-    for field_defn in source_aoi_field_defns:
-        target_watershed_layer.CreateField(field_defn)
+        target_watershed_layer.SetFeature(target_feature)
 
     target_watershed_layer.SyncToDisk()
     target_watershed_layer = None
diff --git a/tests/test_ufrm.py b/tests/test_ufrm.py
index 1d5e1d96b..eb2cef2f6 100644
--- a/tests/test_ufrm.py
+++ b/tests/test_ufrm.py
@@ -79,7 +79,7 @@ def test_ufrm_regression(self):
             set(output_fields),
             set(field.GetName() for field in result_layer.schema))
 
-        result_feature = result_layer.GetFeature(0)
+        result_feature = result_layer.GetNextFeature()
         for fieldname, expected_value in (
                 ('aff_bld', 187010830.32202843),
                 ('serv_blt', 13253546667257.65),
@@ -92,6 +92,11 @@ def test_ufrm_regression(self):
             self.assertAlmostEqual(
                 result_val, expected_value, places=-places_to_round)
 
+        input_feature = input_layer.GetNextFeature()
+        for fieldname in input_fields:
+            self.assertEqual(result_feature.GetField(fieldname),
+                             input_feature.GetField(fieldname))
+
         result_feature = None
         result_layer = None
         result_vector = None

From 5458781db4469cf2837cd918a788af2b17b30eaa Mon Sep 17 00:00:00 2001
From: Emily Davis <eadavis@stanford.edu>
Date: Tue, 8 Oct 2024 12:10:17 -0600
Subject: [PATCH 4/4] Use ESRI SHP driver for reprojections to avoid off-by-one
 scenario when mapping zonal stats to output vector

---
 .../invest/urban_flood_risk_mitigation.py     | 37 +++++++++----------
 1 file changed, 17 insertions(+), 20 deletions(-)

diff --git a/src/natcap/invest/urban_flood_risk_mitigation.py b/src/natcap/invest/urban_flood_risk_mitigation.py
index fa5b64cac..a6dbfa569 100644
--- a/src/natcap/invest/urban_flood_risk_mitigation.py
+++ b/src/natcap/invest/urban_flood_risk_mitigation.py
@@ -173,14 +173,14 @@
                     "about": "Map of runoff volume.",
                     "bands": {1: {"type": "number", "units": u.meter**3}}
                 },
-                "reprojected_aoi.gpkg": {
+                "reprojected_aoi.shp": {
                     "about": (
                         "Copy of AOI vector reprojected to the same spatial "
                         "reference as the LULC."),
                     "geometries": spec_utils.POLYGONS,
                     "fields": {}
                 },
-                "structures_reprojected.gpkg": {
+                "structures_reprojected.shp": {
                     "about": (
                         "Copy of built infrastructure vector reprojected to "
                         "the same spatial reference as the LULC."),
@@ -408,14 +408,14 @@ def execute(args):
         task_name='calculate service built raster')
 
     reprojected_aoi_path = os.path.join(
-        intermediate_dir, 'reprojected_aoi.gpkg')
+        intermediate_dir, 'reprojected_aoi')
     reprojected_aoi_task = task_graph.add_task(
         func=pygeoprocessing.reproject_vector,
         args=(
             args['aoi_watersheds_path'],
             target_sr_wkt,
             reprojected_aoi_path),
-        kwargs={'driver_name': 'GPKG'},
+        kwargs={'driver_name': 'ESRI Shapefile'},
         target_path_list=[reprojected_aoi_path],
         task_name='reproject aoi/watersheds')
 
@@ -435,7 +435,7 @@ def execute(args):
             (runoff_retention_raster_path, 1),
             reprojected_aoi_path),
         store_result=True,
-        dependent_task_list=[runoff_retention_task],
+        dependent_task_list=[runoff_retention_task, reprojected_aoi_task],
         task_name='zonal_statistics over runoff_retention raster')
 
     runoff_retention_volume_stats_task = task_graph.add_task(
@@ -444,7 +444,7 @@ def execute(args):
             (runoff_retention_vol_raster_path, 1),
             reprojected_aoi_path),
         store_result=True,
-        dependent_task_list=[runoff_retention_vol_task],
+        dependent_task_list=[runoff_retention_vol_task, reprojected_aoi_task],
         task_name='zonal_statistics over runoff_retention_volume raster')
 
     damage_per_aoi_stats = None
@@ -457,13 +457,13 @@ def execute(args):
             args['built_infrastructure_vector_path'] not in ('', None)):
         # Reproject the built infrastructure vector to the target SRS.
         reprojected_structures_path = os.path.join(
-            intermediate_dir, 'structures_reprojected.gpkg')
+            intermediate_dir, 'structures_reprojected')
         reproject_built_infrastructure_task = task_graph.add_task(
             func=pygeoprocessing.reproject_vector,
             args=(args['built_infrastructure_vector_path'],
                   target_sr_wkt,
                   reprojected_structures_path),
-            kwargs={'driver_name': 'GPKG'},
+            kwargs={'driver_name': 'ESRI Shapefile'},
             target_path_list=[reprojected_structures_path],
             task_name='reproject built infrastructure to target SRS')
 
@@ -512,7 +512,7 @@ def _write_summary_vector(
         runoff_ret_vol_stats, flood_volume_stats, damage_per_aoi_stats=None):
     """Write a vector with summary statistics.
 
-    This vector will always contain two fields::
+    This vector will always contain three fields::
 
         * ``'flood_vol'``: The volume of flood (runoff), in m3, per watershed.
         * ``'rnf_rt_idx'``: Average of runoff retention values per watershed
@@ -571,34 +571,31 @@ def _write_summary_vector(
 
     target_watershed_layer.ResetReading()
     for target_feature in target_watershed_layer:
-        # Target vector is SHP, where FIDs start at 0, but stats were
-        # generated based on GPKG reprojection, where FIDs start at 1.
-        # Therefore, we need to reference stats at SHP FID + 1.
-        stat_key = target_feature.GetFID() + 1
+        feature_id = target_feature.GetFID()
 
-        pixel_count = runoff_ret_stats[stat_key]['count']
+        pixel_count = runoff_ret_stats[feature_id]['count']
         if pixel_count > 0:
             mean_value = (
-                runoff_ret_stats[stat_key]['sum'] / float(pixel_count))
+                runoff_ret_stats[feature_id]['sum'] / float(pixel_count))
             target_feature.SetField('rnf_rt_idx', float(mean_value))
 
         target_feature.SetField(
             'rnf_rt_m3', float(
-                runoff_ret_vol_stats[stat_key]['sum']))
+                runoff_ret_vol_stats[feature_id]['sum']))
 
         if damage_per_aoi_stats is not None:
-            pixel_count = runoff_ret_vol_stats[stat_key]['count']
+            pixel_count = runoff_ret_vol_stats[feature_id]['count']
             if pixel_count > 0:
-                damage_sum = damage_per_aoi_stats[stat_key]
+                damage_sum = damage_per_aoi_stats[feature_id]
                 target_feature.SetField('aff_bld', damage_sum)
 
                 # This is the service_built equation.
                 target_feature.SetField(
                     'serv_blt', (
-                        damage_sum * runoff_ret_vol_stats[stat_key]['sum']))
+                        damage_sum * runoff_ret_vol_stats[feature_id]['sum']))
 
         target_feature.SetField(
-            'flood_vol', float(flood_volume_stats[stat_key]['sum']))
+            'flood_vol', float(flood_volume_stats[feature_id]['sum']))
 
         target_watershed_layer.SetFeature(target_feature)