NOAA-OWP · ZahraGhahremani · Oct 22, 2024 · Oct 23, 2024 · Oct 23, 2024 · Oct 23, 2024
diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md
@@ -1,6 +1,17 @@
 All notable changes to this project will be documented in this file.
 We follow the [Semantic Versioning 2.0.0](http://semver.org/) format.
 
+## v4.5.x.x - 2024-10-23 - [PR#1327](https://github.com/NOAA-OWP/inundation-mapping/pull/1327)
+
+The initial purpose of this PR was to cut down the runtime for four Alaska HUCs (19020104, 19020503, 19020402, and19020602`). It significantly optimizes runtime by replacing a nested for loop, used for updating rating curve for small segments, with a vectorized process. This can improve runtime for all HUCs with small segments.
+
+### Changes
+
+- `src/add_crosswalk.py`
+- `tools/bridge_inundation.py`
+
+<br/><br/>
+
 ## v4.5.11.1 - 2024-10-16 - [PR#1318](https://github.com/NOAA-OWP/inundation-mapping/pull/1318)
 
 Bug fixes to address issues during `fim_pipeline.sh`.

diff --git a/src/add_crosswalk.py b/src/add_crosswalk.py
@@ -110,6 +110,8 @@ def add_crosswalk(
 
     output_flows = output_flows.merge(output_catchments.filter(items=['HydroID', 'areasqkm']), on='HydroID')
 
+    output_flows = output_flows.drop_duplicates(subset='HydroID')
+
     output_flows['ManningN'] = mannings_n
 
     if output_flows.NextDownID.dtype != 'int':
@@ -281,16 +283,28 @@ def add_crosswalk(
         sml_segs.to_csv(small_segments_filename, index=False)
         print("Update rating curves for short reaches.")
 
-        for index, segment in sml_segs.iterrows():
-            short_id = segment[0]
-            update_id = segment[1]
-            new_values = output_src.loc[output_src['HydroID'] == update_id][['Stage', 'Discharge (m3s-1)']]
-
-            for src_index, src_stage in new_values.iterrows():
-                output_src.loc[
-                    (output_src['HydroID'] == short_id) & (output_src['Stage'] == src_stage[0]),
-                    ['Discharge (m3s-1)'],
-                ] = src_stage[1]
+        # Create a DataFrame with new values for discharge based on 'update_id'
+        new_values = output_src[output_src['HydroID'].isin(sml_segs['update_id'])][
+            ['HydroID', 'Stage', 'Discharge (m3s-1)']
+        ]
+
+        # Merge this new values DataFrame with sml_segs on 'update_id' and 'HydroID'
+        sml_segs_with_values = sml_segs.merge(
+            new_values, left_on='update_id', right_on='HydroID', suffixes=('', '_new')
+        )
+        sml_segs_with_values = sml_segs_with_values[['short_id', 'Stage', 'Discharge (m3s-1)']]
+        merged_output_src = output_src.merge(
+            sml_segs_with_values[['short_id', 'Stage', 'Discharge (m3s-1)']],
+            left_on=['HydroID', 'Stage'],
+            right_on=['short_id', 'Stage'],
+            suffixes=('', '_df2'),
+        )
+        merged_output_src = merged_output_src[['HydroID', 'Stage', 'Discharge (m3s-1)_df2']]
+        output_src = pd.merge(output_src, merged_output_src, on=['HydroID', 'Stage'], how='left')
+        output_src['Discharge (m3s-1)'] = output_src['Discharge (m3s-1)_df2'].fillna(
+            output_src['Discharge (m3s-1)']
+        )
+        output_src = output_src.drop(columns=['Discharge (m3s-1)_df2'])
 
     output_src = output_src.merge(crosswalk[['HydroID', 'feature_id']], on='HydroID')
 

diff --git a/tools/bridge_inundation.py b/tools/bridge_inundation.py
@@ -87,6 +87,11 @@ def bridge_risk_status(
     # Concatenate all GeoDataFrame into a single GeoDataFrame
     bridge_points = gpd.GeoDataFrame(pd.concat(gdfs, ignore_index=True))
 
+    if bridge_points.feature_id.dtype != 'int':
+        bridge_points.feature_id = bridge_points.feature_id.astype(int)
+    if flow_file_data.feature_id.dtype != 'int':
+        flow_file_data.feature_id = flow_file_data.feature_id.astype(int)
+
     # Find the common feature_id between flow_file and bridge_points
     merged_bri = bridge_points.merge(flow_file_data, on='feature_id', how='inner')
 
@@ -111,7 +116,7 @@ def risk_class(row):
     bridge_out = merged_bri.loc[merged_data_max]
     bridge_out.reset_index(drop=True, inplace=True)
     bridge_out.drop('risk', axis=1, inplace=True)
-    bridge_out.to_file(output_dir, driver='GPKG', layer='bridge_risk_status')
+    bridge_out.to_file(output_dir, index=False, driver="GPKG", engine='fiona')
 
     return bridge_out