Use fiona for writing files

NOAA-OWP · Aug 26, 2024 · a1d1648 · a1d1648
1 parent 6321468
commit a1d1648
Show file tree

Hide file tree

Showing 24 changed files with 64 additions and 40 deletions.
diff --git a/data/esri.py b/data/esri.py
@@ -50,7 +50,7 @@ def query(cls, query_url: str, save_file: str = None, **kwargs):
         gdf_complete = rest_call._query_rest()
         # Save geodataframe as geopackage
         if save_file:
-            gdf_complete.to_file(save_file, driver="GPKG", index=False)
+            gdf_complete.to_file(save_file, driver="GPKG", index=False, engine='fiona')
         else:
             return gdf_complete
 

diff --git a/data/nld/levee_download.py b/data/nld/levee_download.py
@@ -42,7 +42,7 @@ def download_nld_lines():
     )
 
     # Write levees to a single geopackage
-    levees.to_file(nld_vector_output, index=False, driver='GPKG')
+    levees.to_file(nld_vector_output, index=False, driver='GPKG', engine='fiona')
     print(f"Levees written to file:\n{nld_vector_output}")
 
     # Spatial join to huc2
@@ -75,7 +75,7 @@ def process_levee_lines(levee_gdf: gpd.GeoDataFrame, out_levees: str):
     levee_gdf['geometry'] = levee_gdf.progress_apply(lambda row: remove_nulls(row.geometry, row.HUC2), axis=1)
     # Remove levees that have empty geometries resulting from the previous filter
     levee_gdf = levee_gdf[~levee_gdf.is_empty]
-    levee_gdf.to_file(out_levees, index=False, driver='GPKG')
+    levee_gdf.to_file(out_levees, index=False, driver='GPKG', engine='fiona')
     print(f"Preprocessed levees written to \n{out_levees}")
 
 
@@ -160,7 +160,7 @@ def download_nld_poly():
     )
 
     # Write levees to a single geopackage
-    leveed_areas.to_file(nld_protected_areas, index=False, driver='GPKG')
+    leveed_areas.to_file(nld_protected_areas, index=False, driver='GPKG', engine='fiona')
     print(f"Levees written to file:\n{nld_protected_areas}")
 
 

diff --git a/data/usgs/acquire_and_preprocess_3dep_dems.py b/data/usgs/acquire_and_preprocess_3dep_dems.py
@@ -413,7 +413,7 @@ def polygonize(target_output_folder_path):
 
     dem_gpkgs['DN'] = 1
     dem_dissolved = dem_gpkgs.dissolve(by='DN')
-    dem_dissolved.to_file(dem_domain_file, driver='GPKG')
+    dem_dissolved.to_file(dem_domain_file, driver='GPKG', engine='fiona')
 
     if not os.path.exists(dem_domain_file):
         msg = f" - Polygonizing -- {dem_domain_file} - Failed"

diff --git a/data/usgs/rating_curve_get_usgs_curves.py b/data/usgs/rating_curve_get_usgs_curves.py
@@ -370,7 +370,7 @@ def usgs_rating_to_elev(list_of_gage_sites, workspace=False, sleep_time=1.0):
         sites_gdf['usgs_data_alt_accuracy_code'] <= acceptable_alt_acc_thresh, True, False
     )
 
-    sites_gdf.to_file(os.path.join(workspace, 'sites_bool_flags.gpkg'), driver='GPKG')
+    sites_gdf.to_file(os.path.join(workspace, 'sites_bool_flags.gpkg'), driver='GPKG', engine='fiona')
 
     # Filter and save filtered file for viewing
     acceptable_sites_gdf = sites_gdf[
@@ -379,7 +379,7 @@ def usgs_rating_to_elev(list_of_gage_sites, workspace=False, sleep_time=1.0):
     acceptable_sites_gdf = acceptable_sites_gdf[acceptable_sites_gdf['curve'] == 'yes']
     acceptable_sites_gdf.to_csv(os.path.join(workspace, 'acceptable_sites_for_rating_curves.csv'))
     acceptable_sites_gdf.to_file(
-        os.path.join(workspace, 'acceptable_sites_for_rating_curves.gpkg'), driver='GPKG'
+        os.path.join(workspace, 'acceptable_sites_for_rating_curves.gpkg'), driver='GPKG', engine='fiona'
     )
 
     # Make list of acceptable sites
@@ -404,7 +404,9 @@ def usgs_rating_to_elev(list_of_gage_sites, workspace=False, sleep_time=1.0):
         # If 'all' option specified, reproject then write out shapefile of acceptable sites.
         if list_of_gage_sites == ['all']:
             sites_gdf = sites_gdf.to_crs(PREP_PROJECTION)
-            sites_gdf.to_file(Path(workspace) / 'usgs_gages.gpkg', layer='usgs_gages', driver='GPKG')
+            sites_gdf.to_file(
+                Path(workspace) / 'usgs_gages.gpkg', layer='usgs_gages', driver='GPKG', engine='fiona'
+            )
 
         # Write out flow files for each threshold across all sites
         write_categorical_flow_files(metadata_list, workspace)

diff --git a/data/wbd/preprocess_wbd.py b/data/wbd/preprocess_wbd.py
@@ -39,7 +39,7 @@ def clip_wbd_to_dem_domain(dem: str, wbd_in: str, wbd_out: str, huc_level: int):
         wbd = gpd.clip(wbd, dem_domain)
 
         # Write output file
-        wbd.to_file(wbd_out, layer=layer, crs=DEFAULT_FIM_PROJECTION_CRS, driver='GPKG')
+        wbd.to_file(wbd_out, layer=layer, crs=DEFAULT_FIM_PROJECTION_CRS, driver='GPKG', engine='fiona')
 
 
 if __name__ == '__main__':

diff --git a/src/derive_headwaters.py b/src/derive_headwaters.py
@@ -66,4 +66,4 @@ def findHeadWaterPoints(flows):
     output_headwaters = args['output_headwaters']
 
     if output_headwaters is not None:
-        hw_gdf.to_file(args['output_headwaters'], driver=getDriver(args['output_headwaters']))
+        hw_gdf.to_file(args['output_headwaters'], driver=getDriver(args['output_headwaters']), engine='fiona')
diff --git a/src/derive_level_paths.py b/src/derive_level_paths.py
@@ -162,15 +162,15 @@ def Derive_level_paths(
 
         catchments = catchments.reset_index(drop=True)
 
-        catchments.to_file(catchments_outfile, index=False, driver="GPKG")
+        catchments.to_file(catchments_outfile, index=False, driver="GPKG", engine='fiona')
 
     # derive headwaters
     if headwaters_outfile is not None:
         headwaters = stream_network.derive_headwater_points_with_inlets(
             inlets_attribute=inlets_attribute, outlet_linestring_index=outlet_linestring_index
         )
         # headwaters write
-        headwaters.to_file(headwaters_outfile, index=False, driver="GPKG")
+        headwaters.to_file(headwaters_outfile, index=False, driver="GPKG", engine='fiona')
 
     if out_stream_network is not None:
         if verbose:
@@ -209,7 +209,7 @@ def Derive_level_paths(
             feature_attribute=branch_id_attribute, outlet_linestring_index=outlet_linestring_index
         )
 
-        branch_inlets.to_file(branch_inlets_outfile, index=False, driver="GPKG")
+        branch_inlets.to_file(branch_inlets_outfile, index=False, driver="GPKG", engine='fiona')
 
     return stream_network
 

diff --git a/src/edit_points.py b/src/edit_points.py
@@ -36,7 +36,7 @@ def Edit_points(
 
     # join on HydroID to add branch_id
     if out_reach_points is not None:
-        reach_points.to_file(out_reach_points, driver='GPKG', index=False)
+        reach_points.to_file(out_reach_points, driver='GPKG', index=False, engine='fiona')
 
     # make pixel points
     if verbose:
@@ -46,7 +46,7 @@ def Edit_points(
     pixel_points['id'] = list(range(1, len(pixel_points) + 1))
 
     if out_pixel_points is not None:
-        pixel_points.to_file(out_pixel_points, driver='GPKG', index=False)
+        pixel_points.to_file(out_pixel_points, driver='GPKG', index=False, engine='fiona')
 
     return (reach_points, pixel_points)
 

diff --git a/src/filter_catchments_and_add_attributes.py b/src/filter_catchments_and_add_attributes.py
@@ -55,8 +55,10 @@ def filter_catchments_and_add_attributes(
 
         if not output_catchments.empty:
             try:
-                output_catchments.to_file(output_catchments_filename, driver="GPKG", index=False)
-                output_flows.to_file(output_flows_filename, driver="GPKG", index=False)
+                output_catchments.to_file(
+                    output_catchments_filename, driver="GPKG", index=False, engine='fiona'
+                )
+                output_flows.to_file(output_flows_filename, driver="GPKG", index=False, engine='fiona')
             except ValueError:
                 # this is not an exception, but a custom exit code that can be trapped
                 print("There are no flowlines in the HUC after stream order filtering.")

diff --git a/src/reachID_grid_to_vector_points.py b/src/reachID_grid_to_vector_points.py
@@ -51,7 +51,9 @@ def convert_grid_cells_to_points(raster, index_option, output_points_filename=Fa
     if output_points_filename is False:
         return pointGDF
     else:
-        pointGDF.to_file(output_points_filename, driver=getDriver(output_points_filename), index=False)
+        pointGDF.to_file(
+            output_points_filename, driver=getDriver(output_points_filename), index=False, engine='fiona'
+        )
 
 
 if __name__ == '__main__':

diff --git a/src/split_flows.py b/src/split_flows.py
@@ -430,11 +430,15 @@ def snap_and_trim_flow(snapped_point, flows):
         print("There are no flowlines after stream order filtering.")
         sys.exit(FIM_exit_codes.NO_FLOWLINES_EXIST.value)  # Note: Will send a 61 back
 
-    split_flows_gdf.to_file(split_flows_filename, driver=getDriver(split_flows_filename), index=False)
+    split_flows_gdf.to_file(
+        split_flows_filename, driver=getDriver(split_flows_filename), index=False, engine='fiona'
+    )
 
     if len(split_points_gdf) == 0:
         raise Exception("No points exist.")
-    split_points_gdf.to_file(split_points_filename, driver=getDriver(split_points_filename), index=False)
+    split_points_gdf.to_file(
+        split_points_filename, driver=getDriver(split_points_filename), index=False, engine='fiona'
+    )
 
 
 if __name__ == '__main__':

diff --git a/src/src_adjust_spatial_obs.py b/src/src_adjust_spatial_obs.py
@@ -108,7 +108,7 @@ def process_points(args):
             branch_debug_pts_out_gpkg = os.path.join(
                 branch_dir, 'export_water_edge_df_' + branch_id + '.gpkg'
             )
-            water_edge_df.to_file(branch_debug_pts_out_gpkg, driver='GPKG', index=False)
+            water_edge_df.to_file(branch_debug_pts_out_gpkg, driver='GPKG', index=False, engine='fiona')
 
         # print('Processing points for HUC: ' + str(huc) + '  Branch: ' + str(branch_id))
         ## Get median HAND value for appropriate groups.
@@ -281,7 +281,7 @@ def ingest_points_layer(fim_directory, job_number, debug_outputs_option, log_fil
             huc_debug_pts_out = os.path.join(fim_directory, huc, 'debug_water_edge_df_' + huc + '.csv')
             water_edge_df.to_csv(huc_debug_pts_out)
             huc_debug_pts_out_gpkg = os.path.join(fim_directory, huc, 'export_water_edge_df_' + huc + '.gpkg')
-            water_edge_df.to_file(huc_debug_pts_out_gpkg, driver='GPKG', index=False)
+            water_edge_df.to_file(huc_debug_pts_out_gpkg, driver='GPKG', index=False, engine='fiona')
             # write parquet file using ".to_parquet() method"
             parquet_filepath = os.path.join(fim_directory, huc, 'debug_water_edge_df_' + huc + '.parquet')
             water_edge_df.to_parquet(parquet_filepath, index=False)

diff --git a/src/src_roughness_optimization.py b/src/src_roughness_optimization.py
@@ -465,7 +465,11 @@ def update_rating_curve(
 
                         try:
                             output_catchments.to_file(
-                                catchments_poly_path, driver="GPKG", index=False, overwrite=True
+                                catchments_poly_path,
+                                driver="GPKG",
+                                index=False,
+                                overwrite=True,
+                                engine='fiona',
                             )  # overwrite the previous layer
 
                         except Exception as e:
@@ -483,7 +487,11 @@ def update_rating_curve(
                             try:
                                 # Attempt to write to the file again
                                 output_catchments.to_file(
-                                    catchments_poly_path, driver="GPKG", index=False, overwrite=True
+                                    catchments_poly_path,
+                                    driver="GPKG",
+                                    index=False,
+                                    overwrite=True,
+                                    engine='fiona',
                                 )
                                 log_text += 'Successful second attempt to write output_catchments gpkg' + '\n'
                             except Exception as e:
@@ -515,7 +523,9 @@ def update_rating_curve(
                             "gw_catchments_src_adjust_" + str(branch_id) + ".gpkg",
                         )
                         output_catchments = input_catchments.merge(df_nmerge, how='left', on='HydroID')
-                        output_catchments.to_file(output_catchments_fileName, driver="GPKG", index=False)
+                        output_catchments.to_file(
+                            output_catchments_fileName, driver="GPKG", index=False, engine='fiona'
+                        )
                         output_catchments = None
 
                 ## Merge the final ManningN dataframe to the original hydroTable

diff --git a/src/stream_branches.py b/src/stream_branches.py
@@ -134,7 +134,7 @@ def write(self, fileName, layer=None, index=True, verbose=False):
         driverDictionary = {".gpkg": "GPKG", ".geojson": "GeoJSON", ".shp": "ESRI Shapefile"}
         driver = driverDictionary[splitext(fileName)[1]]
 
-        self.to_file(fileName, driver=driver, layer=layer, index=index)
+        self.to_file(fileName, driver=driver, layer=layer, index=index, engine='fiona')
 
     def set_index(self, reach_id_attribute, drop=True):
         branch_id_attribute = self.branch_id_attribute

diff --git a/tools/eval_plots.py b/tools/eval_plots.py
@@ -891,7 +891,7 @@ def eval_plots(
             joined = gdf.merge(all_ahps_datasets, on='nws_lid')
             # Project to VIZ projection and write to file
             joined = joined.to_crs(VIZ_PROJECTION)
-            joined.to_file(Path(workspace) / 'fim_performance_points.shp')
+            joined.to_file(Path(workspace) / 'fim_performance_points.shp', engine='fiona')
         else:
             print(
                 'NWS/USGS MS datasets not analyzed, no spatial data created.\n'
@@ -949,7 +949,7 @@ def eval_plots(
             # Project to VIZ projection
             wbd_with_metrics = wbd_with_metrics.to_crs(VIZ_PROJECTION)
             # Write out to file
-            wbd_with_metrics.to_file(Path(workspace) / 'fim_performance_polys.shp')
+            wbd_with_metrics.to_file(Path(workspace) / 'fim_performance_polys.shp', engine='fiona')
         else:
             print(
                 'BLE/IFC/RAS2FIM FR datasets not analyzed, no spatial data created.\n'

diff --git a/tools/evaluate_continuity.py b/tools/evaluate_continuity.py
@@ -113,7 +113,7 @@ def evaluate_continuity(
         fig.savefig(plot_file)
 
     if stream_network_outfile is not None:
-        stream_network.to_file(stream_network_outfile, index=False, driver='GPKG')
+        stream_network.to_file(stream_network_outfile, index=False, driver='GPKG', engine='fiona')
 
     return stream_network
 

diff --git a/tools/generate_categorical_fim.py b/tools/generate_categorical_fim.py
@@ -282,7 +282,7 @@ def update_mapping_status(output_mapping_dir, output_flows_dir, nws_sites_layer,
         flows_df = flows_df.rename(columns={'nws_lid': 'ahps_lid'})
 
         # Write out to file
-        flows_df.to_file(nws_sites_layer)
+        flows_df.to_file(nws_sites_layer, engine='fiona')
     except Exception as e:
         print(f"No LIDs, \n Exception: \n {repr(e)} \n")
 
@@ -860,7 +860,7 @@ def generate_stage_based_categorical_fim(
         viz_out_gdf['acceptable_alt_meth_code_list'] = str(acceptable_alt_meth_code_list)
         viz_out_gdf['acceptable_site_type_list'] = str(acceptable_site_type_list)
 
-        viz_out_gdf.to_file(nws_sites_layer, driver='GPKG')
+        viz_out_gdf.to_file(nws_sites_layer, driver='GPKG', engine='fiona')
 
     return nws_sites_layer
 

diff --git a/tools/generate_categorical_fim_flows.py b/tools/generate_categorical_fim_flows.py
@@ -383,7 +383,7 @@ def generate_catfim_flows(
     # )
     nws_lid_layer = os.path.join(workspace, 'nws_lid_sites.gpkg').replace('flows', 'mapping')
 
-    viz_out_gdf.to_file(nws_lid_layer, driver='GPKG')
+    viz_out_gdf.to_file(nws_lid_layer, driver='GPKG', engine='fiona')
 
     # time operation
     all_end = datetime.now()

diff --git a/tools/generate_categorical_fim_mapping.py b/tools/generate_categorical_fim_mapping.py
@@ -252,9 +252,11 @@ def post_process_cat_fim_for_viz(
             # Write/append aggregate diss_extent
             print(f"Merging layer: {layer}")
             if os.path.isfile(merged_layer):
-                diss_extent.to_file(merged_layer, driver=getDriver(merged_layer), index=False, mode='a')
+                diss_extent.to_file(
+                    merged_layer, driver=getDriver(merged_layer), index=False, mode='a', engine='fiona'
+                )
             else:
-                diss_extent.to_file(merged_layer, driver=getDriver(merged_layer), index=False)
+                diss_extent.to_file(merged_layer, driver=getDriver(merged_layer), index=False, engine='fiona')
             del diss_extent
 
             # shutil.rmtree(gpkg_dir)  # TODO
@@ -321,7 +323,7 @@ def reformat_inundation_maps(
 
         if not extent_poly_diss.empty:
             extent_poly_diss.to_file(
-                diss_extent_filename, driver=getDriver(diss_extent_filename), index=False
+                diss_extent_filename, driver=getDriver(diss_extent_filename), index=False, engine='fiona'
             )
 
     except Exception:

diff --git a/tools/generate_nws_lid.py b/tools/generate_nws_lid.py
@@ -122,7 +122,7 @@ def generate_nws_lid(workspace):
     nws_lid_gdf.reset_index(drop=True)
     Path(workspace).mkdir(parents=True, exist_ok=True)
     nws_lid_gdf.dropna(subset=['nwm_feature_id'], inplace=True)
-    nws_lid_gdf.to_file(Path(workspace) / 'nws_lid.gpkg', driver='GPKG')
+    nws_lid_gdf.to_file(Path(workspace) / 'nws_lid.gpkg', driver='GPKG', engine='fiona')
 
 
 if __name__ == '__main__':

diff --git a/tools/make_boxes_from_bounds.py b/tools/make_boxes_from_bounds.py
@@ -37,7 +37,9 @@ def find_hucs_of_bounding_boxes(
     bounding_boxes = bounding_boxes.to_crs(wbd_proj)
 
     if bounding_boxes_outfile is not None:
-        bounding_boxes.to_file(bounding_boxes_outfile, driver=getDriver(bounding_boxes_outfile), index=False)
+        bounding_boxes.to_file(
+            bounding_boxes_outfile, driver=getDriver(bounding_boxes_outfile), index=False, engine='fiona'
+        )
 
     wbdcol_name = 'HUC' + wbd_layer[-1]
 

diff --git a/tools/mosaic_inundation.py b/tools/mosaic_inundation.py
@@ -185,7 +185,7 @@ def mosaic_final_inundation_extent_to_poly(inundation_raster, inundation_polygon
         ]
 
         # Write polygon
-        extent_poly_diss.to_file(inundation_polygon, driver=driver)
+        extent_poly_diss.to_file(inundation_polygon, driver=driver, engine='fiona')
 
 
 if __name__ == "__main__":

diff --git a/tools/rating_curve_comparison.py b/tools/rating_curve_comparison.py
@@ -1082,7 +1082,7 @@ def create_static_gpkg(output_dir, output_gpkg, agg_recurr_stats_table, gages_gp
     usgs_gages = usgs_gages.round(decimals=2)
 
     # Write to file
-    usgs_gages.to_file(join(output_dir, output_gpkg), driver='GPKG', index=False)
+    usgs_gages.to_file(join(output_dir, output_gpkg), driver='GPKG', index=False, engine='fiona')
 
     # Create figure
     usgs_gages.replace(np.inf, np.nan, inplace=True)  # replace inf with nan for plotting

diff --git a/tools/test_case_by_hydro_id.py b/tools/test_case_by_hydro_id.py
@@ -307,7 +307,7 @@ def assemble_hydro_alpha_for_single_huc(stats, huc8, mag, bench):
     csv_path_dot = csv_path + ".csv"
 
     print('writing_to_gpkg')
-    csv_output.to_file(csv, driver="GPKG")
+    csv_output.to_file(csv, driver="GPKG", engine='fiona')
 
     # Add version information to csv_output dataframe
     csv_output['version'] = version