Skip to content

Commit

Permalink
v4.5.5.0 Update SRC optimization w/ ras2fim v2 inputs (#1247)
Browse files Browse the repository at this point in the history
  • Loading branch information
RyanSpies-NOAA authored Aug 16, 2024
1 parent d99e7a9 commit c20e897
Show file tree
Hide file tree
Showing 7 changed files with 144 additions and 58 deletions.
12 changes: 12 additions & 0 deletions docs/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,18 @@
All notable changes to this project will be documented in this file.
We follow the [Semantic Versioning 2.0.0](http://semver.org/) format.

## v4.5.5.0 - 2024-08-16 - [PR#1247](https://github.com/NOAA-OWP/inundation-mapping/pull/1247)

Updated the gauge crosswalk and SRC adjustment routine to use the ras2fim v2 files. The v2 ras2fim file structure was changed to organize the data by huc8 - one gpkg and csv per huc8. Addresses #1091

### Changes
- `fim_post_processing.sh`: added new input variables for running the `src_adjust_ras2fim_rating.py`
- `src/bash_variables.env`: renamed and reassigned the ras2fim input variables: `ras2fim_input_dir`, `ras_rating_curve_csv_filename`, `ras_rating_curve_gpkg_filename`
- `src/run_unit_wb.sh`: Added logic to check if huc in process has ras2fim input data to process. If yes - copy the ras2fim cross section point gpkg to the huc run directory.
- `src/src_adjust_ras2fim_rating.py`: Updated code logic to use the huc-specific input files containing the ras2fim rating curve data (previous ras2fim input file contained all hucs in one csv)
- `src/utils/shared_functions.py`: Added function to find huc subdirectories with the same name btw two parent folders

<br/><br/>

## v4.5.4.4 - 2024-08-02 - [PR#1238](https://github.com/NOAA-OWP/inundation-mapping/pull/1238)

Expand Down
3 changes: 2 additions & 1 deletion fim_post_processing.sh
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,8 @@ if [ "$src_adjust_ras2fim" = "True" ] && [ "$src_subdiv_toggle" = "True" ] && [
# Run SRC Optimization routine using ras2fim rating curve data (WSE and flow @ NWM recur flow values)
python3 $srcDir/src_adjust_ras2fim_rating.py \
-run_dir $outputDestDir \
-ras_rc $ras_rating_curve_csv \
-ras_input $ras2fim_input_dir \
-ras_rc $ras_rating_curve_csv_filename \
-nwm_recur $nwm_recur_file \
-j $jobLimit
Tcount
Expand Down
6 changes: 3 additions & 3 deletions src/bash_variables.env
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,9 @@ export nwm_recur_file=${inputsDir}/rating_curve/nwm_recur_flo
export usgs_rating_curve_csv=${inputsDir}/usgs_gages/usgs_rating_curves.csv

# input file locations for ras2fim locations and rating curve data
export ras_rating_curve_csv=${inputsDir}/rating_curve/ras2fim_exports/reformat_ras_rating_curve_table_rel_101.csv
export ras_rating_curve_points_gpkg=${inputsDir}/rating_curve/ras2fim_exports/reformat_ras_rating_curve_points_rel_101.gpkg
export ras_rating_curve_gpkg_filename=reformat_ras_rating_curve_points_rel_101.gpkg
export ras2fim_input_dir=${inputsDir}/rating_curve/ras2fim_exports/v2_0
export ras_rating_curve_csv_filename=reformat_ras_rating_curve_table.csv
export ras_rating_curve_gpkg_filename=reformat_ras_rating_curve_points.gpkg

export fim_obs_pnt_data=${inputsDir}/rating_curve/water_edge_database/usgs_nws_benchmark_points_cleaned.gpkg

Expand Down
20 changes: 18 additions & 2 deletions src/run_unit_wb.sh
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,25 @@ cp -a $pre_clip_huc_dir/$hucNumber/. $tempHucDataDir
# For buffer_stream_branches.py
cp $huc_input_DEM_domain $tempHucDataDir
# For usgs_gage_unit_setup.py
cp $inputsDir/usgs_gages/usgs_gages.gpkg $tempHucDataDir
cp $ras_rating_curve_points_gpkg $tempHucDataDir
cp $inputsDir/ahps_sites/nws_lid.gpkg $tempHucDataDir
cp $inputsDir/usgs_gages/usgs_gages.gpkg $tempHucDataDir
# Check if the $hucNumber directory exists in the ras2fim $inputsDir
if [ -d "$ras2fim_input_dir/$hucNumber" ]; then
ras_rating_gpkg="$ras2fim_input_dir/$hucNumber/$ras_rating_curve_gpkg_filename"
ras_rating_csv="$ras2fim_input_dir/$hucNumber/$ras_rating_curve_csv_filename"
if [ -f "$ras_rating_gpkg" ]; then
cp "$ras_rating_gpkg" $tempHucDataDir
echo "Copied $ras_rating_gpkg to $tempHucDataDir"
else
echo "File $ras_rating_gpkg does not exist. Skipping copy."
fi
if [ -f "$ras_rating_csv" ]; then
cp "$ras_rating_csv" $tempHucDataDir
echo "Copied $ras_rating_csv to $tempHucDataDir"
else
echo "File $ras_rating_csv does not exist. Skipping copy."
fi
fi

## DERIVE LEVELPATH ##
echo -e $startDiv"Generating Level Paths for $hucNumber"
Expand Down
98 changes: 64 additions & 34 deletions src/src_adjust_ras2fim_rating.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
import pandas as pd

from src_roughness_optimization import update_rating_curve
from utils.shared_functions import check_file_age, concat_huc_csv
from utils.shared_functions import check_file_age, concat_huc_csv, find_matching_subdirectories


'''
Expand Down Expand Up @@ -47,23 +47,26 @@
'''


def create_ras2fim_rating_database(ras_rc_filepath, ras_elev_df, nwm_recurr_filepath, log_dir):
def create_ras2fim_rating_database(huc_ras_input_file, ras_elev_df, nwm_recurr_filepath, log_dir):
start_time = dt.datetime.now()
print('Reading RAS2FIM rating curves from csv...')
log_text = 'Processing database for RAS2FIM flow/WSE at NWM flow recur intervals...\n'
col_filter = ["fid_xs", "flow", "wse"]
# Note that we are using flow_cfs not flow_cms (error in raw data)
col_filter = ["fid_xs", "flow_cfs", "wse_m"]
ras_rc_df = pd.read_csv(
ras_rc_filepath, dtype={'fid_xs': object}, usecols=col_filter, encoding="unicode_escape"
huc_ras_input_file, dtype={'fid_xs': object}, usecols=col_filter, encoding="unicode_escape"
) # , nrows=30000)
ras_rc_df.rename(columns={'fid_xs': 'location_id'}, inplace=True)
# ras_rc_df['location_id'] = ras_rc_df['feature_id'].astype(object)
run_time = dt.datetime.now() - start_time
print(f"Duration (read ras_rc_csv): {str(run_time).split('.')[0]}")

# convert WSE navd88 values to meters
ras_rc_df.rename(
columns={'wse': 'wse_navd88_m', 'flow': 'discharge_cms'}, inplace=True
) # assume ras2fim elevation data in feet
# rename WSE column
ras_rc_df.rename(columns={'wse_m': 'wse_navd88_m'}, inplace=True)

# Need to use the flow_cfs because there is an error in the raw flow_cms
ras_rc_df['discharge_cms'] = ras_rc_df['flow_cfs'] * 0.0283168
ras_rc_df = ras_rc_df.drop(columns=["flow_cfs"])

# read in the aggregate RAS elev table csv
start_time = dt.datetime.now()
Expand Down Expand Up @@ -196,10 +199,10 @@ def create_ras2fim_rating_database(ras_rc_filepath, ras_elev_df, nwm_recurr_file
return final_df


def branch_proc_list(ras_df, run_dir, debug_outputs_option, log_file):
def branch_proc_list(ras_df, huc_run_dir, debug_outputs_option, log_file):
procs_list = [] # Initialize list for mulitprocessing.

# loop through all unique level paths that have a USGS gage
# loop through all unique level paths that have a ras2fim data points
huc_branch_dict = ras_df.groupby('huc')['levpa_id'].apply(set).to_dict()

for huc in sorted(
Expand All @@ -210,7 +213,7 @@ def branch_proc_list(ras_df, run_dir, debug_outputs_option, log_file):
# Define paths to branch HAND data.
# Define paths to HAND raster, catchments raster, and synthetic rating curve JSON.
# Assumes outputs are for HUC8 (not HUC6)
branch_dir = os.path.join(run_dir, huc, 'branches', branch_id)
branch_dir = os.path.join(huc_run_dir, 'branches', branch_id)
hand_path = os.path.join(branch_dir, 'rem_zeroed_masked_' + branch_id + '.tif')
catchments_path = os.path.join(
branch_dir, 'gw_catchments_reaches_filtered_addedAttributes_' + branch_id + '.tif'
Expand Down Expand Up @@ -302,14 +305,10 @@ def branch_proc_list(ras_df, run_dir, debug_outputs_option, log_file):
# )


def run_prep(run_dir, ras_rc_filepath, nwm_recurr_filepath, debug_outputs_option, job_number):
def run_prep(run_dir, ras_input_dir, ras_rc_filepath, nwm_recurr_filepath, debug_outputs_option, job_number):
## Check input args are valid
assert os.path.isdir(run_dir), 'ERROR: could not find the input fim_dir location: ' + str(run_dir)

## Create an aggregate dataframe with all ras_elev_table.csv entries for hucs in fim_dir
print('Reading RAS2FIM point loc HAND elevation from ras_elev_table csv files...')
csv_name = 'ras_elev_table.csv' # file name to search for ras location data (in the huc/branch dirs)

available_cores = multiprocessing.cpu_count()
if job_number > available_cores:
job_number = available_cores - 1
Expand All @@ -332,25 +331,52 @@ def run_prep(run_dir, ras_rc_filepath, nwm_recurr_filepath, debug_outputs_option
log_file.write('START TIME: ' + str(begin_time) + '\n')
log_file.write('#########################################################\n\n')

ras_elev_df = concat_huc_csv(run_dir, csv_name)

if ras_elev_df is None:
warn_err = 'WARNING: ras_elev_df not created - check that ' + csv_name + ' files exist in fim_dir!'
print(warn_err)
log_file.write(warn_err)
hucs_with_data = find_matching_subdirectories(run_dir, ras_input_dir)
if len(hucs_with_data) == 0:
print('ALERT: Did not find any HUCs with ras2fim data to perform adjustments')
log_file.write('ALERT: Did not find any HUCs with ras2fim data to perform adjustments\n')
return

log_file.write('RAS2FIM data available and will perform SRC adjustments for hucs:\n')
log_file.write(str(hucs_with_data))
log_file.write('\n#########################################################\n\n')
for huc in hucs_with_data:
huc_run_dir = os.path.join(run_dir, huc)
huc_ras_input_file = os.path.join(huc_run_dir, ras_rc_filepath)
## Create an aggregate dataframe with all ras_elev_table.csv entries for hucs in fim_dir
print('Reading RAS2FIM point loc HAND elevation from ras_elev_table csv files...')
csv_elev = 'ras_elev_table.csv' # file name to search for ras location data (in the huc/branch dirs)
# ras_elev_df = concat_huc_csv(huc_run_dir, csv_elev)
ras_elev_df = pd.read_csv(
os.path.join(huc_run_dir, 'ras_elev_table.csv'),
dtype={'HUC8': object, 'location_id': object, 'feature_id': int, 'levpa_id': object},
)

elif ras_elev_df.empty:
warn_err = 'WARNING: ras_elev_df is empty - check that ' + csv_name + ' files exist in fim_dir!'
print(warn_err)
log_file.write(warn_err)
## Create an aggregate dataframe with all ras2fim rating curve csv files
# print('Reading RAS2FIM rating curves csv files from the input directory...')
# ras_rating_df = concat_huc_csv(ras_input_dir, ras_rc_filepath)

else:
print('This may take a few minutes...')
log_file.write("starting create RAS2FIM rating db")
ras_df = create_ras2fim_rating_database(ras_rc_filepath, ras_elev_df, nwm_recurr_filepath, log_dir)
if ras_elev_df is None:
warn_err = (
'WARNING: ras_elev_df not created - check that ' + csv_elev + ' files exist in fim_dir!'
)
print(warn_err)
log_file.write(warn_err)

elif ras_elev_df.empty:
warn_err = 'WARNING: ras_elev_df is empty - check that ' + csv_elev + ' files exist in fim_dir!'
print(warn_err)
log_file.write(warn_err)

else:
print('This may take a few minutes...')
log_file.write("starting create RAS2FIM rating db")
ras_df = create_ras2fim_rating_database(
huc_ras_input_file, ras_elev_df, nwm_recurr_filepath, log_dir
)

## Create huc proc_list for multiprocessing and execute the update_rating_curve function
branch_proc_list(ras_df, run_dir, debug_outputs_option, log_file)
## Create huc proc_list for multiprocessing and execute the update_rating_curve function
branch_proc_list(ras_df, huc_run_dir, debug_outputs_option, log_file)

## Record run time and close log file
log_file.write('#########################################################\n\n')
Expand All @@ -369,10 +395,13 @@ def run_prep(run_dir, ras_rc_filepath, nwm_recurr_filepath, debug_outputs_option
'(calculated WSE/flow).'
)
parser.add_argument('-run_dir', '--run-dir', help='Parent directory of FIM run.', required=True)
parser.add_argument(
'-ras_input', '--ras2fim-dir', help='Path to RAS2FIM rating curve input directory', required=True
)
parser.add_argument(
'-ras_rc',
'--ras2fim-ratings',
help='Path to RAS2FIM rating curve (reach avg) csv file',
help='CSV file name for RAS2FIM rating curve (reach avg)',
required=True,
)
parser.add_argument(
Expand All @@ -394,10 +423,11 @@ def run_prep(run_dir, ras_rc_filepath, nwm_recurr_filepath, debug_outputs_option
## Assign variables from arguments.
args = vars(parser.parse_args())
run_dir = args['run_dir']
ras_input_dir = args['ras2fim_dir']
ras_rc_filepath = args['ras2fim_ratings']
nwm_recurr_filepath = args['nwm_recur']
debug_outputs_option = args['extra_outputs']
job_number = int(args['job_number'])

## Prepare/check inputs, create log file, and spin up the proc list
run_prep(run_dir, ras_rc_filepath, nwm_recurr_filepath, debug_outputs_option, job_number)
run_prep(run_dir, ras_input_dir, ras_rc_filepath, nwm_recurr_filepath, debug_outputs_option, job_number)
39 changes: 21 additions & 18 deletions src/usgs_gage_unit_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,24 +35,27 @@ def load_gages(self, huc_CRS):
# Read RAS2FIM point locations file
# !!! Geopandas is not honoring the dtype arg with this read_file below (huc8 being read as int64).
# Need the raw data to store the 'huc8' attribute as an object to avoid issues with integers truncating the leading zero from some hucs
ras_locs = gpd.read_file(self.ras_locs_filename, dtype={'huc8': 'object'})
ras_locs = ras_locs[['feature_id', 'huc8', 'stream_stn', 'fid_xs', 'source', 'geometry']]
ras_locs['location_id'] = ras_locs['fid_xs']

# Convert ras locs crs to match usgs gage crs
ras_locs.to_crs(huc_CRS, inplace=True)
ras_locs = ras_locs.rename(columns={'huc8': 'HUC8'})

# Convert Multipoint geometry to Point geometry
ras_locs['geometry'] = ras_locs.representative_point()

# if ras_locs.huc8.dtype == 'int64':
# ras_locs = ras_locs[ras_locs.huc8 == int(self.huc8)]
# ras_locs['HUC8'] = str(self.huc8)
# ras_locs = ras_locs.drop('huc8', axis=1)
# elif ras_locs.huc8.dtype == 'int64':
# ras_locs = ras_locs.rename(columns={'huc8':'HUC8'})

if os.path.exists(self.ras_locs_filename):
ras_columns = ['feature_id', 'huc8', 'stream_stn', 'fid_xs', 'source', 'geometry']
ras_locs = gpd.read_file(self.ras_locs_filename, dtype={'huc8': 'object'}, usecols=ras_columns)
ras_locs = ras_locs[ras_columns]
ras_locs['location_id'] = ras_locs['fid_xs']

# Convert ras locs crs to match usgs gage crs
ras_locs.to_crs(huc_CRS, inplace=True)
ras_locs = ras_locs.rename(columns={'huc8': 'HUC8'})

# Convert Multipoint geometry to Point geometry
ras_locs['geometry'] = ras_locs.representative_point()

# if ras_locs.huc8.dtype == 'int64':
# ras_locs = ras_locs[ras_locs.huc8 == int(self.huc8)]
# ras_locs['HUC8'] = str(self.huc8)
# ras_locs = ras_locs.drop('huc8', axis=1)
# elif ras_locs.huc8.dtype == 'int64':
# ras_locs = ras_locs.rename(columns={'huc8':'HUC8'})
else:
ras_locs = pd.DataFrame(columns=['feature_id', 'stream_stn', 'fid_xs', 'source', 'geometry'])
# Concat USGS points and RAS2FIM points
gages_locs = pd.concat([usgs_gages, ras_locs], axis=0, ignore_index=True)
# gages_locs.to_crs(PREP_CRS, inplace=True)
Expand Down
24 changes: 24 additions & 0 deletions src/utils/shared_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,30 @@ def check_file_age(file):
return modified_date


########################################################################
# Function to find huc subdirectories with the same name btw two parent folders
########################################################################
def find_matching_subdirectories(parent_folder1, parent_folder2):
# List all subdirectories in the first parent folder
subdirs1 = {
d
for d in os.listdir(parent_folder1)
if os.path.isdir(os.path.join(parent_folder1, d)) and len(d) == 8
}

# List all subdirectories in the second parent folder
subdirs2 = {
d
for d in os.listdir(parent_folder2)
if os.path.isdir(os.path.join(parent_folder2, d)) and len(d) == 8
}

# Find common subdirectories with exactly 8 characters
matching_subdirs = list(subdirs1 & subdirs2)

return matching_subdirs


########################################################################
# Function to concatenate huc csv files to a single dataframe/csv
########################################################################
Expand Down

0 comments on commit c20e897

Please sign in to comment.