diff --git a/README.md b/README.md
index 7614a11..7d30767 100644
--- a/README.md
+++ b/README.md
@@ -143,7 +143,7 @@ Attributes from non-standardized datasets may need to be acquired for RaFTS mode
 Run [`flow.install.proc.attr.hydfab.R`](https://github.com/NOAA-OWP/formulation-selector/blob/main/pkg/proc.attr.hydfab/flow/flow.install.proc.attr.hydfab.R) to install the package. Note that a user may need to modify the section that creates the `fs_dir` for their custom path to this repo's directory.
 
 ## Usage - `proc.attr.hydfab`
-The following is an example script that runs the attribute grabber: [`fs_attrs_grab`](https://github.com/NOAA-OWP/formulation-selector/blob/main/pkg/proc.attr.hydfab/flow/fsds_attrs_grab.R).
+The following is an example script that runs the attribute grabber: [`fs_attrs_grab`](https://github.com/NOAA-OWP/formulation-selector/blob/main/pkg/proc.attr.hydfab/flow/fs_attrs_grab.R).
 
 This script grabs attribute data corresponding to locations of interest, and saves those attribute data inside a directory as multiple parquet files. The `proc.attr.hydfab::retrieve_attr_exst()` function may then efficiently query and then retrieve desired data by variable name and comid from those parquet files.
 
diff --git a/pkg/proc.attr.hydfab/R/proc_attr_grabber.R b/pkg/proc.attr.hydfab/R/proc_attr_grabber.R
index 46536b8..cd3c7b9 100644
--- a/pkg/proc.attr.hydfab/R/proc_attr_grabber.R
+++ b/pkg/proc.attr.hydfab/R/proc_attr_grabber.R
@@ -105,16 +105,16 @@ retrieve_attr_exst <- function(comids, vars, dir_db_attrs, bucket_conn=NA){
 }
 
 
-proc_attr_std_hfsub_name <- function(comid,custom_name='', ext='gpkg'){
+proc_attr_std_hfsub_name <- function(comid,custom_name='', fileext='gpkg'){
   #' @title Standardidze hydrofabric subsetter's local filename
   #' @description Internal function that ensures consistent filename
   #' @param comid the USGS common identifier, generated by nhdplusTools
   #' @param custom_name Desired custom name following 'hydrofab_'
-  #' @param ext file extension of the hydrofrabric data. Default 'gpkg'
+  #' @param fileext file extension of the hydrofrabric data. Default 'gpkg'
 
   hfsub_fn <- base::gsub(pattern = paste0(custom_name,"__"),
                          replacement = "_",
-                         base::paste0('hydrofab_',custom_name,'_',comid,'.',ext))
+                         base::paste0('hydrofab_',custom_name,'_',comid,'.',fileext))
   return(hfsub_fn)
 }
 
@@ -185,7 +185,7 @@ proc_attr_usgs_nhd <- function(comid,usgs_vars){
 }
 
 
-proc_attr_hf <- function(comid, dir_db_hydfab,custom_name="{lyrs}_",ext = 'gpkg',
+proc_attr_hf <- function(comid, dir_db_hydfab,custom_name="{lyrs}_",fileext = 'gpkg',
                          lyrs=c('divides','network')[2],
                          hf_cat_sel=TRUE, overwrite=FALSE){
 
@@ -195,7 +195,7 @@ proc_attr_hf <- function(comid, dir_db_hydfab,custom_name="{lyrs}_",ext = 'gpkg'
   #' @param comid character class. The common identifier USGS location code for a surface water feature.
   #' @param dir_db_hydfab character class. Local directory path for storing hydrofabric data
   #' @param custom_name character class. A custom name to insert into hydrofabric file. Default \code{glue("{lyrs}_")}
-  #' @param ext character class. file extension of hydrofabric file. Default 'gpkg'
+  #' @param fileext character class. file extension of hydrofabric file. Default 'gpkg'
   #' @param lyrs character class. The layer name(s) of interest from hydrofabric. Default 'network'.
   #' @param hf_cat_sel boolean. TRUE for a total catchment characterization specific to a single comid, FALSE (or anything else) for all subcatchments
   #' @param overwrite boolean. Overwrite local data when pulling from hydrofabric s3 bucket? Default FALSE.
@@ -204,7 +204,7 @@ proc_attr_hf <- function(comid, dir_db_hydfab,custom_name="{lyrs}_",ext = 'gpkg'
   # Build the hydfab filepath
   name_file <- proc.attr.hydfab:::proc_attr_std_hfsub_name(comid=comid,
                                    custom_name=glue::glue('{lyrs}_'),
-                                   ext=ext)
+                                   fileext=fileext)
   fp_cat <- base::file.path(dir_db_hydfab, name_file)
 
   if(!base::dir.exists(dir_db_hydfab)){
@@ -225,7 +225,7 @@ proc_attr_hf <- function(comid, dir_db_hydfab,custom_name="{lyrs}_",ext = 'gpkg'
 
   # Read the hydrofabric file gpkg for each layer
   hfab_ls <- list()
-  if (ext == 'gpkg') {
+  if (fileext == 'gpkg') {
     # Define layers
     layers <- sf::st_layers(dsn = fp_cat)
     for (lyr in layers$name){
@@ -469,7 +469,7 @@ proc_attr_gageids <- function(gage_ids,featureSource,featureID,Retr_Params,
       # Retrieve the variables corresponding to datasets of interest & update database
       loc_attrs <- try(proc.attr.hydfab::proc_attr_wrap(comid=comid,
                                                     Retr_Params=Retr_Params,
-                                                    lyrs='network',overwrite=FALSE))
+                                                    lyrs=lyrs,overwrite=FALSE))
       if("try-error" %in% class(loc_attrs)){
         message(glue::glue("Skipping gage_id {gage_id} corresponding to comid {comid}"))
       }
diff --git a/pkg/proc.attr.hydfab/flow/fs_attrs_grab.R b/pkg/proc.attr.hydfab/flow/fs_attrs_grab.R
index bfa71ba..e0d0f7c 100644
--- a/pkg/proc.attr.hydfab/flow/fs_attrs_grab.R
+++ b/pkg/proc.attr.hydfab/flow/fs_attrs_grab.R
@@ -12,6 +12,7 @@
 #' term 'gage_id' is used as a variable in glue syntax to create featureID
 #' @seealso [fs_proc] A python package that processes input data for the
 #' formulation-selector
+#' @usage Rscript fs_attrs_grab.R "/path/to/attribute_config.yaml"
 
 # Changelog / Contributions
 #   2024-07-24 Originally created, GL
@@ -21,61 +22,81 @@ library(yaml)
 library(ncdf4)
 library(proc.attr.hydfab)
 library(glue)
+
 # TODO is AWS_NO_SIGN_REQUEST necessary??
 # Sys.setenv(AWS_NO_SIGN_REQUEST="YES")
 
-# TODO create config yaml.
-
-# TODO read in config yaml, must populate NA for items that are empty.
-
-# Define input directory:
-# TODO change this to reading the standardized metadata, not the generated data
+# Define command line argument
+cmd_args <- commandArgs("trailingOnly" = TRUE)
 
-# raw_config <- yaml::read_yaml("/Users/guylitt/git/formulation-selector/scripts/eval_ingest/xssa/xssa_attr_config.yaml")
-raw_config <- yaml::read_yaml("/Users/guylitt/git/formulation-selector/scripts/eval_ingest/SI/SI_attr_config.yaml")
+if(base::length(cmd_args)!=1){
+  warning("Unexpected to have more than one argument in Rscript fs_attrs_grab.R /path/to/attribute_config.yaml.")
+}
 
+# Read in config file, e.g.  "~/git/formulation-selector/scripts/eval_ingest/SI/SI_attr_config.yaml"
+path_attr_config <- cmd_args[1] # "~/git/formulation-selector/scripts/eval_ingest/xssa/xssa_attr_config.yaml"
+raw_config <- yaml::read_yaml(path_attr_config)
 
-datasets <- ds <- raw_config$formulation_metadata[[grep("datasets",raw_config$formulation_metadata)]]$datasets#c("juliemai-xSSA",'all')[1] # A listing of datasets to grab attributes. Dataset names match what is inside dir_std_base.  'all' processes all datasets inside dir_std_base.
-#ds_nc_filenames <- c('juliemai-xSSA_Raven_blended.nc','*.nc')[1]
-
+# A listing of datasets to grab attributes. Dataset names match what is inside dir_std_base.  'all' processes all datasets inside dir_std_base.
+datasets <- raw_config$formulation_metadata[[grep("datasets",
+        raw_config$formulation_metadata)]]$datasets #c("juliemai-xSSA",'all')[1]
 
+# Define directory paths from the config file
 home_dir <- Sys.getenv("HOME")
-dir_base <- file.path(home_dir,'noaa','regionalization','data')
-
-dir_std_base <- file.path(dir_base,"input","user_data_std") # The location of standardized data generated by fs_proc python package
-dir_db_hydfab <- file.path(dir_base,'input','hydrofabric') # The local dir where hydrofabric data are stored to limit s3 connections
-dir_db_attrs <- file.path(dir_base,'input','attributes') # The parent dir where each comid's attribute parquet file is stored in the subdirectory 'comid/', and each dataset's aggregated parquet attributes are stored in the subdirectory '/{dataset_name}
-
-
-s3_base <- "s3://lynker-spatial/tabular-resources" # s3 path containing hydrofabric-formatted attribute datasets
-s3_bucket <- 'lynker-spatial' # s3 bucket containing hydrofabric data
-
-s3_path_hydatl <- glue::glue('{s3_base}/hydroATLAS/hydroatlas_vars.parquet') # path to hydroatlas data formatted for hydrofabric
+dir_base <- glue::glue(base::unlist(raw_config$file_io)[['dir_base']])#file.path(home_dir,'noaa','regionalization','data')
+dir_std_base <- glue::glue(base::unlist(raw_config$file_io)[['dir_std_base']]) #file.path(dir_base,"input","user_data_std") # The location of standardized data generated by fs_proc python package
+dir_db_hydfab <- glue::glue(base::unlist(raw_config$file_io)[['dir_db_hydfab']]) # file.path(dir_base,'input','hydrofabric') # The local dir where hydrofabric data are stored to limit s3 connections
+dir_db_attrs <- glue::glue(base::unlist(raw_config$file_io)[['dir_db_attrs']])  # file.path(dir_base,'input','attributes') # The parent dir where each comid's attribute parquet file is stored in the subdirectory 'comid/', and each dataset's aggregated parquet attributes are stored in the subdirectory '/{dataset_name}
+
+# Read s3 connection details
+s3_base <- base::unlist(raw_config$hydfab_config)[['s3_base']]#s3://lynker-spatial/tabular-resources" # s3 path containing hydrofabric-formatted attribute datasets
+s3_bucket <- base::unlist(raw_config$hydfab_config)[['s3_bucket']] #'lynker-spatial' # s3 bucket containing hydrofabric data
+
+# s3 path to hydroatlas data formatted for hydrofabric
+if ("s3_path_hydatl" %in% names(base::unlist(raw_config$attr_select))){
+  s3_path_hydatl <- glue::glue(base::unlist(raw_config$attr_select)[['s3_path_hydatl']])  # glue::glue('{s3_base}/hydroATLAS/hydroatlas_vars.parquet')
+} else {
+  s3_path_hydatl <- NULL
+}
 
 # Additional config options
-hf_cat_sel <- c("total","all")[1] # total: interested in the single location's aggregated catchment data; all: all subcatchments of interest
-ext <- 'gpkg'
-attr_sources <- c("hydroatlas","usgs") # "streamcat",
-# TODO communicate to user that these are standardized variable names
-ha_vars <- c('pet_mm_s01', 'cly_pc_sav', 'cly_pc_uav','cly_pc_sav','ari_ix_sav') # hydroatlas variables
-sc_vars <- c() # TODO look up variables. May need to select datasets first
-usgs_vars <- c('TOT_TWI','TOT_PRSNOW','TOT_POPDENS90','TOT_EWT','TOT_RECHG','TOT_PPT7100_ANN','TOT_AET','TOT_PET','TOT_SILTAVE','TOT_BASIN_AREA','TOT_BASIN_SLOPE','TOT_ELEV_MEAN','TOT_ELEV_MAX','TOT_Intensity','TOT_Wet','TOT_Dry' ) # list of variables retrievable using nhdplusTools::get_characteristics_metadata()
+hf_cat_sel <-  base::unlist(raw_config$hydfab_config)[['hf_cat_sel']] #c("total","all")[1] # total: interested in the single location's aggregated catchment data; all: all subcatchments of interest
+ext <- base::unlist(raw_config$hydfab_config)[['ext']] # 'gpkg'
+
+#-----------------------------------------------------
+# Variable listings:
+names_attr_sel <- base::unlist(base::lapply(raw_config$attr_select,
+                                            function(x) base::names(x)))
+
+# Transform into single named list of lists rather than nested sublists
+idxs_vars <- base::grep("_vars", names_attr_sel)
+var_names <- names_attr_sel[idxs_vars]
+sub_attr_sel <- base::lapply(idxs_vars, function(i)
+  raw_config$attr_select[[i]][[1]])
+base::names(sub_attr_sel) <- var_names
+
+# Subset to only those non-null variables:
+sub_attr_sel <- sub_attr_sel[base::unlist(base::lapply(sub_attr_sel,
+                          function(x) base::any(!base::is.null(unlist(x)))))]
+var_names_sub <- names(sub_attr_sel)
 #-----------------------------------------------------
+message(glue::glue("Attribute dataset sources include the following:\n
+  {paste0(var_names_sub,collapse='\n')}"))
 
-# TODO generate this listing structure based on what is provided in yaml config
-#   & accounting for empty entries
+message(glue::glue("Attribute variables to be acquired include :\n
+  {paste0(sub_attr_sel,collapse='\n')}"))
 
-Retr_Params <- list(paths = list(# Note that if a path is provided, ensure the
+Retr_Params <- base::list(paths = base::list(
+  # Note that if a path is provided, ensure the
   # name includes 'path'. Same for directory having variable name with 'dir'
                         dir_db_hydfab=dir_db_hydfab,
                         dir_db_attrs=dir_db_attrs,
                         s3_path_hydatl = s3_path_hydatl,
                         dir_std_base = dir_std_base),
-                   vars = list(usgs_vars = usgs_vars,
-                               ha_vars = ha_vars,
-                               sc_vars = sc_vars),
+                   vars = sub_attr_sel,
                    datasets = datasets
        )
+# PROCESS ATTRIBUTES
 
 ls_comids <- proc.attr.hydfab:::grab_attrs_datasets_fs_wrap(Retr_Params,overwrite = TRUE)
 
diff --git a/pkg/proc.attr.hydfab/man/proc_attr_hf.Rd b/pkg/proc.attr.hydfab/man/proc_attr_hf.Rd
index f1f2e92..6264aff 100644
--- a/pkg/proc.attr.hydfab/man/proc_attr_hf.Rd
+++ b/pkg/proc.attr.hydfab/man/proc_attr_hf.Rd
@@ -8,7 +8,7 @@ proc_attr_hf(
   comid,
   dir_db_hydfab,
   custom_name = "{lyrs}_",
-  ext = "gpkg",
+  fileext = "gpkg",
   lyrs = c("divides", "network")[2],
   hf_cat_sel = TRUE,
   overwrite = FALSE
@@ -21,7 +21,7 @@ proc_attr_hf(
 
 \item{custom_name}{character class. A custom name to insert into hydrofabric file. Default \code{glue("{lyrs}_")}}
 
-\item{ext}{character class. file extension of hydrofabric file. Default 'gpkg'}
+\item{fileext}{character class. file extension of hydrofabric file. Default 'gpkg'}
 
 \item{lyrs}{character class. The layer name(s) of interest from hydrofabric. Default 'network'.}
 
diff --git a/pkg/proc.attr.hydfab/man/proc_attr_std_hfsub_name.Rd b/pkg/proc.attr.hydfab/man/proc_attr_std_hfsub_name.Rd
index 232d21e..ba64afa 100644
--- a/pkg/proc.attr.hydfab/man/proc_attr_std_hfsub_name.Rd
+++ b/pkg/proc.attr.hydfab/man/proc_attr_std_hfsub_name.Rd
@@ -4,14 +4,14 @@
 \alias{proc_attr_std_hfsub_name}
 \title{Standardidze hydrofabric subsetter's local filename}
 \usage{
-proc_attr_std_hfsub_name(comid, custom_name = "", ext = "gpkg")
+proc_attr_std_hfsub_name(comid, custom_name = "", fileext = "gpkg")
 }
 \arguments{
 \item{comid}{the USGS common identifier, generated by nhdplusTools}
 
 \item{custom_name}{Desired custom name following 'hydrofab_'}
 
-\item{ext}{file extension of the hydrofrabric data. Default 'gpkg'}
+\item{fileext}{file extension of the hydrofrabric data. Default 'gpkg'}
 }
 \description{
 Internal function that ensures consistent filename
diff --git a/pkg/proc.attr.hydfab/tests/testthat/test_proc_attr_grabber.R b/pkg/proc.attr.hydfab/tests/testthat/test_proc_attr_grabber.R
index 6618113..fe66c99 100644
--- a/pkg/proc.attr.hydfab/tests/testthat/test_proc_attr_grabber.R
+++ b/pkg/proc.attr.hydfab/tests/testthat/test_proc_attr_grabber.R
@@ -292,7 +292,7 @@ testthat::test_that("proc_attr_usgs_nhd", {
 
 testthat::test_that("proc_attr_hf not a comid",{
  testthat::expect_error(proc.attr.hydfab::proc_attr_hf(comid="13Notacomid14", dir_db_hydfab,
-                                                       custom_name="{lyrs}_",ext = 'gpkg',
+                                                       custom_name="{lyrs}_",fileext = 'gpkg',
                                                        lyrs=c('divides','network')[2],
                                                        hf_cat_sel=TRUE, overwrite=FALSE))
 })
diff --git a/scripts/config/hfsubsetRv2.2_test.R b/scripts/config/hfsubsetRv2.2_test.R
new file mode 100644
index 0000000..58d09f0
--- /dev/null
+++ b/scripts/config/hfsubsetRv2.2_test.R
@@ -0,0 +1,322 @@
+library(hfsubsetR)
+library(dplyr)
+library(glue)
+library(DBI)
+library(tidyselect)
+#comid <- c("6716129","7388043")
+source <- "~/noaa/hydrofabric/v2.2/ls_conus.gpkg"
+hf_ver <- '2.2'
+#outfile <- glue::glue('~/noaa/hydrofabric/test/{comid}.gpkg')
+dir_save <- '~/noaa/hydrofabric/test/'
+hfab_vars <- "all"
+
+
+
+weight_attrs_by_area <- function(df, hfab_vars, area_col = 'areasqkm'){
+  #' @title summarize a multi-divide attribute dataset into area-weighted average attribute values
+  #' @param df data.frame class. Attribute dataframe
+  #' @param hfab_vars character class. The column names of attributes inside \code{df}
+  #' @param area_col character class. The area column name inside \code{df}
+  #' @return single-row data.frame of area-averaged attribute \code{df}
+  #' @export
+  tot_area <- base::sum(df[[area_col]])
+  if(any(is.na(df))){ # Determine area-weighted mean, ignoring NA
+    ls_wt_area <- list()
+    for(hfvar in hfab_vars){
+      prod_var_area <- df[[hfvar]]*df[[area_col]]
+      if(any(is.na(prod_var_area))){
+        idxs_na <- which(is.na(prod_var_area))
+        sub_tot_area <- sum(df[[area_col]][-idxs_na]) # remove NA areas from consideration
+        ls_wt_area[[hfvar]] <- sum(prod_var_area,na.rm = TRUE)/sub_tot_area
+      } else {
+        ls_wt_area[[hfvar]] <- sum(prod_var_area)/tot_area
+      }
+    }
+  } else {
+    ls_wt_area <- base::lapply(hfab_vars, function(a) try(base::sum(df[[a]]*df[[area_col]])/tot_area))
+  }
+
+
+  names(ls_wt_area) <- hfab_vars
+  return(as.data.frame(ls_wt_area))
+}
+
+proc_attr_hfab <- function(comid,dir_save= "~/", hfab_vars="all",
+                           attr_name = c("model-attributes","divide-attributes")[2],
+                           div_name = "divides",
+                           source="s3://lynker-spatial/hydrofabric", hf_ver = '2.2',
+                           id_col = "divide_id", type = 'nextgen',
+                           domain='conus'){
+  #' @title Process hydrofabric attributes
+  #' @description Connects to the hydrofabric via s3 or local connection and
+  #' acquires attributes of interest
+  #' @details If retrieved attribute data empty, generates warning.
+  #' @param comid atomic
+  #' @param dir_save The temporary directory location for saving subset hydrofabric.
+  #' @param hfab_vars character vector. The attributes of interest to acquire
+  #'  from hydrofabric. Default "all".
+  #'  Refer to formulation-selector github wiki for options.
+  #' @param attr_name The list name for attribute data as retrieved from
+  #'  \code{hfsubsetR::get_subset()}. Default 'divide-attributes' corresponds to
+  #'  hydrofabric v2.2. Previous versions used 'model-attributes'
+  #' @param div_name The list name for the divides data as retrieved from
+  #'  \code{hfsubsetR::get_subset()}. Default 'divides'.
+  #' @param hf_ver Hydrofabric version. Default "2.2"
+  #' @param id_col The identifier column for each divide inside the divides and
+  #' divide-attributes layers. Default 'divide_id'
+  #' @param type hydrofabric type, as used in \code{hfsubsetR::get_subset()}, default "nextgen"
+  #' @param domain hydrofabric domain, as used in \code{hfsubsetR::get_subset()}, default "conus"
+  #' @export
+
+  lyrs <- c(attr_name, div_name)
+  # TODO add local source check. Should this happen here or elsewhere?
+
+  if (base::length(comid) > 1){
+    stop("hfsubsetR only processes one comid at a time as of v0.0.9.
+         If multi-comid processing becomes available, the
+         weighted-area average calc needs to account for multiple
+         comids")
+  }
+
+  if ((base::as.numeric(hf_ver)<2.2) && !('model-attributes' %in% lyrs)){
+    warning("The geopackage layer 'model-attributes' was used prior to
+            hydrofabric version 2.2 and may need to be defined in the `lyrs`
+            argument of proc_attr_hfab() for use in hfsubsetR::get_subset().")
+  }
+  # -----------  READ HYDROFABRIC ATTRIBUTE DATA ------------ #
+  # Read in the divide and divide-attributes layers
+  if (tools::file_ext(source) == 'gpkg'){ # In case a local version of hydrofab exists
+    outfile <- file.path(dir_save,glue::glue("hfab_attr_temp.gpkg"))
+    rslt <- hfsubsetR::get_subset(
+      gpkg = source,
+      lyrs=lyrs,
+      comid = comid[1],
+      outfile = outfile,overwrite=TRUE)
+
+    # Create a list of layers from the temporary location
+    attr_div <- base::lapply(lyrs, function(l) sf::st_read(outfile, layer = l,quiet = TRUE))
+    names(attr_div) <- lyrs
+
+  } else { # Otherwise try the AWS connection
+    attr_div <- hfsubsetR::get_subset(comid = comid,
+                                          lyrs = lyrs,
+                                          source   =source,
+                                          hf_version =  hf_ver,
+                                          type    = type,
+                                          domain  =domain)
+  }
+
+
+  # -----------  SUBSET TO VARIABLES OF INTEREST ------------ #
+  if (hfab_vars == 'all'){
+    hfab_vars <- names(attr_div[[attr_name]])
+    hfab_vars <- hfab_vars[-grep("vpuid", hfab_vars)]
+    hfab_vars <- hfab_vars[-grep("divide_id", hfab_vars)]
+  }
+  attr_df <- attr_div[[attr_name]] %>%
+    dplyr::select(dplyr::all_of(c("divide_id",hfab_vars))) %>%
+    dplyr::select(-tidyselect::matches("vpuid"))
+
+  if(nrow(attr_df) == 0){
+    warning(glue::glue("COMID {comid} has no data in the hydrofabric. Skipping."))
+  } else {
+    # Combine model attributes with divides
+    attrs_area <- dplyr::left_join(x=attr_df, attr_div[[div_name]], by="divide_id")
+
+    if('dist_4.twi' %in% names(attrs_area)){
+      #Expand dist_4.twi into multiple cols, by converting json format to df
+      ls_dist_4_twi <- list()
+      for(i in 1:nrow(attrs_area)){
+        df_twi <- jsonlite::fromJSON(attrs_area[["dist_4.twi"]][[i]])
+
+        ls_dist_4_twi[[i]] <- data.table::as.data.table(base::t(base::unlist(df_twi)))
+      }
+      df_twi_all <- data.table::rbindlist(ls_dist_4_twi,fill = TRUE) %>% base::as.data.frame()
+      df_twi_sub  <- df_twi_all[,c("v1","v2","v3","v4")]
+      new_twi_vars <- c("twi_25pctl", "twi_50pctl","twi_75pctl","twi_100pctl")
+      names(df_twi_sub) <- new_twi_vars
+
+      attrs_area <- base::cbind(attrs_area,df_twi_sub) %>%
+        dplyr::select(-dplyr::all_of("dist_4.twi"))
+
+      hfab_vars <- hfab_vars[-base::grep("dist_4.twi", hfab_vars)]
+      hfab_vars <- c(hfab_vars,new_twi_vars)
+    }
+
+    if (base::length(base::unique(attr_div$divides$divide_id)) !=
+        base::length(base::unique(attr_div[[attr_name]]$divide_id))){
+
+      tot_area <- base::sum(attr_div[[div_name]]$areasqkm)
+      attr_area <- base::sum(attrs_area$areasqkm)
+
+      warning(glue::glue("COMID {comid} basin area totals {round(tot_area,2)}km^2,
+                       but {attr_name} only cover {round(attr_area,2)}km^2"))
+    }
+    # -----------  AREAL WEIGHTING OF ATTRIBUTES ------------ #
+    df_wt <- weight_attrs_by_area(df=attrs_area,hfab_vars=hfab_vars)
+    df_wt$comid <- comid
+
+    df_wt <- df_wt %>% dplyr::relocate(comid)
+    return(df_wt)
+  }
+}
+
+
+
+# Read in comids:
+dir_attr <- "~/noaa/regionalization/data/input/attributes/"
+files_attr <- list.files(dir_attr)
+comids <- gsub("comid_","",files_attr) %>% gsub(pattern = "_attrs.parquet",replacement = "")
+
+
+
+comids <- comids[grep("23864616",comids):length(comids)]
+
+attr_dat <- list()
+for(comid in comids){
+  print(glue::glue("Processing {comid}"))
+  rslt <- try(proc_attr_hfab(comid,dir_save=dir_save, hfab_vars="all",
+                                      attr_name ="divide-attributes",
+                                      div_name = "divides",
+                                      source=source, hf_ver = '2.2',
+                                      id_col = "divide_id", type = 'nextgen',
+                                      domain='conus'))
+
+  if("try-error" %in% rslt){ # sometimes trying again seems to fix things
+    Sys.sleep(5)
+    rslt <- try(proc_attr_hfab(comid,dir_save=dir_save, hfab_vars="all",
+                               attr_name ="divide-attributes",
+                               div_name = "divides",
+                               source=source, hf_ver = '2.2',
+                               id_col = "divide_id", type = 'nextgen',
+                               domain='conus'))
+
+  }
+
+  if(!"try-error" %in% rslt){
+    attr_dat[[comid]] <- rslt
+  }
+}
+
+
+
+# Review all attribute data
+check_empty_func <- function(x) {
+  if (length(x) > 1) {
+    return(x)
+  }
+}
+sub_ls_attrs <- lapply(attr_dat, check_empty_func)
+
+dt_all_attrs <- data.table::rbindlist(sub_ls_attrs)
+
+summary(dt_all_attrs)
+
+# TODO review all areas:
+areas_all_ls <- list()
+for(comid in comids){
+  subset_dat <- hfsubsetR::get_subset(comid = comid,
+                        lyrs = 'divides',
+                        gpkg   =source,
+                        hf_version =  hf_ver,
+                        type    = type,
+                        domain  =domain)
+  areas_all_ls[[comid]] <- sum(subset_dat$divides$areasqkm)
+
+}
+library(ggplot2)
+area_data <- unlist(unname(areas_all_ls)) %>% as.data.frame()
+ggplot2::ggplot(area_data) +
+  ggplot2::geom_histogram()
+
+###### LOOKUP COMID LOCATION
+library(nhdplusTools)
+
+rslt <- nhdplusTools::get_nhdplus(comid="23864616",realization = 'outlet')
+rslt$geometry
+# Paste the coordinates into here: https://apps.nationalmap.gov/viewer/
+  # TODO add divide id and attr areal coverages to final attribute dataset
+
+
+#
+#
+#
+# # Note that there are additional divide ids inside the divides layer, meaning the model-attributes layer is missing some divide ids
+# print(glue::glue("Total divide ids inside divides layer: {length(unique(attr_div$divides$divide_id))}"))
+# print(glue::glue("Total divide ids inside model-attributes layer: {length(unique(attr_div$`model-attributes`$divide_id))}"))
+#
+# # Total area accounted for in each layer:
+# print(glue::glue("Total catchment area inside the divides layer: {sum(attr_div$divides$areasqkm)} km^2"))
+#
+# print(glue::glue("Total catchment area inside the model-attributes layer: {sum(attrs_area$areasqkm)} km^2"))
+#
+# ######
+#
+# # https://noaa-owp.github.io/hydrofabric/articles/05-subsetting.html
+# library(dplyr)
+# library(arrow)
+# library(hydrofabric)
+# proc_attr_hfab <- function(comid,usgs_vars){
+#
+#
+# }
+# (g = open_dataset('s3://lynker-spatial/hydrofabric/v2.2/conus_hl') |>
+#     select("vpuid", 'hl_reference', "hl_link") %>%
+#     filter(hl_link == '06752260') %>%
+#     collect())
+#
+# dat <- arrow::read_parquet("/Users/guylitt/Downloads/nextgen_04.parquet")
+#
+# dim(dat)
+#
+# (hydLoc <- hfsubsetR::findOrigin(network_path, comid=comid))
+#
+# attr_data <- arrow::open_dataset("s3://lynker-spatial/hydrofabric/v2.1.1/nextgen/conus_model-attributes") |>
+#   filter(vpuid == hydLoc$vpuid) |>
+#   # filter(divide_id==hydLoc$id) |>
+#   collect()
+#
+#
+# s3      <- "s3://lynker-spatial/hydrofabric"
+# version <-  'v2.1.1'
+# type    <- "nextgen"
+# domain  <- "conus"
+# network_path <- glue("{s3}/{version}/{type}/conus_network")
+# net <- open_dataset(network_path)
+#
+# glimpse(filter(net,hl_uri == "HUC12-010100100101"))
+#
+# subset_enhanced = get_subset(comid = comid,
+#                              lyrs = c("model-attributes","divides")
+#                              #lyrs = c("forcing-weights", "model-attributes","divides","nexus"),
+#                              source   =s3,
+#                              hf_version =  '2.1.1',
+#                              type    = "nextgen",
+#                              domain  = "conus")
+# length(unique(subset_enhanced$`model-attributes`$divide_id))
+# length(unique(subset_enhanced$divides$divide_id))
+#
+# intersect(subset_enhanced$`model-attributes`$divide_id,subset_enhanced$divides$divide_id)
+#
+# attrs_area <- dplyr::left_join(x=subset_enhanced$`model-attributes`, subset_enhanced$divides, by="divide_id")
+# # TODO why does the total area differ for comid "6716129"? Some divide_ids do not have attributes calculated (~10% of total catchment area)
+# (totl_actl_area <- sum(subset_enhanced$divides$areasqkm))
+# tot_area <- sum(attrs_area$areasqkm)
+# gwCoefWt <- sum(attrs_area$gw_Coeff*attrs_area$areasqkm)/tot_area
+# sum(attrs_area$impervious_mean*attrs_area$areasqkm)/tot_area
+#
+# merge(subset_enhanced)
+#
+# subset_enhanced$divides %>% filter(divide_id %in% subset_enhanced$`model-attributes`$divide_id)
+#
+# length(subset_enhanced$`forcing-weights`$)
+# subset_enhanced$divides
+#
+#
+#
+#
+#
+
+library(nhdplusTools)
+nldi_feat <- list()
+nhdplusTools::discover_nhdplus_id()