Merge pull request #217 from NREL/rhorsey/sampling_v2_plotting_patch

Rhorsey/sampling v2 plotting patch
NREL · Sep 23, 2024 · 2a6b506 · 2a6b506
2 parents b4671de + f7faead
commit 2a6b506
Show file tree

Hide file tree

Showing 9 changed files with 298 additions and 157 deletions.
diff --git a/postprocessing/compare_comstock_to_cbecs.py.template b/postprocessing/compare_comstock_to_cbecs.py.template
@@ -32,12 +32,6 @@ def main():
         stock_estimation_version='2024R2',  # Only updated when a new stock estimate is published
         truth_data_version='v01'  # Typically don't change this
     )
-
-    # Scale ComStock run to CBECS 2018 AND remove non-ComStock buildings from CBECS
-    comstock.add_weights_aportioned_by_stock_estimate(apportionment=stock_estimate)
-    comstock.create_national_aggregation()
-    comstock.create_geospatially_resolved_aggregations(comstock.STATE_ID, pretty_geo_col_name='state_id')
-    comstock.create_geospatially_resolved_aggregations(comstock.COUNTY_ID, pretty_geo_col_name='county_id')
 
     # CBECS
     cbecs = cspp.CBECS(
@@ -46,19 +40,26 @@ def main():
         color_hex='#009E73',  # Color used to represent CBECS in plots
         reload_from_csv=False  # True if CSV already made and want faster reload times
         )
-    
-    # TODO Update past here including ensuring we can still apply CBECS weights on top of previous weights.
-
+
+    # Scale ComStock runs to the 'truth data' from StockE V3 estimates using bucket-based apportionment
+    comstock.add_weights_aportioned_by_stock_estimate(apportionment=stock_estimate)
     # Scale ComStock run to CBECS 2018 AND remove non-ComStock buildings from CBECS
     comstock.add_national_scaling_weights(cbecs, remove_non_comstock_bldg_types_from_cbecs=True)
-    comstock.calculate_weighted_columnal_values()
-    comstock.export_to_csv_wide()
+    # TODO This needs to be rewritten with safe column names, lazyframe usage, etc. 
+    #comstock.calculate_weighted_columnal_values()
+
+    # Uncomment whichever to write results to disk:
+    comstock.create_national_aggregation()
+    # comstock.create_geospatially_resolved_aggregations(comstock.STATE_ID, pretty_geo_col_name='state_id')
+    # comstock.create_geospatially_resolved_aggregations(comstock.COUNTY_ID, pretty_geo_col_name='county_id')
 
     # Make a comparison by passing in a list of CBECs and ComStock runs to compare
     # upgrade_id can be 'All' or the upgrade number
+    comstock.create_plotting_lazyframe()
     comp = cspp.ComStockToCBECSComparison(cbecs_list=[cbecs], comstock_list=[comstock], upgrade_id='All',make_comparison_plots=True)
 
     comp.export_to_csv_wide()
+
 
 # Code to execute the script
 if __name__ == "__main__":

diff --git a/postprocessing/compare_runs.py.template b/postprocessing/compare_runs.py.template
@@ -12,14 +12,14 @@ logger = logging.getLogger(__name__)
 def main():
     # First ComStock run
     comstock_a = cspp.ComStock(
-        s3_base_dir='eulp/comstock_fy22',  # If run not on S3, download results_up**.parquet manually
-        comstock_run_name='com_v15_cooking',  # Name of the run on S3
-        comstock_run_version='v15',  # Use whatever you want to see in plot and folder names
+        s3_base_dir='eulp/euss_com',  # If run not on S3, download results_up**.parquet manually
+        comstock_run_name='sampling_lighting_11079_1',  # Name of the run on S3
+        comstock_run_version='sampling_lighting_11079_1',  # Use whatever you want to see in plot and folder names
         comstock_year=2018,  # Typically don't change this
         athena_table_name=None,  # Typically don't change this
         truth_data_version='v01',  # Typically don't change this
         buildstock_csv_name='buildstock.csv',  # Download buildstock.csv manually
-        acceptable_failure_percentage=0.05,  # Can increase this when testing and high failure are OK
+        acceptable_failure_percentage=0.25,  # Can increase this when testing and high failure are OK
         drop_failed_runs=True,  # False if you want to evaluate which runs failed in raw output data
         color_hex='#0072B2',  # Color used to represent this run in plots
         skip_missing_columns=True,  # False if you want to ensure you have all data specified for export
@@ -29,21 +29,27 @@ def main():
 
     # Second ComStock run
     comstock_b = cspp.ComStock(
-        s3_base_dir='eulp/comstock_fy22',  # If run not on S3, download results_up**.parquet manually
-        comstock_run_name='com_v16_windows_lighting',  # Name of the run on S3
-        comstock_run_version='v16',  # Use whatever you want to see in plot and folder names
+        s3_base_dir='eulp/euss_com',  # If run not on S3, download results_up**.parquet manually
+        comstock_run_name='cycle_4_sampling_test_rand_985932_20240321',  # Name of the run on S3
+        comstock_run_version='new_sampling_test',  # Use whatever you want to see in plot and folder names
         comstock_year=2018,  # Typically don't change this
-        athena_table_name=None,  # Typically don't change this
+        athena_table_name='rand_985932_20240321',  # Typically same as comstock_run_name or None
         truth_data_version='v01',  # Typically don't change this
-        buildstock_csv_name='buildstock.csv',  # Download buildstock.csv manually
-        acceptable_failure_percentage=0.05,  # Can increase this when testing and high failure are OK
+        buildstock_csv_name='rand_985932_sampling_buildstock.csv', # Download buildstock.csv manually
+        acceptable_failure_percentage=0.9,  # Can increase this when testing and high failure are OK
         drop_failed_runs=True,  # False if you want to evaluate which runs failed in raw output data
         color_hex='#56B4E9',  # Color used to represent this run in plots
         skip_missing_columns=True,  # False if you want to ensure you have all data specified for export
         reload_from_csv=False,  # True if CSV already made and want faster reload times
         include_upgrades=False  # False if not looking at upgrades
         )
 
+    # Stock Estimation for Apportionment:
+    stock_estimate = cspp.Apportion(
+        stock_estimation_version='2024R2',  # Only updated when a new stock estimate is published
+        truth_data_version='v01'  # Typically don't change this
+    )
+
     # CBECS
     cbecs = cspp.CBECS(
         cbecs_year=2018,  # 2012 and 2018 currently available
@@ -52,19 +58,19 @@ def main():
         reload_from_csv=False  # True if CSV already made and want faster reload times
         )
 
-    # Scale both ComStock runs to CBECS 2018 AND remove non-ComStock buildings from CBECS
+    # First scale ComStock runs to the 'truth data' from StockE V3 estimates using bucket-based apportionment
+    # Then scale both ComStock runs to CBECS 2018 AND remove non-ComStock buildings from CBECS
     # This is how weights in the models are set to represent national energy consumption
+    comstock_a.add_weights_aportioned_by_stock_estimate(apportionment=stock_estimate)
     comstock_a.add_national_scaling_weights(cbecs, remove_non_comstock_bldg_types_from_cbecs=True)
+    comstock_b.add_weights_aportioned_by_stock_estimate(apportionment=stock_estimate)
     comstock_b.add_national_scaling_weights(cbecs, remove_non_comstock_bldg_types_from_cbecs=True)
 
-    # Uncomment this to correct gas consumption for a ComStock run to match CBECS
-    # Don't typically want to do this
-    # comstock_a.correct_comstock_gas_to_match_cbecs(cbecs)
-
     # Export CBECS and ComStock data to wide and long formats for Tableau and to skip processing later
     cbecs.export_to_csv_wide()  # May comment this out if CSV output isn't needed
-    comstock_a.export_to_csv_wide()  # May comment this out if CSV output isn't needed
-    comstock_b.export_to_csv_wide()  # May comment this out if CSV output isn't needed
+    # comstock_a.create_national_aggregation()  # May comment this out if CSV output isn't needed
+    # comstock_b.create_national_aggregation()  # May comment this out if CSV output isn't needed
+    # TODO This (long CSV export) is not yet re-implemented
     # comstock_a.export_to_csv_long()  # Long format useful for stacking end uses and fuels
     # comstock_b.export_to_csv_long()  # Long format useful for stacking end uses and fuels
 
@@ -73,7 +79,7 @@ def main():
         cbecs_list=[cbecs],
         comstock_list = [comstock_a, comstock_b],
         make_comparison_plots=True
-        )
+    )
 
     # Export the comparison data to wide format for Tableau
     comparison.export_to_csv_wide()

diff --git a/postprocessing/compare_upgrades.py.template b/postprocessing/compare_upgrades.py.template
@@ -1,63 +1,73 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-import logging
-
-import comstockpostproc as cspp
-
-
-logging.basicConfig(level='INFO')  # Use DEBUG, INFO, or WARNING
-logger = logging.getLogger(__name__)
-
-def main():
-    # ComStock run
-    comstock = cspp.ComStock(
-        s3_base_dir='eulp/euss_com',  # If run not on S3, download results_up**.parquet manually
-        comstock_run_name='hprtu_stdperf_fan_test_10k',  # Name of the run on S3
-        comstock_run_version='hprtu_stdperf_fan_test_10k',  # Use whatever you want to see in plot and folder names
-        comstock_year=2018,  # Typically don't change this
-        athena_table_name=None,  # Typically don't change this
-        truth_data_version='v01',  # Typically don't change this
-        buildstock_csv_name='buildstock.csv',  # Download buildstock.csv manually
-        acceptable_failure_percentage=0.025,  # Can increase this when testing and high failure are OK
-        drop_failed_runs=True,  # False if you want to evaluate which runs failed in raw output data
-        color_hex='#0072B2',  # Color used to represent this run in plots
-        skip_missing_columns=True,  # False if you want to ensure you have all data specified for export
-        reload_from_csv=False, # True if CSV already made and want faster reload times
-        include_upgrades=True,  # False if not looking at upgrades
-        upgrade_ids_to_skip=[], # Use [1, 3] etc. to exclude certain upgrades
-        make_timeseries_plots=True,
-        states={
-                #'MN': 'Minnesota',  # specify state to use for timeseries plots in dictionary format. State ID must correspond correctly.
-                'MA':'Massachusetts',
-                'OR': 'Oregon',
-                'LA': 'Louisiana',
-                #'AZ': 'Arizona',
-                #'TN': 'Tennessee'
-                },
-        upgrade_ids_for_comparison={} # Use {'<Name you want for comparison run folder>':[0,1,2]}; add as many upgrade IDs as needed, but plots look strange over 5
-        )
-
-    # CBECS
-    cbecs = cspp.CBECS(
-        cbecs_year=2018,  # 2012 and 2018 currently available
-        truth_data_version='v01',  # Typically don't change this
-        color_hex='#009E73',  # Color used to represent CBECS in plots
-        reload_from_csv=False  # True if CSV already made and want faster reload times
-        )
-
-    # Scale ComStock run to CBECS 2018 AND remove non-ComStock buildings from CBECS
-    # This is how weights in the models are set to represent national energy consumption
-    comstock.add_national_scaling_weights(cbecs, remove_non_comstock_bldg_types_from_cbecs=True)
-
-    # Export CBECS and ComStock data to wide and long formats for Tableau and to skip processing later
-    cbecs.export_to_csv_wide()  # May comment this out after run once
-    comstock.export_to_csv_wide()  # May comment this out after run once
-    # comstock.export_to_csv_long()  # Long format useful for stacking end uses and fuels
-
-    # Create measure run comparisons; only use if run has measures
-    comparison = cspp.ComStockMeasureComparison(comstock, states=comstock.states, make_comparison_plots = comstock.make_comparison_plots, make_timeseries_plots = comstock.make_timeseries_plots)
-
-# Code to execute the script
-if __name__=="__main__":
-    main()
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import logging
+
+import comstockpostproc as cspp
+
+
+logging.basicConfig(level='INFO')  # Use DEBUG, INFO, or WARNING
+logger = logging.getLogger(__name__)
+
+def main():
+    # ComStock run
+    comstock = cspp.ComStock(
+        s3_base_dir='eulp/euss_com',  # If run not on S3, download results_up**.parquet manually
+        comstock_run_name='sampling_lighting_11079_1',  # Name of the run on S3
+        comstock_run_version='sampling_lighting_11079_1',  # Use whatever you want to see in plot and folder names
+        comstock_year=2018,  # Typically don't change this
+        athena_table_name=None,  # Typically don't change this
+        truth_data_version='v01',  # Typically don't change this
+        buildstock_csv_name='buildstock.csv',  # Download buildstock.csv manually
+        acceptable_failure_percentage=0.25,  # Can increase this when testing and high failure are OK
+        drop_failed_runs=True,  # False if you want to evaluate which runs failed in raw output data
+        color_hex='#0072B2',  # Color used to represent this run in plots
+        skip_missing_columns=True,  # False if you want to ensure you have all data specified for export
+        reload_from_csv=False, # True if CSV already made and want faster reload times
+        include_upgrades=True,  # False if not looking at upgrades
+        upgrade_ids_to_skip=[], # Use [1, 3] etc. to exclude certain upgrades
+        make_timeseries_plots=False,
+        states={
+                #'MN': 'Minnesota',  # specify state to use for timeseries plots in dictionary format. State ID must correspond correctly.
+                'MA':'Massachusetts',
+                #'OR': 'Oregon',
+                #'LA': 'Louisiana',
+                #'AZ': 'Arizona',
+                #'TN': 'Tennessee'
+                },
+        upgrade_ids_for_comparison={} # Use {'<Name you want for comparison run folder>':[0,1,2]}; add as many upgrade IDs as needed, but plots look strange over 5
+        )
+
+    # Stock Estimation for Apportionment:
+    stock_estimate = cspp.Apportion(
+        stock_estimation_version='2024R2',  # Only updated when a new stock estimate is published
+        truth_data_version='v01'  # Typically don't change this
+    )
+
+    # CBECS
+    cbecs = cspp.CBECS(
+        cbecs_year=2018,  # 2012 and 2018 currently available
+        truth_data_version='v01',  # Typically don't change this
+        color_hex='#009E73',  # Color used to represent CBECS in plots
+        reload_from_csv=False  # True if CSV already made and want faster reload times
+        )
+
+    # Scale ComStock runs to the 'truth data' from StockE V3 estimates using bucket-based apportionment
+    comstock.add_weights_aportioned_by_stock_estimate(apportionment=stock_estimate)
+    # Scale ComStock run to CBECS 2018 AND remove non-ComStock buildings from CBECS
+    comstock.add_national_scaling_weights(cbecs, remove_non_comstock_bldg_types_from_cbecs=True)
+
+    # Export CBECS and ComStock data to wide and long formats for Tableau and to skip processing later
+    # cbecs.export_to_csv_wide()  # May comment this out after run once
+    # comstock.create_national_aggregation()
+    # comstock.create_geospatially_resolved_aggregations(comstock.STATE_ID, pretty_geo_col_name='state_id')
+    # comstock.create_geospatially_resolved_aggregations(comstock.COUNTY_ID, pretty_geo_col_name='county_id')
+    # TODO Long is def not working as expected anymore...
+    # comstock.export_to_csv_long()  # Long format useful for stacking end uses and fuels
+
+    # Create measure run comparisons; only use if run has measures
+    comparison = cspp.ComStockMeasureComparison(comstock, states=comstock.states, make_comparison_plots = comstock.make_comparison_plots, make_timeseries_plots = comstock.make_timeseries_plots)
+
+# Code to execute the script
+if __name__=="__main__":
+    main()
diff --git a/postprocessing/comstockpostproc/cbecs.py b/postprocessing/comstockpostproc/cbecs.py
@@ -483,4 +483,9 @@ def export_to_csv_wide(self):
 
         file_name = f'CBECS wide.csv'
         file_path = os.path.join(self.output_dir, file_name)
-        self.data.to_csv(file_path, index=False)
+        try:
+            self.data.sink_csv(file_path)
+        except pl.exceptions.InvalidOperationError:
+            logger.warn('Warning - sink_csv not supported for metadata write in current polars version')
+            logger.warn('Falling back to .collect.write_csv')
+            self.data.collect().write_csv(file_path)