Skip to content

Commit

Permalink
Added MultiCloudsModel for a multi step model with cloud type pre…
Browse files Browse the repository at this point in the history
…diction and cloud property prediction. Added tests for this new composite model. Pickles added for cloud type and cloud prop models.
  • Loading branch information
bnb32 committed Oct 16, 2024
1 parent 716bb1c commit aeff5f4
Show file tree
Hide file tree
Showing 69 changed files with 2,168 additions and 459 deletions.
18 changes: 14 additions & 4 deletions mlclouds/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,20 @@

MLCLOUDSDIR = os.path.dirname(os.path.realpath(__file__))
TESTDATADIR = os.path.join(os.path.dirname(MLCLOUDSDIR), 'tests', 'data')
PROD_DIR = os.path.join(MLCLOUDSDIR, 'model/production_model')
CONFIG_FPATH = os.path.join(PROD_DIR, 'config.json')
MODEL_FPATH = os.path.join(PROD_DIR, 'outputs/mlclouds_model.pkl')

LEG_DIR = os.path.join(MLCLOUDSDIR, 'model/legacy_model')
CTYPE_DIR = os.path.join(MLCLOUDSDIR, 'model/cloud_type')
CTYPE_CONFIG_FPATH = os.path.join(CTYPE_DIR, 'config.json')
CTYPE_MODEL_FPATH = os.path.join(CTYPE_DIR, 'outputs/mlclouds_model.pkl')

CPROP_DIR = os.path.join(MLCLOUDSDIR, 'model/cloud_properties')
CPROP_CONFIG_FPATH = os.path.join(CPROP_DIR, 'config.json')
CPROP_MODEL_FPATH = os.path.join(CPROP_DIR, 'outputs/mlclouds_model.pkl')

MODEL_FPATH = {
'cloud_type_model_path': CTYPE_MODEL_FPATH,
'cloud_prop_model_path': CPROP_MODEL_FPATH,
}

LEG_DIR = os.path.join(MLCLOUDSDIR, 'model/legacy')
LEG_CONFIG_FPATH = os.path.join(LEG_DIR, 'config.json')
LEG_MODEL_FPATH = os.path.join(LEG_DIR, 'outputs/mlclouds_model.pkl')
32 changes: 17 additions & 15 deletions mlclouds/data_handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,10 +262,11 @@ def _load_data(self, nsrdb_files=None):
var_names = copy.deepcopy(self._config['features'])
var_names += self._config['y_labels']

# cloud_type is needed for data cleaning, even if not included in
# training features
if 'cloud_type' not in var_names:
var_names.append('cloud_type')
# cloud_type, cld_opd_dcomp, cld_reff_dcomp are needed for data
# cleaning even if not included in training features
for f in ('cloud_type', 'cld_opd_dcomp', 'cld_reff_dcomp'):
if f not in var_names:
var_names.append(f)

logger.debug('Loading vars {}'.format(var_names))

Expand Down Expand Up @@ -400,17 +401,17 @@ def _prep_data(self, kwargs=TRAINING_PREP_KWARGS):
)
)

# Inspecting features would go here

# Final cleaning
drop_list = ['gid', 'time_index', 'cloud_type']
drop_list = [
'gid',
'time_index',
'cloud_type',
'cld_opd_dcomp',
'cld_reff_dcomp',
]
if self._config.get('one_hot_categories', None) is None:
drop_list.append('flag')

for name in drop_list:
if name in self.df_train:
self.df_train = self.df_train.drop(name, axis=1)

logger.debug('**Shape: df_train={}'.format(self.df_train.shape))
features = self.df_train.columns.values.tolist()

Expand Down Expand Up @@ -605,10 +606,11 @@ def _load_data(self, test_set_mask):
var_names = copy.deepcopy(self.features)
var_names += self.y_labels

# cloud_type is needed for data cleaning, even if not included in
# training features
if 'cloud_type' not in var_names:
var_names.append('cloud_type')
# cloud_type, cld_opd_dcomp, cld_reff_dcomp are needed for data
# cleaning even if not included in training features
for f in ('cloud_type', 'cld_opd_dcomp', 'cld_reff_dcomp'):
if f not in var_names:
var_names.append(f)

logger.debug('Loading vars {}'.format(var_names))

Expand Down
6 changes: 3 additions & 3 deletions mlclouds/grid_searcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@
'loss_weights_b': [0.5, 0.5],
'p_kwargs': {'loss_terms': ['mae_ghi', 'mae_dni', 'mbe_ghi', 'mbe_dni']},
'p_fun': 'p_fun_all_sky',
'clean_training_data_kwargs': {
'training_prep_kwargs': {
'filter_clear': False,
'nan_option': 'interp',
},
Expand Down Expand Up @@ -173,8 +173,8 @@ def __init__(
"p_kwargs": {"loss_terms": ["mae_ghi", "mae_dni", "mbe_ghi",
"mbe_dni"]},
"p_fun": "p_fun_all_sky",
"clean_training_data_kwargs": {"filter_clear": False,
"nan_option": "interp"},
"training_prep_kwargs": {"filter_clear": False,
"nan_option": "interp"},
"one_hot_categories": {"flag": ["clear", "ice_cloud",
"water_cloud", "bad_cloud"]}
}
Expand Down
83 changes: 83 additions & 0 deletions mlclouds/model/cloud_properties/config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
{
"epochs_a": 100,
"epochs_b": 90,
"features": [
"solar_zenith_angle",
"refl_0_65um_nom",
"temp_3_75um_nom",
"temp_11_0um_nom",
"air_temperature",
"cld_press_acha",
"dew_point",
"cloud_type",
"relative_humidity",
"total_precipitable_water",
"surface_albedo"
],
"hidden_layers": [
{
"activation": "relu",
"dropout": 0.1,
"units": 256
},
{
"activation": "relu",
"dropout": 0.1,
"units": 256
},
{
"activation": "relu",
"dropout": 0.1,
"units": 256
},
{
"activation": "relu",
"dropout": 0.1,
"units": 256
},
{
"activation": "relu",
"dropout": 0.1,
"units": 256
}
],
"learning_rate": 0.001,
"loss_weights_a": [
1,
0
],
"loss_weights_b": [
0.5,
0.5
],
"metric": "relative_mae",
"n_batch": 64,
"one_hot_categories": {
"flag": [
"clear",
"ice_cloud",
"water_cloud",
"bad_cloud"
]
},
"p_fun": "p_fun_all_sky",
"p_kwargs": {
"loss_terms": [
"mae_ghi"
]
},
"phygnn_seed": 0,
"surfrad_window_minutes": 15,
"y_labels": [
"cld_opd_dcomp",
"cld_reff_dcomp"
],
"training_prep_kwargs": {
"filter_daylight": true,
"filter_clear": false,
"filter_sky_class": true,
"add_cloud_flag": true,
"sza_lim": 89,
"nan_option": "interp"
}
}
85 changes: 85 additions & 0 deletions mlclouds/model/cloud_properties/outputs/mlclouds_model.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
{
"feature_names": [
"solar_zenith_angle",
"refl_0_65um_nom",
"temp_3_75um_nom",
"temp_11_0um_nom",
"air_temperature",
"cld_press_acha",
"dew_point",
"relative_humidity",
"total_precipitable_water",
"surface_albedo",
"clear",
"ice_cloud",
"water_cloud",
"bad_cloud"
],
"label_names": [
"cld_opd_dcomp",
"cld_reff_dcomp"
],
"norm_params": {
"air_temperature": {
"mean": 15.700041770935059,
"stdev": 11.880159378051758
},
"cld_press_acha": {
"mean": 216.6448974609375,
"stdev": 248.54319763183594
},
"dew_point": {
"mean": 4.1942572593688965,
"stdev": 10.864990234375
},
"refl_0_65um_nom": {
"mean": 40.5810546875,
"stdev": 25.966941833496094
},
"relative_humidity": {
"mean": 53.88063049316406,
"stdev": 24.798677444458008
},
"solar_zenith_angle": {
"mean": 57.1184196472168,
"stdev": 18.005184173583984
},
"surface_albedo": {
"mean": 0.25820812582969666,
"stdev": 0.19068337976932526
},
"temp_11_0um_nom": {
"mean": 269.0400390625,
"stdev": 25.51395606994629
},
"temp_3_75um_nom": {
"mean": 283.1265563964844,
"stdev": 19.781322479248047
},
"total_precipitable_water": {
"mean": 1.819767713546753,
"stdev": 1.280860185623169
}
},
"normalize": [
true,
false
],
"one_hot_categories": {
"flag": [
"clear",
"ice_cloud",
"water_cloud",
"bad_cloud"
]
},
"version_record": {
"nrel-rex": "0.2.90",
"numpy": "1.24.3",
"pandas": "2.2.2",
"phygnn": "0.0.29",
"python": "3.11.9 (main, Apr 19 2024, 16:48:06) [GCC 11.2.0]",
"sklearn": "1.5.1",
"tensorflow": "2.15.1"
}
}
Binary file not shown.
Loading

0 comments on commit aeff5f4

Please sign in to comment.