chore(experiments): introduce CartPole and Oscillator exp confgs

This commit incorporates the experimental configurations utilized in my Master's thesis research, specifically for the CartPole and Oscillator environments.
rickstaa · Feb 14, 2024 · 2275f03 · 2275f03
1 parent 8d93610
commit 2275f03
Show file tree

Hide file tree

Showing 17 changed files with 697 additions and 0 deletions.
diff --git a/...staa_et_al_2024/han2020_reproduction_lac_cartpole_cost_alpha3_tune_experiment_seed234.yml b/...staa_et_al_2024/han2020_reproduction_lac_cartpole_cost_alpha3_tune_experiment_seed234.yml
@@ -0,0 +1,41 @@
+alg_name: lac
+exp_name: han2020_reproduction_lac_cartpole_cost_alpha3_tune_exp
+env_name: "stable_gym:CartPoleCost-v1"
+ac_kwargs:
+  hidden_sizes:
+    actor: [256, 256]  # NOTE: Using [256, 256] for consistency with the article.
+    critic: [64, 64, 16]
+  activation:
+    actor: "nn.ReLU"
+    critic: "nn.ReLU"
+  output_activation:
+    actor: "nn.ReLU"
+opt_type: "minimize"
+max_ep_len: 250
+epochs: 489
+steps_per_epoch: 2048
+start_steps: 0
+update_every: 100
+update_after: 1000
+steps_per_update: 80
+num_test_episodes: 10
+alpha: 2.0
+alpha3: 0.1 0.2 0.4 0.5 0.7 0.8 0.9 1.0 # 0.3 0.6  # NOTE: Tuning alpha3.
+labda: 0.99  # NOTE: Decreased from 1.0 to 0.99 for stability.
+# gamma: 0.995   # NOTE: Not used for finite horizon tasks.
+polyak: 0.995
+adaptive_temperature: True 
+lr_a: "1e-4"
+lr_c: "3e-4"
+lr_a_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_c_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_decay_type: "linear"
+lr_decay_ref: "step"
+batch_size: 256
+replay_size: "int(1e6)"
+horizon_length: 5
+seed: 234  # NOTE: Using 5 seeds for tuning.
+save_freq: 10
+use_wandb: True
+wandb_group: "han2020_reproduction"
+device: "gpu:1"
diff --git a/...taa_et_al_2024/han2020_reproduction_lac_cartpole_cost_alpha3_tune_experiment_seed3658.yml b/...taa_et_al_2024/han2020_reproduction_lac_cartpole_cost_alpha3_tune_experiment_seed3658.yml
@@ -0,0 +1,41 @@
+alg_name: lac
+exp_name: han2020_reproduction_lac_cartpole_cost_alpha3_tune_exp
+env_name: "stable_gym:CartPoleCost-v1"
+ac_kwargs:
+  hidden_sizes:
+    actor: [256, 256]  # NOTE: Using [256, 256] for consistency with the article.
+    critic: [64, 64, 16]
+  activation:
+    actor: "nn.ReLU"
+    critic: "nn.ReLU"
+  output_activation:
+    actor: "nn.ReLU"
+opt_type: "minimize"
+max_ep_len: 250
+epochs: 489
+steps_per_epoch: 2048
+start_steps: 0
+update_every: 100
+update_after: 1000
+steps_per_update: 80
+num_test_episodes: 10
+alpha: 2.0
+alpha3: 0.1 0.2 0.4 0.6 0.7 0.8 0.9 1.0 # 0.3 0.5  # NOTE: Tuning alpha3.
+labda: 0.99  # NOTE: Decreased from 1.0 to 0.99 for stability.
+# gamma: 0.995   # NOTE: Not used for finite horizon tasks.
+polyak: 0.995
+adaptive_temperature: True 
+lr_a: "1e-4"
+lr_c: "3e-4"
+lr_a_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_c_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_decay_type: "linear"
+lr_decay_ref: "step"
+batch_size: 256
+replay_size: "int(1e6)"
+horizon_length: 5
+seed: 3658 # 234  # NOTE: Using 5 seeds for tuning.
+save_freq: 10
+use_wandb: True
+wandb_group: "han2020_reproduction"
+device: "gpu:1"
diff --git a/...aa_et_al_2024/han2020_reproduction_lac_cartpole_cost_alpha3_tune_experiment_seed48104.yml b/...aa_et_al_2024/han2020_reproduction_lac_cartpole_cost_alpha3_tune_experiment_seed48104.yml
@@ -0,0 +1,41 @@
+alg_name: lac
+exp_name: han2020_reproduction_lac_cartpole_cost_alpha3_tune_exp
+env_name: "stable_gym:CartPoleCost-v1"
+ac_kwargs:
+  hidden_sizes:
+    actor: [256, 256]  # NOTE: Using [256, 256] for consistency with the article.
+    critic: [64, 64, 16]
+  activation:
+    actor: "nn.ReLU"
+    critic: "nn.ReLU"
+  output_activation:
+    actor: "nn.ReLU"
+opt_type: "minimize"
+max_ep_len: 250
+epochs: 489
+steps_per_epoch: 2048
+start_steps: 0
+update_every: 100
+update_after: 1000
+steps_per_update: 80
+num_test_episodes: 10
+alpha: 2.0
+alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.8 0.9 # 0.7 1.0  # NOTE: Tuning alpha3.
+labda: 0.99  # NOTE: Decreased from 1.0 to 0.99 for stability.
+# gamma: 0.995   # NOTE: Not used for finite horizon tasks.
+polyak: 0.995
+adaptive_temperature: True 
+lr_a: "1e-4"
+lr_c: "3e-4"
+lr_a_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_c_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_decay_type: "linear"
+lr_decay_ref: "step"
+batch_size: 256
+replay_size: "int(1e6)"
+horizon_length: 5
+seed: 48104 # 567 3658 234  # NOTE: Using 5 seeds for tuning.
+save_freq: 10
+use_wandb: True
+wandb_group: "han2020_reproduction"
+device: "gpu:1"
diff --git a/...staa_et_al_2024/han2020_reproduction_lac_cartpole_cost_alpha3_tune_experiment_seed567.yml b/...staa_et_al_2024/han2020_reproduction_lac_cartpole_cost_alpha3_tune_experiment_seed567.yml
@@ -0,0 +1,41 @@
+alg_name: lac
+exp_name: han2020_reproduction_lac_cartpole_cost_alpha3_tune_exp
+env_name: "stable_gym:CartPoleCost-v1"
+ac_kwargs:
+  hidden_sizes:
+    actor: [256, 256]  # NOTE: Using [256, 256] for consistency with the article.
+    critic: [64, 64, 16]
+  activation:
+    actor: "nn.ReLU"
+    critic: "nn.ReLU"
+  output_activation:
+    actor: "nn.ReLU"
+opt_type: "minimize"
+max_ep_len: 250
+epochs: 489
+steps_per_epoch: 2048
+start_steps: 0
+update_every: 100
+update_after: 1000
+steps_per_update: 80
+num_test_episodes: 10
+alpha: 2.0
+alpha3: 0.1 0.2 0.3 0.4 0.6 0.7 0.8 0.9 # 0.5 1.0  # NOTE: Tuning alpha3.
+labda: 0.99  # NOTE: Decreased from 1.0 to 0.99 for stability.
+# gamma: 0.995   # NOTE: Not used for finite horizon tasks.
+polyak: 0.995
+adaptive_temperature: True 
+lr_a: "1e-4"
+lr_c: "3e-4"
+lr_a_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_c_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_decay_type: "linear"
+lr_decay_ref: "step"
+batch_size: 256
+replay_size: "int(1e6)"
+horizon_length: 5
+seed: 567 # 3658 234  # NOTE: Using 5 seeds for tuning.
+save_freq: 10
+use_wandb: True
+wandb_group: "han2020_reproduction"
+device: "gpu:1"
diff --git a/...aa_et_al_2024/han2020_reproduction_lac_cartpole_cost_alpha3_tune_experiment_seed78456.yml b/...aa_et_al_2024/han2020_reproduction_lac_cartpole_cost_alpha3_tune_experiment_seed78456.yml
@@ -0,0 +1,41 @@
+alg_name: lac
+exp_name: han2020_reproduction_lac_cartpole_cost_alpha3_tune_exp
+env_name: "stable_gym:CartPoleCost-v1"
+ac_kwargs:
+  hidden_sizes:
+    actor: [256, 256]  # NOTE: Using [256, 256] for consistency with the article.
+    critic: [64, 64, 16]
+  activation:
+    actor: "nn.ReLU"
+    critic: "nn.ReLU"
+  output_activation:
+    actor: "nn.ReLU"
+opt_type: "minimize"
+max_ep_len: 250
+epochs: 489
+steps_per_epoch: 2048
+start_steps: 0
+update_every: 100
+update_after: 1000
+steps_per_update: 80
+num_test_episodes: 10
+alpha: 2.0
+alpha3: 0.1 0.2 0.3 0.4 0.6 0.7 0.8 0.9 # 0.5 1.0  # NOTE: Tuning alpha3.
+labda: 0.99  # NOTE: Decreased from 1.0 to 0.99 for stability.
+# gamma: 0.995   # NOTE: Not used for finite horizon tasks.
+polyak: 0.995
+adaptive_temperature: True 
+lr_a: "1e-4"
+lr_c: "3e-4"
+lr_a_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_c_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_decay_type: "linear"
+lr_decay_ref: "step"
+batch_size: 256
+replay_size: "int(1e6)"
+horizon_length: 5
+seed: 78456 # 48104 567 3658 234  # NOTE: Using 5 seeds for tuning.
+save_freq: 10
+use_wandb: True
+wandb_group: "han2020_reproduction"
+device: "gpu:1"
diff --git a/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_alpha3_tune_experiment.yml b/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_alpha3_tune_experiment.yml
@@ -0,0 +1,41 @@
+alg_name: lac
+exp_name: han2020_reproduction_lac_oscillator_alpha3_tune_exp
+env_name: "stable_gym:Oscillator-v1"
+ac_kwargs:
+  hidden_sizes:
+    actor: [256, 256]  # NOTE: Using [256, 256] for consistency with the article.
+    critic: [256, 256, 16]
+  activation:
+    actor: "nn.ReLU"
+    critic: "nn.ReLU"
+  output_activation:
+    actor: "nn.ReLU"
+opt_type: "minimize"
+max_ep_len: 400
+epochs: 49
+steps_per_epoch: 2048
+start_steps: 0
+update_every: 100
+update_after: 1000
+steps_per_update: 80
+num_test_episodes: 10
+alpha: 2.0
+alpha3: 0.1 0.2 0.3 0.4 0.6 0.7 0.8 0.9 0.5 1.0  # NOTE: Tuning alpha3.
+labda: 0.99  # NOTE: Decreased from 1.0 to 0.99 for stability.
+# gamma: 0.995   # NOTE: Not used for finite horizon tasks.
+polyak: 0.995
+adaptive_temperature: True 
+lr_a: "1e-4"
+lr_c: "3e-4"
+lr_a_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_c_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_decay_type: "linear"
+lr_decay_ref: "step"
+batch_size: 256
+replay_size: "int(1e6)"
+horizon_length: 5
+seed: 78456 48104 567 3658 234  # NOTE: Using 5 seeds for tuning.
+save_freq: 10
+use_wandb: True
+wandb_group: "han2020_reproduction"
+device: "gpu:1"
diff --git a/...ts/staa_et_al_2024/han2020_reproduction_lac_oscillator_alpha3_tune_experiment_seed234.yml b/...ts/staa_et_al_2024/han2020_reproduction_lac_oscillator_alpha3_tune_experiment_seed234.yml
@@ -0,0 +1,41 @@
+alg_name: lac
+exp_name: han2020_reproduction_lac_oscillator_alpha3_tune_exp
+env_name: "stable_gym:Oscillator-v1"
+ac_kwargs:
+  hidden_sizes:
+    actor: [256, 256]  # NOTE: Using [256, 256] for consistency with the article.
+    critic: [256, 256, 16]
+  activation:
+    actor: "nn.ReLU"
+    critic: "nn.ReLU"
+  output_activation:
+    actor: "nn.ReLU"
+opt_type: "minimize"
+max_ep_len: 400
+epochs: 49
+steps_per_epoch: 2048
+start_steps: 0
+update_every: 100
+update_after: 1000
+steps_per_update: 80
+num_test_episodes: 10
+alpha: 2.0
+alpha3: 0.1 0.2 0.3 0.4 0.6 0.7 0.8 0.9 0.5 1.0  # NOTE: Tuning alpha3.
+labda: 0.99  # NOTE: Decreased from 1.0 to 0.99 for stability.
+# gamma: 0.995   # NOTE: Not used for finite horizon tasks.
+polyak: 0.995
+adaptive_temperature: True 
+lr_a: "1e-4"
+lr_c: "3e-4"
+lr_a_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_c_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_decay_type: "linear"
+lr_decay_ref: "step"
+batch_size: 256
+replay_size: "int(1e6)"
+horizon_length: 5
+seed: 234  # NOTE: Using 5 seeds for tuning.
+save_freq: 10
+use_wandb: True
+wandb_group: "han2020_reproduction"
+device: "gpu:1"
diff --git a/...s/staa_et_al_2024/han2020_reproduction_lac_oscillator_alpha3_tune_experiment_seed3658.yml b/...s/staa_et_al_2024/han2020_reproduction_lac_oscillator_alpha3_tune_experiment_seed3658.yml
@@ -0,0 +1,41 @@
+alg_name: lac
+exp_name: han2020_reproduction_lac_oscillator_alpha3_tune_exp
+env_name: "stable_gym:Oscillator-v1"
+ac_kwargs:
+  hidden_sizes:
+    actor: [256, 256]  # NOTE: Using [256, 256] for consistency with the article.
+    critic: [256, 256, 16]
+  activation:
+    actor: "nn.ReLU"
+    critic: "nn.ReLU"
+  output_activation:
+    actor: "nn.ReLU"
+opt_type: "minimize"
+max_ep_len: 400
+epochs: 49
+steps_per_epoch: 2048
+start_steps: 0
+update_every: 100
+update_after: 1000
+steps_per_update: 80
+num_test_episodes: 10
+alpha: 2.0
+alpha3: 0.1 0.2 0.3 0.4 0.6 0.7 0.8 0.9 0.5 1.0  # NOTE: Tuning alpha3.
+labda: 0.99  # NOTE: Decreased from 1.0 to 0.99 for stability.
+# gamma: 0.995   # NOTE: Not used for finite horizon tasks.
+polyak: 0.995
+adaptive_temperature: True 
+lr_a: "1e-4"
+lr_c: "3e-4"
+lr_a_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_c_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_decay_type: "linear"
+lr_decay_ref: "step"
+batch_size: 256
+replay_size: "int(1e6)"
+horizon_length: 5
+seed: 3658 # 234  # NOTE: Using 5 seeds for tuning.
+save_freq: 10
+use_wandb: True
+wandb_group: "han2020_reproduction"
+device: "gpu:1"
diff --git a/.../staa_et_al_2024/han2020_reproduction_lac_oscillator_alpha3_tune_experiment_seed48104.yml b/.../staa_et_al_2024/han2020_reproduction_lac_oscillator_alpha3_tune_experiment_seed48104.yml
@@ -0,0 +1,41 @@
+alg_name: lac
+exp_name: han2020_reproduction_lac_oscillator_alpha3_tune_exp
+env_name: "stable_gym:Oscillator-v1"
+ac_kwargs:
+  hidden_sizes:
+    actor: [256, 256]  # NOTE: Using [256, 256] for consistency with the article.
+    critic: [256, 256, 16]
+  activation:
+    actor: "nn.ReLU"
+    critic: "nn.ReLU"
+  output_activation:
+    actor: "nn.ReLU"
+opt_type: "minimize"
+max_ep_len: 400
+epochs: 49
+steps_per_epoch: 2048
+start_steps: 0
+update_every: 100
+update_after: 1000
+steps_per_update: 80
+num_test_episodes: 10
+alpha: 2.0
+alpha3: 0.1 0.2 0.3 0.4 0.6 0.7 0.8 0.9 0.5 1.0  # NOTE: Tuning alpha3.
+labda: 0.99  # NOTE: Decreased from 1.0 to 0.99 for stability.
+# gamma: 0.995   # NOTE: Not used for finite horizon tasks.
+polyak: 0.995
+adaptive_temperature: True 
+lr_a: "1e-4"
+lr_c: "3e-4"
+lr_a_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_c_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_decay_type: "linear"
+lr_decay_ref: "step"
+batch_size: 256
+replay_size: "int(1e6)"
+horizon_length: 5
+seed: 48104 # 567 3658 234  # NOTE: Using 5 seeds for tuning.
+save_freq: 10
+use_wandb: True
+wandb_group: "han2020_reproduction"
+device: "gpu:1"