refactor(experiments): update rstaa2024 experimental configs

This commit updates the experimental configs of my Master thesis to account for the fact that Han et al. 2020 in their codebase used a constant lambda learning rate instead of a linear decaying.
rickstaa · Feb 21, 2024 · e8012bc · e8012bc
1 parent 712e94b
commit e8012bc
Show file tree

Hide file tree

Showing 24 changed files with 216 additions and 48 deletions.
diff --git a/...riments/staa_et_al_2024/han2020_reproduction_lac_cartpole_cost_alpha3_tune_experiment.yml b/...riments/staa_et_al_2024/han2020_reproduction_lac_cartpole_cost_alpha3_tune_experiment.yml
@@ -20,16 +20,23 @@ update_after: 1000
 steps_per_update: 80
 num_test_episodes: 10
 alpha: 2.0
-alpha3: 0.1 0.2 0.3 0.4 0.6 0.7 0.8 0.9 0.5 1.0  # NOTE: Tuning alpha3.
+alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5  # NOTE: Tuning alpha3.
 labda: 0.99  # NOTE: Decreased from 1.0 to 0.99 for stability.
 # gamma: 0.995   # NOTE: Not used for finite horizon tasks.
 polyak: 0.995
 adaptive_temperature: True 
 lr_a: "1e-4"
 lr_c: "3e-4"
+lr_alpha: "1e-4"
+lr_labda: "3e-4"
 lr_a_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
 lr_c_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
-lr_decay_type: "linear"
+lr_alpha_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_labda_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_a_decay_type: "linear"
+lr_c_decay_type: "linear"
+lr_alpha_decay_type: "linear"
+lr_labda_decay_type: "constant"
 lr_decay_ref: "step"
 batch_size: 256
 replay_size: "int(1e6)"

diff --git a/...staa_et_al_2024/han2020_reproduction_lac_cartpole_cost_alpha3_tune_experiment_seed234.yml b/...staa_et_al_2024/han2020_reproduction_lac_cartpole_cost_alpha3_tune_experiment_seed234.yml
@@ -20,16 +20,23 @@ update_after: 1000
 steps_per_update: 80
 num_test_episodes: 10
 alpha: 2.0
-alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0  # NOTE: Tuning alpha3.
+alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5  # NOTE: Tuning alpha3.
 labda: 0.99  # NOTE: Decreased from 1.0 to 0.99 for stability.
 # gamma: 0.995   # NOTE: Not used for finite horizon tasks.
 polyak: 0.995
 adaptive_temperature: True 
 lr_a: "1e-4"
 lr_c: "3e-4"
+lr_alpha: "1e-4"
+lr_labda: "3e-4"
 lr_a_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
 lr_c_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
-lr_decay_type: "linear"
+lr_alpha_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_labda_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_a_decay_type: "linear"
+lr_c_decay_type: "linear"
+lr_alpha_decay_type: "linear"
+lr_labda_decay_type: "constant"
 lr_decay_ref: "step"
 batch_size: 256
 replay_size: "int(1e6)"

diff --git a/...taa_et_al_2024/han2020_reproduction_lac_cartpole_cost_alpha3_tune_experiment_seed3658.yml b/...taa_et_al_2024/han2020_reproduction_lac_cartpole_cost_alpha3_tune_experiment_seed3658.yml
@@ -20,16 +20,23 @@ update_after: 1000
 steps_per_update: 80
 num_test_episodes: 10
 alpha: 2.0
-alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0  # NOTE: Tuning alpha3.
+alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5  # NOTE: Tuning alpha3.
 labda: 0.99  # NOTE: Decreased from 1.0 to 0.99 for stability.
 # gamma: 0.995   # NOTE: Not used for finite horizon tasks.
 polyak: 0.995
 adaptive_temperature: True 
 lr_a: "1e-4"
 lr_c: "3e-4"
+lr_alpha: "1e-4"
+lr_labda: "3e-4"
 lr_a_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
 lr_c_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
-lr_decay_type: "linear"
+lr_alpha_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_labda_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_a_decay_type: "linear"
+lr_c_decay_type: "linear"
+lr_alpha_decay_type: "linear"
+lr_labda_decay_type: "constant"
 lr_decay_ref: "step"
 batch_size: 256
 replay_size: "int(1e6)"

diff --git a/...aa_et_al_2024/han2020_reproduction_lac_cartpole_cost_alpha3_tune_experiment_seed48104.yml b/...aa_et_al_2024/han2020_reproduction_lac_cartpole_cost_alpha3_tune_experiment_seed48104.yml
@@ -20,16 +20,23 @@ update_after: 1000
 steps_per_update: 80
 num_test_episodes: 10
 alpha: 2.0
-alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0  # NOTE: Tuning alpha3.
+alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5  # NOTE: Tuning alpha3.
 labda: 0.99  # NOTE: Decreased from 1.0 to 0.99 for stability.
 # gamma: 0.995   # NOTE: Not used for finite horizon tasks.
 polyak: 0.995
 adaptive_temperature: True 
 lr_a: "1e-4"
 lr_c: "3e-4"
+lr_alpha: "1e-4"
+lr_labda: "3e-4"
 lr_a_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
 lr_c_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
-lr_decay_type: "linear"
+lr_alpha_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_labda_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_a_decay_type: "linear"
+lr_c_decay_type: "linear"
+lr_alpha_decay_type: "linear"
+lr_labda_decay_type: "constant"
 lr_decay_ref: "step"
 batch_size: 256
 replay_size: "int(1e6)"

diff --git a/...staa_et_al_2024/han2020_reproduction_lac_cartpole_cost_alpha3_tune_experiment_seed567.yml b/...staa_et_al_2024/han2020_reproduction_lac_cartpole_cost_alpha3_tune_experiment_seed567.yml
@@ -20,16 +20,23 @@ update_after: 1000
 steps_per_update: 80
 num_test_episodes: 10
 alpha: 2.0
-alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0  # NOTE: Tuning alpha3.
+alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5  # NOTE: Tuning alpha3.
 labda: 0.99  # NOTE: Decreased from 1.0 to 0.99 for stability.
 # gamma: 0.995   # NOTE: Not used for finite horizon tasks.
 polyak: 0.995
 adaptive_temperature: True 
 lr_a: "1e-4"
 lr_c: "3e-4"
+lr_alpha: "1e-4"
+lr_labda: "3e-4"
 lr_a_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
 lr_c_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
-lr_decay_type: "linear"
+lr_alpha_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_labda_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_a_decay_type: "linear"
+lr_c_decay_type: "linear"
+lr_alpha_decay_type: "linear"
+lr_labda_decay_type: "constant"
 lr_decay_ref: "step"
 batch_size: 256
 replay_size: "int(1e6)"

diff --git a/...aa_et_al_2024/han2020_reproduction_lac_cartpole_cost_alpha3_tune_experiment_seed78456.yml b/...aa_et_al_2024/han2020_reproduction_lac_cartpole_cost_alpha3_tune_experiment_seed78456.yml
@@ -20,16 +20,23 @@ update_after: 1000
 steps_per_update: 80
 num_test_episodes: 10
 alpha: 2.0
-alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0  # NOTE: Tuning alpha3.
+alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5  # NOTE: Tuning alpha3.
 labda: 0.99  # NOTE: Decreased from 1.0 to 0.99 for stability.
 # gamma: 0.995   # NOTE: Not used for finite horizon tasks.
 polyak: 0.995
 adaptive_temperature: True 
 lr_a: "1e-4"
 lr_c: "3e-4"
+lr_alpha: "1e-4"
+lr_labda: "3e-4"
 lr_a_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
 lr_c_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
-lr_decay_type: "linear"
+lr_alpha_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_labda_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_a_decay_type: "linear"
+lr_c_decay_type: "linear"
+lr_alpha_decay_type: "linear"
+lr_labda_decay_type: "constant"
 lr_decay_ref: "step"
 batch_size: 256
 replay_size: "int(1e6)"

diff --git a/experiments/staa_et_al_2024/han2020_reproduction_lac_fetch_reach_alpha3_tune_experiment.yml b/experiments/staa_et_al_2024/han2020_reproduction_lac_fetch_reach_alpha3_tune_experiment.yml
@@ -20,16 +20,23 @@ update_after: 1000
 steps_per_update: 80
 num_test_episodes: 10
 alpha: 2.0
-alpha3: 0.1 0.2 0.3 0.4 0.6 0.7 0.8 0.9 0.5 1.0  # NOTE: Tuning alpha3.
+alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5  # NOTE: Tuning alpha3.
 labda: 0.99  # NOTE: Decreased from 1.0 to 0.99 for stability.
 # gamma: 0.995   # NOTE: Not used for finite horizon tasks.
 polyak: 0.995
 adaptive_temperature: True 
 lr_a: "1e-4"
 lr_c: "3e-4"
+lr_alpha: "1e-4"
+lr_labda: "3e-4"
 lr_a_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
 lr_c_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
-lr_decay_type: "linear"
+lr_alpha_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_labda_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_a_decay_type: "linear"
+lr_c_decay_type: "linear"
+lr_alpha_decay_type: "linear"
+lr_labda_decay_type: "constant"
 lr_decay_ref: "step"
 batch_size: 256
 replay_size: "int(1e6)"

diff --git a/...s/staa_et_al_2024/han2020_reproduction_lac_fetch_reach_alpha3_tune_experiment_seed234.yml b/...s/staa_et_al_2024/han2020_reproduction_lac_fetch_reach_alpha3_tune_experiment_seed234.yml
@@ -20,16 +20,23 @@ update_after: 1000
 steps_per_update: 80
 num_test_episodes: 10
 alpha: 2.0
-alpha3: 0.1 0.2 0.3 0.4 0.6 0.7 0.8 0.9 0.5 1.0  # NOTE: Tuning alpha3.
+alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5  # NOTE: Tuning alpha3.
 labda: 0.99  # NOTE: Decreased from 1.0 to 0.99 for stability.
 # gamma: 0.995   # NOTE: Not used for finite horizon tasks.
 polyak: 0.995
 adaptive_temperature: True 
 lr_a: "1e-4"
 lr_c: "3e-4"
+lr_alpha: "1e-4"
+lr_labda: "3e-4"
 lr_a_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
 lr_c_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
-lr_decay_type: "linear"
+lr_alpha_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_labda_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_a_decay_type: "linear"
+lr_c_decay_type: "linear"
+lr_alpha_decay_type: "linear"
+lr_labda_decay_type: "constant"
 lr_decay_ref: "step"
 batch_size: 256
 replay_size: "int(1e6)"

diff --git a/.../staa_et_al_2024/han2020_reproduction_lac_fetch_reach_alpha3_tune_experiment_seed3658.yml b/.../staa_et_al_2024/han2020_reproduction_lac_fetch_reach_alpha3_tune_experiment_seed3658.yml
@@ -20,16 +20,23 @@ update_after: 1000
 steps_per_update: 80
 num_test_episodes: 10
 alpha: 2.0
-alpha3: 0.1 0.2 0.3 0.4 0.6 0.7 0.8 0.9 0.5 1.0  # NOTE: Tuning alpha3.
+alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5  # NOTE: Tuning alpha3.
 labda: 0.99  # NOTE: Decreased from 1.0 to 0.99 for stability.
 # gamma: 0.995   # NOTE: Not used for finite horizon tasks.
 polyak: 0.995
 adaptive_temperature: True 
 lr_a: "1e-4"
 lr_c: "3e-4"
+lr_alpha: "1e-4"
+lr_labda: "3e-4"
 lr_a_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
 lr_c_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
-lr_decay_type: "linear"
+lr_alpha_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_labda_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_a_decay_type: "linear"
+lr_c_decay_type: "linear"
+lr_alpha_decay_type: "linear"
+lr_labda_decay_type: "constant"
 lr_decay_ref: "step"
 batch_size: 256
 replay_size: "int(1e6)"

diff --git a/...staa_et_al_2024/han2020_reproduction_lac_fetch_reach_alpha3_tune_experiment_seed48104.yml b/...staa_et_al_2024/han2020_reproduction_lac_fetch_reach_alpha3_tune_experiment_seed48104.yml
@@ -20,16 +20,23 @@ update_after: 1000
 steps_per_update: 80
 num_test_episodes: 10
 alpha: 2.0
-alpha3: 0.1 0.2 0.3 0.4 0.6 0.7 0.8 0.9 0.5 1.0  # NOTE: Tuning alpha3.
+alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5  # NOTE: Tuning alpha3.
 labda: 0.99  # NOTE: Decreased from 1.0 to 0.99 for stability.
 # gamma: 0.995   # NOTE: Not used for finite horizon tasks.
 polyak: 0.995
 adaptive_temperature: True 
 lr_a: "1e-4"
 lr_c: "3e-4"
+lr_alpha: "1e-4"
+lr_labda: "3e-4"
 lr_a_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
 lr_c_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
-lr_decay_type: "linear"
+lr_alpha_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_labda_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_a_decay_type: "linear"
+lr_c_decay_type: "linear"
+lr_alpha_decay_type: "linear"
+lr_labda_decay_type: "constant"
 lr_decay_ref: "step"
 batch_size: 256
 replay_size: "int(1e6)"

diff --git a/...s/staa_et_al_2024/han2020_reproduction_lac_fetch_reach_alpha3_tune_experiment_seed567.yml b/...s/staa_et_al_2024/han2020_reproduction_lac_fetch_reach_alpha3_tune_experiment_seed567.yml
@@ -20,16 +20,23 @@ update_after: 1000
 steps_per_update: 80
 num_test_episodes: 10
 alpha: 2.0
-alpha3: 0.1 0.2 0.3 0.4 0.6 0.7 0.8 0.9 0.5 1.0  # NOTE: Tuning alpha3.
+alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5  # NOTE: Tuning alpha3.
 labda: 0.99  # NOTE: Decreased from 1.0 to 0.99 for stability.
 # gamma: 0.995   # NOTE: Not used for finite horizon tasks.
 polyak: 0.995
 adaptive_temperature: True 
 lr_a: "1e-4"
 lr_c: "3e-4"
+lr_alpha: "1e-4"
+lr_labda: "3e-4"
 lr_a_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
 lr_c_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
-lr_decay_type: "linear"
+lr_alpha_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_labda_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_a_decay_type: "linear"
+lr_c_decay_type: "linear"
+lr_alpha_decay_type: "linear"
+lr_labda_decay_type: "constant"
 lr_decay_ref: "step"
 batch_size: 256
 replay_size: "int(1e6)"

diff --git a/...staa_et_al_2024/han2020_reproduction_lac_fetch_reach_alpha3_tune_experiment_seed78456.yml b/...staa_et_al_2024/han2020_reproduction_lac_fetch_reach_alpha3_tune_experiment_seed78456.yml
@@ -20,16 +20,23 @@ update_after: 1000
 steps_per_update: 80
 num_test_episodes: 10
 alpha: 2.0
-alpha3: 0.1 0.2 0.3 0.4 0.6 0.7 0.8 0.9 0.5 1.0  # NOTE: Tuning alpha3.
+alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5  # NOTE: Tuning alpha3.
 labda: 0.99  # NOTE: Decreased from 1.0 to 0.99 for stability.
 # gamma: 0.995   # NOTE: Not used for finite horizon tasks.
 polyak: 0.995
 adaptive_temperature: True 
 lr_a: "1e-4"
 lr_c: "3e-4"
+lr_alpha: "1e-4"
+lr_labda: "3e-4"
 lr_a_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
 lr_c_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
-lr_decay_type: "linear"
+lr_alpha_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_labda_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_a_decay_type: "linear"
+lr_c_decay_type: "linear"
+lr_alpha_decay_type: "linear"
+lr_labda_decay_type: "constant"
 lr_decay_ref: "step"
 batch_size: 256
 replay_size: "int(1e6)"

diff --git a/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_alpha3_tune_experiment.yml b/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_alpha3_tune_experiment.yml
@@ -20,16 +20,23 @@ update_after: 1000
 steps_per_update: 80
 num_test_episodes: 10
 alpha: 2.0
-alpha3: 0.1 0.2 0.3 0.4 0.6 0.7 0.8 0.9 0.5 1.0  # NOTE: Tuning alpha3.
+alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5  # NOTE: Tuning alpha3.
 labda: 0.99  # NOTE: Decreased from 1.0 to 0.99 for stability.
 # gamma: 0.995   # NOTE: Not used for finite horizon tasks.
 polyak: 0.995
 adaptive_temperature: True 
 lr_a: "1e-4"
 lr_c: "3e-4"
+lr_alpha: "1e-4"
+lr_labda: "3e-4"
 lr_a_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
 lr_c_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
-lr_decay_type: "linear"
+lr_alpha_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_labda_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_a_decay_type: "linear"
+lr_c_decay_type: "linear"
+lr_alpha_decay_type: "linear"
+lr_labda_decay_type: "constant"
 lr_decay_ref: "step"
 batch_size: 256
 replay_size: "int(1e6)"

diff --git a/...ts/staa_et_al_2024/han2020_reproduction_lac_oscillator_alpha3_tune_experiment_seed234.yml b/...ts/staa_et_al_2024/han2020_reproduction_lac_oscillator_alpha3_tune_experiment_seed234.yml
@@ -20,16 +20,23 @@ update_after: 1000
 steps_per_update: 80
 num_test_episodes: 10
 alpha: 2.0
-alpha3: 0.1 0.2 0.3 0.4 0.6 0.7 0.8 0.9 0.5 1.0  # NOTE: Tuning alpha3.
+alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5  # NOTE: Tuning alpha3.
 labda: 0.99  # NOTE: Decreased from 1.0 to 0.99 for stability.
 # gamma: 0.995   # NOTE: Not used for finite horizon tasks.
 polyak: 0.995
 adaptive_temperature: True 
 lr_a: "1e-4"
 lr_c: "3e-4"
+lr_alpha: "1e-4"
+lr_labda: "3e-4"
 lr_a_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
 lr_c_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
-lr_decay_type: "linear"
+lr_alpha_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_labda_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_a_decay_type: "linear"
+lr_c_decay_type: "linear"
+lr_alpha_decay_type: "linear"
+lr_labda_decay_type: "constant"
 lr_decay_ref: "step"
 batch_size: 256
 replay_size: "int(1e6)"

diff --git a/...s/staa_et_al_2024/han2020_reproduction_lac_oscillator_alpha3_tune_experiment_seed3658.yml b/...s/staa_et_al_2024/han2020_reproduction_lac_oscillator_alpha3_tune_experiment_seed3658.yml
@@ -20,16 +20,23 @@ update_after: 1000
 steps_per_update: 80
 num_test_episodes: 10
 alpha: 2.0
-alpha3: 0.1 0.2 0.3 0.4 0.6 0.7 0.8 0.9 0.5 1.0  # NOTE: Tuning alpha3.
+alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5  # NOTE: Tuning alpha3.
 labda: 0.99  # NOTE: Decreased from 1.0 to 0.99 for stability.
 # gamma: 0.995   # NOTE: Not used for finite horizon tasks.
 polyak: 0.995
 adaptive_temperature: True 
 lr_a: "1e-4"
 lr_c: "3e-4"
+lr_alpha: "1e-4"
+lr_labda: "3e-4"
 lr_a_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
 lr_c_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
-lr_decay_type: "linear"
+lr_alpha_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_labda_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_a_decay_type: "linear"
+lr_c_decay_type: "linear"
+lr_alpha_decay_type: "linear"
+lr_labda_decay_type: "constant"
 lr_decay_ref: "step"
 batch_size: 256
 replay_size: "int(1e6)"