diff --git a/experiments/staa_et_al_2024/han2020_reproduction_lac_cartpole_cost_alpha3_tune_experiment.yml b/experiments/staa_et_al_2024/han2020_reproduction_lac_cartpole_cost_alpha3_tune_experiment.yml
index 7ddcfce3..a5ff74a4 100644
--- a/experiments/staa_et_al_2024/han2020_reproduction_lac_cartpole_cost_alpha3_tune_experiment.yml
+++ b/experiments/staa_et_al_2024/han2020_reproduction_lac_cartpole_cost_alpha3_tune_experiment.yml
@@ -20,16 +20,23 @@ update_after: 1000
 steps_per_update: 80
 num_test_episodes: 10
 alpha: 2.0
-alpha3: 0.1 0.2 0.3 0.4 0.6 0.7 0.8 0.9 0.5 1.0  # NOTE: Tuning alpha3.
+alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5  # NOTE: Tuning alpha3.
 labda: 0.99  # NOTE: Decreased from 1.0 to 0.99 for stability.
 # gamma: 0.995   # NOTE: Not used for finite horizon tasks.
 polyak: 0.995
 adaptive_temperature: True 
 lr_a: "1e-4"
 lr_c: "3e-4"
+lr_alpha: "1e-4"
+lr_labda: "3e-4"
 lr_a_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
 lr_c_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
-lr_decay_type: "linear"
+lr_alpha_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_labda_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_a_decay_type: "linear"
+lr_c_decay_type: "linear"
+lr_alpha_decay_type: "linear"
+lr_labda_decay_type: "constant"
 lr_decay_ref: "step"
 batch_size: 256
 replay_size: "int(1e6)"
diff --git a/experiments/staa_et_al_2024/han2020_reproduction_lac_cartpole_cost_alpha3_tune_experiment_seed234.yml b/experiments/staa_et_al_2024/han2020_reproduction_lac_cartpole_cost_alpha3_tune_experiment_seed234.yml
index d0ec9f0d..5cf3aecc 100644
--- a/experiments/staa_et_al_2024/han2020_reproduction_lac_cartpole_cost_alpha3_tune_experiment_seed234.yml
+++ b/experiments/staa_et_al_2024/han2020_reproduction_lac_cartpole_cost_alpha3_tune_experiment_seed234.yml
@@ -20,16 +20,23 @@ update_after: 1000
 steps_per_update: 80
 num_test_episodes: 10
 alpha: 2.0
-alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0  # NOTE: Tuning alpha3.
+alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5  # NOTE: Tuning alpha3.
 labda: 0.99  # NOTE: Decreased from 1.0 to 0.99 for stability.
 # gamma: 0.995   # NOTE: Not used for finite horizon tasks.
 polyak: 0.995
 adaptive_temperature: True 
 lr_a: "1e-4"
 lr_c: "3e-4"
+lr_alpha: "1e-4"
+lr_labda: "3e-4"
 lr_a_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
 lr_c_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
-lr_decay_type: "linear"
+lr_alpha_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_labda_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_a_decay_type: "linear"
+lr_c_decay_type: "linear"
+lr_alpha_decay_type: "linear"
+lr_labda_decay_type: "constant"
 lr_decay_ref: "step"
 batch_size: 256
 replay_size: "int(1e6)"
diff --git a/experiments/staa_et_al_2024/han2020_reproduction_lac_cartpole_cost_alpha3_tune_experiment_seed3658.yml b/experiments/staa_et_al_2024/han2020_reproduction_lac_cartpole_cost_alpha3_tune_experiment_seed3658.yml
index 11c80e99..d68c2530 100644
--- a/experiments/staa_et_al_2024/han2020_reproduction_lac_cartpole_cost_alpha3_tune_experiment_seed3658.yml
+++ b/experiments/staa_et_al_2024/han2020_reproduction_lac_cartpole_cost_alpha3_tune_experiment_seed3658.yml
@@ -20,16 +20,23 @@ update_after: 1000
 steps_per_update: 80
 num_test_episodes: 10
 alpha: 2.0
-alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0  # NOTE: Tuning alpha3.
+alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5  # NOTE: Tuning alpha3.
 labda: 0.99  # NOTE: Decreased from 1.0 to 0.99 for stability.
 # gamma: 0.995   # NOTE: Not used for finite horizon tasks.
 polyak: 0.995
 adaptive_temperature: True 
 lr_a: "1e-4"
 lr_c: "3e-4"
+lr_alpha: "1e-4"
+lr_labda: "3e-4"
 lr_a_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
 lr_c_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
-lr_decay_type: "linear"
+lr_alpha_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_labda_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_a_decay_type: "linear"
+lr_c_decay_type: "linear"
+lr_alpha_decay_type: "linear"
+lr_labda_decay_type: "constant"
 lr_decay_ref: "step"
 batch_size: 256
 replay_size: "int(1e6)"
diff --git a/experiments/staa_et_al_2024/han2020_reproduction_lac_cartpole_cost_alpha3_tune_experiment_seed48104.yml b/experiments/staa_et_al_2024/han2020_reproduction_lac_cartpole_cost_alpha3_tune_experiment_seed48104.yml
index c5a41641..6587085c 100644
--- a/experiments/staa_et_al_2024/han2020_reproduction_lac_cartpole_cost_alpha3_tune_experiment_seed48104.yml
+++ b/experiments/staa_et_al_2024/han2020_reproduction_lac_cartpole_cost_alpha3_tune_experiment_seed48104.yml
@@ -20,16 +20,23 @@ update_after: 1000
 steps_per_update: 80
 num_test_episodes: 10
 alpha: 2.0
-alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0  # NOTE: Tuning alpha3.
+alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5  # NOTE: Tuning alpha3.
 labda: 0.99  # NOTE: Decreased from 1.0 to 0.99 for stability.
 # gamma: 0.995   # NOTE: Not used for finite horizon tasks.
 polyak: 0.995
 adaptive_temperature: True 
 lr_a: "1e-4"
 lr_c: "3e-4"
+lr_alpha: "1e-4"
+lr_labda: "3e-4"
 lr_a_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
 lr_c_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
-lr_decay_type: "linear"
+lr_alpha_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_labda_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_a_decay_type: "linear"
+lr_c_decay_type: "linear"
+lr_alpha_decay_type: "linear"
+lr_labda_decay_type: "constant"
 lr_decay_ref: "step"
 batch_size: 256
 replay_size: "int(1e6)"
diff --git a/experiments/staa_et_al_2024/han2020_reproduction_lac_cartpole_cost_alpha3_tune_experiment_seed567.yml b/experiments/staa_et_al_2024/han2020_reproduction_lac_cartpole_cost_alpha3_tune_experiment_seed567.yml
index 7aaf5ae9..368ad3f5 100644
--- a/experiments/staa_et_al_2024/han2020_reproduction_lac_cartpole_cost_alpha3_tune_experiment_seed567.yml
+++ b/experiments/staa_et_al_2024/han2020_reproduction_lac_cartpole_cost_alpha3_tune_experiment_seed567.yml
@@ -20,16 +20,23 @@ update_after: 1000
 steps_per_update: 80
 num_test_episodes: 10
 alpha: 2.0
-alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0  # NOTE: Tuning alpha3.
+alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5  # NOTE: Tuning alpha3.
 labda: 0.99  # NOTE: Decreased from 1.0 to 0.99 for stability.
 # gamma: 0.995   # NOTE: Not used for finite horizon tasks.
 polyak: 0.995
 adaptive_temperature: True 
 lr_a: "1e-4"
 lr_c: "3e-4"
+lr_alpha: "1e-4"
+lr_labda: "3e-4"
 lr_a_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
 lr_c_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
-lr_decay_type: "linear"
+lr_alpha_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_labda_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_a_decay_type: "linear"
+lr_c_decay_type: "linear"
+lr_alpha_decay_type: "linear"
+lr_labda_decay_type: "constant"
 lr_decay_ref: "step"
 batch_size: 256
 replay_size: "int(1e6)"
diff --git a/experiments/staa_et_al_2024/han2020_reproduction_lac_cartpole_cost_alpha3_tune_experiment_seed78456.yml b/experiments/staa_et_al_2024/han2020_reproduction_lac_cartpole_cost_alpha3_tune_experiment_seed78456.yml
index 84f0bd3e..2cdc80f1 100644
--- a/experiments/staa_et_al_2024/han2020_reproduction_lac_cartpole_cost_alpha3_tune_experiment_seed78456.yml
+++ b/experiments/staa_et_al_2024/han2020_reproduction_lac_cartpole_cost_alpha3_tune_experiment_seed78456.yml
@@ -20,16 +20,23 @@ update_after: 1000
 steps_per_update: 80
 num_test_episodes: 10
 alpha: 2.0
-alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0  # NOTE: Tuning alpha3.
+alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5  # NOTE: Tuning alpha3.
 labda: 0.99  # NOTE: Decreased from 1.0 to 0.99 for stability.
 # gamma: 0.995   # NOTE: Not used for finite horizon tasks.
 polyak: 0.995
 adaptive_temperature: True 
 lr_a: "1e-4"
 lr_c: "3e-4"
+lr_alpha: "1e-4"
+lr_labda: "3e-4"
 lr_a_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
 lr_c_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
-lr_decay_type: "linear"
+lr_alpha_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_labda_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_a_decay_type: "linear"
+lr_c_decay_type: "linear"
+lr_alpha_decay_type: "linear"
+lr_labda_decay_type: "constant"
 lr_decay_ref: "step"
 batch_size: 256
 replay_size: "int(1e6)"
diff --git a/experiments/staa_et_al_2024/han2020_reproduction_lac_fetch_reach_alpha3_tune_experiment.yml b/experiments/staa_et_al_2024/han2020_reproduction_lac_fetch_reach_alpha3_tune_experiment.yml
index a61fba51..906ed580 100644
--- a/experiments/staa_et_al_2024/han2020_reproduction_lac_fetch_reach_alpha3_tune_experiment.yml
+++ b/experiments/staa_et_al_2024/han2020_reproduction_lac_fetch_reach_alpha3_tune_experiment.yml
@@ -20,16 +20,23 @@ update_after: 1000
 steps_per_update: 80
 num_test_episodes: 10
 alpha: 2.0
-alpha3: 0.1 0.2 0.3 0.4 0.6 0.7 0.8 0.9 0.5 1.0  # NOTE: Tuning alpha3.
+alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5  # NOTE: Tuning alpha3.
 labda: 0.99  # NOTE: Decreased from 1.0 to 0.99 for stability.
 # gamma: 0.995   # NOTE: Not used for finite horizon tasks.
 polyak: 0.995
 adaptive_temperature: True 
 lr_a: "1e-4"
 lr_c: "3e-4"
+lr_alpha: "1e-4"
+lr_labda: "3e-4"
 lr_a_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
 lr_c_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
-lr_decay_type: "linear"
+lr_alpha_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_labda_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_a_decay_type: "linear"
+lr_c_decay_type: "linear"
+lr_alpha_decay_type: "linear"
+lr_labda_decay_type: "constant"
 lr_decay_ref: "step"
 batch_size: 256
 replay_size: "int(1e6)"
diff --git a/experiments/staa_et_al_2024/han2020_reproduction_lac_fetch_reach_alpha3_tune_experiment_seed234.yml b/experiments/staa_et_al_2024/han2020_reproduction_lac_fetch_reach_alpha3_tune_experiment_seed234.yml
index 1bb80297..92f561ca 100644
--- a/experiments/staa_et_al_2024/han2020_reproduction_lac_fetch_reach_alpha3_tune_experiment_seed234.yml
+++ b/experiments/staa_et_al_2024/han2020_reproduction_lac_fetch_reach_alpha3_tune_experiment_seed234.yml
@@ -20,16 +20,23 @@ update_after: 1000
 steps_per_update: 80
 num_test_episodes: 10
 alpha: 2.0
-alpha3: 0.1 0.2 0.3 0.4 0.6 0.7 0.8 0.9 0.5 1.0  # NOTE: Tuning alpha3.
+alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5  # NOTE: Tuning alpha3.
 labda: 0.99  # NOTE: Decreased from 1.0 to 0.99 for stability.
 # gamma: 0.995   # NOTE: Not used for finite horizon tasks.
 polyak: 0.995
 adaptive_temperature: True 
 lr_a: "1e-4"
 lr_c: "3e-4"
+lr_alpha: "1e-4"
+lr_labda: "3e-4"
 lr_a_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
 lr_c_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
-lr_decay_type: "linear"
+lr_alpha_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_labda_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_a_decay_type: "linear"
+lr_c_decay_type: "linear"
+lr_alpha_decay_type: "linear"
+lr_labda_decay_type: "constant"
 lr_decay_ref: "step"
 batch_size: 256
 replay_size: "int(1e6)"
diff --git a/experiments/staa_et_al_2024/han2020_reproduction_lac_fetch_reach_alpha3_tune_experiment_seed3658.yml b/experiments/staa_et_al_2024/han2020_reproduction_lac_fetch_reach_alpha3_tune_experiment_seed3658.yml
index ef9ffeda..08178d53 100644
--- a/experiments/staa_et_al_2024/han2020_reproduction_lac_fetch_reach_alpha3_tune_experiment_seed3658.yml
+++ b/experiments/staa_et_al_2024/han2020_reproduction_lac_fetch_reach_alpha3_tune_experiment_seed3658.yml
@@ -20,16 +20,23 @@ update_after: 1000
 steps_per_update: 80
 num_test_episodes: 10
 alpha: 2.0
-alpha3: 0.1 0.2 0.3 0.4 0.6 0.7 0.8 0.9 0.5 1.0  # NOTE: Tuning alpha3.
+alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5  # NOTE: Tuning alpha3.
 labda: 0.99  # NOTE: Decreased from 1.0 to 0.99 for stability.
 # gamma: 0.995   # NOTE: Not used for finite horizon tasks.
 polyak: 0.995
 adaptive_temperature: True 
 lr_a: "1e-4"
 lr_c: "3e-4"
+lr_alpha: "1e-4"
+lr_labda: "3e-4"
 lr_a_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
 lr_c_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
-lr_decay_type: "linear"
+lr_alpha_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_labda_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_a_decay_type: "linear"
+lr_c_decay_type: "linear"
+lr_alpha_decay_type: "linear"
+lr_labda_decay_type: "constant"
 lr_decay_ref: "step"
 batch_size: 256
 replay_size: "int(1e6)"
diff --git a/experiments/staa_et_al_2024/han2020_reproduction_lac_fetch_reach_alpha3_tune_experiment_seed48104.yml b/experiments/staa_et_al_2024/han2020_reproduction_lac_fetch_reach_alpha3_tune_experiment_seed48104.yml
index 5b7e9149..d92eddfc 100644
--- a/experiments/staa_et_al_2024/han2020_reproduction_lac_fetch_reach_alpha3_tune_experiment_seed48104.yml
+++ b/experiments/staa_et_al_2024/han2020_reproduction_lac_fetch_reach_alpha3_tune_experiment_seed48104.yml
@@ -20,16 +20,23 @@ update_after: 1000
 steps_per_update: 80
 num_test_episodes: 10
 alpha: 2.0
-alpha3: 0.1 0.2 0.3 0.4 0.6 0.7 0.8 0.9 0.5 1.0  # NOTE: Tuning alpha3.
+alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5  # NOTE: Tuning alpha3.
 labda: 0.99  # NOTE: Decreased from 1.0 to 0.99 for stability.
 # gamma: 0.995   # NOTE: Not used for finite horizon tasks.
 polyak: 0.995
 adaptive_temperature: True 
 lr_a: "1e-4"
 lr_c: "3e-4"
+lr_alpha: "1e-4"
+lr_labda: "3e-4"
 lr_a_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
 lr_c_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
-lr_decay_type: "linear"
+lr_alpha_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_labda_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_a_decay_type: "linear"
+lr_c_decay_type: "linear"
+lr_alpha_decay_type: "linear"
+lr_labda_decay_type: "constant"
 lr_decay_ref: "step"
 batch_size: 256
 replay_size: "int(1e6)"
diff --git a/experiments/staa_et_al_2024/han2020_reproduction_lac_fetch_reach_alpha3_tune_experiment_seed567.yml b/experiments/staa_et_al_2024/han2020_reproduction_lac_fetch_reach_alpha3_tune_experiment_seed567.yml
index 8d3c8eba..7a59c3e7 100644
--- a/experiments/staa_et_al_2024/han2020_reproduction_lac_fetch_reach_alpha3_tune_experiment_seed567.yml
+++ b/experiments/staa_et_al_2024/han2020_reproduction_lac_fetch_reach_alpha3_tune_experiment_seed567.yml
@@ -20,16 +20,23 @@ update_after: 1000
 steps_per_update: 80
 num_test_episodes: 10
 alpha: 2.0
-alpha3: 0.1 0.2 0.3 0.4 0.6 0.7 0.8 0.9 0.5 1.0  # NOTE: Tuning alpha3.
+alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5  # NOTE: Tuning alpha3.
 labda: 0.99  # NOTE: Decreased from 1.0 to 0.99 for stability.
 # gamma: 0.995   # NOTE: Not used for finite horizon tasks.
 polyak: 0.995
 adaptive_temperature: True 
 lr_a: "1e-4"
 lr_c: "3e-4"
+lr_alpha: "1e-4"
+lr_labda: "3e-4"
 lr_a_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
 lr_c_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
-lr_decay_type: "linear"
+lr_alpha_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_labda_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_a_decay_type: "linear"
+lr_c_decay_type: "linear"
+lr_alpha_decay_type: "linear"
+lr_labda_decay_type: "constant"
 lr_decay_ref: "step"
 batch_size: 256
 replay_size: "int(1e6)"
diff --git a/experiments/staa_et_al_2024/han2020_reproduction_lac_fetch_reach_alpha3_tune_experiment_seed78456.yml b/experiments/staa_et_al_2024/han2020_reproduction_lac_fetch_reach_alpha3_tune_experiment_seed78456.yml
index 085b8891..7790206f 100644
--- a/experiments/staa_et_al_2024/han2020_reproduction_lac_fetch_reach_alpha3_tune_experiment_seed78456.yml
+++ b/experiments/staa_et_al_2024/han2020_reproduction_lac_fetch_reach_alpha3_tune_experiment_seed78456.yml
@@ -20,16 +20,23 @@ update_after: 1000
 steps_per_update: 80
 num_test_episodes: 10
 alpha: 2.0
-alpha3: 0.1 0.2 0.3 0.4 0.6 0.7 0.8 0.9 0.5 1.0  # NOTE: Tuning alpha3.
+alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5  # NOTE: Tuning alpha3.
 labda: 0.99  # NOTE: Decreased from 1.0 to 0.99 for stability.
 # gamma: 0.995   # NOTE: Not used for finite horizon tasks.
 polyak: 0.995
 adaptive_temperature: True 
 lr_a: "1e-4"
 lr_c: "3e-4"
+lr_alpha: "1e-4"
+lr_labda: "3e-4"
 lr_a_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
 lr_c_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
-lr_decay_type: "linear"
+lr_alpha_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_labda_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_a_decay_type: "linear"
+lr_c_decay_type: "linear"
+lr_alpha_decay_type: "linear"
+lr_labda_decay_type: "constant"
 lr_decay_ref: "step"
 batch_size: 256
 replay_size: "int(1e6)"
diff --git a/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_alpha3_tune_experiment.yml b/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_alpha3_tune_experiment.yml
index 4efa010f..13bfba8b 100644
--- a/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_alpha3_tune_experiment.yml
+++ b/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_alpha3_tune_experiment.yml
@@ -20,16 +20,23 @@ update_after: 1000
 steps_per_update: 80
 num_test_episodes: 10
 alpha: 2.0
-alpha3: 0.1 0.2 0.3 0.4 0.6 0.7 0.8 0.9 0.5 1.0  # NOTE: Tuning alpha3.
+alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5  # NOTE: Tuning alpha3.
 labda: 0.99  # NOTE: Decreased from 1.0 to 0.99 for stability.
 # gamma: 0.995   # NOTE: Not used for finite horizon tasks.
 polyak: 0.995
 adaptive_temperature: True 
 lr_a: "1e-4"
 lr_c: "3e-4"
+lr_alpha: "1e-4"
+lr_labda: "3e-4"
 lr_a_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
 lr_c_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
-lr_decay_type: "linear"
+lr_alpha_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_labda_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_a_decay_type: "linear"
+lr_c_decay_type: "linear"
+lr_alpha_decay_type: "linear"
+lr_labda_decay_type: "constant"
 lr_decay_ref: "step"
 batch_size: 256
 replay_size: "int(1e6)"
diff --git a/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_alpha3_tune_experiment_seed234.yml b/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_alpha3_tune_experiment_seed234.yml
index cb7996f0..cf4af847 100644
--- a/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_alpha3_tune_experiment_seed234.yml
+++ b/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_alpha3_tune_experiment_seed234.yml
@@ -20,16 +20,23 @@ update_after: 1000
 steps_per_update: 80
 num_test_episodes: 10
 alpha: 2.0
-alpha3: 0.1 0.2 0.3 0.4 0.6 0.7 0.8 0.9 0.5 1.0  # NOTE: Tuning alpha3.
+alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5  # NOTE: Tuning alpha3.
 labda: 0.99  # NOTE: Decreased from 1.0 to 0.99 for stability.
 # gamma: 0.995   # NOTE: Not used for finite horizon tasks.
 polyak: 0.995
 adaptive_temperature: True 
 lr_a: "1e-4"
 lr_c: "3e-4"
+lr_alpha: "1e-4"
+lr_labda: "3e-4"
 lr_a_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
 lr_c_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
-lr_decay_type: "linear"
+lr_alpha_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_labda_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_a_decay_type: "linear"
+lr_c_decay_type: "linear"
+lr_alpha_decay_type: "linear"
+lr_labda_decay_type: "constant"
 lr_decay_ref: "step"
 batch_size: 256
 replay_size: "int(1e6)"
diff --git a/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_alpha3_tune_experiment_seed3658.yml b/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_alpha3_tune_experiment_seed3658.yml
index 05c07f77..46daa5f5 100644
--- a/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_alpha3_tune_experiment_seed3658.yml
+++ b/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_alpha3_tune_experiment_seed3658.yml
@@ -20,16 +20,23 @@ update_after: 1000
 steps_per_update: 80
 num_test_episodes: 10
 alpha: 2.0
-alpha3: 0.1 0.2 0.3 0.4 0.6 0.7 0.8 0.9 0.5 1.0  # NOTE: Tuning alpha3.
+alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5  # NOTE: Tuning alpha3.
 labda: 0.99  # NOTE: Decreased from 1.0 to 0.99 for stability.
 # gamma: 0.995   # NOTE: Not used for finite horizon tasks.
 polyak: 0.995
 adaptive_temperature: True 
 lr_a: "1e-4"
 lr_c: "3e-4"
+lr_alpha: "1e-4"
+lr_labda: "3e-4"
 lr_a_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
 lr_c_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
-lr_decay_type: "linear"
+lr_alpha_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_labda_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_a_decay_type: "linear"
+lr_c_decay_type: "linear"
+lr_alpha_decay_type: "linear"
+lr_labda_decay_type: "constant"
 lr_decay_ref: "step"
 batch_size: 256
 replay_size: "int(1e6)"
diff --git a/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_alpha3_tune_experiment_seed48104.yml b/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_alpha3_tune_experiment_seed48104.yml
index dd3facd9..5d1a5e65 100644
--- a/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_alpha3_tune_experiment_seed48104.yml
+++ b/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_alpha3_tune_experiment_seed48104.yml
@@ -20,16 +20,23 @@ update_after: 1000
 steps_per_update: 80
 num_test_episodes: 10
 alpha: 2.0
-alpha3: 0.1 0.2 0.3 0.4 0.6 0.7 0.8 0.9 0.5 1.0  # NOTE: Tuning alpha3.
+alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5  # NOTE: Tuning alpha3.
 labda: 0.99  # NOTE: Decreased from 1.0 to 0.99 for stability.
 # gamma: 0.995   # NOTE: Not used for finite horizon tasks.
 polyak: 0.995
 adaptive_temperature: True 
 lr_a: "1e-4"
 lr_c: "3e-4"
+lr_alpha: "1e-4"
+lr_labda: "3e-4"
 lr_a_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
 lr_c_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
-lr_decay_type: "linear"
+lr_alpha_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_labda_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_a_decay_type: "linear"
+lr_c_decay_type: "linear"
+lr_alpha_decay_type: "linear"
+lr_labda_decay_type: "constant"
 lr_decay_ref: "step"
 batch_size: 256
 replay_size: "int(1e6)"
diff --git a/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_alpha3_tune_experiment_seed567.yml b/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_alpha3_tune_experiment_seed567.yml
index c1ca970a..891c45c6 100644
--- a/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_alpha3_tune_experiment_seed567.yml
+++ b/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_alpha3_tune_experiment_seed567.yml
@@ -20,16 +20,23 @@ update_after: 1000
 steps_per_update: 80
 num_test_episodes: 10
 alpha: 2.0
-alpha3: 0.1 0.2 0.3 0.4 0.6 0.7 0.8 0.9 0.5 1.0  # NOTE: Tuning alpha3.
+alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5  # NOTE: Tuning alpha3.
 labda: 0.99  # NOTE: Decreased from 1.0 to 0.99 for stability.
 # gamma: 0.995   # NOTE: Not used for finite horizon tasks.
 polyak: 0.995
 adaptive_temperature: True 
 lr_a: "1e-4"
 lr_c: "3e-4"
+lr_alpha: "1e-4"
+lr_labda: "3e-4"
 lr_a_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
 lr_c_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
-lr_decay_type: "linear"
+lr_alpha_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_labda_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_a_decay_type: "linear"
+lr_c_decay_type: "linear"
+lr_alpha_decay_type: "linear"
+lr_labda_decay_type: "constant"
 lr_decay_ref: "step"
 batch_size: 256
 replay_size: "int(1e6)"
diff --git a/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_alpha3_tune_experiment_seed78456.yml b/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_alpha3_tune_experiment_seed78456.yml
index 0520c2dd..286fd571 100644
--- a/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_alpha3_tune_experiment_seed78456.yml
+++ b/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_alpha3_tune_experiment_seed78456.yml
@@ -20,16 +20,23 @@ update_after: 1000
 steps_per_update: 80
 num_test_episodes: 10
 alpha: 2.0
-alpha3: 0.1 0.2 0.3 0.4 0.6 0.7 0.8 0.9 0.5 1.0  # NOTE: Tuning alpha3.
+alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5  # NOTE: Tuning alpha3.
 labda: 0.99  # NOTE: Decreased from 1.0 to 0.99 for stability.
 # gamma: 0.995   # NOTE: Not used for finite horizon tasks.
 polyak: 0.995
 adaptive_temperature: True 
 lr_a: "1e-4"
 lr_c: "3e-4"
+lr_alpha: "1e-4"
+lr_labda: "3e-4"
 lr_a_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
 lr_c_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
-lr_decay_type: "linear"
+lr_alpha_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_labda_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_a_decay_type: "linear"
+lr_c_decay_type: "linear"
+lr_alpha_decay_type: "linear"
+lr_labda_decay_type: "constant"
 lr_decay_ref: "step"
 batch_size: 256
 replay_size: "int(1e6)"
diff --git a/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_complicated_alpha3_tune_experiment.yml b/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_complicated_alpha3_tune_experiment.yml
index 338e1a4a..5ec3f599 100644
--- a/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_complicated_alpha3_tune_experiment.yml
+++ b/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_complicated_alpha3_tune_experiment.yml
@@ -20,16 +20,23 @@ update_after: 1000
 steps_per_update: 80
 num_test_episodes: 10
 alpha: 2.0
-alpha3: 0.1 0.2 0.3 0.4 0.6 0.7 0.8 0.9 0.5 1.0  # NOTE: Tuning alpha3.
+alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5  # NOTE: Tuning alpha3.
 labda: 0.99  # NOTE: Decreased from 1.0 to 0.99 for stability.
 # gamma: 0.995   # NOTE: Not used for finite horizon tasks.
 polyak: 0.995
 adaptive_temperature: True 
 lr_a: "1e-4"
 lr_c: "3e-4"
+lr_alpha: "1e-4"
+lr_labda: "3e-4"
 lr_a_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
 lr_c_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
-lr_decay_type: "linear"
+lr_alpha_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_labda_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_a_decay_type: "linear"
+lr_c_decay_type: "linear"
+lr_alpha_decay_type: "linear"
+lr_labda_decay_type: "constant"
 lr_decay_ref: "step"
 batch_size: 256
 replay_size: "int(1e6)"
diff --git a/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_complicated_alpha3_tune_experiment_seed234.yml b/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_complicated_alpha3_tune_experiment_seed234.yml
index 58ebe180..38222b49 100644
--- a/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_complicated_alpha3_tune_experiment_seed234.yml
+++ b/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_complicated_alpha3_tune_experiment_seed234.yml
@@ -20,16 +20,23 @@ update_after: 1000
 steps_per_update: 80
 num_test_episodes: 10
 alpha: 2.0
-alpha3: 0.1 0.2 0.3 0.4 0.6 0.7 0.8 0.9 0.5 1.0  # NOTE: Tuning alpha3.
+alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5  # NOTE: Tuning alpha3.
 labda: 0.99  # NOTE: Decreased from 1.0 to 0.99 for stability.
 # gamma: 0.995   # NOTE: Not used for finite horizon tasks.
 polyak: 0.995
 adaptive_temperature: True 
 lr_a: "1e-4"
 lr_c: "3e-4"
+lr_alpha: "1e-4"
+lr_labda: "3e-4"
 lr_a_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
 lr_c_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
-lr_decay_type: "linear"
+lr_alpha_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_labda_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_a_decay_type: "linear"
+lr_c_decay_type: "linear"
+lr_alpha_decay_type: "linear"
+lr_labda_decay_type: "constant"
 lr_decay_ref: "step"
 batch_size: 256
 replay_size: "int(1e6)"
diff --git a/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_complicated_alpha3_tune_experiment_seed3658.yml b/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_complicated_alpha3_tune_experiment_seed3658.yml
index 27e94292..0c300598 100644
--- a/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_complicated_alpha3_tune_experiment_seed3658.yml
+++ b/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_complicated_alpha3_tune_experiment_seed3658.yml
@@ -20,16 +20,23 @@ update_after: 1000
 steps_per_update: 80
 num_test_episodes: 10
 alpha: 2.0
-alpha3: 0.1 0.2 0.3 0.4 0.6 0.7 0.8 0.9 0.5 1.0  # NOTE: Tuning alpha3.
+alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5  # NOTE: Tuning alpha3.
 labda: 0.99  # NOTE: Decreased from 1.0 to 0.99 for stability.
 # gamma: 0.995   # NOTE: Not used for finite horizon tasks.
 polyak: 0.995
 adaptive_temperature: True 
 lr_a: "1e-4"
 lr_c: "3e-4"
+lr_alpha: "1e-4"
+lr_labda: "3e-4"
 lr_a_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
 lr_c_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
-lr_decay_type: "linear"
+lr_alpha_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_labda_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_a_decay_type: "linear"
+lr_c_decay_type: "linear"
+lr_alpha_decay_type: "linear"
+lr_labda_decay_type: "constant"
 lr_decay_ref: "step"
 batch_size: 256
 replay_size: "int(1e6)"
diff --git a/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_complicated_alpha3_tune_experiment_seed48104.yml b/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_complicated_alpha3_tune_experiment_seed48104.yml
index 16a2bf80..5821af08 100644
--- a/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_complicated_alpha3_tune_experiment_seed48104.yml
+++ b/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_complicated_alpha3_tune_experiment_seed48104.yml
@@ -20,16 +20,23 @@ update_after: 1000
 steps_per_update: 80
 num_test_episodes: 10
 alpha: 2.0
-alpha3: 0.1 0.2 0.3 0.4 0.6 0.7 0.8 0.9 0.5 1.0  # NOTE: Tuning alpha3.
+alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5  # NOTE: Tuning alpha3.
 labda: 0.99  # NOTE: Decreased from 1.0 to 0.99 for stability.
 # gamma: 0.995   # NOTE: Not used for finite horizon tasks.
 polyak: 0.995
 adaptive_temperature: True 
 lr_a: "1e-4"
 lr_c: "3e-4"
+lr_alpha: "1e-4"
+lr_labda: "3e-4"
 lr_a_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
 lr_c_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
-lr_decay_type: "linear"
+lr_alpha_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_labda_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_a_decay_type: "linear"
+lr_c_decay_type: "linear"
+lr_alpha_decay_type: "linear"
+lr_labda_decay_type: "constant"
 lr_decay_ref: "step"
 batch_size: 256
 replay_size: "int(1e6)"
diff --git a/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_complicated_alpha3_tune_experiment_seed567.yml b/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_complicated_alpha3_tune_experiment_seed567.yml
index aa391863..95a8bbad 100644
--- a/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_complicated_alpha3_tune_experiment_seed567.yml
+++ b/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_complicated_alpha3_tune_experiment_seed567.yml
@@ -20,16 +20,23 @@ update_after: 1000
 steps_per_update: 80
 num_test_episodes: 10
 alpha: 2.0
-alpha3: 0.1 0.2 0.3 0.4 0.6 0.7 0.8 0.9 0.5 1.0  # NOTE: Tuning alpha3.
+alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5  # NOTE: Tuning alpha3.
 labda: 0.99  # NOTE: Decreased from 1.0 to 0.99 for stability.
 # gamma: 0.995   # NOTE: Not used for finite horizon tasks.
 polyak: 0.995
 adaptive_temperature: True 
 lr_a: "1e-4"
 lr_c: "3e-4"
+lr_alpha: "1e-4"
+lr_labda: "3e-4"
 lr_a_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
 lr_c_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
-lr_decay_type: "linear"
+lr_alpha_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_labda_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_a_decay_type: "linear"
+lr_c_decay_type: "linear"
+lr_alpha_decay_type: "linear"
+lr_labda_decay_type: "constant"
 lr_decay_ref: "step"
 batch_size: 256
 replay_size: "int(1e6)"
diff --git a/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_complicated_alpha3_tune_experiment_seed78456.yml b/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_complicated_alpha3_tune_experiment_seed78456.yml
index 6bbd56bd..cf3ab847 100644
--- a/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_complicated_alpha3_tune_experiment_seed78456.yml
+++ b/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_complicated_alpha3_tune_experiment_seed78456.yml
@@ -20,16 +20,23 @@ update_after: 1000
 steps_per_update: 80
 num_test_episodes: 10
 alpha: 2.0
-alpha3: 0.1 0.2 0.3 0.4 0.6 0.7 0.8 0.9 0.5 1.0  # NOTE: Tuning alpha3.
+alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5  # NOTE: Tuning alpha3.
 labda: 0.99  # NOTE: Decreased from 1.0 to 0.99 for stability.
 # gamma: 0.995   # NOTE: Not used for finite horizon tasks.
 polyak: 0.995
 adaptive_temperature: True 
 lr_a: "1e-4"
 lr_c: "3e-4"
+lr_alpha: "1e-4"
+lr_labda: "3e-4"
 lr_a_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
 lr_c_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
-lr_decay_type: "linear"
+lr_alpha_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_labda_final: "1e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_a_decay_type: "linear"
+lr_c_decay_type: "linear"
+lr_alpha_decay_type: "linear"
+lr_labda_decay_type: "constant"
 lr_decay_ref: "step"
 batch_size: 256
 replay_size: "int(1e6)"