diff --git a/experiments/staa_et_al_2024/han2020_reproduction_lac_cartpole_cost_alpha3_tune_experiment.yml b/experiments/staa_et_al_2024/han2020_reproduction_lac_cartpole_cost_alpha3_tune_experiment.yml index 7ddcfce3..a5ff74a4 100644 --- a/experiments/staa_et_al_2024/han2020_reproduction_lac_cartpole_cost_alpha3_tune_experiment.yml +++ b/experiments/staa_et_al_2024/han2020_reproduction_lac_cartpole_cost_alpha3_tune_experiment.yml @@ -20,16 +20,23 @@ update_after: 1000 steps_per_update: 80 num_test_episodes: 10 alpha: 2.0 -alpha3: 0.1 0.2 0.3 0.4 0.6 0.7 0.8 0.9 0.5 1.0 # NOTE: Tuning alpha3. +alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5 # NOTE: Tuning alpha3. labda: 0.99 # NOTE: Decreased from 1.0 to 0.99 for stability. # gamma: 0.995 # NOTE: Not used for finite horizon tasks. polyak: 0.995 adaptive_temperature: True lr_a: "1e-4" lr_c: "3e-4" +lr_alpha: "1e-4" +lr_labda: "3e-4" lr_a_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. lr_c_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. -lr_decay_type: "linear" +lr_alpha_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. +lr_labda_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. +lr_a_decay_type: "linear" +lr_c_decay_type: "linear" +lr_alpha_decay_type: "linear" +lr_labda_decay_type: "constant" lr_decay_ref: "step" batch_size: 256 replay_size: "int(1e6)" diff --git a/experiments/staa_et_al_2024/han2020_reproduction_lac_cartpole_cost_alpha3_tune_experiment_seed234.yml b/experiments/staa_et_al_2024/han2020_reproduction_lac_cartpole_cost_alpha3_tune_experiment_seed234.yml index d0ec9f0d..5cf3aecc 100644 --- a/experiments/staa_et_al_2024/han2020_reproduction_lac_cartpole_cost_alpha3_tune_experiment_seed234.yml +++ b/experiments/staa_et_al_2024/han2020_reproduction_lac_cartpole_cost_alpha3_tune_experiment_seed234.yml @@ -20,16 +20,23 @@ update_after: 1000 steps_per_update: 80 num_test_episodes: 10 alpha: 2.0 -alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 # NOTE: Tuning alpha3. +alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5 # NOTE: Tuning alpha3. labda: 0.99 # NOTE: Decreased from 1.0 to 0.99 for stability. # gamma: 0.995 # NOTE: Not used for finite horizon tasks. polyak: 0.995 adaptive_temperature: True lr_a: "1e-4" lr_c: "3e-4" +lr_alpha: "1e-4" +lr_labda: "3e-4" lr_a_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. lr_c_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. -lr_decay_type: "linear" +lr_alpha_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. +lr_labda_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. +lr_a_decay_type: "linear" +lr_c_decay_type: "linear" +lr_alpha_decay_type: "linear" +lr_labda_decay_type: "constant" lr_decay_ref: "step" batch_size: 256 replay_size: "int(1e6)" diff --git a/experiments/staa_et_al_2024/han2020_reproduction_lac_cartpole_cost_alpha3_tune_experiment_seed3658.yml b/experiments/staa_et_al_2024/han2020_reproduction_lac_cartpole_cost_alpha3_tune_experiment_seed3658.yml index 11c80e99..d68c2530 100644 --- a/experiments/staa_et_al_2024/han2020_reproduction_lac_cartpole_cost_alpha3_tune_experiment_seed3658.yml +++ b/experiments/staa_et_al_2024/han2020_reproduction_lac_cartpole_cost_alpha3_tune_experiment_seed3658.yml @@ -20,16 +20,23 @@ update_after: 1000 steps_per_update: 80 num_test_episodes: 10 alpha: 2.0 -alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 # NOTE: Tuning alpha3. +alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5 # NOTE: Tuning alpha3. labda: 0.99 # NOTE: Decreased from 1.0 to 0.99 for stability. # gamma: 0.995 # NOTE: Not used for finite horizon tasks. polyak: 0.995 adaptive_temperature: True lr_a: "1e-4" lr_c: "3e-4" +lr_alpha: "1e-4" +lr_labda: "3e-4" lr_a_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. lr_c_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. -lr_decay_type: "linear" +lr_alpha_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. +lr_labda_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. +lr_a_decay_type: "linear" +lr_c_decay_type: "linear" +lr_alpha_decay_type: "linear" +lr_labda_decay_type: "constant" lr_decay_ref: "step" batch_size: 256 replay_size: "int(1e6)" diff --git a/experiments/staa_et_al_2024/han2020_reproduction_lac_cartpole_cost_alpha3_tune_experiment_seed48104.yml b/experiments/staa_et_al_2024/han2020_reproduction_lac_cartpole_cost_alpha3_tune_experiment_seed48104.yml index c5a41641..6587085c 100644 --- a/experiments/staa_et_al_2024/han2020_reproduction_lac_cartpole_cost_alpha3_tune_experiment_seed48104.yml +++ b/experiments/staa_et_al_2024/han2020_reproduction_lac_cartpole_cost_alpha3_tune_experiment_seed48104.yml @@ -20,16 +20,23 @@ update_after: 1000 steps_per_update: 80 num_test_episodes: 10 alpha: 2.0 -alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 # NOTE: Tuning alpha3. +alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5 # NOTE: Tuning alpha3. labda: 0.99 # NOTE: Decreased from 1.0 to 0.99 for stability. # gamma: 0.995 # NOTE: Not used for finite horizon tasks. polyak: 0.995 adaptive_temperature: True lr_a: "1e-4" lr_c: "3e-4" +lr_alpha: "1e-4" +lr_labda: "3e-4" lr_a_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. lr_c_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. -lr_decay_type: "linear" +lr_alpha_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. +lr_labda_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. +lr_a_decay_type: "linear" +lr_c_decay_type: "linear" +lr_alpha_decay_type: "linear" +lr_labda_decay_type: "constant" lr_decay_ref: "step" batch_size: 256 replay_size: "int(1e6)" diff --git a/experiments/staa_et_al_2024/han2020_reproduction_lac_cartpole_cost_alpha3_tune_experiment_seed567.yml b/experiments/staa_et_al_2024/han2020_reproduction_lac_cartpole_cost_alpha3_tune_experiment_seed567.yml index 7aaf5ae9..368ad3f5 100644 --- a/experiments/staa_et_al_2024/han2020_reproduction_lac_cartpole_cost_alpha3_tune_experiment_seed567.yml +++ b/experiments/staa_et_al_2024/han2020_reproduction_lac_cartpole_cost_alpha3_tune_experiment_seed567.yml @@ -20,16 +20,23 @@ update_after: 1000 steps_per_update: 80 num_test_episodes: 10 alpha: 2.0 -alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 # NOTE: Tuning alpha3. +alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5 # NOTE: Tuning alpha3. labda: 0.99 # NOTE: Decreased from 1.0 to 0.99 for stability. # gamma: 0.995 # NOTE: Not used for finite horizon tasks. polyak: 0.995 adaptive_temperature: True lr_a: "1e-4" lr_c: "3e-4" +lr_alpha: "1e-4" +lr_labda: "3e-4" lr_a_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. lr_c_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. -lr_decay_type: "linear" +lr_alpha_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. +lr_labda_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. +lr_a_decay_type: "linear" +lr_c_decay_type: "linear" +lr_alpha_decay_type: "linear" +lr_labda_decay_type: "constant" lr_decay_ref: "step" batch_size: 256 replay_size: "int(1e6)" diff --git a/experiments/staa_et_al_2024/han2020_reproduction_lac_cartpole_cost_alpha3_tune_experiment_seed78456.yml b/experiments/staa_et_al_2024/han2020_reproduction_lac_cartpole_cost_alpha3_tune_experiment_seed78456.yml index 84f0bd3e..2cdc80f1 100644 --- a/experiments/staa_et_al_2024/han2020_reproduction_lac_cartpole_cost_alpha3_tune_experiment_seed78456.yml +++ b/experiments/staa_et_al_2024/han2020_reproduction_lac_cartpole_cost_alpha3_tune_experiment_seed78456.yml @@ -20,16 +20,23 @@ update_after: 1000 steps_per_update: 80 num_test_episodes: 10 alpha: 2.0 -alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 # NOTE: Tuning alpha3. +alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5 # NOTE: Tuning alpha3. labda: 0.99 # NOTE: Decreased from 1.0 to 0.99 for stability. # gamma: 0.995 # NOTE: Not used for finite horizon tasks. polyak: 0.995 adaptive_temperature: True lr_a: "1e-4" lr_c: "3e-4" +lr_alpha: "1e-4" +lr_labda: "3e-4" lr_a_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. lr_c_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. -lr_decay_type: "linear" +lr_alpha_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. +lr_labda_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. +lr_a_decay_type: "linear" +lr_c_decay_type: "linear" +lr_alpha_decay_type: "linear" +lr_labda_decay_type: "constant" lr_decay_ref: "step" batch_size: 256 replay_size: "int(1e6)" diff --git a/experiments/staa_et_al_2024/han2020_reproduction_lac_fetch_reach_alpha3_tune_experiment.yml b/experiments/staa_et_al_2024/han2020_reproduction_lac_fetch_reach_alpha3_tune_experiment.yml index a61fba51..906ed580 100644 --- a/experiments/staa_et_al_2024/han2020_reproduction_lac_fetch_reach_alpha3_tune_experiment.yml +++ b/experiments/staa_et_al_2024/han2020_reproduction_lac_fetch_reach_alpha3_tune_experiment.yml @@ -20,16 +20,23 @@ update_after: 1000 steps_per_update: 80 num_test_episodes: 10 alpha: 2.0 -alpha3: 0.1 0.2 0.3 0.4 0.6 0.7 0.8 0.9 0.5 1.0 # NOTE: Tuning alpha3. +alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5 # NOTE: Tuning alpha3. labda: 0.99 # NOTE: Decreased from 1.0 to 0.99 for stability. # gamma: 0.995 # NOTE: Not used for finite horizon tasks. polyak: 0.995 adaptive_temperature: True lr_a: "1e-4" lr_c: "3e-4" +lr_alpha: "1e-4" +lr_labda: "3e-4" lr_a_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. lr_c_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. -lr_decay_type: "linear" +lr_alpha_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. +lr_labda_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. +lr_a_decay_type: "linear" +lr_c_decay_type: "linear" +lr_alpha_decay_type: "linear" +lr_labda_decay_type: "constant" lr_decay_ref: "step" batch_size: 256 replay_size: "int(1e6)" diff --git a/experiments/staa_et_al_2024/han2020_reproduction_lac_fetch_reach_alpha3_tune_experiment_seed234.yml b/experiments/staa_et_al_2024/han2020_reproduction_lac_fetch_reach_alpha3_tune_experiment_seed234.yml index 1bb80297..92f561ca 100644 --- a/experiments/staa_et_al_2024/han2020_reproduction_lac_fetch_reach_alpha3_tune_experiment_seed234.yml +++ b/experiments/staa_et_al_2024/han2020_reproduction_lac_fetch_reach_alpha3_tune_experiment_seed234.yml @@ -20,16 +20,23 @@ update_after: 1000 steps_per_update: 80 num_test_episodes: 10 alpha: 2.0 -alpha3: 0.1 0.2 0.3 0.4 0.6 0.7 0.8 0.9 0.5 1.0 # NOTE: Tuning alpha3. +alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5 # NOTE: Tuning alpha3. labda: 0.99 # NOTE: Decreased from 1.0 to 0.99 for stability. # gamma: 0.995 # NOTE: Not used for finite horizon tasks. polyak: 0.995 adaptive_temperature: True lr_a: "1e-4" lr_c: "3e-4" +lr_alpha: "1e-4" +lr_labda: "3e-4" lr_a_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. lr_c_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. -lr_decay_type: "linear" +lr_alpha_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. +lr_labda_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. +lr_a_decay_type: "linear" +lr_c_decay_type: "linear" +lr_alpha_decay_type: "linear" +lr_labda_decay_type: "constant" lr_decay_ref: "step" batch_size: 256 replay_size: "int(1e6)" diff --git a/experiments/staa_et_al_2024/han2020_reproduction_lac_fetch_reach_alpha3_tune_experiment_seed3658.yml b/experiments/staa_et_al_2024/han2020_reproduction_lac_fetch_reach_alpha3_tune_experiment_seed3658.yml index ef9ffeda..08178d53 100644 --- a/experiments/staa_et_al_2024/han2020_reproduction_lac_fetch_reach_alpha3_tune_experiment_seed3658.yml +++ b/experiments/staa_et_al_2024/han2020_reproduction_lac_fetch_reach_alpha3_tune_experiment_seed3658.yml @@ -20,16 +20,23 @@ update_after: 1000 steps_per_update: 80 num_test_episodes: 10 alpha: 2.0 -alpha3: 0.1 0.2 0.3 0.4 0.6 0.7 0.8 0.9 0.5 1.0 # NOTE: Tuning alpha3. +alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5 # NOTE: Tuning alpha3. labda: 0.99 # NOTE: Decreased from 1.0 to 0.99 for stability. # gamma: 0.995 # NOTE: Not used for finite horizon tasks. polyak: 0.995 adaptive_temperature: True lr_a: "1e-4" lr_c: "3e-4" +lr_alpha: "1e-4" +lr_labda: "3e-4" lr_a_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. lr_c_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. -lr_decay_type: "linear" +lr_alpha_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. +lr_labda_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. +lr_a_decay_type: "linear" +lr_c_decay_type: "linear" +lr_alpha_decay_type: "linear" +lr_labda_decay_type: "constant" lr_decay_ref: "step" batch_size: 256 replay_size: "int(1e6)" diff --git a/experiments/staa_et_al_2024/han2020_reproduction_lac_fetch_reach_alpha3_tune_experiment_seed48104.yml b/experiments/staa_et_al_2024/han2020_reproduction_lac_fetch_reach_alpha3_tune_experiment_seed48104.yml index 5b7e9149..d92eddfc 100644 --- a/experiments/staa_et_al_2024/han2020_reproduction_lac_fetch_reach_alpha3_tune_experiment_seed48104.yml +++ b/experiments/staa_et_al_2024/han2020_reproduction_lac_fetch_reach_alpha3_tune_experiment_seed48104.yml @@ -20,16 +20,23 @@ update_after: 1000 steps_per_update: 80 num_test_episodes: 10 alpha: 2.0 -alpha3: 0.1 0.2 0.3 0.4 0.6 0.7 0.8 0.9 0.5 1.0 # NOTE: Tuning alpha3. +alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5 # NOTE: Tuning alpha3. labda: 0.99 # NOTE: Decreased from 1.0 to 0.99 for stability. # gamma: 0.995 # NOTE: Not used for finite horizon tasks. polyak: 0.995 adaptive_temperature: True lr_a: "1e-4" lr_c: "3e-4" +lr_alpha: "1e-4" +lr_labda: "3e-4" lr_a_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. lr_c_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. -lr_decay_type: "linear" +lr_alpha_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. +lr_labda_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. +lr_a_decay_type: "linear" +lr_c_decay_type: "linear" +lr_alpha_decay_type: "linear" +lr_labda_decay_type: "constant" lr_decay_ref: "step" batch_size: 256 replay_size: "int(1e6)" diff --git a/experiments/staa_et_al_2024/han2020_reproduction_lac_fetch_reach_alpha3_tune_experiment_seed567.yml b/experiments/staa_et_al_2024/han2020_reproduction_lac_fetch_reach_alpha3_tune_experiment_seed567.yml index 8d3c8eba..7a59c3e7 100644 --- a/experiments/staa_et_al_2024/han2020_reproduction_lac_fetch_reach_alpha3_tune_experiment_seed567.yml +++ b/experiments/staa_et_al_2024/han2020_reproduction_lac_fetch_reach_alpha3_tune_experiment_seed567.yml @@ -20,16 +20,23 @@ update_after: 1000 steps_per_update: 80 num_test_episodes: 10 alpha: 2.0 -alpha3: 0.1 0.2 0.3 0.4 0.6 0.7 0.8 0.9 0.5 1.0 # NOTE: Tuning alpha3. +alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5 # NOTE: Tuning alpha3. labda: 0.99 # NOTE: Decreased from 1.0 to 0.99 for stability. # gamma: 0.995 # NOTE: Not used for finite horizon tasks. polyak: 0.995 adaptive_temperature: True lr_a: "1e-4" lr_c: "3e-4" +lr_alpha: "1e-4" +lr_labda: "3e-4" lr_a_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. lr_c_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. -lr_decay_type: "linear" +lr_alpha_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. +lr_labda_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. +lr_a_decay_type: "linear" +lr_c_decay_type: "linear" +lr_alpha_decay_type: "linear" +lr_labda_decay_type: "constant" lr_decay_ref: "step" batch_size: 256 replay_size: "int(1e6)" diff --git a/experiments/staa_et_al_2024/han2020_reproduction_lac_fetch_reach_alpha3_tune_experiment_seed78456.yml b/experiments/staa_et_al_2024/han2020_reproduction_lac_fetch_reach_alpha3_tune_experiment_seed78456.yml index 085b8891..7790206f 100644 --- a/experiments/staa_et_al_2024/han2020_reproduction_lac_fetch_reach_alpha3_tune_experiment_seed78456.yml +++ b/experiments/staa_et_al_2024/han2020_reproduction_lac_fetch_reach_alpha3_tune_experiment_seed78456.yml @@ -20,16 +20,23 @@ update_after: 1000 steps_per_update: 80 num_test_episodes: 10 alpha: 2.0 -alpha3: 0.1 0.2 0.3 0.4 0.6 0.7 0.8 0.9 0.5 1.0 # NOTE: Tuning alpha3. +alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5 # NOTE: Tuning alpha3. labda: 0.99 # NOTE: Decreased from 1.0 to 0.99 for stability. # gamma: 0.995 # NOTE: Not used for finite horizon tasks. polyak: 0.995 adaptive_temperature: True lr_a: "1e-4" lr_c: "3e-4" +lr_alpha: "1e-4" +lr_labda: "3e-4" lr_a_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. lr_c_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. -lr_decay_type: "linear" +lr_alpha_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. +lr_labda_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. +lr_a_decay_type: "linear" +lr_c_decay_type: "linear" +lr_alpha_decay_type: "linear" +lr_labda_decay_type: "constant" lr_decay_ref: "step" batch_size: 256 replay_size: "int(1e6)" diff --git a/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_alpha3_tune_experiment.yml b/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_alpha3_tune_experiment.yml index 4efa010f..13bfba8b 100644 --- a/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_alpha3_tune_experiment.yml +++ b/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_alpha3_tune_experiment.yml @@ -20,16 +20,23 @@ update_after: 1000 steps_per_update: 80 num_test_episodes: 10 alpha: 2.0 -alpha3: 0.1 0.2 0.3 0.4 0.6 0.7 0.8 0.9 0.5 1.0 # NOTE: Tuning alpha3. +alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5 # NOTE: Tuning alpha3. labda: 0.99 # NOTE: Decreased from 1.0 to 0.99 for stability. # gamma: 0.995 # NOTE: Not used for finite horizon tasks. polyak: 0.995 adaptive_temperature: True lr_a: "1e-4" lr_c: "3e-4" +lr_alpha: "1e-4" +lr_labda: "3e-4" lr_a_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. lr_c_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. -lr_decay_type: "linear" +lr_alpha_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. +lr_labda_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. +lr_a_decay_type: "linear" +lr_c_decay_type: "linear" +lr_alpha_decay_type: "linear" +lr_labda_decay_type: "constant" lr_decay_ref: "step" batch_size: 256 replay_size: "int(1e6)" diff --git a/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_alpha3_tune_experiment_seed234.yml b/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_alpha3_tune_experiment_seed234.yml index cb7996f0..cf4af847 100644 --- a/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_alpha3_tune_experiment_seed234.yml +++ b/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_alpha3_tune_experiment_seed234.yml @@ -20,16 +20,23 @@ update_after: 1000 steps_per_update: 80 num_test_episodes: 10 alpha: 2.0 -alpha3: 0.1 0.2 0.3 0.4 0.6 0.7 0.8 0.9 0.5 1.0 # NOTE: Tuning alpha3. +alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5 # NOTE: Tuning alpha3. labda: 0.99 # NOTE: Decreased from 1.0 to 0.99 for stability. # gamma: 0.995 # NOTE: Not used for finite horizon tasks. polyak: 0.995 adaptive_temperature: True lr_a: "1e-4" lr_c: "3e-4" +lr_alpha: "1e-4" +lr_labda: "3e-4" lr_a_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. lr_c_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. -lr_decay_type: "linear" +lr_alpha_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. +lr_labda_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. +lr_a_decay_type: "linear" +lr_c_decay_type: "linear" +lr_alpha_decay_type: "linear" +lr_labda_decay_type: "constant" lr_decay_ref: "step" batch_size: 256 replay_size: "int(1e6)" diff --git a/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_alpha3_tune_experiment_seed3658.yml b/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_alpha3_tune_experiment_seed3658.yml index 05c07f77..46daa5f5 100644 --- a/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_alpha3_tune_experiment_seed3658.yml +++ b/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_alpha3_tune_experiment_seed3658.yml @@ -20,16 +20,23 @@ update_after: 1000 steps_per_update: 80 num_test_episodes: 10 alpha: 2.0 -alpha3: 0.1 0.2 0.3 0.4 0.6 0.7 0.8 0.9 0.5 1.0 # NOTE: Tuning alpha3. +alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5 # NOTE: Tuning alpha3. labda: 0.99 # NOTE: Decreased from 1.0 to 0.99 for stability. # gamma: 0.995 # NOTE: Not used for finite horizon tasks. polyak: 0.995 adaptive_temperature: True lr_a: "1e-4" lr_c: "3e-4" +lr_alpha: "1e-4" +lr_labda: "3e-4" lr_a_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. lr_c_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. -lr_decay_type: "linear" +lr_alpha_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. +lr_labda_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. +lr_a_decay_type: "linear" +lr_c_decay_type: "linear" +lr_alpha_decay_type: "linear" +lr_labda_decay_type: "constant" lr_decay_ref: "step" batch_size: 256 replay_size: "int(1e6)" diff --git a/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_alpha3_tune_experiment_seed48104.yml b/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_alpha3_tune_experiment_seed48104.yml index dd3facd9..5d1a5e65 100644 --- a/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_alpha3_tune_experiment_seed48104.yml +++ b/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_alpha3_tune_experiment_seed48104.yml @@ -20,16 +20,23 @@ update_after: 1000 steps_per_update: 80 num_test_episodes: 10 alpha: 2.0 -alpha3: 0.1 0.2 0.3 0.4 0.6 0.7 0.8 0.9 0.5 1.0 # NOTE: Tuning alpha3. +alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5 # NOTE: Tuning alpha3. labda: 0.99 # NOTE: Decreased from 1.0 to 0.99 for stability. # gamma: 0.995 # NOTE: Not used for finite horizon tasks. polyak: 0.995 adaptive_temperature: True lr_a: "1e-4" lr_c: "3e-4" +lr_alpha: "1e-4" +lr_labda: "3e-4" lr_a_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. lr_c_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. -lr_decay_type: "linear" +lr_alpha_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. +lr_labda_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. +lr_a_decay_type: "linear" +lr_c_decay_type: "linear" +lr_alpha_decay_type: "linear" +lr_labda_decay_type: "constant" lr_decay_ref: "step" batch_size: 256 replay_size: "int(1e6)" diff --git a/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_alpha3_tune_experiment_seed567.yml b/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_alpha3_tune_experiment_seed567.yml index c1ca970a..891c45c6 100644 --- a/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_alpha3_tune_experiment_seed567.yml +++ b/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_alpha3_tune_experiment_seed567.yml @@ -20,16 +20,23 @@ update_after: 1000 steps_per_update: 80 num_test_episodes: 10 alpha: 2.0 -alpha3: 0.1 0.2 0.3 0.4 0.6 0.7 0.8 0.9 0.5 1.0 # NOTE: Tuning alpha3. +alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5 # NOTE: Tuning alpha3. labda: 0.99 # NOTE: Decreased from 1.0 to 0.99 for stability. # gamma: 0.995 # NOTE: Not used for finite horizon tasks. polyak: 0.995 adaptive_temperature: True lr_a: "1e-4" lr_c: "3e-4" +lr_alpha: "1e-4" +lr_labda: "3e-4" lr_a_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. lr_c_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. -lr_decay_type: "linear" +lr_alpha_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. +lr_labda_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. +lr_a_decay_type: "linear" +lr_c_decay_type: "linear" +lr_alpha_decay_type: "linear" +lr_labda_decay_type: "constant" lr_decay_ref: "step" batch_size: 256 replay_size: "int(1e6)" diff --git a/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_alpha3_tune_experiment_seed78456.yml b/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_alpha3_tune_experiment_seed78456.yml index 0520c2dd..286fd571 100644 --- a/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_alpha3_tune_experiment_seed78456.yml +++ b/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_alpha3_tune_experiment_seed78456.yml @@ -20,16 +20,23 @@ update_after: 1000 steps_per_update: 80 num_test_episodes: 10 alpha: 2.0 -alpha3: 0.1 0.2 0.3 0.4 0.6 0.7 0.8 0.9 0.5 1.0 # NOTE: Tuning alpha3. +alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5 # NOTE: Tuning alpha3. labda: 0.99 # NOTE: Decreased from 1.0 to 0.99 for stability. # gamma: 0.995 # NOTE: Not used for finite horizon tasks. polyak: 0.995 adaptive_temperature: True lr_a: "1e-4" lr_c: "3e-4" +lr_alpha: "1e-4" +lr_labda: "3e-4" lr_a_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. lr_c_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. -lr_decay_type: "linear" +lr_alpha_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. +lr_labda_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. +lr_a_decay_type: "linear" +lr_c_decay_type: "linear" +lr_alpha_decay_type: "linear" +lr_labda_decay_type: "constant" lr_decay_ref: "step" batch_size: 256 replay_size: "int(1e6)" diff --git a/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_complicated_alpha3_tune_experiment.yml b/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_complicated_alpha3_tune_experiment.yml index 338e1a4a..5ec3f599 100644 --- a/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_complicated_alpha3_tune_experiment.yml +++ b/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_complicated_alpha3_tune_experiment.yml @@ -20,16 +20,23 @@ update_after: 1000 steps_per_update: 80 num_test_episodes: 10 alpha: 2.0 -alpha3: 0.1 0.2 0.3 0.4 0.6 0.7 0.8 0.9 0.5 1.0 # NOTE: Tuning alpha3. +alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5 # NOTE: Tuning alpha3. labda: 0.99 # NOTE: Decreased from 1.0 to 0.99 for stability. # gamma: 0.995 # NOTE: Not used for finite horizon tasks. polyak: 0.995 adaptive_temperature: True lr_a: "1e-4" lr_c: "3e-4" +lr_alpha: "1e-4" +lr_labda: "3e-4" lr_a_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. lr_c_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. -lr_decay_type: "linear" +lr_alpha_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. +lr_labda_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. +lr_a_decay_type: "linear" +lr_c_decay_type: "linear" +lr_alpha_decay_type: "linear" +lr_labda_decay_type: "constant" lr_decay_ref: "step" batch_size: 256 replay_size: "int(1e6)" diff --git a/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_complicated_alpha3_tune_experiment_seed234.yml b/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_complicated_alpha3_tune_experiment_seed234.yml index 58ebe180..38222b49 100644 --- a/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_complicated_alpha3_tune_experiment_seed234.yml +++ b/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_complicated_alpha3_tune_experiment_seed234.yml @@ -20,16 +20,23 @@ update_after: 1000 steps_per_update: 80 num_test_episodes: 10 alpha: 2.0 -alpha3: 0.1 0.2 0.3 0.4 0.6 0.7 0.8 0.9 0.5 1.0 # NOTE: Tuning alpha3. +alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5 # NOTE: Tuning alpha3. labda: 0.99 # NOTE: Decreased from 1.0 to 0.99 for stability. # gamma: 0.995 # NOTE: Not used for finite horizon tasks. polyak: 0.995 adaptive_temperature: True lr_a: "1e-4" lr_c: "3e-4" +lr_alpha: "1e-4" +lr_labda: "3e-4" lr_a_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. lr_c_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. -lr_decay_type: "linear" +lr_alpha_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. +lr_labda_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. +lr_a_decay_type: "linear" +lr_c_decay_type: "linear" +lr_alpha_decay_type: "linear" +lr_labda_decay_type: "constant" lr_decay_ref: "step" batch_size: 256 replay_size: "int(1e6)" diff --git a/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_complicated_alpha3_tune_experiment_seed3658.yml b/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_complicated_alpha3_tune_experiment_seed3658.yml index 27e94292..0c300598 100644 --- a/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_complicated_alpha3_tune_experiment_seed3658.yml +++ b/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_complicated_alpha3_tune_experiment_seed3658.yml @@ -20,16 +20,23 @@ update_after: 1000 steps_per_update: 80 num_test_episodes: 10 alpha: 2.0 -alpha3: 0.1 0.2 0.3 0.4 0.6 0.7 0.8 0.9 0.5 1.0 # NOTE: Tuning alpha3. +alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5 # NOTE: Tuning alpha3. labda: 0.99 # NOTE: Decreased from 1.0 to 0.99 for stability. # gamma: 0.995 # NOTE: Not used for finite horizon tasks. polyak: 0.995 adaptive_temperature: True lr_a: "1e-4" lr_c: "3e-4" +lr_alpha: "1e-4" +lr_labda: "3e-4" lr_a_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. lr_c_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. -lr_decay_type: "linear" +lr_alpha_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. +lr_labda_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. +lr_a_decay_type: "linear" +lr_c_decay_type: "linear" +lr_alpha_decay_type: "linear" +lr_labda_decay_type: "constant" lr_decay_ref: "step" batch_size: 256 replay_size: "int(1e6)" diff --git a/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_complicated_alpha3_tune_experiment_seed48104.yml b/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_complicated_alpha3_tune_experiment_seed48104.yml index 16a2bf80..5821af08 100644 --- a/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_complicated_alpha3_tune_experiment_seed48104.yml +++ b/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_complicated_alpha3_tune_experiment_seed48104.yml @@ -20,16 +20,23 @@ update_after: 1000 steps_per_update: 80 num_test_episodes: 10 alpha: 2.0 -alpha3: 0.1 0.2 0.3 0.4 0.6 0.7 0.8 0.9 0.5 1.0 # NOTE: Tuning alpha3. +alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5 # NOTE: Tuning alpha3. labda: 0.99 # NOTE: Decreased from 1.0 to 0.99 for stability. # gamma: 0.995 # NOTE: Not used for finite horizon tasks. polyak: 0.995 adaptive_temperature: True lr_a: "1e-4" lr_c: "3e-4" +lr_alpha: "1e-4" +lr_labda: "3e-4" lr_a_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. lr_c_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. -lr_decay_type: "linear" +lr_alpha_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. +lr_labda_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. +lr_a_decay_type: "linear" +lr_c_decay_type: "linear" +lr_alpha_decay_type: "linear" +lr_labda_decay_type: "constant" lr_decay_ref: "step" batch_size: 256 replay_size: "int(1e6)" diff --git a/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_complicated_alpha3_tune_experiment_seed567.yml b/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_complicated_alpha3_tune_experiment_seed567.yml index aa391863..95a8bbad 100644 --- a/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_complicated_alpha3_tune_experiment_seed567.yml +++ b/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_complicated_alpha3_tune_experiment_seed567.yml @@ -20,16 +20,23 @@ update_after: 1000 steps_per_update: 80 num_test_episodes: 10 alpha: 2.0 -alpha3: 0.1 0.2 0.3 0.4 0.6 0.7 0.8 0.9 0.5 1.0 # NOTE: Tuning alpha3. +alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5 # NOTE: Tuning alpha3. labda: 0.99 # NOTE: Decreased from 1.0 to 0.99 for stability. # gamma: 0.995 # NOTE: Not used for finite horizon tasks. polyak: 0.995 adaptive_temperature: True lr_a: "1e-4" lr_c: "3e-4" +lr_alpha: "1e-4" +lr_labda: "3e-4" lr_a_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. lr_c_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. -lr_decay_type: "linear" +lr_alpha_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. +lr_labda_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. +lr_a_decay_type: "linear" +lr_c_decay_type: "linear" +lr_alpha_decay_type: "linear" +lr_labda_decay_type: "constant" lr_decay_ref: "step" batch_size: 256 replay_size: "int(1e6)" diff --git a/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_complicated_alpha3_tune_experiment_seed78456.yml b/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_complicated_alpha3_tune_experiment_seed78456.yml index 6bbd56bd..cf3ab847 100644 --- a/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_complicated_alpha3_tune_experiment_seed78456.yml +++ b/experiments/staa_et_al_2024/han2020_reproduction_lac_oscillator_complicated_alpha3_tune_experiment_seed78456.yml @@ -20,16 +20,23 @@ update_after: 1000 steps_per_update: 80 num_test_episodes: 10 alpha: 2.0 -alpha3: 0.1 0.2 0.3 0.4 0.6 0.7 0.8 0.9 0.5 1.0 # NOTE: Tuning alpha3. +alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5 # NOTE: Tuning alpha3. labda: 0.99 # NOTE: Decreased from 1.0 to 0.99 for stability. # gamma: 0.995 # NOTE: Not used for finite horizon tasks. polyak: 0.995 adaptive_temperature: True lr_a: "1e-4" lr_c: "3e-4" +lr_alpha: "1e-4" +lr_labda: "3e-4" lr_a_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. lr_c_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. -lr_decay_type: "linear" +lr_alpha_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. +lr_labda_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code. +lr_a_decay_type: "linear" +lr_c_decay_type: "linear" +lr_alpha_decay_type: "linear" +lr_labda_decay_type: "constant" lr_decay_ref: "step" batch_size: 256 replay_size: "int(1e6)"