Skip to content

Commit

Permalink
chore(experiments): introduce CartPole and Oscillator exp confgs
Browse files Browse the repository at this point in the history
This commit incorporates the experimental configurations utilized
in my Master's thesis research, specifically for the CartPole and
Oscillator environments.
  • Loading branch information
rickstaa committed Feb 14, 2024
1 parent 8d93610 commit 2275f03
Show file tree
Hide file tree
Showing 17 changed files with 697 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
alg_name: lac
exp_name: han2020_reproduction_lac_cartpole_cost_alpha3_tune_exp
env_name: "stable_gym:CartPoleCost-v1"
ac_kwargs:
hidden_sizes:
actor: [256, 256] # NOTE: Using [256, 256] for consistency with the article.
critic: [64, 64, 16]
activation:
actor: "nn.ReLU"
critic: "nn.ReLU"
output_activation:
actor: "nn.ReLU"
opt_type: "minimize"
max_ep_len: 250
epochs: 489
steps_per_epoch: 2048
start_steps: 0
update_every: 100
update_after: 1000
steps_per_update: 80
num_test_episodes: 10
alpha: 2.0
alpha3: 0.1 0.2 0.4 0.5 0.7 0.8 0.9 1.0 # 0.3 0.6 # NOTE: Tuning alpha3.
labda: 0.99 # NOTE: Decreased from 1.0 to 0.99 for stability.
# gamma: 0.995 # NOTE: Not used for finite horizon tasks.
polyak: 0.995
adaptive_temperature: True
lr_a: "1e-4"
lr_c: "3e-4"
lr_a_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_c_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_decay_type: "linear"
lr_decay_ref: "step"
batch_size: 256
replay_size: "int(1e6)"
horizon_length: 5
seed: 234 # NOTE: Using 5 seeds for tuning.
save_freq: 10
use_wandb: True
wandb_group: "han2020_reproduction"
device: "gpu:1"
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
alg_name: lac
exp_name: han2020_reproduction_lac_cartpole_cost_alpha3_tune_exp
env_name: "stable_gym:CartPoleCost-v1"
ac_kwargs:
hidden_sizes:
actor: [256, 256] # NOTE: Using [256, 256] for consistency with the article.
critic: [64, 64, 16]
activation:
actor: "nn.ReLU"
critic: "nn.ReLU"
output_activation:
actor: "nn.ReLU"
opt_type: "minimize"
max_ep_len: 250
epochs: 489
steps_per_epoch: 2048
start_steps: 0
update_every: 100
update_after: 1000
steps_per_update: 80
num_test_episodes: 10
alpha: 2.0
alpha3: 0.1 0.2 0.4 0.6 0.7 0.8 0.9 1.0 # 0.3 0.5 # NOTE: Tuning alpha3.
labda: 0.99 # NOTE: Decreased from 1.0 to 0.99 for stability.
# gamma: 0.995 # NOTE: Not used for finite horizon tasks.
polyak: 0.995
adaptive_temperature: True
lr_a: "1e-4"
lr_c: "3e-4"
lr_a_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_c_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_decay_type: "linear"
lr_decay_ref: "step"
batch_size: 256
replay_size: "int(1e6)"
horizon_length: 5
seed: 3658 # 234 # NOTE: Using 5 seeds for tuning.
save_freq: 10
use_wandb: True
wandb_group: "han2020_reproduction"
device: "gpu:1"
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
alg_name: lac
exp_name: han2020_reproduction_lac_cartpole_cost_alpha3_tune_exp
env_name: "stable_gym:CartPoleCost-v1"
ac_kwargs:
hidden_sizes:
actor: [256, 256] # NOTE: Using [256, 256] for consistency with the article.
critic: [64, 64, 16]
activation:
actor: "nn.ReLU"
critic: "nn.ReLU"
output_activation:
actor: "nn.ReLU"
opt_type: "minimize"
max_ep_len: 250
epochs: 489
steps_per_epoch: 2048
start_steps: 0
update_every: 100
update_after: 1000
steps_per_update: 80
num_test_episodes: 10
alpha: 2.0
alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.8 0.9 # 0.7 1.0 # NOTE: Tuning alpha3.
labda: 0.99 # NOTE: Decreased from 1.0 to 0.99 for stability.
# gamma: 0.995 # NOTE: Not used for finite horizon tasks.
polyak: 0.995
adaptive_temperature: True
lr_a: "1e-4"
lr_c: "3e-4"
lr_a_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_c_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_decay_type: "linear"
lr_decay_ref: "step"
batch_size: 256
replay_size: "int(1e6)"
horizon_length: 5
seed: 48104 # 567 3658 234 # NOTE: Using 5 seeds for tuning.
save_freq: 10
use_wandb: True
wandb_group: "han2020_reproduction"
device: "gpu:1"
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
alg_name: lac
exp_name: han2020_reproduction_lac_cartpole_cost_alpha3_tune_exp
env_name: "stable_gym:CartPoleCost-v1"
ac_kwargs:
hidden_sizes:
actor: [256, 256] # NOTE: Using [256, 256] for consistency with the article.
critic: [64, 64, 16]
activation:
actor: "nn.ReLU"
critic: "nn.ReLU"
output_activation:
actor: "nn.ReLU"
opt_type: "minimize"
max_ep_len: 250
epochs: 489
steps_per_epoch: 2048
start_steps: 0
update_every: 100
update_after: 1000
steps_per_update: 80
num_test_episodes: 10
alpha: 2.0
alpha3: 0.1 0.2 0.3 0.4 0.6 0.7 0.8 0.9 # 0.5 1.0 # NOTE: Tuning alpha3.
labda: 0.99 # NOTE: Decreased from 1.0 to 0.99 for stability.
# gamma: 0.995 # NOTE: Not used for finite horizon tasks.
polyak: 0.995
adaptive_temperature: True
lr_a: "1e-4"
lr_c: "3e-4"
lr_a_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_c_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_decay_type: "linear"
lr_decay_ref: "step"
batch_size: 256
replay_size: "int(1e6)"
horizon_length: 5
seed: 567 # 3658 234 # NOTE: Using 5 seeds for tuning.
save_freq: 10
use_wandb: True
wandb_group: "han2020_reproduction"
device: "gpu:1"
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
alg_name: lac
exp_name: han2020_reproduction_lac_cartpole_cost_alpha3_tune_exp
env_name: "stable_gym:CartPoleCost-v1"
ac_kwargs:
hidden_sizes:
actor: [256, 256] # NOTE: Using [256, 256] for consistency with the article.
critic: [64, 64, 16]
activation:
actor: "nn.ReLU"
critic: "nn.ReLU"
output_activation:
actor: "nn.ReLU"
opt_type: "minimize"
max_ep_len: 250
epochs: 489
steps_per_epoch: 2048
start_steps: 0
update_every: 100
update_after: 1000
steps_per_update: 80
num_test_episodes: 10
alpha: 2.0
alpha3: 0.1 0.2 0.3 0.4 0.6 0.7 0.8 0.9 # 0.5 1.0 # NOTE: Tuning alpha3.
labda: 0.99 # NOTE: Decreased from 1.0 to 0.99 for stability.
# gamma: 0.995 # NOTE: Not used for finite horizon tasks.
polyak: 0.995
adaptive_temperature: True
lr_a: "1e-4"
lr_c: "3e-4"
lr_a_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_c_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_decay_type: "linear"
lr_decay_ref: "step"
batch_size: 256
replay_size: "int(1e6)"
horizon_length: 5
seed: 78456 # 48104 567 3658 234 # NOTE: Using 5 seeds for tuning.
save_freq: 10
use_wandb: True
wandb_group: "han2020_reproduction"
device: "gpu:1"
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
alg_name: lac
exp_name: han2020_reproduction_lac_oscillator_alpha3_tune_exp
env_name: "stable_gym:Oscillator-v1"
ac_kwargs:
hidden_sizes:
actor: [256, 256] # NOTE: Using [256, 256] for consistency with the article.
critic: [256, 256, 16]
activation:
actor: "nn.ReLU"
critic: "nn.ReLU"
output_activation:
actor: "nn.ReLU"
opt_type: "minimize"
max_ep_len: 400
epochs: 49
steps_per_epoch: 2048
start_steps: 0
update_every: 100
update_after: 1000
steps_per_update: 80
num_test_episodes: 10
alpha: 2.0
alpha3: 0.1 0.2 0.3 0.4 0.6 0.7 0.8 0.9 0.5 1.0 # NOTE: Tuning alpha3.
labda: 0.99 # NOTE: Decreased from 1.0 to 0.99 for stability.
# gamma: 0.995 # NOTE: Not used for finite horizon tasks.
polyak: 0.995
adaptive_temperature: True
lr_a: "1e-4"
lr_c: "3e-4"
lr_a_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_c_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_decay_type: "linear"
lr_decay_ref: "step"
batch_size: 256
replay_size: "int(1e6)"
horizon_length: 5
seed: 78456 48104 567 3658 234 # NOTE: Using 5 seeds for tuning.
save_freq: 10
use_wandb: True
wandb_group: "han2020_reproduction"
device: "gpu:1"
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
alg_name: lac
exp_name: han2020_reproduction_lac_oscillator_alpha3_tune_exp
env_name: "stable_gym:Oscillator-v1"
ac_kwargs:
hidden_sizes:
actor: [256, 256] # NOTE: Using [256, 256] for consistency with the article.
critic: [256, 256, 16]
activation:
actor: "nn.ReLU"
critic: "nn.ReLU"
output_activation:
actor: "nn.ReLU"
opt_type: "minimize"
max_ep_len: 400
epochs: 49
steps_per_epoch: 2048
start_steps: 0
update_every: 100
update_after: 1000
steps_per_update: 80
num_test_episodes: 10
alpha: 2.0
alpha3: 0.1 0.2 0.3 0.4 0.6 0.7 0.8 0.9 0.5 1.0 # NOTE: Tuning alpha3.
labda: 0.99 # NOTE: Decreased from 1.0 to 0.99 for stability.
# gamma: 0.995 # NOTE: Not used for finite horizon tasks.
polyak: 0.995
adaptive_temperature: True
lr_a: "1e-4"
lr_c: "3e-4"
lr_a_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_c_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_decay_type: "linear"
lr_decay_ref: "step"
batch_size: 256
replay_size: "int(1e6)"
horizon_length: 5
seed: 234 # NOTE: Using 5 seeds for tuning.
save_freq: 10
use_wandb: True
wandb_group: "han2020_reproduction"
device: "gpu:1"
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
alg_name: lac
exp_name: han2020_reproduction_lac_oscillator_alpha3_tune_exp
env_name: "stable_gym:Oscillator-v1"
ac_kwargs:
hidden_sizes:
actor: [256, 256] # NOTE: Using [256, 256] for consistency with the article.
critic: [256, 256, 16]
activation:
actor: "nn.ReLU"
critic: "nn.ReLU"
output_activation:
actor: "nn.ReLU"
opt_type: "minimize"
max_ep_len: 400
epochs: 49
steps_per_epoch: 2048
start_steps: 0
update_every: 100
update_after: 1000
steps_per_update: 80
num_test_episodes: 10
alpha: 2.0
alpha3: 0.1 0.2 0.3 0.4 0.6 0.7 0.8 0.9 0.5 1.0 # NOTE: Tuning alpha3.
labda: 0.99 # NOTE: Decreased from 1.0 to 0.99 for stability.
# gamma: 0.995 # NOTE: Not used for finite horizon tasks.
polyak: 0.995
adaptive_temperature: True
lr_a: "1e-4"
lr_c: "3e-4"
lr_a_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_c_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_decay_type: "linear"
lr_decay_ref: "step"
batch_size: 256
replay_size: "int(1e6)"
horizon_length: 5
seed: 3658 # 234 # NOTE: Using 5 seeds for tuning.
save_freq: 10
use_wandb: True
wandb_group: "han2020_reproduction"
device: "gpu:1"
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
alg_name: lac
exp_name: han2020_reproduction_lac_oscillator_alpha3_tune_exp
env_name: "stable_gym:Oscillator-v1"
ac_kwargs:
hidden_sizes:
actor: [256, 256] # NOTE: Using [256, 256] for consistency with the article.
critic: [256, 256, 16]
activation:
actor: "nn.ReLU"
critic: "nn.ReLU"
output_activation:
actor: "nn.ReLU"
opt_type: "minimize"
max_ep_len: 400
epochs: 49
steps_per_epoch: 2048
start_steps: 0
update_every: 100
update_after: 1000
steps_per_update: 80
num_test_episodes: 10
alpha: 2.0
alpha3: 0.1 0.2 0.3 0.4 0.6 0.7 0.8 0.9 0.5 1.0 # NOTE: Tuning alpha3.
labda: 0.99 # NOTE: Decreased from 1.0 to 0.99 for stability.
# gamma: 0.995 # NOTE: Not used for finite horizon tasks.
polyak: 0.995
adaptive_temperature: True
lr_a: "1e-4"
lr_c: "3e-4"
lr_a_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_c_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_decay_type: "linear"
lr_decay_ref: "step"
batch_size: 256
replay_size: "int(1e6)"
horizon_length: 5
seed: 48104 # 567 3658 234 # NOTE: Using 5 seeds for tuning.
save_freq: 10
use_wandb: True
wandb_group: "han2020_reproduction"
device: "gpu:1"
Loading

0 comments on commit 2275f03

Please sign in to comment.