Newer
Older
env_id: "overcooked"

Christoph Kowalski
committed
policy: "MlpPolicy"
model_name: "PPO"
model_type:
_partial_: true
_target_: stable_baselines3.PPO

Christoph Kowalski
committed
model_type_inference:
_partial_: true
_target_: stable_baselines3.PPO.load
total_timesteps: 3_000_000 # hendric sagt eher so 300_000_000 schritte

Christoph Kowalski
committed
number_envs_parallel: 16
learning_rate: 0.0003
n_steps: 2048

Christoph Kowalski
committed
batch_size: 16
n_epochs: 10
gamma: 0.99
gae_lambda: 0.95
clip_range: 0.2
clip_range_vf: None
normalize_advantage: True
ent_coef: 0.0
vf_coef: 0.5
max_grad_norm: 0.5
use_sde: False
sde_sample_freq: -1
rollout_buffer_class: None
rollout_buffer_kwargs: None
target_kl: None
stats_window_size: 100

Christoph Kowalski
committed
tensorboard_log: f"logs/reinforcement_learning/runs/{0}"
policy_kwargs: None
verbose: 0
seed: None

Christoph Kowalski
committed
device: "auto"
_init_setup_model: True