Skip to content
Snippets Groups Projects
PPO.yaml 629 B
Newer Older
  • Learn to ignore specific revisions
  • model_type:  "PPO"
    total_timesteps:  3_000_000 # hendric sagt eher so 300_000_000 schritte
    number_envs_parallel:  64
    learning_rate: 0.0003
    n_steps: 2048
    batch_size: 64
    n_epochs: 10
    gamma: 0.99
    gae_lambda: 0.95
    clip_range: 0.2
    clip_range_vf:  None
    normalize_advantage: True
    ent_coef: 0.0
    vf_coef: 0.5
    max_grad_norm: 0.5
    use_sde: False
    sde_sample_freq: -1
    rollout_buffer_class: None
    rollout_buffer_kwargs: None
    target_kl: None
    stats_window_size: 100
    
    tensorboard_log: f"logs/reinforcement_learning/runs/{0}"
    
    policy_kwargs: None
    verbose: 0
    seed: None