Skip to content
Snippets Groups Projects
PPO.yaml 776 B
Newer Older
  • Learn to ignore specific revisions
  • model_name: "PPO"
    model_type:
      _partial_: true
      _target_: stable_baselines3.PPO
    
    model_type_inference:
      _partial_: true
      _target_: stable_baselines3.PPO.load
    
    total_timesteps:  300_000_000 # hendric sagt eher so 300_000_000 schritte
    number_envs_parallel:  32
    learning_rate: 0.0002
    
    gae_lambda: 0.95
    clip_range: 0.2
    clip_range_vf:  None
    normalize_advantage: True
    
    vf_coef: 0.5
    max_grad_norm: 0.5
    use_sde: False
    sde_sample_freq: -1
    rollout_buffer_class: None
    rollout_buffer_kwargs: None
    target_kl: None
    stats_window_size: 100
    
    tensorboard_log: f"logs/reinforcement_learning/runs/{0}"
    
    policy_kwargs: None
    verbose: 0
    seed: None