diff --git a/overcooked_simulator/game_content/environment_config_rl.yaml b/overcooked_simulator/game_content/environment_config_rl.yaml index 391481efe7e4190b561520e0bd8c1bca137c4ed5..1b32b457a53a361932ac4bcd419f9ccf29fbfe8e 100644 --- a/overcooked_simulator/game_content/environment_config_rl.yaml +++ b/overcooked_simulator/game_content/environment_config_rl.yaml @@ -6,7 +6,7 @@ plates: # range of seconds until the dirty plate arrives. game: - time_limit_seconds: 300 + time_limit_seconds: 400 meals: all: true @@ -67,18 +67,8 @@ orders: b: 20 sample_on_serving: false # Sample the delay for the next order only after a meal was served. - score_calc_gen_func: !!python/name:overcooked_simulator.order.simple_score_calc_gen_func '' - score_calc_gen_kwargs: - # the kwargs for the score_calc_gen_func - other: 0 - scores: [ ] - expired_penalty_func: !!python/name:overcooked_simulator.order.simple_expired_penalty '' - expired_penalty_kwargs: - default: 0 - serving_not_ordered_meals: !!python/name:overcooked_simulator.order.serving_not_ordered_meals_with_five_score '' - # a func that calcs a store for not ordered but served meals. Input: meal - penalty_for_trash: !!python/name:overcooked_simulator.order.penalty_for_each_item '' - # a func that calcs the penalty for items that the player puts into the trashcan. + serving_not_ordered_meals: true + # can meals that are not ordered be served / dropped on the serving window player_config: radius: 0.4 @@ -103,20 +93,15 @@ extra_setup_functions: hooks: [ completed_order ] callback_class: !!python/name:overcooked_simulator.scores.ScoreViaHooks '' callback_class_kwargs: - static_score: 20 - score_on_specific_kwarg: meal_name - score_map: - Burger: 15 - OnionSoup: 10 - Salad: 5 - TomatoSoup: 10 - not_ordered_meals: + static_score: 100 + + serve_not_ordered_meals: func: !!python/name:overcooked_simulator.hooks.hooks_via_callback_class '' kwargs: hooks: [ serve_not_ordered_meal ] callback_class: !!python/name:overcooked_simulator.scores.ScoreViaHooks '' callback_class_kwargs: - static_score: 2 + static_score: 100 trashcan_usages: func: !!python/name:overcooked_simulator.hooks.hooks_via_callback_class '' kwargs: @@ -124,13 +109,21 @@ extra_setup_functions: callback_class: !!python/name:overcooked_simulator.scores.ScoreViaHooks '' callback_class_kwargs: static_score: -5 - expired_orders: + item_cut: + func: !!python/name:overcooked_simulator.hooks.hooks_via_callback_class '' + kwargs: + hooks: [ cutting_board_100 ] + callback_class: !!python/name:overcooked_simulator.scores.ScoreViaHooks '' + callback_class_kwargs: + static_score: 10 + stepped: func: !!python/name:overcooked_simulator.hooks.hooks_via_callback_class '' kwargs: - hooks: [ order_expired ] + hooks: [ post_step ] callback_class: !!python/name:overcooked_simulator.scores.ScoreViaHooks '' callback_class_kwargs: - static_score: -10 + static_score: -1 + # json_states: # func: !!python/name:overcooked_simulator.recording.class_recording_with_hooks '' # kwargs: diff --git a/overcooked_simulator/game_content/layouts/rl.layout b/overcooked_simulator/game_content/layouts/rl.layout index 624af90b4e174a801bfd027e0c9c864450489eef..4b91262e0e78525486821ca6c18edd99097138d8 100644 --- a/overcooked_simulator/game_content/layouts/rl.layout +++ b/overcooked_simulator/game_content/layouts/rl.layout @@ -1,5 +1,4 @@ -#X### -T___# -#___# -U___P -##W## +#X## +T__# +U__P +##W# diff --git a/overcooked_simulator/gym_env.py b/overcooked_simulator/gym_env.py index 2e3f55d7c8cf504d108824beda2e02ac18313310..c5de18a97c01e7d5b21999c12ff7652f31545a05 100644 --- a/overcooked_simulator/gym_env.py +++ b/overcooked_simulator/gym_env.py @@ -196,17 +196,8 @@ class EnvGymWrapper(Env): observation = self.get_vector_state() - reward = -1 - if ( - self.env.score > self.prev_score - and self.env.score != 0 - ): - self.prev_score = self.env.score - reward = 100 - elif self.env.score < self.prev_score: - self.prev_score = 0 - reward = -1 - + reward = self.env.score - self.prev_score + self.prev_score = self.env.score terminated = self.env.game_ended truncated = self.env.game_ended info = {} @@ -283,7 +274,7 @@ def main(): # # save_code=True, # optional # ) - env = make_vec_env(EnvGymWrapper, n_envs=64) + env = make_vec_env(EnvGymWrapper, n_envs=8) # env = EnvGymWrapper() model_classes = [A2C, DQN, PPO] @@ -317,7 +308,7 @@ def main(): check_env(env) obs, info = env.reset() while True: - time.sleep(1 / 30) + time.sleep(1 / 10) action, _states = model.predict(obs, deterministic=False) obs, reward, terminated, truncated, info = env.step(int(action)) env.render() diff --git a/overcooked_simulator/overcooked_environment.py b/overcooked_simulator/overcooked_environment.py index 601d876994d7fc4494ed1a93dd13a4dac8122986..54de3fb157e31f41d397185c379f8b93213f3203 100644 --- a/overcooked_simulator/overcooked_environment.py +++ b/overcooked_simulator/overcooked_environment.py @@ -48,7 +48,7 @@ from overcooked_simulator.hooks import ( ACTION_ON_NOT_REACHABLE_COUNTER, ACTION_PUT, ACTION_INTERACT_START, - ITEM_INFO_CONFIG, + ITEM_INFO_CONFIG, POST_STEP, ) from overcooked_simulator.order import ( OrderManager, @@ -756,7 +756,7 @@ class Environment: self.order_manager.progress(passed_time=passed_time, now=self.env_time) for effect_manager in self.effect_manager.values(): effect_manager.progress(passed_time=passed_time, now=self.env_time) - # self.hook(POST_STEP, passed_time=passed_time) + self.hook(POST_STEP, passed_time=passed_time) def get_state(self): """Get the current state of the game environment. The state here is accessible by the current python objects. diff --git a/overcooked_simulator/rl_agent_checkpoints/overcooked_PPO.zip b/overcooked_simulator/rl_agent_checkpoints/overcooked_PPO.zip new file mode 100644 index 0000000000000000000000000000000000000000..b79af5605fa8773501888d028bca62b6845cb2d4 Binary files /dev/null and b/overcooked_simulator/rl_agent_checkpoints/overcooked_PPO.zip differ