diff --git a/cooperative_cuisine/orders.py b/cooperative_cuisine/orders.py index 8ee060e8a4488028f70bca0e6a15468c7c5c95a2..968a042a739e234f574ddd764e4d5cad1a88fdd9 100644 --- a/cooperative_cuisine/orders.py +++ b/cooperative_cuisine/orders.py @@ -651,6 +651,7 @@ class DeterministicOrderGeneration(OrderGeneration): self.current_queue[0].start -= diff_to_next self.next_order_time = self.current_queue[0].start orders.extend(self.get_orders(passed_time, now, [], [])) + log.info(f"Create order for meal {orders}.") return orders def parse_timed_orders(self) -> list[ParsedTimedOrder]: diff --git a/cooperative_cuisine/reinforcement_learning/config/additional_configs/additional_config_base.yaml b/cooperative_cuisine/reinforcement_learning/config/additional_configs/additional_config_base.yaml index 56ca8938c0d1c4e40cc3f3bb0a7be96978ad3531..4ba0f49b5cb740b6bb6f1ba3d36fa1518950ef6b 100644 --- a/cooperative_cuisine/reinforcement_learning/config/additional_configs/additional_config_base.yaml +++ b/cooperative_cuisine/reinforcement_learning/config/additional_configs/additional_config_base.yaml @@ -1,7 +1,6 @@ -order_generator: "random_orders.yaml" # Here the filename of the converter should be given. The converter class needs to be called StateConverter and implement the abstract StateToObservationConverter class state_converter: - _target_: "cooperative_cuisine.reinforcement_learning.obs_converter.base_converter_onehot.BaseStateConverterOnehot" + _target_: "cooperative_cuisine.reinforcement_learning.obs_converter.base_converter.BaseStateConverter" log_path: "logs/reinforcement_learning" checkpoint_path: "rl_agent_checkpoints" render_mode: "rgb_array" diff --git a/cooperative_cuisine/reinforcement_learning/config/environment/environment_config_rl.yaml b/cooperative_cuisine/reinforcement_learning/config/environment/environment_config_rl.yaml index 5151d980a30826f68925fc320b03e6275a5077d1..8e0a61603966d332d68f7d1d9d5c43b74e2f066d 100644 --- a/cooperative_cuisine/reinforcement_learning/config/environment/environment_config_rl.yaml +++ b/cooperative_cuisine/reinforcement_learning/config/environment/environment_config_rl.yaml @@ -11,7 +11,7 @@ game: undo_dispenser_pickup: true validate_recipes: false - +layout_name: rl_small.layout layout_chars: _: Free @@ -51,6 +51,9 @@ layout_chars: orders: + order_generator: + _target_: "cooperative_cuisine.orders.RandomOrderGeneration" + _partial_: true meals: all: false # if all: false -> only orders for these meals are generated @@ -97,38 +100,59 @@ effect_manager: { } # spreading_duration: [ 5, 10 ] # fire_burns_ingredients_and_meals: true - hook_callbacks: # # --------------- Scoring --------------- orders: hooks: [ completed_order ] + callback_class: + _target_: "cooperative_cuisine.scores.ScoreViaHooks" + _partial_: true callback_class_kwargs: static_score: 0.95 serve_not_ordered_meals: hooks: [ serve_not_ordered_meal ] + callback_class: + _target_: "cooperative_cuisine.scores.ScoreViaHooks" + _partial_: true callback_class_kwargs: static_score: 0.95 trashcan_usages: hooks: [ trashcan_usage ] + callback_class: + _target_: "cooperative_cuisine.scores.ScoreViaHooks" + _partial_: true callback_class_kwargs: static_score: -0.2 item_cut: hooks: [ cutting_board_100 ] + callback_class: + _target_: "cooperative_cuisine.scores.ScoreViaHooks" + _partial_: true callback_class_kwargs: static_score: 0.1 stepped: hooks: [ post_step ] + callback_class: + _target_: "cooperative_cuisine.scores.ScoreViaHooks" + _partial_: true callback_class_kwargs: static_score: -0.01 combine: hooks: [ drop_off_on_cooking_equipment ] + callback_class: + _target_: "cooperative_cuisine.scores.ScoreViaHooks" + _partial_: true callback_class_kwargs: static_score: 0.01 start_interact: hooks: [ player_start_interaction ] + callback_class: + _target_: "cooperative_cuisine.scores.ScoreViaHooks" + _partial_: true callback_class_kwargs: static_score: 0.01 + # json_states: # hooks: [ json_state ] # record_class: !!python/name:cooperative_cuisine.recording.LogRecorder '' diff --git a/cooperative_cuisine/reinforcement_learning/config/environment/environment_config_rl_deterministic_order_generation.yaml b/cooperative_cuisine/reinforcement_learning/config/environment/environment_config_rl_deterministic_order_generation.yaml new file mode 100644 index 0000000000000000000000000000000000000000..de3613cad6be89eb479c032f95c406ab4a75fbb3 --- /dev/null +++ b/cooperative_cuisine/reinforcement_learning/config/environment/environment_config_rl_deterministic_order_generation.yaml @@ -0,0 +1,166 @@ + +plates: + clean_plates: 2 + dirty_plates: 0 + plate_delay: [ 2, 4 ] + return_dirty: False + # range of seconds until the dirty plate arrives. + +game: + time_limit_seconds: 300 + undo_dispenser_pickup: true + validate_recipes: false + +layout_name: rl_small.layout + +layout_chars: + _: Free + hash: Counter # # + A: Agent + pipe: Extinguisher + P: PlateDispenser + C: CuttingBoard + X: Trashcan + $: ServingWindow + S: Sink + +: SinkAddon + at: Plate # @ just a clean plate on a counter + U: Pot # with Stove + Q: Pan # with Stove + O: Peel # with Oven + F: Basket # with DeepFryer + T: Tomato + N: Onion # oNioN + L: Lettuce + K: Potato # Kartoffel + I: Fish # fIIIsh + D: Dough + E: Cheese # chEEEse + G: Sausage # sausaGe + B: Bun + M: Meat + question: Counter # ? mushroom + ↓: Counter + ^: Counter + right: Counter + left: Counter + wave: Free # ~ Water + minus: Free # - Ice + dquote: Counter # " wall/truck + p: Counter # second plate return ?? + +orders: + order_generator: + _target_: "cooperative_cuisine.orders.DeterministicOrderGeneration" + _partial_: true + meals: + all: false + # if all: false -> only orders for these meals are generated + # TODO: what if this list is empty? + list: + - TomatoSoup + - OnionSoup + #- Salad + # - FriedFish + # the class to that receives the kwargs. Should be a child class of OrderGeneration in orders.py + order_gen_kwargs: + # structure: [meal_name, start, duration] (start and duration as seconds or timedeltas https://github.com/wroberts/pytimeparse) + timed_orders: + - [ TomatoSoup, 0:00, 0:10 ] + - [ OnionSoup, 0:00, 0:10 ] + - [ TomatoSoup, 0:10, 0:10 ] + - [ TomatoSoup, 0:15, 0:06 ] + never_no_order: False + never_no_order_update_all_remaining: False + serving_not_ordered_meals: null + +player_config: + radius: 0.4 + speed_units_per_seconds: 1 + interaction_range: 1.6 + restricted_view: False + view_angle: 95 + +effect_manager: { } +# FireManager: +# class: !!python/name:cooperative_cuisine.effects.FireEffectManager '' +# kwargs: +# spreading_duration: [ 5, 10 ] +# fire_burns_ingredients_and_meals: true + +hook_callbacks: + # # --------------- Scoring --------------- + orders: + hooks: [ completed_order ] + callback_class: + _target_: "cooperative_cuisine.scores.ScoreViaHooks" + _partial_: true + callback_class_kwargs: + static_score: 0.95 + + serve_not_ordered_meals: + hooks: [ serve_not_ordered_meal ] + callback_class: + _target_: "cooperative_cuisine.scores.ScoreViaHooks" + _partial_: true + callback_class_kwargs: + static_score: 0.95 + trashcan_usages: + hooks: [ trashcan_usage ] + callback_class: + _target_: "cooperative_cuisine.scores.ScoreViaHooks" + _partial_: true + callback_class_kwargs: + static_score: -0.2 + item_cut: + hooks: [ cutting_board_100 ] + callback_class: + _target_: "cooperative_cuisine.scores.ScoreViaHooks" + _partial_: true + callback_class_kwargs: + static_score: 0.1 + stepped: + hooks: [ post_step ] + callback_class: + _target_: "cooperative_cuisine.scores.ScoreViaHooks" + _partial_: true + callback_class_kwargs: + static_score: -0.01 + combine: + hooks: [ drop_off_on_cooking_equipment ] + callback_class: + _target_: "cooperative_cuisine.scores.ScoreViaHooks" + _partial_: true + callback_class_kwargs: + static_score: 0.01 + start_interact: + hooks: [ player_start_interaction ] + callback_class: + _target_: "cooperative_cuisine.scores.ScoreViaHooks" + _partial_: true + callback_class_kwargs: + static_score: 0.01 + +# json_states: +# hooks: [ json_state ] +# record_class: !!python/name:cooperative_cuisine.recording.LogRecorder '' +# record_class_kwargs: +# record_path: USER_LOG_DIR/ENV_NAME/json_states.jsonl +# actions: +# hooks: [ pre_perform_action ] +# record_class: !!python/name:cooperative_cuisine.recording.LogRecorder '' +# record_class_kwargs: +# record_path: USER_LOG_DIR/ENV_NAME/LOG_RECORD_NAME.jsonl +# random_env_events: +# hooks: [ order_duration_sample, plate_out_of_kitchen_time ] +# record_class: !!python/name:cooperative_cuisine.recording.LogRecorder '' +# record_class_kwargs: +# record_path: USER_LOG_DIR/ENV_NAME/LOG_RECORD_NAME.jsonl +# add_hook_ref: true +# env_configs: +# hooks: [ env_initialized, item_info_config ] +# record_class: !!python/name:cooperative_cuisine.recording.LogRecorder '' +# record_class_kwargs: +# record_path: USER_LOG_DIR/ENV_NAME/LOG_RECORD_NAME.jsonl +# add_hook_ref: true + diff --git a/cooperative_cuisine/reinforcement_learning/config/environment/environment_config_rl_small_rewards.yaml b/cooperative_cuisine/reinforcement_learning/config/environment/environment_config_rl_small_rewards.yaml index c7f0fbf30e7717294ef1a6d169918b5f241b5bbf..6b7d3943e34a18213336fd118af00b3a7e4038f2 100644 --- a/cooperative_cuisine/reinforcement_learning/config/environment/environment_config_rl_small_rewards.yaml +++ b/cooperative_cuisine/reinforcement_learning/config/environment/environment_config_rl_small_rewards.yaml @@ -11,6 +11,7 @@ game: undo_dispenser_pickup: true validate_recipes: false +layout_name: rl_small.layout layout_chars: @@ -51,6 +52,9 @@ layout_chars: orders: + order_generator: + _target_: "cooperative_cuisine.orders.RandomOrderGeneration" + _partial_: true meals: all: true # if all: false -> only orders for these meals are generated @@ -97,38 +101,58 @@ effect_manager: { } # spreading_duration: [ 5, 10 ] # fire_burns_ingredients_and_meals: true - hook_callbacks: # # --------------- Scoring --------------- orders: hooks: [ completed_order ] + callback_class: + _target_: "cooperative_cuisine.scores.ScoreViaHooks" + _partial_: true callback_class_kwargs: static_score: 0.1 serve_not_ordered_meals: hooks: [ serve_not_ordered_meal ] + callback_class: + _target_: "cooperative_cuisine.scores.ScoreViaHooks" + _partial_: true callback_class_kwargs: static_score: 0.1 trashcan_usages: hooks: [ trashcan_usage ] + callback_class: + _target_: "cooperative_cuisine.scores.ScoreViaHooks" + _partial_: true callback_class_kwargs: static_score: -0.2 item_cut: hooks: [ cutting_board_100 ] + callback_class: + _target_: "cooperative_cuisine.scores.ScoreViaHooks" + _partial_: true callback_class_kwargs: - static_score: 0.0 + static_score: 0 stepped: hooks: [ post_step ] + callback_class: + _target_: "cooperative_cuisine.scores.ScoreViaHooks" + _partial_: true callback_class_kwargs: - static_score: -0.0 + static_score: 0 combine: hooks: [ drop_off_on_cooking_equipment ] + callback_class: + _target_: "cooperative_cuisine.scores.ScoreViaHooks" + _partial_: true callback_class_kwargs: - static_score: 0.0 + static_score: 0 start_interact: hooks: [ player_start_interaction ] + callback_class: + _target_: "cooperative_cuisine.scores.ScoreViaHooks" + _partial_: true callback_class_kwargs: - static_score: 0.0 + static_score: 0 # json_states: # hooks: [ json_state ] # record_class: !!python/name:cooperative_cuisine.recording.LogRecorder '' diff --git a/cooperative_cuisine/reinforcement_learning/config/environment/order_config.yaml b/cooperative_cuisine/reinforcement_learning/config/environment/order_config.yaml deleted file mode 100644 index 9cc3de7dbd523c5f814d87d86cb6ac51e807b999..0000000000000000000000000000000000000000 --- a/cooperative_cuisine/reinforcement_learning/config/environment/order_config.yaml +++ /dev/null @@ -1,33 +0,0 @@ -orders: - meals: - all: true - # if all: false -> only orders for these meals are generated - # TODO: what if this list is empty? - list: - - TomatoSoup - - OnionSoup - - Salad - #order_gen_class: !!python/name:cooperative_cuisine.orders.RandomOrderGeneration '' - # the class to that receives the kwargs. Should be a child class of OrderGeneration in orders.py - order_gen_kwargs: - order_duration_random_func: - # how long should the orders be alive - # 'random' library call with getattr, kwargs are passed to the function - func: uniform - kwargs: - a: 40 - b: 60 - max_orders: 6 - # maximum number of active orders at the same time - num_start_meals: 2 - # number of orders generated at the start of the environment - sample_on_dur_random_func: - # 'random' library call with getattr, kwargs are passed to the function - func: uniform - kwargs: - a: 10 - b: 20 - sample_on_serving: false - # Sample the delay for the next order only after a meal was served. - serving_not_ordered_meals: true - # can meals that are not ordered be served / dropped on the serving window \ No newline at end of file diff --git a/cooperative_cuisine/reinforcement_learning/config/environment/overcooked-ai_environment_config.yaml b/cooperative_cuisine/reinforcement_learning/config/environment/overcooked-ai_environment_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..54d96c1acb9510195e9651ca7730b46eb7267272 --- /dev/null +++ b/cooperative_cuisine/reinforcement_learning/config/environment/overcooked-ai_environment_config.yaml @@ -0,0 +1,179 @@ + +plates: + clean_plates: 1 + dirty_plates: 0 + plate_delay: [ 0, 0 ] + return_dirty: False + # range of seconds until the dirty plate arrives. + +game: + time_limit_seconds: 300 + undo_dispenser_pickup: true + validate_recipes: false + +layout_name: centre_pots.layout + + +layout_chars: + _: Free + hash: Counter # # + A: Agent + pipe: Extinguisher + P: PlateDispenser + C: CuttingBoard + X: Trashcan + $: ServingWindow + S: Sink + +: SinkAddon + at: Plate # @ just a clean plate on a counter + U: Pot # with Stove + Q: Pan # with Stove + O: Peel # with Oven + F: Basket # with DeepFryer + T: Tomato + N: Onion # oNioN + L: Lettuce + K: Potato # Kartoffel + I: Fish # fIIIsh + D: Dough + E: Cheese # chEEEse + G: Sausage # sausaGe + B: Bun + M: Meat + question: Counter # ? mushroom + ↓: Counter + ^: Counter + right: Counter + left: Counter + wave: Free # ~ Water + minus: Free # - Ice + dquote: Counter # " wall/truck + p: Counter # second plate return ?? + + +orders: + order_generator: + _target_: "cooperative_cuisine.orders.RandomOrderGeneration" + _partial_: true + meals: + all: false + # if all: false -> only orders for these meals are generated + # TODO: what if this list is empty? + list: + - TomatoSoup + - OnionSoup + - Salad + # the class to that receives the kwargs. Should be a child class of OrderGeneration in orders.py + order_gen_kwargs: + order_duration_random_func: + # how long should the orders be alive + # 'random' library call with getattr, kwargs are passed to the function + func: uniform + kwargs: + a: 40 + b: 60 + max_orders: 6 + # maximum number of active orders at the same time + num_start_meals: 2 + # number of orders generated at the start of the environment + sample_on_dur_random_func: + # 'random' library call with getattr, kwargs are passed to the function + func: uniform + kwargs: + a: 10 + b: 20 + sample_on_serving: false + # Sample the delay for the next order only after a meal was served. + serving_not_ordered_meals: true + # can meals that are not ordered be served / dropped on the serving window + +player_config: + radius: 0.1 + speed_units_per_seconds: 1 + interaction_range: 1 + restricted_view: False + view_angle: 60 + +effect_manager: { } +# FireManager: +# class: !!python/name:cooperative_cuisine.effects.FireEffectManager '' +# kwargs: +# spreading_duration: [ 5, 10 ] +# fire_burns_ingredients_and_meals: true + + +hook_callbacks: + # # --------------- Scoring --------------- + orders: + hooks: [ completed_order ] + callback_class: + _target_: "cooperative_cuisine.scores.ScoreViaHooks" + _partial_: true + callback_class_kwargs: + static_score: 5 + + serve_not_ordered_meals: + hooks: [ serve_not_ordered_meal ] + callback_class: + _target_: "cooperative_cuisine.scores.ScoreViaHooks" + _partial_: true + callback_class_kwargs: + static_score: 3 + trashcan_usages: + hooks: [ trashcan_usage ] + callback_class: + _target_: "cooperative_cuisine.scores.ScoreViaHooks" + _partial_: true + callback_class_kwargs: + static_score: 0 + item_cut: + hooks: [ cutting_board_100 ] + callback_class: + _target_: "cooperative_cuisine.scores.ScoreViaHooks" + _partial_: true + callback_class_kwargs: + static_score: 0 + stepped: + hooks: [ post_step ] + callback_class: + _target_: "cooperative_cuisine.scores.ScoreViaHooks" + _partial_: true + callback_class_kwargs: + static_score: 0 + combine: + hooks: [ drop_off_on_cooking_equipment ] + callback_class: + _target_: "cooperative_cuisine.scores.ScoreViaHooks" + _partial_: true + callback_class_kwargs: + static_score: 0 + start_interact: + hooks: [ player_start_interaction ] + callback_class: + _target_: "cooperative_cuisine.scores.ScoreViaHooks" + _partial_: true + callback_class_kwargs: + static_score: 0 +# json_states: +# hooks: [ json_state ] +# record_class: !!python/name:cooperative_cuisine.recording.LogRecorder '' +# record_class_kwargs: +# record_path: USER_LOG_DIR/ENV_NAME/json_states.jsonl +# actions: +# hooks: [ pre_perform_action ] +# record_class: !!python/name:cooperative_cuisine.recording.LogRecorder '' +# record_class_kwargs: +# record_path: USER_LOG_DIR/ENV_NAME/LOG_RECORD_NAME.jsonl +# random_env_events: +# hooks: [ order_duration_sample, plate_out_of_kitchen_time ] +# record_class: !!python/name:cooperative_cuisine.recording.LogRecorder '' +# record_class_kwargs: +# record_path: USER_LOG_DIR/ENV_NAME/LOG_RECORD_NAME.jsonl +# add_hook_ref: true +# env_configs: +# hooks: [ env_initialized, item_info_config ] +# record_class: !!python/name:cooperative_cuisine.recording.LogRecorder '' +# record_class_kwargs: +# record_path: USER_LOG_DIR/ENV_NAME/LOG_RECORD_NAME.jsonl +# add_hook_ref: true + diff --git a/cooperative_cuisine/reinforcement_learning/config/item_info/item_info_overcooked-ai.yaml b/cooperative_cuisine/reinforcement_learning/config/item_info/item_info_overcooked-ai.yaml new file mode 100644 index 0000000000000000000000000000000000000000..62d1fab6cdf3dd40c297d1b75d9bb1d764117cf6 --- /dev/null +++ b/cooperative_cuisine/reinforcement_learning/config/item_info/item_info_overcooked-ai.yaml @@ -0,0 +1,219 @@ +CuttingBoard: + type: Equipment + +Sink: + type: Equipment + +Stove: + type: Equipment + +DeepFryer: + type: Equipment + +Oven: + type: Equipment + +Pot: + type: Equipment + equipment: Stove + +Pan: + type: Equipment + equipment: Stove + +Basket: + type: Equipment + equipment: DeepFryer + +Peel: + type: Equipment + equipment: Oven + +DirtyPlate: + type: Equipment + +Plate: + type: Equipment + needs: [ DirtyPlate ] + seconds: 2.0 + equipment: Sink + +# -------------------------------------------------------------------------------- + +Tomato: + type: Ingredient + +Lettuce: + type: Ingredient + +Onion: + type: Ingredient + +Meat: + type: Ingredient + +Bun: + type: Ingredient + +Potato: + type: Ingredient + +Fish: + type: Ingredient + +Dough: + type: Ingredient + +Cheese: + type: Ingredient + +Sausage: + type: Ingredient + +# Chopped things +ChoppedTomato: + type: Ingredient + needs: [ Tomato ] + seconds: 4.0 + equipment: CuttingBoard + +ChoppedLettuce: + type: Ingredient + needs: [ Lettuce ] + seconds: 3.0 + equipment: CuttingBoard + +ChoppedOnion: + type: Ingredient + needs: [ Onion ] + seconds: 4.0 + equipment: CuttingBoard + +RawPatty: + type: Ingredient + needs: [ Meat ] + seconds: 4.0 + equipment: CuttingBoard + +RawChips: + type: Ingredient + needs: [ Potato ] + seconds: 4.0 + equipment: CuttingBoard + +ChoppedFish: + type: Ingredient + needs: [ Fish ] + seconds: 4.0 + equipment: CuttingBoard + +PizzaBase: + type: Ingredient + needs: [ Dough ] + seconds: 4.0 + equipment: CuttingBoard + +GratedCheese: + type: Ingredient + needs: [ Cheese ] + seconds: 4.0 + equipment: CuttingBoard + +ChoppedSausage: + type: Ingredient + needs: [ Sausage ] + seconds: 4.0 + equipment: CuttingBoard + +CookedPatty: + type: Ingredient + seconds: 5.0 + needs: [ RawPatty ] + equipment: Pan + +# -------------------------------------------------------------------------------- + +Chips: + type: Meal + seconds: 5.0 + needs: [ RawChips ] + equipment: Basket + +FriedFish: + type: Meal + seconds: 5.0 + needs: [ ChoppedFish ] + equipment: Basket + +Burger: + type: Meal + needs: [ Bun, ChoppedLettuce, ChoppedTomato, CookedPatty ] + equipment: ~ + +Salad: + type: Meal + needs: [ ChoppedLettuce, ChoppedTomato ] + equipment: ~ + +TomatoSoup: + type: Meal + needs: [Tomato,Tomato, Tomato ] + seconds: 1 + equipment: Pot + +OnionSoup: + type: Meal + needs: [ Onion, Onion, Onion ] + seconds: 1 + equipment: Pot + +FishAndChips: + type: Meal + needs: [ FriedFish, Chips ] + equipment: ~ + +Pizza: + type: Meal + needs: [ PizzaBase, ChoppedTomato, GratedCheese, ChoppedSausage ] + seconds: 7.0 + equipment: Peel + +# -------------------------------------------------------------------------------- + +BurntCookedPatty: + type: Waste + seconds: 10.0 + needs: [ CookedPatty ] + equipment: Pan + +BurntChips: + type: Waste + seconds: 10.0 + needs: [ Chips ] + equipment: Basket + +BurntFriedFish: + type: Waste + seconds: 10.0 + needs: [ FriedFish ] + equipment: Basket + +BurntTomatoSoup: + type: Waste + needs: [ TomatoSoup ] + seconds: 20.0 + equipment: Pot + +BurntOnionSoup: + type: Waste + needs: [ OnionSoup ] + seconds: 20.0 + equipment: Pot + +BurntPizza: + type: Waste + needs: [ Pizza ] + seconds: 10.0 + equipment: Peel + + diff --git a/cooperative_cuisine/reinforcement_learning/config/random_orders.yaml b/cooperative_cuisine/reinforcement_learning/config/random_orders.yaml deleted file mode 100644 index e8a93ff75c0fe47a9909028c8905a2c8511e2fce..0000000000000000000000000000000000000000 --- a/cooperative_cuisine/reinforcement_learning/config/random_orders.yaml +++ /dev/null @@ -1,2 +0,0 @@ - order_gen_class: !!python/name:cooperative_cuisine.orders.RandomOrderGeneration '' - callback_class: !!python/name:cooperative_cuisine.scores.ScoreViaHooks '' \ No newline at end of file diff --git a/cooperative_cuisine/reinforcement_learning/config/rl_config.yaml b/cooperative_cuisine/reinforcement_learning/config/rl_config.yaml index 4fd36579d767cad57a3807caefa21f7e70c9866a..af22536dd8f119d89f0ab4c187b6bed30d191769 100644 --- a/cooperative_cuisine/reinforcement_learning/config/rl_config.yaml +++ b/cooperative_cuisine/reinforcement_learning/config/rl_config.yaml @@ -1,5 +1,5 @@ defaults: - - environment: environment_config_rl - - item_info: item_info_rl + - environment: overcooked-ai_environment_config + - item_info: item_info_overcooked-ai - model: PPO - additional_configs: additional_config_base \ No newline at end of file diff --git a/cooperative_cuisine/reinforcement_learning/gym_env.py b/cooperative_cuisine/reinforcement_learning/gym_env.py index 5bb014bae397042d17750f1a9196d0a1d9020067..211fbb63c42ee9a5957342a500a28e8fe13af091 100644 --- a/cooperative_cuisine/reinforcement_learning/gym_env.py +++ b/cooperative_cuisine/reinforcement_learning/gym_env.py @@ -12,6 +12,7 @@ import yaml from gymnasium import spaces, Env from hydra.utils import instantiate from omegaconf import OmegaConf +from layouts.convert_overcooked_ai_layouts import convert_overcookd_ai_layouts from cooperative_cuisine import ROOT_DIR from cooperative_cuisine.action import ActionType, InterActionData, Action @@ -34,6 +35,13 @@ class SimpleActionSpace(Enum): def get_env_action(player_id, simple_action, duration): + """ + Creates a concrete action. + :param player_id:id of the player + :param simple_action: an action in the form of a SimpleActionSpace + :param duration: for how long an action should be conducted + :return: a concrete action + """ match simple_action: case SimpleActionSpace.Up: return Action( @@ -80,14 +88,15 @@ def get_env_action(player_id, simple_action, duration): ) -layout_path: Path = ROOT_DIR / "reinforcement_learning" / "rl_small.layout" -with open(layout_path, "r") as file: - layout = file.read() with open(ROOT_DIR / "pygame_2d_vis" / "visualization.yaml", "r") as file: visualization_config = yaml.safe_load(file) def shuffle_counters(env): + """ + Shuffles the counters of an environment + :param env: the environment object + """ sample_counter = [] other_counters = [] for counter in env.counters: @@ -104,11 +113,10 @@ def shuffle_counters(env): class StateToObservationConverter: - ''' - - + """ + Abstract definition of a class that gets and environment and outputs a state representation for rl + """ - ''' @abstractmethod def setup(self, env): ... @@ -126,6 +134,10 @@ class EnvGymWrapper(Env): metadata = {"render_modes": ["human", "rgb_array"], "render_fps": 10} def __init__(self, config): + """ + :param config: gets the rl and environment configuration from hydra + Initializes all necessary variables + """ super().__init__() self.gridsize = 40 self.randomize_counter_placement = False @@ -133,18 +145,21 @@ class EnvGymWrapper(Env): self.full_vector_state = True config_env = OmegaConf.to_container(config.environment, resolve=True) config_item_info = OmegaConf.to_container(config.item_info, resolve=True) - order_generator = config.additional_configs.order_generator - custom_config_path = ROOT_DIR / "reinforcement_learning" / "config" / order_generator - with open(custom_config_path, "r") as file: - custom_classes = yaml.load(file, Loader=yaml.Loader) - for key, value in config_env['hook_callbacks'].items(): - value['callback_class'] = custom_classes['callback_class'] - config_env["orders"]["order_gen_class"] = custom_classes['order_gen_class'] + for val in config_env['hook_callbacks']: + config_env['hook_callbacks'][val]["callback_class"] = instantiate(config_env['hook_callbacks'][val]["callback_class"]) + config_env["orders"]["order_gen_class"] = instantiate(config_env["orders"]["order_generator"]) self.config_env = config_env self.config_item_info = config_item_info + layout_file = config_env["layout_name"] + layout_path: Path = ROOT_DIR / "reinforcement_learning" / "layouts" / layout_file + + if not Path(layout_path).is_file(): + convert_overcookd_ai_layouts(ROOT_DIR, layout_file) + with open(layout_path, "r") as file: + self.layout = file.read() self.env: Environment = Environment( env_config=deepcopy(config_env), - layout_config=layout, + layout_config=self.layout, item_info=deepcopy(config_item_info), as_files=False, yaml_already_loaded=True @@ -191,6 +206,10 @@ class EnvGymWrapper(Env): self.prev_score = 0 def step(self, action): + """ + takes one step in the environment and returns the observation, reward, info whether terminated, truncated + and additional information + """ # this is simply a work-around to enable no action which is necessary for the play_gym.py if action == 8: observation = self.get_observation() @@ -225,9 +244,12 @@ class EnvGymWrapper(Env): return observation, reward, terminated, truncated, info def reset(self, seed=None, options=None): + """ + Resets the environment according to the configs + """ self.env: Environment = Environment( env_config=deepcopy(self.config_env), - layout_config=layout, + layout_config=self.layout, item_info=deepcopy(self.config_item_info), as_files=False, yaml_already_loaded=True @@ -255,6 +277,9 @@ class EnvGymWrapper(Env): return obs def render(self): + """ + Create a visual representation of the environment + """ observation = self.get_env_img(self.gridsize) img = observation.astype(np.uint8) img = img.transpose((1, 2, 0)) diff --git a/cooperative_cuisine/reinforcement_learning/layouts/convert_overcooked_ai_layouts.py b/cooperative_cuisine/reinforcement_learning/layouts/convert_overcooked_ai_layouts.py new file mode 100644 index 0000000000000000000000000000000000000000..66c8e647786cc2485a2a7d308ace171c93d0241b --- /dev/null +++ b/cooperative_cuisine/reinforcement_learning/layouts/convert_overcooked_ai_layouts.py @@ -0,0 +1,42 @@ +from pathlib import Path + + +def convert_overcookd_ai_layouts(file_root, filename): + convertion_dict = { + " ": "_", + "X": "#", + "O": "N", + "T": "T", + "P": "U", + "D": "P", + "S": "$", + "1": "A", + "2": "_" + } + loadpath = Path(file_root) / "reinforcement_learning" / "layouts" / "overcooked_ai_layouts" / filename + savepath = Path(file_root) / "reinforcement_learning" / "layouts" / filename + with open(loadpath, "r") as f: + layoutfile = f.read() + f.close() + layout = eval(layoutfile) + lines = layout["grid"].split("\n") + additional_info = [] + for key in layout: + if key != "grid": + additional_info.append( + '; {}: {}'.format(key, str(layout[key]).replace("'", "").replace("None", "null"))) + + with open(savepath, "w+") as f: + for line in lines: + line = line.lstrip() + for char in line: + f.write(convertion_dict[char]) + f.write("\n") + for info in additional_info: + f.write(info) + f.write("\n") + f.close() + + +if __name__ == "__main__": + convert_overcookd_ai_layouts("", "cramped_corridor.layout") diff --git a/cooperative_cuisine/reinforcement_learning/layouts/overcooked_ai_layouts/1-cramped-room.layout b/cooperative_cuisine/reinforcement_learning/layouts/overcooked_ai_layouts/1-cramped-room.layout new file mode 100644 index 0000000000000000000000000000000000000000..49655d1ce3c4217a1fd2385cd5b33918fd395137 --- /dev/null +++ b/cooperative_cuisine/reinforcement_learning/layouts/overcooked_ai_layouts/1-cramped-room.layout @@ -0,0 +1,4 @@ +##U## +NA_AN +#___# +#P#$# \ No newline at end of file diff --git a/cooperative_cuisine/reinforcement_learning/layouts/overcooked_ai_layouts/centre_pots.layout b/cooperative_cuisine/reinforcement_learning/layouts/overcooked_ai_layouts/centre_pots.layout new file mode 100644 index 0000000000000000000000000000000000000000..3f6eb555ced5181ea57f9abdac7057e5b5c193a6 --- /dev/null +++ b/cooperative_cuisine/reinforcement_learning/layouts/overcooked_ai_layouts/centre_pots.layout @@ -0,0 +1,12 @@ +{ + "grid": """XXXOSSX + X 1 X + X P P X + X 2 X + XDDOXXX""", + "start_bonus_orders": [], + "start_all_orders" : [ + { "ingredients" : ["onion", "onion", "onion"]} + ], + "rew_shaping_params": None +} diff --git a/cooperative_cuisine/reinforcement_learning/layouts/overcooked_ai_layouts/cramped_corridor.layout b/cooperative_cuisine/reinforcement_learning/layouts/overcooked_ai_layouts/cramped_corridor.layout new file mode 100644 index 0000000000000000000000000000000000000000..8674189f517524afafb5edf834d2d40730fc90f7 --- /dev/null +++ b/cooperative_cuisine/reinforcement_learning/layouts/overcooked_ai_layouts/cramped_corridor.layout @@ -0,0 +1,23 @@ +{ + "grid": """XTPXXXXSX + D 2X + XXXX XXXX + D 1X + XOPXXXXSX""", + "start_bonus_orders": [ + { "ingredients" : ["tomato", "onion"]}, + { "ingredients" : ["tomato"]}, + { "ingredients" : ["tomato", "tomato"]} + ], + "start_all_orders" : [ + { "ingredients" : ["tomato"]}, + { "ingredients" : ["onion", "onion", "onion"]}, + { "ingredients" : ["tomato", "tomato", "tomato"]}, + { "ingredients" : ["tomato", "onion"] }, + { "ingredients" : ["tomato", "tomato"]} + ], + "onion_value" : 21, + "tomato_value" : 13, + "onion_time" : 15, + "tomato_time" : 7 +} \ No newline at end of file diff --git a/cooperative_cuisine/reinforcement_learning/rl.layout b/cooperative_cuisine/reinforcement_learning/layouts/rl.layout similarity index 100% rename from cooperative_cuisine/reinforcement_learning/rl.layout rename to cooperative_cuisine/reinforcement_learning/layouts/rl.layout diff --git a/cooperative_cuisine/reinforcement_learning/rl_small.layout b/cooperative_cuisine/reinforcement_learning/layouts/rl_small.layout similarity index 100% rename from cooperative_cuisine/reinforcement_learning/rl_small.layout rename to cooperative_cuisine/reinforcement_learning/layouts/rl_small.layout diff --git a/cooperative_cuisine/reinforcement_learning/obs_converter/base_converter.py b/cooperative_cuisine/reinforcement_learning/obs_converter/base_converter.py index 703a7d7a27a4cede1ee3b488fdd75a7ddc5808c3..da96510f05cc9bd3c49a60e1c584a58edf6421bd 100644 --- a/cooperative_cuisine/reinforcement_learning/obs_converter/base_converter.py +++ b/cooperative_cuisine/reinforcement_learning/obs_converter/base_converter.py @@ -8,13 +8,19 @@ from cooperative_cuisine.reinforcement_learning.gym_env import StateToObservatio class BaseStateConverter(StateToObservationConverter): + """ + Converts an environment state to an Encoding where each counter/item has its unique value + """ def __init__(self): self.onehot = False + self.grid_height: int | None = None + self.grid_width: int | None = None self.counter_list = [ "Empty", "Counter", "PlateDispenser", "TomatoDispenser", + "OnionDispenser", "ServingWindow", "PlateReturn", "Trashcan", @@ -26,23 +32,34 @@ class BaseStateConverter(StateToObservationConverter): self.item_list = [ "None", "Pot", - "PotOne", - "PotTwo", - "PotThree", - "PotDone", + "PotOne_Tomato", + "PotTwo_Tomato", + "PotThree_Tomato", + "PotDone_Tomato", + "PotOne_Onion", + "PotTwo_Onion", + "PotThree_Onion", + "PotDone_Onion", "Tomato", + "Onion", "ChoppedTomato", "Plate", "PlateTomatoSoup", + "PlateOnionSoup", "PlateSalad", "Lettuce", "PlateChoppedTomato", "PlateChoppedLettuce", "ChoppedLettuce", + "ChoppedOnion", ] self.player = "0" def setup(self, env): + """ + Constructor setting basic variables as attributes. + + """ self.grid_width, self.grid_height = int(env.kitchen_width), int( env.kitchen_height) @@ -115,18 +132,31 @@ class BaseStateConverter(StateToObservationConverter): if item.name == "Pot": if len(item.content_list) > 0: if item.content_list[0].name == "TomatoSoup": - item_name = "PotDone" + item_name = "PotDone_Tomato" + if item.content_list[0].name == "OnionSoup": + item_name = "PotDone_Onion" elif len(item.content_list) == 1: - item_name = "PotOne" + if item.content_list[0].name == "Tomato": + item_name = "PotOne_Tomato" + if item.content_list[0].name == "Onion": + item_name = "PotOne_Onion" elif len(item.content_list) == 2: - item_name = "PotTwo" + if item.content_list[0].name == "Tomato": + item_name = "PotTwo_Tomato" + if item.content_list[0].name == "Onion": + item_name = "PotTwo_Onion" elif len(item.content_list) == 3: - item_name = "PotThree" + if item.content_list[0].name == "Tomato": + item_name = "PotThree_Tomato" + if item.content_list[0].name == "Onion": + item_name = "PotThree_Onion" if "Plate" in item.name: content_list = [i.name for i in item.content_list] match content_list: case ["TomatoSoup"]: item_name = "PlateTomatoSoup" + case ["OnionSoup"]: + item_name = "PlateOnionSoup" case ["ChoppedTomato"]: item_name = "PlateChoppedTomato" case ["ChoppedLettuce"]: diff --git a/cooperative_cuisine/reinforcement_learning/obs_converter/base_converter_onehot.py b/cooperative_cuisine/reinforcement_learning/obs_converter/base_converter_onehot.py index d3a7d877db6cc4786c948fc522b156dba19eb00b..d4735a768f6472c4ea3781d072a67d91e0064614 100644 --- a/cooperative_cuisine/reinforcement_learning/obs_converter/base_converter_onehot.py +++ b/cooperative_cuisine/reinforcement_learning/obs_converter/base_converter_onehot.py @@ -8,15 +8,23 @@ from cooperative_cuisine.reinforcement_learning.gym_env import StateToObservatio class BaseStateConverterOnehot(StateToObservationConverter): + """ + Converts an environment state to an Onehot Encoding + """ def __init__(self): + """ + Constructor setting basic variables as attributes. + + """ self.onehot = True - self.grid_height = None - self.grid_width = None + self.grid_height: int | None = None + self.grid_width: int | None = None self.counter_list = [ "Empty", "Counter", "PlateDispenser", "TomatoDispenser", + "OnionDispenser", "ServingWindow", "PlateReturn", "Trashcan", @@ -28,27 +36,42 @@ class BaseStateConverterOnehot(StateToObservationConverter): self.item_list = [ "None", "Pot", - "PotOne", - "PotTwo", - "PotThree", - "PotDone", + "PotOne_Tomato", + "PotTwo_Tomato", + "PotThree_Tomato", + "PotDone_Tomato", + "PotOne_Onion", + "PotTwo_Onion", + "PotThree_Onion", + "PotDone_Onion", "Tomato", + "Onion", "ChoppedTomato", "Plate", "PlateTomatoSoup", + "PlateOnionSoup", "PlateSalad", "Lettuce", "PlateChoppedTomato", "PlateChoppedLettuce", "ChoppedLettuce", + "ChoppedOnion", ] self.player = "0" def setup(self, env): + """ + Set the grid width and height according to the present environment + Args: Environment used + """ self.grid_width, self.grid_height = int(env.kitchen_width), int( env.kitchen_height) def convert_state_to_observation(self, env) -> np.ndarray: + """ + Converts the environment into an onehot encoding + Args: Environment + """ grid_base_array = np.zeros( ( self.grid_width, @@ -92,7 +115,7 @@ class BaseStateConverterOnehot(StateToObservationConverter): player_item_one_hot = self.vectorize_item( env.players[self.player].holding, self.item_list ) - + # simply concat all entities to one large vector final = np.concatenate( ( counters.flatten(), @@ -116,22 +139,36 @@ class BaseStateConverterOnehot(StateToObservationConverter): else: item_name = item.name + # different naming convention for the different pots to include the progress. New implementation should be found here if isinstance(item, CookingEquipment): if item.name == "Pot": if len(item.content_list) > 0: if item.content_list[0].name == "TomatoSoup": - item_name = "PotDone" + item_name = "PotDone_Tomato" + if item.content_list[0].name == "OnionSoup": + item_name = "PotDone_Onion" elif len(item.content_list) == 1: - item_name = "PotOne" + if item.content_list[0].name == "Tomato": + item_name = "PotOne_Tomato" + if item.content_list[0].name == "Onion": + item_name = "PotOne_Onion" elif len(item.content_list) == 2: - item_name = "PotTwo" + if item.content_list[0].name == "Tomato": + item_name = "PotTwo_Tomato" + if item.content_list[0].name == "Onion": + item_name = "PotTwo_Onion" elif len(item.content_list) == 3: - item_name = "PotThree" + if item.content_list[0].name == "Tomato": + item_name = "PotThree_Tomato" + if item.content_list[0].name == "Onion": + item_name = "PotThree_Onion" if "Plate" in item.name: content_list = [i.name for i in item.content_list] match content_list: case ["TomatoSoup"]: item_name = "PlateTomatoSoup" + case ["OnionSoup"]: + item_name = "PlateOnionSoup" case ["ChoppedTomato"]: item_name = "PlateChoppedTomato" case ["ChoppedLettuce"]: diff --git a/cooperative_cuisine/reinforcement_learning/overcooked_ai.md b/cooperative_cuisine/reinforcement_learning/overcooked_ai.md new file mode 100644 index 0000000000000000000000000000000000000000..7e2ae81eec713b7b5d424fa40664f08bf38ebf4d --- /dev/null +++ b/cooperative_cuisine/reinforcement_learning/overcooked_ai.md @@ -0,0 +1,13 @@ +# Overcooked-AI and Cooperative Cuisine + +## Use the overcooked-AI levels and configs in cooperative cuisine +All the layouts from overcooked-AI can be used within cooperative cuisine. Dedicated configs are defined and can be loaded via hydra. +The overcooked-ai_environment_config.yaml must be chosen as environment config. Under layout_name any layout from overcooked-AI can be defined. +Additionally, the item_config must be item_info_overcooked-ai.yaml. +With those chosen configs the layouts and rewards from overcooked-AI are used. + +## How is the connection between Overcooked-AI and cooperative cuisine defined? +Cooperative Cuisine is highly modular due to the usage of hydra as config manager. +Therefore, the parameters used for overcooked-AI are simply used in the dedicated config file. +The layout format is different, which is why a mapping is defined which converts the overcooked-AI layout into the cooperative cuisine layout. +The layout file has to be present in cooperative_cuisine/reinforcement_learning/layouts/overcooked_ai_layouts. diff --git a/cooperative_cuisine/reinforcement_learning/play_gym.py b/cooperative_cuisine/reinforcement_learning/play_gym.py index 96c7e73636cf46f961323885a6fbf117184fdffd..1d29bd4a7aaa75ebea585b53171ec8ef5b299a5f 100644 --- a/cooperative_cuisine/reinforcement_learning/play_gym.py +++ b/cooperative_cuisine/reinforcement_learning/play_gym.py @@ -7,6 +7,9 @@ from gym_env import EnvGymWrapper, SimpleActionSpace @hydra.main(version_base="1.3", config_path="config", config_name="rl_config") def main(cfg: DictConfig): + """ + Enables steering the agent in the environment used for rl. + """ env = EnvGymWrapper(cfg) env.render_mode = "rgb_array" play(env, keys_to_action={"a": 2, "d": 3, "w": 0, "s": 1, " ": 4, "k": 5}, noop=8) diff --git a/cooperative_cuisine/reinforcement_learning/run_single_agent.py b/cooperative_cuisine/reinforcement_learning/run_single_agent.py index 0a4034329e326752d4a2825509cb489ccd8c1477..60b443aa088d3a0c4aa40898ede8746fd2f1df61 100644 --- a/cooperative_cuisine/reinforcement_learning/run_single_agent.py +++ b/cooperative_cuisine/reinforcement_learning/run_single_agent.py @@ -11,6 +11,9 @@ from hydra.utils import instantiate, call @hydra.main(version_base="1.3", config_path="config", config_name="rl_config") def main(cfg: DictConfig): + """ + loads the trained model and enables the user to see an example with the according rewards. + """ additional_config = OmegaConf.to_container(cfg.additional_configs, resolve=True) model_save_path = additional_config["log_path"] + "/" + additional_config["checkpoint_path"] + "/" + \ additional_config["project_name"] + "_" + OmegaConf.to_container(cfg.model, resolve=True)[ diff --git a/cooperative_cuisine/reinforcement_learning/train_single_agent.py b/cooperative_cuisine/reinforcement_learning/train_single_agent.py index d039aa9a98df90a85984c74684236dc3b5ba2dd8..aaad353f444464e2c64075e207d9c65bc7a50947 100644 --- a/cooperative_cuisine/reinforcement_learning/train_single_agent.py +++ b/cooperative_cuisine/reinforcement_learning/train_single_agent.py @@ -1,4 +1,5 @@ from pathlib import Path +from typing import Any import wandb from omegaconf import DictConfig, OmegaConf @@ -17,13 +18,17 @@ from hydra.utils import instantiate @hydra.main(version_base="1.3", config_path="config", config_name="rl_config") def main(cfg: DictConfig): - additional_configs = OmegaConf.to_container(cfg.additional_configs, resolve=True) - rl_logs = Path(additional_configs["log_path"]) + """ + trains an agent from scratch and saves the model to the specified path + All configs are managed with hydra. + """ + additional_configs: dict[str, Any] = OmegaConf.to_container(cfg.additional_configs, resolve=True) + rl_logs: Path = Path(additional_configs["log_path"]) rl_logs.mkdir(exist_ok=True) - rl_agent_checkpoints = rl_logs / Path(additional_configs["checkpoint_path"]) + rl_agent_checkpoints: Path = rl_logs / Path(additional_configs["checkpoint_path"]) rl_agent_checkpoints.mkdir(exist_ok=True) - config = OmegaConf.to_container(cfg.model, resolve=True) - debug = additional_configs["debug_mode"] + config: dict[str, Any] = OmegaConf.to_container(cfg.model, resolve=True) + debug: bool = additional_configs["debug_mode"] vec_env = additional_configs["vec_env"] number_envs_parallel = config["number_envs_parallel"] model_class = instantiate(cfg.model.model_type)