From 321f04b2aa3dc7335ace1e2de7f51eea343d3972 Mon Sep 17 00:00:00 2001 From: fheinrich <fheinrich@techfak.uni-bielefeld.de> Date: Thu, 15 Feb 2024 12:32:05 +0100 Subject: [PATCH] Overhaul of vector state. --- .../game_content/environment_config.yaml | 4 +- .../game_content/layouts/rl_small.layout | 4 + overcooked_simulator/gui_2d_vis/drawing.py | 10 +- .../gui_2d_vis/visualization.yaml | 8 +- .../gui_2d_vis/visualization_rl.yaml | 427 ---------------- .../environment_config_rl.yaml | 22 +- .../full_vectorization.py | 244 ++++++++++ .../reinforcement_learning/gym_env.py | 457 +++++------------- .../reinforcement_learning/item_info_rl.yaml | 2 +- .../reinforcement_learning/rl.layout | 4 +- .../reinforcement_learning/rl_small.layout | 8 +- 11 files changed, 417 insertions(+), 773 deletions(-) create mode 100644 overcooked_simulator/game_content/layouts/rl_small.layout delete mode 100644 overcooked_simulator/gui_2d_vis/visualization_rl.yaml create mode 100644 overcooked_simulator/reinforcement_learning/full_vectorization.py diff --git a/overcooked_simulator/game_content/environment_config.yaml b/overcooked_simulator/game_content/environment_config.yaml index a0c14fcd..4491e6fe 100644 --- a/overcooked_simulator/game_content/environment_config.yaml +++ b/overcooked_simulator/game_content/environment_config.yaml @@ -1,6 +1,6 @@ plates: - clean_plates: 1 - dirty_plates: 2 + clean_plates: 2 + dirty_plates: 0 plate_delay: [ 5, 10 ] # range of seconds until the dirty plate arrives. diff --git a/overcooked_simulator/game_content/layouts/rl_small.layout b/overcooked_simulator/game_content/layouts/rl_small.layout new file mode 100644 index 00000000..c3e66a2c --- /dev/null +++ b/overcooked_simulator/game_content/layouts/rl_small.layout @@ -0,0 +1,4 @@ +#X## +T__P +U__# +#C$# diff --git a/overcooked_simulator/gui_2d_vis/drawing.py b/overcooked_simulator/gui_2d_vis/drawing.py index 05475472..f0b27674 100644 --- a/overcooked_simulator/gui_2d_vis/drawing.py +++ b/overcooked_simulator/gui_2d_vis/drawing.py @@ -543,7 +543,7 @@ class Visualizer: ) elif "content_list" in item and item["content_list"]: triangle_offsets = create_polygon( - len(item["content_list"]), np.array([0.10]) + len(item["content_list"]), np.array([0, 10]) ) scale = 1 if len(item["content_list"]) == 1 else 0.6 for idx, o in enumerate(item["content_list"]): @@ -856,9 +856,13 @@ class Visualizer: flags = pygame.HIDDEN if not self.observation_screen: - self.observation_screen = pygame.display.set_mode((width, height), flags=flags) + self.observation_screen = pygame.display.set_mode( + (width, height), flags=flags + ) - self.draw_gamescreen(self.observation_screen, state, grid_size, [0 for _ in state["players"]]) + self.draw_gamescreen( + self.observation_screen, state, grid_size, [0 for _ in state["players"]] + ) red = pygame.surfarray.array_red(self.observation_screen) green = pygame.surfarray.array_green(self.observation_screen) diff --git a/overcooked_simulator/gui_2d_vis/visualization.yaml b/overcooked_simulator/gui_2d_vis/visualization.yaml index a0d7989c..1fb2d253 100644 --- a/overcooked_simulator/gui_2d_vis/visualization.yaml +++ b/overcooked_simulator/gui_2d_vis/visualization.yaml @@ -102,10 +102,10 @@ Dispenser: ServingWindow: parts: - # - type: image - # path: images/arrow_right.png - # size: 1 - # center_offset: [ 0, 0 ] + - type: image + path: images/arrow_right.png + size: 1 + center_offset: [ 0, 0 ] - type: image path: images/bell_gold.png size: 0.5 diff --git a/overcooked_simulator/gui_2d_vis/visualization_rl.yaml b/overcooked_simulator/gui_2d_vis/visualization_rl.yaml deleted file mode 100644 index 7636e379..00000000 --- a/overcooked_simulator/gui_2d_vis/visualization_rl.yaml +++ /dev/null @@ -1,427 +0,0 @@ -# colors: https://www.webucator.com/article/python-color-constants-module/ - -GameWindow: - WhatIsFixed: grid # grid or window_width or window_height - size: 50 - screen_margin: 100 - min_width: 700 - min_height: 600 - buttons_width: 180 - buttons_height: 60 - - order_bar_height: 100 - order_size: 50 - - game_border_size: 1 - game_border_color: black - background_color: lemonchiffon1 - -Kitchen: - ground_tiles_color: sgigray76 - background_lines: gray79 - -Counter: - parts: - # - type: rect - # height: 1 - # width: 1 - # color: whitesmoke - - type: image - path: images/counter5.png - size: 1 - -CuttingBoard: - parts: - - type: image - path: images/cutting_board_large.png - size: 0.75 - center_offset: [ 0, -0.05 ] - - -PlateDispenser: - parts: [ ] -# - type: rect -# height: 0.95 -# width: 0.95 -# color: cadetblue1 - -Trashcan: - parts: - - type: image - path: images/trash3.png - size: 0.88 - center_offset: [ 0, -0.05 ] - -#TomatoDispenser: -# parts: -# - color: orangered1 -# type: rect -# height: 0.8 -# width: 0.8 -# -#LettuceDispenser: -# parts: -# - color: palegreen3 -# type: rect -# height: 0.8 -# width: 0.8 -# -#OnionDispenser: -# parts: -# - color: deeppink3 -# type: rect -# height: 0.8 -# width: 0.8 -# -#MeatDispenser: -# parts: -# - color: indianred1 -# type: rect -# height: 0.8 -# width: 0.8 -# -#BunDispenser: -# parts: -# - color: sandybrown -# type: rect -# height: 0.8 -# width: 0.8 - -Dispenser: - parts: - - type: circle - color: black - radius: 0.35 - center_offset: [ 0, -0.05 ] - - type: circle - color: gray83 - radius: 0.33 - center_offset: [ 0, -0.05 ] - - - item_offset: [ 0, -0.05 ] - item_scale: 0.9 - -ServingWindow: - parts: - - type: image - path: images/arrow_right.png - size: 1 - center_offset: [ 0, 0 ] - - type: image - path: images/bell_gold.png - size: 0.5 - center_offset: [ -0.4, 0.1 ] - rotate_image: False - -Stove: - parts: - - color: black - type: rect - height: 0.875 - width: 0.625 - - color: flesh - type: circle - radius: 0.25 - -Sink: - parts: - - type: image - path: images/sink1.png - size: 0.85 - center_offset: [ 0, -0.12 ] - -SinkAddon: - parts: - - type: image - path: images/drip2.png - size: 0.75 - center_offset: [ 0, -0.05 ] - -# Tools -Extinguisher: - parts: - - type: image - path: images/fire_extinguisher.png - size: 0.85 - center_offset: [ 0, -0.05 ] - -# Effects -Fire: - parts: - - type: image - path: images/fire.png - size: 1 - -Fire1: - parts: - - type: image - path: images/fire.png - size: 1.0 - -Fire2: - parts: - - type: image - path: images/fire2.png - size: 1.0 - -Fire3: - parts: - - type: image - path: images/fire3.png - size: 1.0 - - -# Items -Tomato: - parts: - - type: image - path: images/tomato3_smaller.png - size: 1 - -Onion: - parts: - - type: image - path: images/onion_large.png - size: 0.8 - -Bun: - parts: - - type: image - path: images/bun.png - size: 0.9 - -Lettuce: - parts: - - type: image - path: images/lettuce_smaller.png - size: 0.8 - -Meat: - parts: - - type: image - path: images/meat.png - size: 1 - -ChoppedLettuce: - parts: - - type: image - path: images/lettuce_cut_smaller.png - size: 0.8 - -ChoppedTomato: - parts: - - type: image - path: images/tomato3_cut_smaller.png - size: 1 - -ChoppedOnion: - parts: - - type: image - path: images/onion_cut.png - size: 0.95 - -RawPatty: - parts: - - type: image - path: images/raw_patty.png - size: 0.9 - -CookedPatty: - parts: - - type: image - path: images/cooked_patty.png - size: 0.9 - -Burger: - parts: - - type: image - path: images/burger.png - size: 0.8 - -Salad: - parts: - - type: image - path: images/salad.png - size: 0.8 - -TomatoSoup: - parts: - - type: image - path: images/tomato_soup_pot.png - size: 1.05 - center_offset: [ -0.02, -0.1 ] - -TomatoSoupPlate: - parts: - - type: image - path: images/tomato_soup_plate.png - size: 0.6 - -OnionSoup: - parts: - - type: image - path: images/onion_soup_pot.png - size: 1.05 - center_offset: [ -0.02, -0.1 ] - -OnionSoupPlate: - parts: - - type: image - path: images/onion_soup_plate.png - size: 0.6 - -Cook: - parts: - - type: image - path: images/pixel_cook_masked.png - size: 1 - -Plate: - parts: - - type: image - path: images/plate_clean.png - size: 0.8 - -DirtyPlate: - parts: - - type: image - path: images/plate_dirty.png - size: 0.8 - -Pot: - parts: - - type: image - path: images/pot_smaller.png - size: 1.05 - center_offset: [ -0.02, -0.1 ] - -Pan: - parts: - - type: image - path: images/pan.png - size: 1.1 - -DeepFryer: - parts: - - color: gray5 - type: rect - height: 0.875 - width: 0.875 - - color: lightyellow2 - type: rect - height: 0.675 - width: 0.675 -Oven: - parts: - - color: gray83 - type: rect - height: 0.875 - width: 0.625 - - type: rect - color: black - height: 0.8 - width: 0.3 - center_offset: [ 0, -0.1 ] - -Basket: - parts: - - type: image - path: images/basket.png - size: 0.8 - -Peel: - parts: - - type: image - path: images/pizza_wood.png - size: 1.2 - center_offset: [ 0, 0.2 ] - -Potato: - parts: - - type: image - path: images/potato2.png - size: 0.7 - -RawChips: - parts: - - type: image - path: images/raw_fries.png - size: 0.8 - -Chips: - parts: - - type: image - path: images/fries2.png - size: 0.8 - -Fish: - parts: - - type: image - path: images/fish3.png - size: 0.9 - -ChoppedFish: - parts: - - type: image - path: images/cut_fish.png - size: 0.8 - -FriedFish: - parts: - - type: image - path: images/fried_fish.png - size: 0.8 - -FishAndChips: - parts: - - type: image - path: images/fries2.png - size: 0.8 - center_offset: [ -0.1, 0 ] - - type: image - path: images/fried_fish.png - size: 0.8 - center_offset: [ +0.2, 0 ] - -Dough: - parts: - - type: image - path: images/pizza_dough.png - size: 0.7 - -PizzaBase: - parts: - - type: image - path: images/pizza_base.png - size: 0.9 - -Sausage: - parts: - - type: image - path: images/sausage.png - size: 0.8 - -ChoppedSausage: - parts: - - type: image - path: images/sausage_chopped.png - size: 0.8 - -Cheese: - parts: - - type: image - path: images/cheese3.png - size: 0.7 - -GratedCheese: - parts: - - type: image - path: images/grated_cheese.png - size: 1.1 - -Pizza: - parts: - - type: image - path: images/pizza.png - size: 0.9 \ No newline at end of file diff --git a/overcooked_simulator/reinforcement_learning/environment_config_rl.yaml b/overcooked_simulator/reinforcement_learning/environment_config_rl.yaml index 6b9c6389..8b39daee 100644 --- a/overcooked_simulator/reinforcement_learning/environment_config_rl.yaml +++ b/overcooked_simulator/reinforcement_learning/environment_config_rl.yaml @@ -19,15 +19,16 @@ meals: layout_chars: _: Free - hash: Counter + hash: Counter # # A: Agent pipe: Extinguisher P: PlateDispenser C: CuttingBoard X: Trashcan - W: ServingWindow + $: ServingWindow S: Sink +: SinkAddon + at: Plate # @ just a clean plate on a counter U: Pot # with Stove Q: Pan # with Stove O: Peel # with Oven @@ -42,6 +43,15 @@ layout_chars: G: Sausage # sausaGe B: Bun M: Meat + question: Counter # ? mushroom + ↓: Counter + ^: Counter + right: Counter + left: Counter + wave: Free # ~ Water + minus: Free # - Ice + dquote: Counter # " wall/truck + p: Counter # second plate return ?? orders: @@ -108,28 +118,28 @@ extra_setup_functions: hooks: [ trashcan_usage ] callback_class: !!python/name:overcooked_simulator.scores.ScoreViaHooks '' callback_class_kwargs: - static_score: -0.5 + static_score: -0.2 item_cut: func: !!python/name:overcooked_simulator.hooks.hooks_via_callback_class '' kwargs: hooks: [ cutting_board_100 ] callback_class: !!python/name:overcooked_simulator.scores.ScoreViaHooks '' callback_class_kwargs: - static_score: 0.10 + static_score: 0.01 stepped: func: !!python/name:overcooked_simulator.hooks.hooks_via_callback_class '' kwargs: hooks: [ post_step ] callback_class: !!python/name:overcooked_simulator.scores.ScoreViaHooks '' callback_class_kwargs: - static_score: -0.05 + static_score: -0.01 combine: func: !!python/name:overcooked_simulator.hooks.hooks_via_callback_class '' kwargs: hooks: [ drop_off_on_cooking_equipment ] callback_class: !!python/name:overcooked_simulator.scores.ScoreViaHooks '' callback_class_kwargs: - static_score: 0.15 + static_score: 0.01 # json_states: # func: !!python/name:overcooked_simulator.hooks.hooks_via_callback_class '' # kwargs: diff --git a/overcooked_simulator/reinforcement_learning/full_vectorization.py b/overcooked_simulator/reinforcement_learning/full_vectorization.py new file mode 100644 index 00000000..c4ee9d99 --- /dev/null +++ b/overcooked_simulator/reinforcement_learning/full_vectorization.py @@ -0,0 +1,244 @@ +# def setup_vectorization(self) -> VectorStateGenerationData: +# grid_base_array = np.zeros( +# ( +# int(self.env.kitchen_width), +# int(self.env.kitchen_height), +# 114 + 12 + 4, # TODO calc based on item info +# ), +# dtype=np.float32, +# ) +# counter_list = [ +# "Counter", +# "CuttingBoard", +# "ServingWindow", +# "Trashcan", +# "Sink", +# "SinkAddon", +# "Stove", +# "DeepFryer", +# "Oven", +# ] +# grid_idxs = [ +# (x, y) +# for x in range(int(self.env.kitchen_width)) +# for y in range(int(self.env.kitchen_height)) +# ] +# # counters do not move +# for counter in self.env.counters: +# grid_idx = np.floor(counter.pos).astype(int) +# counter_name = ( +# counter.name +# if isinstance(counter, CookingCounter) +# else ( +# repr(counter) +# if isinstance(Counter, Dispenser) +# else counter.__class__.__name__ +# ) +# ) +# assert counter_name in counter_list or counter_name.endswith( +# "Dispenser" +# ), f"Unknown Counter {counter}" +# oh_idx = len(counter_list) +# if counter_name in counter_list: +# oh_idx = counter_list.index(counter_name) +# +# one_hot = [0] * (len(counter_list) + 2) +# one_hot[oh_idx] = 1 +# grid_base_array[ +# grid_idx[0], grid_idx[1], 4 : 4 + (len(counter_list) + 2) +# ] = np.array(one_hot, dtype=np.float32) +# +# grid_idxs.remove((int(grid_idx[0]), int(grid_idx[1]))) +# +# for free_idx in grid_idxs: +# one_hot = [0] * (len(counter_list) + 2) +# one_hot[len(counter_list) + 1] = 1 +# grid_base_array[ +# free_idx[0], free_idx[1], 4 : 4 + (len(counter_list) + 2) +# ] = np.array(one_hot, dtype=np.float32) +# +# player_info_base_array = np.zeros( +# ( +# 4, +# 4 + 114, +# ), +# dtype=np.float32, +# ) +# order_base_array = np.zeros((10 * (8 + 1)), dtype=np.float32) +# +# return VectorStateGenerationData( +# grid_base_array=grid_base_array, +# oh_len=12, +# ) +# +# +# def get_simple_vectorized_item(self, item: Item) -> npt.NDArray[float]: +# name = item.name +# array = np.zeros(21, dtype=np.float32) +# if item.name.startswith("Burnt"): +# name = name[len("Burnt") :] +# array[0] = 1.0 +# if name.startswith("Chopped"): +# array[1] = 1.0 +# name = name[len("Chopped") :] +# if name in [ +# "PizzaBase", +# "GratedCheese", +# "RawChips", +# "RawPatty", +# ]: +# array[1] = 1.0 +# name = { +# "PizzaBase": "Dough", +# "GratedCheese": "Cheese", +# "RawChips": "Potato", +# "RawPatty": "Meat", +# }[name] +# if name == "CookedPatty": +# array[2] = 1.0 +# name = "Meat" +# +# if name in self.vector_state_generation.meals: +# idx = 3 + self.vector_state_generation.meals.index(name) +# elif name in self.vector_state_generation.ingredients: +# idx = ( +# 3 +# + len(self.vector_state_generation.meals) +# + self.vector_state_generation.ingredients.index(name) +# ) +# else: +# raise ValueError(f"Unknown item {name} - {item}") +# array[idx] = 1.0 +# return array +# +# +# def get_vectorized_item(self, item: Item) -> npt.NDArray[float]: +# item_array = np.zeros(114, dtype=np.float32) +# +# if isinstance(item, CookingEquipment) or item.item_info.type == ItemType.Tool: +# assert ( +# item.name in self.vector_state_generation.equipments +# ), f"unknown equipment {item}" +# idx = self.vector_state_generation.equipments.index(item.name) +# item_array[idx] = 1.0 +# if isinstance(item, CookingEquipment): +# for s_idx, sub_item in enumerate(item.content_list): +# if s_idx > 3: +# print("Too much content in the content list, info dropped") +# break +# start_idx = len(self.vector_state_generation.equipments) + 21 + 2 +# item_array[ +# start_idx + (s_idx * (21)) : start_idx + ((s_idx + 1) * (21)) +# ] = self.get_simple_vectorized_item(sub_item) +# +# else: +# item_array[ +# len(self.vector_state_generation.equipments) : len( +# self.vector_state_generation.equipments +# ) +# + 21 +# ] = self.get_simple_vectorized_item(item) +# +# item_array[ +# len(self.vector_state_generation.equipments) + 21 + 1 +# ] = item.progress_percentage +# +# if item.active_effects: +# item_array[ +# len(self.vector_state_generation.equipments) + 21 + 2 +# ] = 1.0 # TODO percentage of fire... +# +# return item_array +# +# +# def get_vectorized_state_full( +# self, player_id: str +# ) -> Tuple[ +# npt.NDArray[npt.NDArray[float]], +# npt.NDArray[npt.NDArray[float]], +# float, +# npt.NDArray[float], +# ]: +# grid_array = self.vector_state_generation.grid_base_array.copy() +# for counter in self.env.counters: +# grid_idx = np.floor(counter.pos).astype(int) # store in counter? +# if counter.occupied_by: +# if isinstance(counter.occupied_by, deque): +# ... +# else: +# item = counter.occupied_by +# grid_array[ +# grid_idx[0], +# grid_idx[1], +# 4 + self.vector_state_generation.oh_len :, +# ] = self.get_vectorized_item(item) +# if counter.active_effects: +# grid_array[ +# grid_idx[0], +# grid_idx[1], +# 4 + self.vector_state_generation.oh_len - 1, +# ] = 1.0 # TODO percentage of fire... +# +# assert len(self.env.players) <= 4, "To many players for vector representation" +# player_vec = np.zeros( +# ( +# 4, +# 4 + 114, +# ), +# dtype=np.float32, +# ) +# player_pos = 1 +# for player in self.env.players.values(): +# if player.name == player_id: +# idx = 0 +# player_vec[0, :4] = np.array( +# [ +# player.pos[0], +# player.pos[1], +# player.facing_point[0], +# player.facing_point[1], +# ], +# dtype=np.float32, +# ) +# else: +# idx = player_pos +# +# if not idx: +# player_pos += 1 +# grid_idx = np.floor(player.pos).astype(int) # store in counter? +# player_vec[idx, :4] = np.array( +# [ +# player.pos[0] - grid_idx[0], +# player.pos[1] - grid_idx[1], +# player.facing_point[0] / np.linalg.norm(player.facing_point), +# player.facing_point[1] / np.linalg.norm(player.facing_point), +# ], +# dtype=np.float32, +# ) +# grid_array[grid_idx[0], grid_idx[1], idx] = 1.0 +# +# if player.holding: +# player_vec[idx, 4:] = self.get_vectorized_item(player.holding) +# +# order_array = np.zeros((10 * (8 + 1)), dtype=np.float32) +# +# for i, order in enumerate(self.env.order_manager.open_orders): +# if i > 9: +# print("some orders are not represented in the vectorized state") +# break +# assert ( +# order.meal.name in self.vector_state_generation.meals +# ), "unknown meal in order" +# idx = self.vector_state_generation.meals.index(order.meal.name) +# order_array[(i * 9) + idx] = 1.0 +# order_array[(i * 9) + 8] = ( +# self.env_time - order.start_time +# ).total_seconds() / order.max_duration.total_seconds() +# +# return ( +# grid_array, +# player_vec, +# (self.env.env_time - self.env.start_time).total_seconds() +# / (self.env.env_time_end - self.env.start_time).total_seconds(), +# order_array, +# ) diff --git a/overcooked_simulator/reinforcement_learning/gym_env.py b/overcooked_simulator/reinforcement_learning/gym_env.py index 8b87ad08..4b59ec57 100644 --- a/overcooked_simulator/reinforcement_learning/gym_env.py +++ b/overcooked_simulator/reinforcement_learning/gym_env.py @@ -5,11 +5,9 @@ from collections import deque from datetime import timedelta from enum import Enum from pathlib import Path -from typing import Tuple import cv2 import numpy as np -import numpy.typing as npt import wandb import yaml from gymnasium import spaces, Env @@ -24,7 +22,7 @@ from wandb.integration.sb3 import WandbCallback from overcooked_simulator import ROOT_DIR from overcooked_simulator.counters import Counter, CookingCounter, Dispenser -from overcooked_simulator.game_items import Item, CookingEquipment, ItemType +from overcooked_simulator.game_items import CookingEquipment from overcooked_simulator.gui_2d_vis.drawing import Visualizer from overcooked_simulator.overcooked_environment import ( Environment, @@ -32,19 +30,15 @@ from overcooked_simulator.overcooked_environment import ( ActionType, InterActionData, ) -from overcooked_simulator.utils import VectorStateGenerationData - -SimpleActionSpace = Enum( - "SimpleActionSpace", - [ - "Up", - "Left", - "Down", - "Right", - "Interact", - "Put", - ], -) + + +class SimpleActionSpace(Enum): + Up = "Up" + Down = "Down" + Left = "Left" + Right = "Right" + Interact = "Interact" + Put = "Put" def get_env_action(player_id, simple_action, duration): @@ -56,6 +50,7 @@ def get_env_action(player_id, simple_action, duration): np.array([0, -1]), duration, ) + case SimpleActionSpace.Left: return Action( player_id, @@ -104,7 +99,7 @@ with open(layout_path, "r") as file: layout = file.read() with open(environment_config_path, "r") as file: environment_config = file.read() -with open(ROOT_DIR / "gui_2d_vis" / "visualization_rl.yaml", "r") as file: +with open(ROOT_DIR / "gui_2d_vis" / "visualization.yaml", "r") as file: visualization_config = yaml.safe_load(file) @@ -117,6 +112,7 @@ def shuffle_counters(env): random.shuffle(new_counter_pos) for counter, new_pos in zip(sample_counter, new_counter_pos): counter.pos = new_pos + env.counter_positions = np.array([c.pos for c in env.counters]) class EnvGymWrapper(Env): @@ -124,7 +120,7 @@ class EnvGymWrapper(Env): observation, reward, terminated, truncated, info = env.step(action) """ - metadata = {"render_modes": ["human", "rgb_array"], "render_fps": 20} + metadata = {"render_modes": ["human", "rgb_array"], "render_fps": 10} def __init__(self): super().__init__() @@ -133,8 +129,8 @@ class EnvGymWrapper(Env): self.randomize_counter_placement = True self.use_rgb_obs = False # if False uses simple vectorized state - self.use_onehot = False self.full_vector_state = True + self.onehot_state = False self.env: Environment = Environment( env_config=environment_config, @@ -146,16 +142,13 @@ class EnvGymWrapper(Env): if self.randomize_counter_placement: shuffle_counters(self.env) - if self.full_vector_state: - self.vector_state_generation = self.setup_vectorization() - - self.visualizer: Visualizer = Visualizer(config=visualization_config) - self.visualizer.create_player_colors(1) - self.player_name = str(0) self.env.add_player(self.player_name) self.player_id = list(self.env.players.keys())[0] + self.visualizer: Visualizer = Visualizer(config=visualization_config) + self.visualizer.create_player_colors(1) + # self.action_space = {idx: value for idx, value in enumerate(SimpleActionSpace)} self.action_space_map = {} for idx, item in enumerate(SimpleActionSpace): @@ -164,16 +157,15 @@ class EnvGymWrapper(Env): self.in_between_steps = 1 self.action_space = spaces.Discrete(len(self.action_space_map)) - # Example for using image as input (channel-first; channel-last also works): min_obs_val = -1 if not self.use_rgb_obs else 0 - max_obs_val = 1 if self.use_onehot else 255 if self.use_rgb_obs else 8 + max_obs_val = 255 if self.use_rgb_obs else 1 if self.onehot_state else 9 dummy_obs = self.get_observation() self.observation_space = spaces.Box( low=min_obs_val, high=max_obs_val, shape=dummy_obs.shape, - dtype=np.uint8 if self.use_rgb_obs else np.float32, + dtype=np.uint8 if self.use_rgb_obs else int, ) print(self.observation_space) @@ -182,9 +174,66 @@ class EnvGymWrapper(Env): self.step_counter = 0 self.prev_score = 0 + def vectorize_item(self, item, item_list): + item_one_hot = np.zeros(len(item_list)) + if item is None: + item_name = "None" + elif isinstance(item, deque): + if len(item) > 0: + item_name = item[0].name + else: + item_name = "None" + else: + item_name = item.name + + if isinstance(item, CookingEquipment): + if item.name == "Pot": + if len(item.content_list) > 0: + if item.content_list[0].name == "TomatoSoup": + item_name = "PotDone" + elif len(item.content_list) == 1: + item_name = "PotOne" + elif len(item.content_list) == 2: + item_name = "PotTwo" + elif len(item.content_list) == 3: + item_name = "PotThree" + elif item.name == "Plate": + if len(item.content_list) == 0: + item_name = "Plate" + else: + item_name = "PlateTomatoSoup" + assert item_name in item_list, f"Unknown item {item_name}." + item_idx = item_list.index(item_name) + item_one_hot[item_idx] = 1 + + return item_one_hot, item_idx + + @staticmethod + def vectorize_counter(counter, counter_list): + counter_name = ( + counter.name + if isinstance(counter, CookingCounter) + else ( + repr(counter) + if isinstance(Counter, Dispenser) + else counter.__class__.__name__ + ) + ) + if counter_name == "Dispenser": + counter_name = f"{counter.occupied_by.name}Dispenser" + assert counter_name in counter_list, f"Unknown Counter {counter}" + + counter_oh_idx = counter_list.index("Empty") + if counter_name in counter_list: + counter_oh_idx = counter_list.index(counter_name) + + counter_one_hot = np.zeros(len(counter_list), dtype=int) + counter_one_hot[counter_oh_idx] = 1 + return counter_one_hot, counter_oh_idx + def get_vectorized_state_simple(self, player, onehot=True): - item_list = ["Pot", "Tomato", "ChoppedTomato", "Plate"] counter_list = [ + "Empty", "Counter", "PlateDispenser", "TomatoDispenser", @@ -195,11 +244,23 @@ class EnvGymWrapper(Env): "CuttingBoard", ] + item_list = [ + "None", + "Pot", + "PotOne", + "PotTwo", + "PotThree", + "PotDone", + "Tomato", + "ChoppedTomato", + "Plate", + "PlateTomatoSoup", + ] + grid_width, grid_height = int(self.env.kitchen_width), int( self.env.kitchen_height ) - counter_one_hot_length = len(counter_list) + 1 # one for empty field grid_base_array = np.zeros( ( grid_width, @@ -207,312 +268,60 @@ class EnvGymWrapper(Env): ), dtype=int, ) - grid_idxs = [(x, y) for x in range(grid_width) for y in range(grid_height)] - # counters do not move - for counter in self.env.counters: - grid_idx = np.floor(counter.pos).astype(int) - counter_name = ( - counter.name - if isinstance(counter, CookingCounter) - else ( - repr(counter) - if isinstance(Counter, Dispenser) - else counter.__class__.__name__ - ) - ) - if counter_name == "Dispenser": - counter_name = f"{counter.occupied_by.name}Dispenser" - assert counter_name in counter_list, f"Unknown Counter {counter}" - - counter_oh_idx = counter_one_hot_length - if counter_name in counter_list: - counter_oh_idx = counter_list.index(counter_name) - - grid_base_array[grid_idx[0], grid_idx[1]] = counter_oh_idx - grid_idxs.remove((int(grid_idx[0]), int(grid_idx[1]))) - - for free_idx in grid_idxs: - grid_base_array[free_idx[0], free_idx[1]] = counter_one_hot_length - 1 - - counter_grid_one_hot = np.zeros( - (grid_width, grid_height, counter_one_hot_length), dtype=int - ) - for x in range(grid_width): - for y in range(grid_height): - counter_type_idx = grid_base_array[x, y] - counter_grid_one_hot[x, y, counter_type_idx] = 1 - - player_pos = self.env.players[player].pos if onehot: - player_pos[0] /= self.env.kitchen_width - player_pos[1] /= self.env.kitchen_height + item_one_hot_length = len(item_list) + counter_items = np.zeros( + (grid_width, grid_height, item_one_hot_length), dtype=int + ) + counter_one_hot_length = len(counter_list) + counters = np.zeros( + (grid_width, grid_height, counter_one_hot_length), dtype=int + ) else: - player_pos = player_pos.astype(int) - - player_dir = self.env.players[player].facing_direction - player_data = np.concatenate((player_pos, player_dir), axis=0) + counter_items = np.zeros((grid_width, grid_height), dtype=int) + counters = np.zeros((grid_width, grid_height), dtype=int) - items_one_hot_length = len(item_list) + 1 - item_one_hot = np.zeros(items_one_hot_length, dtype=int) - player_item = self.env.players[player].holding - player_item_idx = items_one_hot_length - 1 - if player_item: - if player_item.name in item_list: - player_item_idx = item_list.index(player_item.name) - item_one_hot[player_item_idx] = 1 - - final_idxs = np.concatenate( - (grid_base_array.flatten(), player_data, item_one_hot), axis=0 - ) - final_one_hot = np.concatenate( - (counter_grid_one_hot.flatten(), player_data, item_one_hot), axis=0 - ) - - return final_one_hot if onehot else final_idxs - - def setup_vectorization(self) -> VectorStateGenerationData: - grid_base_array = np.zeros( - ( - int(self.env.kitchen_width), - int(self.env.kitchen_height), - 114 + 12 + 4, # TODO calc based on item info - ), - dtype=np.float32, - ) - counter_list = [ - "Counter", - "CuttingBoard", - "ServingWindow", - "Trashcan", - "Sink", - "SinkAddon", - "Stove", - "DeepFryer", - "Oven", - ] - grid_idxs = [ - (x, y) - for x in range(int(self.env.kitchen_width)) - for y in range(int(self.env.kitchen_height)) - ] - # counters do not move for counter in self.env.counters: grid_idx = np.floor(counter.pos).astype(int) - counter_name = ( - counter.name - if isinstance(counter, CookingCounter) - else ( - repr(counter) - if isinstance(Counter, Dispenser) - else counter.__class__.__name__ - ) - ) - assert counter_name in counter_list or counter_name.endswith( - "Dispenser" - ), f"Unknown Counter {counter}" - oh_idx = len(counter_list) - if counter_name in counter_list: - oh_idx = counter_list.index(counter_name) - - one_hot = [0] * (len(counter_list) + 2) - one_hot[oh_idx] = 1 - grid_base_array[ - grid_idx[0], grid_idx[1], 4 : 4 + (len(counter_list) + 2) - ] = np.array(one_hot, dtype=np.float32) + counter_one_hot, counter_oh_idx = self.vectorize_counter( + counter, counter_list + ) + grid_base_array[grid_idx[0], grid_idx[1]] = counter_oh_idx grid_idxs.remove((int(grid_idx[0]), int(grid_idx[1]))) + counter_item_one_hot, counter_item_oh_idx = self.vectorize_item( + counter.occupied_by, item_list + ) + counter_items[grid_idx] = ( + counter_item_one_hot if onehot else counter_item_oh_idx + ) + counters[grid_idx] = counter_one_hot if onehot else counter_oh_idx + for free_idx in grid_idxs: - one_hot = [0] * (len(counter_list) + 2) - one_hot[len(counter_list) + 1] = 1 - grid_base_array[ - free_idx[0], free_idx[1], 4 : 4 + (len(counter_list) + 2) - ] = np.array(one_hot, dtype=np.float32) + grid_base_array[free_idx[0], free_idx[1]] = counter_list.index("Empty") - player_info_base_array = np.zeros( - ( - 4, - 4 + 114, - ), - dtype=np.float32, - ) - order_base_array = np.zeros((10 * (8 + 1)), dtype=np.float32) + player_pos = self.env.players[player].pos.astype(int) + player_dir = self.env.players[player].facing_direction.astype(int) + player_data = np.concatenate((player_pos, player_dir), axis=0) - return VectorStateGenerationData( - grid_base_array=grid_base_array, - oh_len=12, + player_item_one_hot, player_item_idx = self.vectorize_item( + self.env.players[player].holding, item_list ) + player_item = player_item_one_hot if onehot else [player_item_idx] - def get_simple_vectorized_item(self, item: Item) -> npt.NDArray[float]: - name = item.name - array = np.zeros(21, dtype=np.float32) - if item.name.startswith("Burnt"): - name = name[len("Burnt") :] - array[0] = 1.0 - if name.startswith("Chopped"): - array[1] = 1.0 - name = name[len("Chopped") :] - if name in [ - "PizzaBase", - "GratedCheese", - "RawChips", - "RawPatty", - ]: - array[1] = 1.0 - name = { - "PizzaBase": "Dough", - "GratedCheese": "Cheese", - "RawChips": "Potato", - "RawPatty": "Meat", - }[name] - if name == "CookedPatty": - array[2] = 1.0 - name = "Meat" - - if name in self.vector_state_generation.meals: - idx = 3 + self.vector_state_generation.meals.index(name) - elif name in self.vector_state_generation.ingredients: - idx = ( - 3 - + len(self.vector_state_generation.meals) - + self.vector_state_generation.ingredients.index(name) - ) - else: - raise ValueError(f"Unknown item {name} - {item}") - array[idx] = 1.0 - return array - - def get_vectorized_item(self, item: Item) -> npt.NDArray[float]: - item_array = np.zeros(114, dtype=np.float32) - - if isinstance(item, CookingEquipment) or item.item_info.type == ItemType.Tool: - assert ( - item.name in self.vector_state_generation.equipments - ), f"unknown equipment {item}" - idx = self.vector_state_generation.equipments.index(item.name) - item_array[idx] = 1.0 - if isinstance(item, CookingEquipment): - for s_idx, sub_item in enumerate(item.content_list): - if s_idx > 3: - print("Too much content in the content list, info dropped") - break - start_idx = len(self.vector_state_generation.equipments) + 21 + 2 - item_array[ - start_idx + (s_idx * (21)) : start_idx + ((s_idx + 1) * (21)) - ] = self.get_simple_vectorized_item(sub_item) - - else: - item_array[ - len(self.vector_state_generation.equipments) : len( - self.vector_state_generation.equipments - ) - + 21 - ] = self.get_simple_vectorized_item(item) - - item_array[ - len(self.vector_state_generation.equipments) + 21 + 1 - ] = item.progress_percentage - - if item.active_effects: - item_array[ - len(self.vector_state_generation.equipments) + 21 + 2 - ] = 1.0 # TODO percentage of fire... - - return item_array - - def get_vectorized_state_full( - self, player_id: str - ) -> Tuple[ - npt.NDArray[npt.NDArray[float]], - npt.NDArray[npt.NDArray[float]], - float, - npt.NDArray[float], - ]: - grid_array = self.vector_state_generation.grid_base_array.copy() - for counter in self.env.counters: - grid_idx = np.floor(counter.pos).astype(int) # store in counter? - if counter.occupied_by: - if isinstance(counter.occupied_by, deque): - ... - else: - item = counter.occupied_by - grid_array[ - grid_idx[0], - grid_idx[1], - 4 + self.vector_state_generation.oh_len :, - ] = self.get_vectorized_item(item) - if counter.active_effects: - grid_array[ - grid_idx[0], - grid_idx[1], - 4 + self.vector_state_generation.oh_len - 1, - ] = 1.0 # TODO percentage of fire... - - assert len(self.env.players) <= 4, "To many players for vector representation" - player_vec = np.zeros( + final = np.concatenate( ( - 4, - 4 + 114, + counters.flatten(), + counter_items.flatten(), + player_data.flatten(), + player_item, ), - dtype=np.float32, - ) - player_pos = 1 - for player in self.env.players.values(): - if player.name == player_id: - idx = 0 - player_vec[0, :4] = np.array( - [ - player.pos[0], - player.pos[1], - player.facing_point[0], - player.facing_point[1], - ], - dtype=np.float32, - ) - else: - idx = player_pos - - if not idx: - player_pos += 1 - grid_idx = np.floor(player.pos).astype(int) # store in counter? - player_vec[idx, :4] = np.array( - [ - player.pos[0] - grid_idx[0], - player.pos[1] - grid_idx[1], - player.facing_point[0] / np.linalg.norm(player.facing_point), - player.facing_point[1] / np.linalg.norm(player.facing_point), - ], - dtype=np.float32, - ) - grid_array[grid_idx[0], grid_idx[1], idx] = 1.0 - - if player.holding: - player_vec[idx, 4:] = self.get_vectorized_item(player.holding) - - order_array = np.zeros((10 * (8 + 1)), dtype=np.float32) - - for i, order in enumerate(self.env.order_manager.open_orders): - if i > 9: - print("some orders are not represented in the vectorized state") - break - assert ( - order.meal.name in self.vector_state_generation.meals - ), "unknown meal in order" - idx = self.vector_state_generation.meals.index(order.meal.name) - order_array[(i * 9) + idx] = 1.0 - order_array[(i * 9) + 8] = ( - self.env_time - order.start_time - ).total_seconds() / order.max_duration.total_seconds() - - return ( - grid_array, - player_vec, - (self.env.env_time - self.env.start_time).total_seconds() - / (self.env.env_time_end - self.env.start_time).total_seconds(), - order_array, + axis=0, ) + return final def step(self, action): simple_action = self.action_space_map[action] @@ -531,6 +340,9 @@ class EnvGymWrapper(Env): reward = self.env.score - self.prev_score self.prev_score = self.env.score + if reward > 0.6: + print("- - - - - - - - - - - - - - - - SCORED", reward) + terminated = self.env.game_ended truncated = self.env.game_ended info = {} @@ -552,9 +364,6 @@ class EnvGymWrapper(Env): self.env.add_player(self.player_name) self.player_id = list(self.env.players.keys())[0] - if self.full_vector_state: - self.vector_state_generation = self.setup_vectorization() - info = {} obs = self.get_observation() @@ -579,16 +388,16 @@ class EnvGymWrapper(Env): def close(self): pass - def get_env_img(self, gridsize): + def get_env_img(self, grid_size=20): state = self.env.get_json_state(player_id=self.player_id) json_dict = json.loads(state) observation = self.visualizer.get_state_image( - grid_size=gridsize, state=json_dict + grid_size=grid_size, state=json_dict ).transpose((1, 0, 2)) return (observation.transpose((2, 0, 1))).astype(np.uint8) def get_vector_state(self): - obs = self.get_vectorized_state_simple("0", self.use_onehot) + obs = self.get_vectorized_state_simple("0", self.onehot_state) return obs def sample_random_action(self): diff --git a/overcooked_simulator/reinforcement_learning/item_info_rl.yaml b/overcooked_simulator/reinforcement_learning/item_info_rl.yaml index 8128b92c..22a79261 100644 --- a/overcooked_simulator/reinforcement_learning/item_info_rl.yaml +++ b/overcooked_simulator/reinforcement_learning/item_info_rl.yaml @@ -73,7 +73,7 @@ Sausage: ChoppedTomato: type: Ingredient needs: [ Tomato ] - seconds: 4.0 + seconds: 1.0 equipment: CuttingBoard ChoppedLettuce: diff --git a/overcooked_simulator/reinforcement_learning/rl.layout b/overcooked_simulator/reinforcement_learning/rl.layout index e1e8c075..131e1b95 100644 --- a/overcooked_simulator/reinforcement_learning/rl.layout +++ b/overcooked_simulator/reinforcement_learning/rl.layout @@ -1,5 +1,5 @@ ##X## -T___P #___# +T___P U___# -#C#W# +#C#$# diff --git a/overcooked_simulator/reinforcement_learning/rl_small.layout b/overcooked_simulator/reinforcement_learning/rl_small.layout index a9eda0c9..bbb4ad3e 100644 --- a/overcooked_simulator/reinforcement_learning/rl_small.layout +++ b/overcooked_simulator/reinforcement_learning/rl_small.layout @@ -1,4 +1,4 @@ -#X## -T__P -U__# -#CW# +##X# +T__# +U__P +#C$# -- GitLab