Overhaul of vector state.

321f04b2 · Fabian Heinrich · 04b45d2d · 321f04b2 · 321f04b2 · 321f04b2
Commit 321f04b2 authored 1 year ago by Fabian Heinrich
--- a/overcooked_simulator/game_content/environment_config.yaml
+++ b/overcooked_simulator/game_content/environment_config.yaml
 plates:
-  clean_plates: 1
+  clean_plates: 2
-  dirty_plates: 2
+  dirty_plates: 0
  plate_delay: [ 5, 10 ]
  # range of seconds until the dirty plate arrives.

--- a/overcooked_simulator/game_content/layouts/rl_small.layout
+++ b/overcooked_simulator/game_content/layouts/rl_small.layout
+#X##
+T__P
+U__#
+#C$#
--- a/overcooked_simulator/gui_2d_vis/drawing.py
+++ b/overcooked_simulator/gui_2d_vis/drawing.py
@@ -543,7 +543,7 @@ class Visualizer:
            )
        elif "content_list" in item and item["content_list"]:
            triangle_offsets = create_polygon(
-                len(item["content_list"]), np.array([0.10])
+                len(item["content_list"]), np.array([0, 10])
            )
            scale = 1 if len(item["content_list"]) == 1 else 0.6
            for idx, o in enumerate(item["content_list"]):
@@ -856,9 +856,13 @@ class Visualizer:
        flags = pygame.HIDDEN
        if not self.observation_screen:
-            self.observation_screen = pygame.display.set_mode((width, height), flags=flags)
+            self.observation_screen = pygame.display.set_mode(
+                (width, height), flags=flags
+            )
-        self.draw_gamescreen(self.observation_screen, state, grid_size, [0 for _ in state["players"]])
+        self.draw_gamescreen(
+            self.observation_screen, state, grid_size, [0 for _ in state["players"]]
+        )
        red = pygame.surfarray.array_red(self.observation_screen)
        green = pygame.surfarray.array_green(self.observation_screen)

--- a/overcooked_simulator/gui_2d_vis/visualization.yaml
+++ b/overcooked_simulator/gui_2d_vis/visualization.yaml
@@ -102,10 +102,10 @@ Dispenser:
 ServingWindow:
  parts:
-    #    - type: image
+    - type: image
-    #      path: images/arrow_right.png
+      path: images/arrow_right.png
-    #      size: 1
+      size: 1
-    #      center_offset: [ 0, 0 ]
+      center_offset: [ 0, 0 ]
    - type: image
      path: images/bell_gold.png
      size: 0.5

--- a/overcooked_simulator/gui_2d_vis/visualization_rl.yaml
+++ b/overcooked_simulator/gui_2d_vis/visualization_rl.yaml
-# colors: https://www.webucator.com/article/python-color-constants-module/
-GameWindow:
-  WhatIsFixed: grid  # grid or window_width or window_height
-  size: 50
-  screen_margin: 100
-  min_width: 700
-  min_height: 600
-  buttons_width: 180
-  buttons_height: 60
-  order_bar_height: 100
-  order_size: 50
-  game_border_size: 1
-  game_border_color: black
-  background_color: lemonchiffon1
-Kitchen:
-  ground_tiles_color: sgigray76
-  background_lines: gray79
-Counter:
-  parts:
-    #    - type: rect
-    #      height: 1
-    #      width: 1
-    #      color: whitesmoke
-    - type: image
-      path: images/counter5.png
-      size: 1
-CuttingBoard:
-  parts:
-    - type: image
-      path: images/cutting_board_large.png
-      size: 0.75
-      center_offset: [ 0, -0.05 ]
-PlateDispenser:
-  parts: [ ]
-#    - type: rect
-#      height: 0.95
-#      width: 0.95
-#      color: cadetblue1
-Trashcan:
-  parts:
-    - type: image
-      path: images/trash3.png
-      size: 0.88
-      center_offset: [ 0, -0.05 ]
-#TomatoDispenser:
-#  parts:
-#    - color: orangered1
-#      type: rect
-#      height: 0.8
-#      width: 0.8
-#
-#LettuceDispenser:
-#  parts:
-#    - color: palegreen3
-#      type: rect
-#      height: 0.8
-#      width: 0.8
-#
-#OnionDispenser:
-#  parts:
-#    - color: deeppink3
-#      type: rect
-#      height: 0.8
-#      width: 0.8
-#
-#MeatDispenser:
-#  parts:
-#    - color: indianred1
-#      type: rect
-#      height: 0.8
-#      width: 0.8
-#
-#BunDispenser:
-#  parts:
-#    - color: sandybrown
-#      type: rect
-#      height: 0.8
-#      width: 0.8
-Dispenser:
-  parts:
-    - type: circle
-      color: black
-      radius: 0.35
-      center_offset: [ 0, -0.05 ]
-    - type: circle
-      color: gray83
-      radius: 0.33
-      center_offset: [ 0, -0.05 ]
-  item_offset: [ 0, -0.05 ]
-  item_scale: 0.9
-ServingWindow:
-  parts:
-    - type: image
-      path: images/arrow_right.png
-      size: 1
-      center_offset: [ 0, 0 ]
-    - type: image
-      path: images/bell_gold.png
-      size: 0.5
-      center_offset: [ -0.4, 0.1 ]
-      rotate_image: False
-Stove:
-  parts:
-    - color: black
-      type: rect
-      height: 0.875
-      width: 0.625
-    - color: flesh
-      type: circle
-      radius: 0.25
-Sink:
-  parts:
-    - type: image
-      path: images/sink1.png
-      size: 0.85
-      center_offset: [ 0, -0.12 ]
-SinkAddon:
-  parts:
-    - type: image
-      path: images/drip2.png
-      size: 0.75
-      center_offset: [ 0, -0.05 ]
-# Tools
-Extinguisher:
-  parts:
-    - type: image
-      path: images/fire_extinguisher.png
-      size: 0.85
-      center_offset: [ 0, -0.05 ]
-# Effects
-Fire:
-  parts:
-    - type: image
-      path: images/fire.png
-      size: 1
-Fire1:
-  parts:
-    - type: image
-      path: images/fire.png
-      size: 1.0
-Fire2:
-  parts:
-    - type: image
-      path: images/fire2.png
-      size: 1.0
-Fire3:
-  parts:
-    - type: image
-      path: images/fire3.png
-      size: 1.0
-# Items
-Tomato:
-  parts:
-    - type: image
-      path: images/tomato3_smaller.png
-      size: 1
-Onion:
-  parts:
-    - type: image
-      path: images/onion_large.png
-      size: 0.8
-Bun:
-  parts:
-    - type: image
-      path: images/bun.png
-      size: 0.9
-Lettuce:
-  parts:
-    - type: image
-      path: images/lettuce_smaller.png
-      size: 0.8
-Meat:
-  parts:
-    - type: image
-      path: images/meat.png
-      size: 1
-ChoppedLettuce:
-  parts:
-    - type: image
-      path: images/lettuce_cut_smaller.png
-      size: 0.8
-ChoppedTomato:
-  parts:
-    - type: image
-      path: images/tomato3_cut_smaller.png
-      size: 1
-ChoppedOnion:
-  parts:
-    - type: image
-      path: images/onion_cut.png
-      size: 0.95
-RawPatty:
-  parts:
-    - type: image
-      path: images/raw_patty.png
-      size: 0.9
-CookedPatty:
-  parts:
-    - type: image
-      path: images/cooked_patty.png
-      size: 0.9
-Burger:
-  parts:
-    - type: image
-      path: images/burger.png
-      size: 0.8
-Salad:
-  parts:
-    - type: image
-      path: images/salad.png
-      size: 0.8
-TomatoSoup:
-  parts:
-    - type: image
-      path: images/tomato_soup_pot.png
-      size: 1.05
-      center_offset: [ -0.02, -0.1 ]
-TomatoSoupPlate:
-  parts:
-    - type: image
-      path: images/tomato_soup_plate.png
-      size: 0.6
-OnionSoup:
-  parts:
-    - type: image
-      path: images/onion_soup_pot.png
-      size: 1.05
-      center_offset: [ -0.02, -0.1 ]
-OnionSoupPlate:
-  parts:
-    - type: image
-      path: images/onion_soup_plate.png
-      size: 0.6
-Cook:
-  parts:
-    - type: image
-      path: images/pixel_cook_masked.png
-      size: 1
-Plate:
-  parts:
-    - type: image
-      path: images/plate_clean.png
-      size: 0.8
-DirtyPlate:
-  parts:
-    - type: image
-      path: images/plate_dirty.png
-      size: 0.8
-Pot:
-  parts:
-    - type: image
-      path: images/pot_smaller.png
-      size: 1.05
-      center_offset: [ -0.02, -0.1 ]
-Pan:
-  parts:
-    - type: image
-      path: images/pan.png
-      size: 1.1
-DeepFryer:
-  parts:
-    - color: gray5
-      type: rect
-      height: 0.875
-      width: 0.875
-    - color: lightyellow2
-      type: rect
-      height: 0.675
-      width: 0.675
-Oven:
-  parts:
-    - color: gray83
-      type: rect
-      height: 0.875
-      width: 0.625
-    - type: rect
-      color: black
-      height: 0.8
-      width: 0.3
-      center_offset: [ 0, -0.1 ]
-Basket:
-  parts:
-    - type: image
-      path: images/basket.png
-      size: 0.8
-Peel:
-  parts:
-    - type: image
-      path: images/pizza_wood.png
-      size: 1.2
-      center_offset: [ 0, 0.2 ]
-Potato:
-  parts:
-    - type: image
-      path: images/potato2.png
-      size: 0.7
-RawChips:
-  parts:
-    - type: image
-      path: images/raw_fries.png
-      size: 0.8
-Chips:
-  parts:
-    - type: image
-      path: images/fries2.png
-      size: 0.8
-Fish:
-  parts:
-    - type: image
-      path: images/fish3.png
-      size: 0.9
-ChoppedFish:
-  parts:
-    - type: image
-      path: images/cut_fish.png
-      size: 0.8
-FriedFish:
-  parts:
-    - type: image
-      path: images/fried_fish.png
-      size: 0.8
-FishAndChips:
-  parts:
-    - type: image
-      path: images/fries2.png
-      size: 0.8
-      center_offset: [ -0.1, 0 ]
-    - type: image
-      path: images/fried_fish.png
-      size: 0.8
-      center_offset: [ +0.2, 0 ]
-Dough:
-  parts:
-    - type: image
-      path: images/pizza_dough.png
-      size: 0.7
-PizzaBase:
-  parts:
-    - type: image
-      path: images/pizza_base.png
-      size: 0.9
-Sausage:
-  parts:
-    - type: image
-      path: images/sausage.png
-      size: 0.8
-ChoppedSausage:
-  parts:
-    - type: image
-      path: images/sausage_chopped.png
-      size: 0.8
-Cheese:
-  parts:
-    - type: image
-      path: images/cheese3.png
-      size: 0.7
-GratedCheese:
-  parts:
-    - type: image
-      path: images/grated_cheese.png
-      size: 1.1
-Pizza:
-  parts:
-    - type: image
-      path: images/pizza.png
-      size: 0.9
\ No newline at end of file
--- a/overcooked_simulator/reinforcement_learning/environment_config_rl.yaml
+++ b/overcooked_simulator/reinforcement_learning/environment_config_rl.yaml
@@ -19,15 +19,16 @@ meals:
 layout_chars:
  _: Free
-  hash: Counter
+  hash: Counter  # #
  A: Agent
  pipe: Extinguisher
  P: PlateDispenser
  C: CuttingBoard
  X: Trashcan
-  W: ServingWindow
+  $: ServingWindow
  S: Sink
  +: SinkAddon
+  at: Plate  # @ just a clean plate on a counter
  U: Pot  # with Stove
  Q: Pan  # with Stove
  O: Peel  # with Oven
@@ -42,6 +43,15 @@ layout_chars:
  G: Sausage  # sausaGe
  B: Bun
  M: Meat
+  question: Counter  # ? mushroom
+  ↓: Counter
+  ^: Counter
+  right: Counter
+  left: Counter
+  wave: Free  # ~ Water
+  minus: Free  # - Ice
+  dquote: Counter  # " wall/truck
+  p: Counter # second plate return ??
 orders:
@@ -108,28 +118,28 @@ extra_setup_functions:
      hooks: [ trashcan_usage ]
      callback_class: !!python/name:overcooked_simulator.scores.ScoreViaHooks ''
      callback_class_kwargs:
-        static_score: -0.5
+        static_score: -0.2
  item_cut:
    func: !!python/name:overcooked_simulator.hooks.hooks_via_callback_class ''
    kwargs:
      hooks: [ cutting_board_100 ]
      callback_class: !!python/name:overcooked_simulator.scores.ScoreViaHooks ''
      callback_class_kwargs:
-        static_score: 0.10
+        static_score: 0.01
  stepped:
    func: !!python/name:overcooked_simulator.hooks.hooks_via_callback_class ''
    kwargs:
      hooks: [ post_step ]
      callback_class: !!python/name:overcooked_simulator.scores.ScoreViaHooks ''
      callback_class_kwargs:
-        static_score: -0.05
+        static_score: -0.01
  combine:
    func: !!python/name:overcooked_simulator.hooks.hooks_via_callback_class ''
    kwargs:
      hooks: [ drop_off_on_cooking_equipment ]
      callback_class: !!python/name:overcooked_simulator.scores.ScoreViaHooks ''
      callback_class_kwargs:
-        static_score: 0.15
+        static_score: 0.01
  #  json_states:
  #    func: !!python/name:overcooked_simulator.hooks.hooks_via_callback_class ''
  #    kwargs:

--- a/overcooked_simulator/reinforcement_learning/full_vectorization.py
+++ b/overcooked_simulator/reinforcement_learning/full_vectorization.py
+# def setup_vectorization(self) -> VectorStateGenerationData:
+#     grid_base_array = np.zeros(
+#         (
+#             int(self.env.kitchen_width),
+#             int(self.env.kitchen_height),
+#             114 + 12 + 4,  # TODO calc based on item info
+#         ),
+#         dtype=np.float32,
+#     )
+#     counter_list = [
+#         "Counter",
+#         "CuttingBoard",
+#         "ServingWindow",
+#         "Trashcan",
+#         "Sink",
+#         "SinkAddon",
+#         "Stove",
+#         "DeepFryer",
+#         "Oven",
+#     ]
+#     grid_idxs = [
+#         (x, y)
+#         for x in range(int(self.env.kitchen_width))
+#         for y in range(int(self.env.kitchen_height))
+#     ]
+#     # counters do not move
+#     for counter in self.env.counters:
+#         grid_idx = np.floor(counter.pos).astype(int)
+#         counter_name = (
+#             counter.name
+#             if isinstance(counter, CookingCounter)
+#             else (
+#                 repr(counter)
+#                 if isinstance(Counter, Dispenser)
+#                 else counter.__class__.__name__
+#             )
+#         )
+#         assert counter_name in counter_list or counter_name.endswith(
+#             "Dispenser"
+#         ), f"Unknown Counter {counter}"
+#         oh_idx = len(counter_list)
+#         if counter_name in counter_list:
+#             oh_idx = counter_list.index(counter_name)
+#
+#         one_hot = [0] * (len(counter_list) + 2)
+#         one_hot[oh_idx] = 1
+#         grid_base_array[
+#             grid_idx[0], grid_idx[1], 4 : 4 + (len(counter_list) + 2)
+#         ] = np.array(one_hot, dtype=np.float32)
+#
+#         grid_idxs.remove((int(grid_idx[0]), int(grid_idx[1])))
+#
+#     for free_idx in grid_idxs:
+#         one_hot = [0] * (len(counter_list) + 2)
+#         one_hot[len(counter_list) + 1] = 1
+#         grid_base_array[
+#             free_idx[0], free_idx[1], 4 : 4 + (len(counter_list) + 2)
+#         ] = np.array(one_hot, dtype=np.float32)
+#
+#     player_info_base_array = np.zeros(
+#         (
+#             4,
+#             4 + 114,
+#         ),
+#         dtype=np.float32,
+#     )
+#     order_base_array = np.zeros((10 * (8 + 1)), dtype=np.float32)
+#
+#     return VectorStateGenerationData(
+#         grid_base_array=grid_base_array,
+#         oh_len=12,
+#     )
+#
+#
+# def get_simple_vectorized_item(self, item: Item) -> npt.NDArray[float]:
+#     name = item.name
+#     array = np.zeros(21, dtype=np.float32)
+#     if item.name.startswith("Burnt"):
+#         name = name[len("Burnt") :]
+#         array[0] = 1.0
+#     if name.startswith("Chopped"):
+#         array[1] = 1.0
+#         name = name[len("Chopped") :]
+#     if name in [
+#         "PizzaBase",
+#         "GratedCheese",
+#         "RawChips",
+#         "RawPatty",
+#     ]:
+#         array[1] = 1.0
+#         name = {
+#             "PizzaBase": "Dough",
+#             "GratedCheese": "Cheese",
+#             "RawChips": "Potato",
+#             "RawPatty": "Meat",
+#         }[name]
+#     if name == "CookedPatty":
+#         array[2] = 1.0
+#         name = "Meat"
+#
+#     if name in self.vector_state_generation.meals:
+#         idx = 3 + self.vector_state_generation.meals.index(name)
+#     elif name in self.vector_state_generation.ingredients:
+#         idx = (
+#             3
+#             + len(self.vector_state_generation.meals)
+#             + self.vector_state_generation.ingredients.index(name)
+#         )
+#     else:
+#         raise ValueError(f"Unknown item {name} - {item}")
+#     array[idx] = 1.0
+#     return array
+#
+#
+# def get_vectorized_item(self, item: Item) -> npt.NDArray[float]:
+#     item_array = np.zeros(114, dtype=np.float32)
+#
+#     if isinstance(item, CookingEquipment) or item.item_info.type == ItemType.Tool:
+#         assert (
+#             item.name in self.vector_state_generation.equipments
+#         ), f"unknown equipment {item}"
+#         idx = self.vector_state_generation.equipments.index(item.name)
+#         item_array[idx] = 1.0
+#         if isinstance(item, CookingEquipment):
+#             for s_idx, sub_item in enumerate(item.content_list):
+#                 if s_idx > 3:
+#                     print("Too much content in the content list, info dropped")
+#                     break
+#                 start_idx = len(self.vector_state_generation.equipments) + 21 + 2
+#                 item_array[
+#                     start_idx + (s_idx * (21)) : start_idx + ((s_idx + 1) * (21))
+#                 ] = self.get_simple_vectorized_item(sub_item)
+#
+#     else:
+#         item_array[
+#             len(self.vector_state_generation.equipments) : len(
+#                 self.vector_state_generation.equipments
+#             )
+#             + 21
+#         ] = self.get_simple_vectorized_item(item)
+#
+#     item_array[
+#         len(self.vector_state_generation.equipments) + 21 + 1
+#     ] = item.progress_percentage
+#
+#     if item.active_effects:
+#         item_array[
+#             len(self.vector_state_generation.equipments) + 21 + 2
+#         ] = 1.0  # TODO percentage of fire...
+#
+#     return item_array
+#
+#
+# def get_vectorized_state_full(
+#     self, player_id: str
+# ) -> Tuple[
+#     npt.NDArray[npt.NDArray[float]],
+#     npt.NDArray[npt.NDArray[float]],
+#     float,
+#     npt.NDArray[float],
+# ]:
+#     grid_array = self.vector_state_generation.grid_base_array.copy()
+#     for counter in self.env.counters:
+#         grid_idx = np.floor(counter.pos).astype(int)  # store in counter?
+#         if counter.occupied_by:
+#             if isinstance(counter.occupied_by, deque):
+#                 ...
+#             else:
+#                 item = counter.occupied_by
+#                 grid_array[
+#                     grid_idx[0],
+#                     grid_idx[1],
+#                     4 + self.vector_state_generation.oh_len :,
+#                 ] = self.get_vectorized_item(item)
+#         if counter.active_effects:
+#             grid_array[
+#                 grid_idx[0],
+#                 grid_idx[1],
+#                 4 + self.vector_state_generation.oh_len - 1,
+#             ] = 1.0  # TODO percentage of fire...
+#
+#     assert len(self.env.players) <= 4, "To many players for vector representation"
+#     player_vec = np.zeros(
+#         (
+#             4,
+#             4 + 114,
+#         ),
+#         dtype=np.float32,
+#     )
+#     player_pos = 1
+#     for player in self.env.players.values():
+#         if player.name == player_id:
+#             idx = 0
+#             player_vec[0, :4] = np.array(
+#                 [
+#                     player.pos[0],
+#                     player.pos[1],
+#                     player.facing_point[0],
+#                     player.facing_point[1],
+#                 ],
+#                 dtype=np.float32,
+#             )
+#         else:
+#             idx = player_pos
+#
+#         if not idx:
+#             player_pos += 1
+#         grid_idx = np.floor(player.pos).astype(int)  # store in counter?
+#         player_vec[idx, :4] = np.array(
+#             [
+#                 player.pos[0] - grid_idx[0],
+#                 player.pos[1] - grid_idx[1],
+#                 player.facing_point[0] / np.linalg.norm(player.facing_point),
+#                 player.facing_point[1] / np.linalg.norm(player.facing_point),
+#             ],
+#             dtype=np.float32,
+#         )
+#         grid_array[grid_idx[0], grid_idx[1], idx] = 1.0
+#
+#         if player.holding:
+#             player_vec[idx, 4:] = self.get_vectorized_item(player.holding)
+#
+#     order_array = np.zeros((10 * (8 + 1)), dtype=np.float32)
+#
+#     for i, order in enumerate(self.env.order_manager.open_orders):
+#         if i > 9:
+#             print("some orders are not represented in the vectorized state")
+#             break
+#         assert (
+#             order.meal.name in self.vector_state_generation.meals
+#         ), "unknown meal in order"
+#         idx = self.vector_state_generation.meals.index(order.meal.name)
+#         order_array[(i * 9) + idx] = 1.0
+#         order_array[(i * 9) + 8] = (
+#             self.env_time - order.start_time
+#         ).total_seconds() / order.max_duration.total_seconds()
+#
+#     return (
+#         grid_array,
+#         player_vec,
+#         (self.env.env_time - self.env.start_time).total_seconds()
+#         / (self.env.env_time_end - self.env.start_time).total_seconds(),
+#         order_array,
+#     )
--- a/overcooked_simulator/reinforcement_learning/gym_env.py
+++ b/overcooked_simulator/reinforcement_learning/gym_env.py
--- a/overcooked_simulator/reinforcement_learning/item_info_rl.yaml
+++ b/overcooked_simulator/reinforcement_learning/item_info_rl.yaml
@@ -73,7 +73,7 @@ Sausage:
 ChoppedTomato:
  type: Ingredient
  needs: [ Tomato ]
-  seconds: 4.0
+  seconds: 1.0
  equipment: CuttingBoard
 ChoppedLettuce:

--- a/overcooked_simulator/reinforcement_learning/rl.layout
+++ b/overcooked_simulator/reinforcement_learning/rl.layout
 ##X##
-T___P
 #___#
+T___P
 U___#
-#C#W#
+#C#$#
--- a/overcooked_simulator/reinforcement_learning/rl_small.layout
+++ b/overcooked_simulator/reinforcement_learning/rl_small.layout
-#X##
+##X#
-T__P
+T__#
-U__#
+U__P
-#CW#
+#C$#