From 321f04b2aa3dc7335ace1e2de7f51eea343d3972 Mon Sep 17 00:00:00 2001
From: fheinrich <fheinrich@techfak.uni-bielefeld.de>
Date: Thu, 15 Feb 2024 12:32:05 +0100
Subject: [PATCH] Overhaul of vector state.

---
 .../game_content/environment_config.yaml      |   4 +-
 .../game_content/layouts/rl_small.layout      |   4 +
 overcooked_simulator/gui_2d_vis/drawing.py    |  10 +-
 .../gui_2d_vis/visualization.yaml             |   8 +-
 .../gui_2d_vis/visualization_rl.yaml          | 427 ----------------
 .../environment_config_rl.yaml                |  22 +-
 .../full_vectorization.py                     | 244 ++++++++++
 .../reinforcement_learning/gym_env.py         | 457 +++++-------------
 .../reinforcement_learning/item_info_rl.yaml  |   2 +-
 .../reinforcement_learning/rl.layout          |   4 +-
 .../reinforcement_learning/rl_small.layout    |   8 +-
 11 files changed, 417 insertions(+), 773 deletions(-)
 create mode 100644 overcooked_simulator/game_content/layouts/rl_small.layout
 delete mode 100644 overcooked_simulator/gui_2d_vis/visualization_rl.yaml
 create mode 100644 overcooked_simulator/reinforcement_learning/full_vectorization.py

diff --git a/overcooked_simulator/game_content/environment_config.yaml b/overcooked_simulator/game_content/environment_config.yaml
index a0c14fcd..4491e6fe 100644
--- a/overcooked_simulator/game_content/environment_config.yaml
+++ b/overcooked_simulator/game_content/environment_config.yaml
@@ -1,6 +1,6 @@
 plates:
-  clean_plates: 1
-  dirty_plates: 2
+  clean_plates: 2
+  dirty_plates: 0
   plate_delay: [ 5, 10 ]
   # range of seconds until the dirty plate arrives.
 
diff --git a/overcooked_simulator/game_content/layouts/rl_small.layout b/overcooked_simulator/game_content/layouts/rl_small.layout
new file mode 100644
index 00000000..c3e66a2c
--- /dev/null
+++ b/overcooked_simulator/game_content/layouts/rl_small.layout
@@ -0,0 +1,4 @@
+#X##
+T__P
+U__#
+#C$#
diff --git a/overcooked_simulator/gui_2d_vis/drawing.py b/overcooked_simulator/gui_2d_vis/drawing.py
index 05475472..f0b27674 100644
--- a/overcooked_simulator/gui_2d_vis/drawing.py
+++ b/overcooked_simulator/gui_2d_vis/drawing.py
@@ -543,7 +543,7 @@ class Visualizer:
             )
         elif "content_list" in item and item["content_list"]:
             triangle_offsets = create_polygon(
-                len(item["content_list"]), np.array([0.10])
+                len(item["content_list"]), np.array([0, 10])
             )
             scale = 1 if len(item["content_list"]) == 1 else 0.6
             for idx, o in enumerate(item["content_list"]):
@@ -856,9 +856,13 @@ class Visualizer:
         flags = pygame.HIDDEN
 
         if not self.observation_screen:
-            self.observation_screen = pygame.display.set_mode((width, height), flags=flags)
+            self.observation_screen = pygame.display.set_mode(
+                (width, height), flags=flags
+            )
 
-        self.draw_gamescreen(self.observation_screen, state, grid_size, [0 for _ in state["players"]])
+        self.draw_gamescreen(
+            self.observation_screen, state, grid_size, [0 for _ in state["players"]]
+        )
 
         red = pygame.surfarray.array_red(self.observation_screen)
         green = pygame.surfarray.array_green(self.observation_screen)
diff --git a/overcooked_simulator/gui_2d_vis/visualization.yaml b/overcooked_simulator/gui_2d_vis/visualization.yaml
index a0d7989c..1fb2d253 100644
--- a/overcooked_simulator/gui_2d_vis/visualization.yaml
+++ b/overcooked_simulator/gui_2d_vis/visualization.yaml
@@ -102,10 +102,10 @@ Dispenser:
 
 ServingWindow:
   parts:
-    #    - type: image
-    #      path: images/arrow_right.png
-    #      size: 1
-    #      center_offset: [ 0, 0 ]
+    - type: image
+      path: images/arrow_right.png
+      size: 1
+      center_offset: [ 0, 0 ]
     - type: image
       path: images/bell_gold.png
       size: 0.5
diff --git a/overcooked_simulator/gui_2d_vis/visualization_rl.yaml b/overcooked_simulator/gui_2d_vis/visualization_rl.yaml
deleted file mode 100644
index 7636e379..00000000
--- a/overcooked_simulator/gui_2d_vis/visualization_rl.yaml
+++ /dev/null
@@ -1,427 +0,0 @@
-# colors: https://www.webucator.com/article/python-color-constants-module/
-
-GameWindow:
-  WhatIsFixed: grid  # grid or window_width or window_height
-  size: 50
-  screen_margin: 100
-  min_width: 700
-  min_height: 600
-  buttons_width: 180
-  buttons_height: 60
-
-  order_bar_height: 100
-  order_size: 50
-
-  game_border_size: 1
-  game_border_color: black
-  background_color: lemonchiffon1
-
-Kitchen:
-  ground_tiles_color: sgigray76
-  background_lines: gray79
-
-Counter:
-  parts:
-    #    - type: rect
-    #      height: 1
-    #      width: 1
-    #      color: whitesmoke
-    - type: image
-      path: images/counter5.png
-      size: 1
-
-CuttingBoard:
-  parts:
-    - type: image
-      path: images/cutting_board_large.png
-      size: 0.75
-      center_offset: [ 0, -0.05 ]
-
-
-PlateDispenser:
-  parts: [ ]
-#    - type: rect
-#      height: 0.95
-#      width: 0.95
-#      color: cadetblue1
-
-Trashcan:
-  parts:
-    - type: image
-      path: images/trash3.png
-      size: 0.88
-      center_offset: [ 0, -0.05 ]
-
-#TomatoDispenser:
-#  parts:
-#    - color: orangered1
-#      type: rect
-#      height: 0.8
-#      width: 0.8
-#
-#LettuceDispenser:
-#  parts:
-#    - color: palegreen3
-#      type: rect
-#      height: 0.8
-#      width: 0.8
-#
-#OnionDispenser:
-#  parts:
-#    - color: deeppink3
-#      type: rect
-#      height: 0.8
-#      width: 0.8
-#
-#MeatDispenser:
-#  parts:
-#    - color: indianred1
-#      type: rect
-#      height: 0.8
-#      width: 0.8
-#
-#BunDispenser:
-#  parts:
-#    - color: sandybrown
-#      type: rect
-#      height: 0.8
-#      width: 0.8
-
-Dispenser:
-  parts:
-    - type: circle
-      color: black
-      radius: 0.35
-      center_offset: [ 0, -0.05 ]
-    - type: circle
-      color: gray83
-      radius: 0.33
-      center_offset: [ 0, -0.05 ]
-
-
-  item_offset: [ 0, -0.05 ]
-  item_scale: 0.9
-
-ServingWindow:
-  parts:
-    - type: image
-      path: images/arrow_right.png
-      size: 1
-      center_offset: [ 0, 0 ]
-    - type: image
-      path: images/bell_gold.png
-      size: 0.5
-      center_offset: [ -0.4, 0.1 ]
-      rotate_image: False
-
-Stove:
-  parts:
-    - color: black
-      type: rect
-      height: 0.875
-      width: 0.625
-    - color: flesh
-      type: circle
-      radius: 0.25
-
-Sink:
-  parts:
-    - type: image
-      path: images/sink1.png
-      size: 0.85
-      center_offset: [ 0, -0.12 ]
-
-SinkAddon:
-  parts:
-    - type: image
-      path: images/drip2.png
-      size: 0.75
-      center_offset: [ 0, -0.05 ]
-
-# Tools
-Extinguisher:
-  parts:
-    - type: image
-      path: images/fire_extinguisher.png
-      size: 0.85
-      center_offset: [ 0, -0.05 ]
-
-# Effects
-Fire:
-  parts:
-    - type: image
-      path: images/fire.png
-      size: 1
-
-Fire1:
-  parts:
-    - type: image
-      path: images/fire.png
-      size: 1.0
-
-Fire2:
-  parts:
-    - type: image
-      path: images/fire2.png
-      size: 1.0
-
-Fire3:
-  parts:
-    - type: image
-      path: images/fire3.png
-      size: 1.0
-
-
-# Items
-Tomato:
-  parts:
-    - type: image
-      path: images/tomato3_smaller.png
-      size: 1
-
-Onion:
-  parts:
-    - type: image
-      path: images/onion_large.png
-      size: 0.8
-
-Bun:
-  parts:
-    - type: image
-      path: images/bun.png
-      size: 0.9
-
-Lettuce:
-  parts:
-    - type: image
-      path: images/lettuce_smaller.png
-      size: 0.8
-
-Meat:
-  parts:
-    - type: image
-      path: images/meat.png
-      size: 1
-
-ChoppedLettuce:
-  parts:
-    - type: image
-      path: images/lettuce_cut_smaller.png
-      size: 0.8
-
-ChoppedTomato:
-  parts:
-    - type: image
-      path: images/tomato3_cut_smaller.png
-      size: 1
-
-ChoppedOnion:
-  parts:
-    - type: image
-      path: images/onion_cut.png
-      size: 0.95
-
-RawPatty:
-  parts:
-    - type: image
-      path: images/raw_patty.png
-      size: 0.9
-
-CookedPatty:
-  parts:
-    - type: image
-      path: images/cooked_patty.png
-      size: 0.9
-
-Burger:
-  parts:
-    - type: image
-      path: images/burger.png
-      size: 0.8
-
-Salad:
-  parts:
-    - type: image
-      path: images/salad.png
-      size: 0.8
-
-TomatoSoup:
-  parts:
-    - type: image
-      path: images/tomato_soup_pot.png
-      size: 1.05
-      center_offset: [ -0.02, -0.1 ]
-
-TomatoSoupPlate:
-  parts:
-    - type: image
-      path: images/tomato_soup_plate.png
-      size: 0.6
-
-OnionSoup:
-  parts:
-    - type: image
-      path: images/onion_soup_pot.png
-      size: 1.05
-      center_offset: [ -0.02, -0.1 ]
-
-OnionSoupPlate:
-  parts:
-    - type: image
-      path: images/onion_soup_plate.png
-      size: 0.6
-
-Cook:
-  parts:
-    - type: image
-      path: images/pixel_cook_masked.png
-      size: 1
-
-Plate:
-  parts:
-    - type: image
-      path: images/plate_clean.png
-      size: 0.8
-
-DirtyPlate:
-  parts:
-    - type: image
-      path: images/plate_dirty.png
-      size: 0.8
-
-Pot:
-  parts:
-    - type: image
-      path: images/pot_smaller.png
-      size: 1.05
-      center_offset: [ -0.02, -0.1 ]
-
-Pan:
-  parts:
-    - type: image
-      path: images/pan.png
-      size: 1.1
-
-DeepFryer:
-  parts:
-    - color: gray5
-      type: rect
-      height: 0.875
-      width: 0.875
-    - color: lightyellow2
-      type: rect
-      height: 0.675
-      width: 0.675
-Oven:
-  parts:
-    - color: gray83
-      type: rect
-      height: 0.875
-      width: 0.625
-    - type: rect
-      color: black
-      height: 0.8
-      width: 0.3
-      center_offset: [ 0, -0.1 ]
-
-Basket:
-  parts:
-    - type: image
-      path: images/basket.png
-      size: 0.8
-
-Peel:
-  parts:
-    - type: image
-      path: images/pizza_wood.png
-      size: 1.2
-      center_offset: [ 0, 0.2 ]
-
-Potato:
-  parts:
-    - type: image
-      path: images/potato2.png
-      size: 0.7
-
-RawChips:
-  parts:
-    - type: image
-      path: images/raw_fries.png
-      size: 0.8
-
-Chips:
-  parts:
-    - type: image
-      path: images/fries2.png
-      size: 0.8
-
-Fish:
-  parts:
-    - type: image
-      path: images/fish3.png
-      size: 0.9
-
-ChoppedFish:
-  parts:
-    - type: image
-      path: images/cut_fish.png
-      size: 0.8
-
-FriedFish:
-  parts:
-    - type: image
-      path: images/fried_fish.png
-      size: 0.8
-
-FishAndChips:
-  parts:
-    - type: image
-      path: images/fries2.png
-      size: 0.8
-      center_offset: [ -0.1, 0 ]
-    - type: image
-      path: images/fried_fish.png
-      size: 0.8
-      center_offset: [ +0.2, 0 ]
-
-Dough:
-  parts:
-    - type: image
-      path: images/pizza_dough.png
-      size: 0.7
-
-PizzaBase:
-  parts:
-    - type: image
-      path: images/pizza_base.png
-      size: 0.9
-
-Sausage:
-  parts:
-    - type: image
-      path: images/sausage.png
-      size: 0.8
-
-ChoppedSausage:
-  parts:
-    - type: image
-      path: images/sausage_chopped.png
-      size: 0.8
-
-Cheese:
-  parts:
-    - type: image
-      path: images/cheese3.png
-      size: 0.7
-
-GratedCheese:
-  parts:
-    - type: image
-      path: images/grated_cheese.png
-      size: 1.1
-
-Pizza:
-  parts:
-    - type: image
-      path: images/pizza.png
-      size: 0.9
\ No newline at end of file
diff --git a/overcooked_simulator/reinforcement_learning/environment_config_rl.yaml b/overcooked_simulator/reinforcement_learning/environment_config_rl.yaml
index 6b9c6389..8b39daee 100644
--- a/overcooked_simulator/reinforcement_learning/environment_config_rl.yaml
+++ b/overcooked_simulator/reinforcement_learning/environment_config_rl.yaml
@@ -19,15 +19,16 @@ meals:
 
 layout_chars:
   _: Free
-  hash: Counter
+  hash: Counter  # #
   A: Agent
   pipe: Extinguisher
   P: PlateDispenser
   C: CuttingBoard
   X: Trashcan
-  W: ServingWindow
+  $: ServingWindow
   S: Sink
   +: SinkAddon
+  at: Plate  # @ just a clean plate on a counter
   U: Pot  # with Stove
   Q: Pan  # with Stove
   O: Peel  # with Oven
@@ -42,6 +43,15 @@ layout_chars:
   G: Sausage  # sausaGe
   B: Bun
   M: Meat
+  question: Counter  # ? mushroom
+  ↓: Counter
+  ^: Counter
+  right: Counter
+  left: Counter
+  wave: Free  # ~ Water
+  minus: Free  # - Ice
+  dquote: Counter  # " wall/truck
+  p: Counter # second plate return ??
 
 
 orders:
@@ -108,28 +118,28 @@ extra_setup_functions:
       hooks: [ trashcan_usage ]
       callback_class: !!python/name:overcooked_simulator.scores.ScoreViaHooks ''
       callback_class_kwargs:
-        static_score: -0.5
+        static_score: -0.2
   item_cut:
     func: !!python/name:overcooked_simulator.hooks.hooks_via_callback_class ''
     kwargs:
       hooks: [ cutting_board_100 ]
       callback_class: !!python/name:overcooked_simulator.scores.ScoreViaHooks ''
       callback_class_kwargs:
-        static_score: 0.10
+        static_score: 0.01
   stepped:
     func: !!python/name:overcooked_simulator.hooks.hooks_via_callback_class ''
     kwargs:
       hooks: [ post_step ]
       callback_class: !!python/name:overcooked_simulator.scores.ScoreViaHooks ''
       callback_class_kwargs:
-        static_score: -0.05
+        static_score: -0.01
   combine:
     func: !!python/name:overcooked_simulator.hooks.hooks_via_callback_class ''
     kwargs:
       hooks: [ drop_off_on_cooking_equipment ]
       callback_class: !!python/name:overcooked_simulator.scores.ScoreViaHooks ''
       callback_class_kwargs:
-        static_score: 0.15
+        static_score: 0.01
   #  json_states:
   #    func: !!python/name:overcooked_simulator.hooks.hooks_via_callback_class ''
   #    kwargs:
diff --git a/overcooked_simulator/reinforcement_learning/full_vectorization.py b/overcooked_simulator/reinforcement_learning/full_vectorization.py
new file mode 100644
index 00000000..c4ee9d99
--- /dev/null
+++ b/overcooked_simulator/reinforcement_learning/full_vectorization.py
@@ -0,0 +1,244 @@
+# def setup_vectorization(self) -> VectorStateGenerationData:
+#     grid_base_array = np.zeros(
+#         (
+#             int(self.env.kitchen_width),
+#             int(self.env.kitchen_height),
+#             114 + 12 + 4,  # TODO calc based on item info
+#         ),
+#         dtype=np.float32,
+#     )
+#     counter_list = [
+#         "Counter",
+#         "CuttingBoard",
+#         "ServingWindow",
+#         "Trashcan",
+#         "Sink",
+#         "SinkAddon",
+#         "Stove",
+#         "DeepFryer",
+#         "Oven",
+#     ]
+#     grid_idxs = [
+#         (x, y)
+#         for x in range(int(self.env.kitchen_width))
+#         for y in range(int(self.env.kitchen_height))
+#     ]
+#     # counters do not move
+#     for counter in self.env.counters:
+#         grid_idx = np.floor(counter.pos).astype(int)
+#         counter_name = (
+#             counter.name
+#             if isinstance(counter, CookingCounter)
+#             else (
+#                 repr(counter)
+#                 if isinstance(Counter, Dispenser)
+#                 else counter.__class__.__name__
+#             )
+#         )
+#         assert counter_name in counter_list or counter_name.endswith(
+#             "Dispenser"
+#         ), f"Unknown Counter {counter}"
+#         oh_idx = len(counter_list)
+#         if counter_name in counter_list:
+#             oh_idx = counter_list.index(counter_name)
+#
+#         one_hot = [0] * (len(counter_list) + 2)
+#         one_hot[oh_idx] = 1
+#         grid_base_array[
+#             grid_idx[0], grid_idx[1], 4 : 4 + (len(counter_list) + 2)
+#         ] = np.array(one_hot, dtype=np.float32)
+#
+#         grid_idxs.remove((int(grid_idx[0]), int(grid_idx[1])))
+#
+#     for free_idx in grid_idxs:
+#         one_hot = [0] * (len(counter_list) + 2)
+#         one_hot[len(counter_list) + 1] = 1
+#         grid_base_array[
+#             free_idx[0], free_idx[1], 4 : 4 + (len(counter_list) + 2)
+#         ] = np.array(one_hot, dtype=np.float32)
+#
+#     player_info_base_array = np.zeros(
+#         (
+#             4,
+#             4 + 114,
+#         ),
+#         dtype=np.float32,
+#     )
+#     order_base_array = np.zeros((10 * (8 + 1)), dtype=np.float32)
+#
+#     return VectorStateGenerationData(
+#         grid_base_array=grid_base_array,
+#         oh_len=12,
+#     )
+#
+#
+# def get_simple_vectorized_item(self, item: Item) -> npt.NDArray[float]:
+#     name = item.name
+#     array = np.zeros(21, dtype=np.float32)
+#     if item.name.startswith("Burnt"):
+#         name = name[len("Burnt") :]
+#         array[0] = 1.0
+#     if name.startswith("Chopped"):
+#         array[1] = 1.0
+#         name = name[len("Chopped") :]
+#     if name in [
+#         "PizzaBase",
+#         "GratedCheese",
+#         "RawChips",
+#         "RawPatty",
+#     ]:
+#         array[1] = 1.0
+#         name = {
+#             "PizzaBase": "Dough",
+#             "GratedCheese": "Cheese",
+#             "RawChips": "Potato",
+#             "RawPatty": "Meat",
+#         }[name]
+#     if name == "CookedPatty":
+#         array[2] = 1.0
+#         name = "Meat"
+#
+#     if name in self.vector_state_generation.meals:
+#         idx = 3 + self.vector_state_generation.meals.index(name)
+#     elif name in self.vector_state_generation.ingredients:
+#         idx = (
+#             3
+#             + len(self.vector_state_generation.meals)
+#             + self.vector_state_generation.ingredients.index(name)
+#         )
+#     else:
+#         raise ValueError(f"Unknown item {name} - {item}")
+#     array[idx] = 1.0
+#     return array
+#
+#
+# def get_vectorized_item(self, item: Item) -> npt.NDArray[float]:
+#     item_array = np.zeros(114, dtype=np.float32)
+#
+#     if isinstance(item, CookingEquipment) or item.item_info.type == ItemType.Tool:
+#         assert (
+#             item.name in self.vector_state_generation.equipments
+#         ), f"unknown equipment {item}"
+#         idx = self.vector_state_generation.equipments.index(item.name)
+#         item_array[idx] = 1.0
+#         if isinstance(item, CookingEquipment):
+#             for s_idx, sub_item in enumerate(item.content_list):
+#                 if s_idx > 3:
+#                     print("Too much content in the content list, info dropped")
+#                     break
+#                 start_idx = len(self.vector_state_generation.equipments) + 21 + 2
+#                 item_array[
+#                     start_idx + (s_idx * (21)) : start_idx + ((s_idx + 1) * (21))
+#                 ] = self.get_simple_vectorized_item(sub_item)
+#
+#     else:
+#         item_array[
+#             len(self.vector_state_generation.equipments) : len(
+#                 self.vector_state_generation.equipments
+#             )
+#             + 21
+#         ] = self.get_simple_vectorized_item(item)
+#
+#     item_array[
+#         len(self.vector_state_generation.equipments) + 21 + 1
+#     ] = item.progress_percentage
+#
+#     if item.active_effects:
+#         item_array[
+#             len(self.vector_state_generation.equipments) + 21 + 2
+#         ] = 1.0  # TODO percentage of fire...
+#
+#     return item_array
+#
+#
+# def get_vectorized_state_full(
+#     self, player_id: str
+# ) -> Tuple[
+#     npt.NDArray[npt.NDArray[float]],
+#     npt.NDArray[npt.NDArray[float]],
+#     float,
+#     npt.NDArray[float],
+# ]:
+#     grid_array = self.vector_state_generation.grid_base_array.copy()
+#     for counter in self.env.counters:
+#         grid_idx = np.floor(counter.pos).astype(int)  # store in counter?
+#         if counter.occupied_by:
+#             if isinstance(counter.occupied_by, deque):
+#                 ...
+#             else:
+#                 item = counter.occupied_by
+#                 grid_array[
+#                     grid_idx[0],
+#                     grid_idx[1],
+#                     4 + self.vector_state_generation.oh_len :,
+#                 ] = self.get_vectorized_item(item)
+#         if counter.active_effects:
+#             grid_array[
+#                 grid_idx[0],
+#                 grid_idx[1],
+#                 4 + self.vector_state_generation.oh_len - 1,
+#             ] = 1.0  # TODO percentage of fire...
+#
+#     assert len(self.env.players) <= 4, "To many players for vector representation"
+#     player_vec = np.zeros(
+#         (
+#             4,
+#             4 + 114,
+#         ),
+#         dtype=np.float32,
+#     )
+#     player_pos = 1
+#     for player in self.env.players.values():
+#         if player.name == player_id:
+#             idx = 0
+#             player_vec[0, :4] = np.array(
+#                 [
+#                     player.pos[0],
+#                     player.pos[1],
+#                     player.facing_point[0],
+#                     player.facing_point[1],
+#                 ],
+#                 dtype=np.float32,
+#             )
+#         else:
+#             idx = player_pos
+#
+#         if not idx:
+#             player_pos += 1
+#         grid_idx = np.floor(player.pos).astype(int)  # store in counter?
+#         player_vec[idx, :4] = np.array(
+#             [
+#                 player.pos[0] - grid_idx[0],
+#                 player.pos[1] - grid_idx[1],
+#                 player.facing_point[0] / np.linalg.norm(player.facing_point),
+#                 player.facing_point[1] / np.linalg.norm(player.facing_point),
+#             ],
+#             dtype=np.float32,
+#         )
+#         grid_array[grid_idx[0], grid_idx[1], idx] = 1.0
+#
+#         if player.holding:
+#             player_vec[idx, 4:] = self.get_vectorized_item(player.holding)
+#
+#     order_array = np.zeros((10 * (8 + 1)), dtype=np.float32)
+#
+#     for i, order in enumerate(self.env.order_manager.open_orders):
+#         if i > 9:
+#             print("some orders are not represented in the vectorized state")
+#             break
+#         assert (
+#             order.meal.name in self.vector_state_generation.meals
+#         ), "unknown meal in order"
+#         idx = self.vector_state_generation.meals.index(order.meal.name)
+#         order_array[(i * 9) + idx] = 1.0
+#         order_array[(i * 9) + 8] = (
+#             self.env_time - order.start_time
+#         ).total_seconds() / order.max_duration.total_seconds()
+#
+#     return (
+#         grid_array,
+#         player_vec,
+#         (self.env.env_time - self.env.start_time).total_seconds()
+#         / (self.env.env_time_end - self.env.start_time).total_seconds(),
+#         order_array,
+#     )
diff --git a/overcooked_simulator/reinforcement_learning/gym_env.py b/overcooked_simulator/reinforcement_learning/gym_env.py
index 8b87ad08..4b59ec57 100644
--- a/overcooked_simulator/reinforcement_learning/gym_env.py
+++ b/overcooked_simulator/reinforcement_learning/gym_env.py
@@ -5,11 +5,9 @@ from collections import deque
 from datetime import timedelta
 from enum import Enum
 from pathlib import Path
-from typing import Tuple
 
 import cv2
 import numpy as np
-import numpy.typing as npt
 import wandb
 import yaml
 from gymnasium import spaces, Env
@@ -24,7 +22,7 @@ from wandb.integration.sb3 import WandbCallback
 
 from overcooked_simulator import ROOT_DIR
 from overcooked_simulator.counters import Counter, CookingCounter, Dispenser
-from overcooked_simulator.game_items import Item, CookingEquipment, ItemType
+from overcooked_simulator.game_items import CookingEquipment
 from overcooked_simulator.gui_2d_vis.drawing import Visualizer
 from overcooked_simulator.overcooked_environment import (
     Environment,
@@ -32,19 +30,15 @@ from overcooked_simulator.overcooked_environment import (
     ActionType,
     InterActionData,
 )
-from overcooked_simulator.utils import VectorStateGenerationData
-
-SimpleActionSpace = Enum(
-    "SimpleActionSpace",
-    [
-        "Up",
-        "Left",
-        "Down",
-        "Right",
-        "Interact",
-        "Put",
-    ],
-)
+
+
+class SimpleActionSpace(Enum):
+    Up = "Up"
+    Down = "Down"
+    Left = "Left"
+    Right = "Right"
+    Interact = "Interact"
+    Put = "Put"
 
 
 def get_env_action(player_id, simple_action, duration):
@@ -56,6 +50,7 @@ def get_env_action(player_id, simple_action, duration):
                 np.array([0, -1]),
                 duration,
             )
+
         case SimpleActionSpace.Left:
             return Action(
                 player_id,
@@ -104,7 +99,7 @@ with open(layout_path, "r") as file:
     layout = file.read()
 with open(environment_config_path, "r") as file:
     environment_config = file.read()
-with open(ROOT_DIR / "gui_2d_vis" / "visualization_rl.yaml", "r") as file:
+with open(ROOT_DIR / "gui_2d_vis" / "visualization.yaml", "r") as file:
     visualization_config = yaml.safe_load(file)
 
 
@@ -117,6 +112,7 @@ def shuffle_counters(env):
     random.shuffle(new_counter_pos)
     for counter, new_pos in zip(sample_counter, new_counter_pos):
         counter.pos = new_pos
+    env.counter_positions = np.array([c.pos for c in env.counters])
 
 
 class EnvGymWrapper(Env):
@@ -124,7 +120,7 @@ class EnvGymWrapper(Env):
     observation, reward, terminated, truncated, info = env.step(action)
     """
 
-    metadata = {"render_modes": ["human", "rgb_array"], "render_fps": 20}
+    metadata = {"render_modes": ["human", "rgb_array"], "render_fps": 10}
 
     def __init__(self):
         super().__init__()
@@ -133,8 +129,8 @@ class EnvGymWrapper(Env):
 
         self.randomize_counter_placement = True
         self.use_rgb_obs = False  # if False uses simple vectorized state
-        self.use_onehot = False
         self.full_vector_state = True
+        self.onehot_state = False
 
         self.env: Environment = Environment(
             env_config=environment_config,
@@ -146,16 +142,13 @@ class EnvGymWrapper(Env):
         if self.randomize_counter_placement:
             shuffle_counters(self.env)
 
-        if self.full_vector_state:
-            self.vector_state_generation = self.setup_vectorization()
-
-        self.visualizer: Visualizer = Visualizer(config=visualization_config)
-        self.visualizer.create_player_colors(1)
-
         self.player_name = str(0)
         self.env.add_player(self.player_name)
         self.player_id = list(self.env.players.keys())[0]
 
+        self.visualizer: Visualizer = Visualizer(config=visualization_config)
+        self.visualizer.create_player_colors(1)
+
         # self.action_space = {idx: value for idx, value in enumerate(SimpleActionSpace)}
         self.action_space_map = {}
         for idx, item in enumerate(SimpleActionSpace):
@@ -164,16 +157,15 @@ class EnvGymWrapper(Env):
         self.in_between_steps = 1
 
         self.action_space = spaces.Discrete(len(self.action_space_map))
-        # Example for using image as input (channel-first; channel-last also works):
 
         min_obs_val = -1 if not self.use_rgb_obs else 0
-        max_obs_val = 1 if self.use_onehot else 255 if self.use_rgb_obs else 8
+        max_obs_val = 255 if self.use_rgb_obs else 1 if self.onehot_state else 9
         dummy_obs = self.get_observation()
         self.observation_space = spaces.Box(
             low=min_obs_val,
             high=max_obs_val,
             shape=dummy_obs.shape,
-            dtype=np.uint8 if self.use_rgb_obs else np.float32,
+            dtype=np.uint8 if self.use_rgb_obs else int,
         )
         print(self.observation_space)
 
@@ -182,9 +174,66 @@ class EnvGymWrapper(Env):
         self.step_counter = 0
         self.prev_score = 0
 
+    def vectorize_item(self, item, item_list):
+        item_one_hot = np.zeros(len(item_list))
+        if item is None:
+            item_name = "None"
+        elif isinstance(item, deque):
+            if len(item) > 0:
+                item_name = item[0].name
+            else:
+                item_name = "None"
+        else:
+            item_name = item.name
+
+        if isinstance(item, CookingEquipment):
+            if item.name == "Pot":
+                if len(item.content_list) > 0:
+                    if item.content_list[0].name == "TomatoSoup":
+                        item_name = "PotDone"
+                    elif len(item.content_list) == 1:
+                        item_name = "PotOne"
+                    elif len(item.content_list) == 2:
+                        item_name = "PotTwo"
+                    elif len(item.content_list) == 3:
+                        item_name = "PotThree"
+            elif item.name == "Plate":
+                if len(item.content_list) == 0:
+                    item_name = "Plate"
+                else:
+                    item_name = "PlateTomatoSoup"
+        assert item_name in item_list, f"Unknown item {item_name}."
+        item_idx = item_list.index(item_name)
+        item_one_hot[item_idx] = 1
+
+        return item_one_hot, item_idx
+
+    @staticmethod
+    def vectorize_counter(counter, counter_list):
+        counter_name = (
+            counter.name
+            if isinstance(counter, CookingCounter)
+            else (
+                repr(counter)
+                if isinstance(Counter, Dispenser)
+                else counter.__class__.__name__
+            )
+        )
+        if counter_name == "Dispenser":
+            counter_name = f"{counter.occupied_by.name}Dispenser"
+        assert counter_name in counter_list, f"Unknown Counter {counter}"
+
+        counter_oh_idx = counter_list.index("Empty")
+        if counter_name in counter_list:
+            counter_oh_idx = counter_list.index(counter_name)
+
+        counter_one_hot = np.zeros(len(counter_list), dtype=int)
+        counter_one_hot[counter_oh_idx] = 1
+        return counter_one_hot, counter_oh_idx
+
     def get_vectorized_state_simple(self, player, onehot=True):
-        item_list = ["Pot", "Tomato", "ChoppedTomato", "Plate"]
         counter_list = [
+            "Empty",
             "Counter",
             "PlateDispenser",
             "TomatoDispenser",
@@ -195,11 +244,23 @@ class EnvGymWrapper(Env):
             "CuttingBoard",
         ]
 
+        item_list = [
+            "None",
+            "Pot",
+            "PotOne",
+            "PotTwo",
+            "PotThree",
+            "PotDone",
+            "Tomato",
+            "ChoppedTomato",
+            "Plate",
+            "PlateTomatoSoup",
+        ]
+
         grid_width, grid_height = int(self.env.kitchen_width), int(
             self.env.kitchen_height
         )
 
-        counter_one_hot_length = len(counter_list) + 1  # one for empty field
         grid_base_array = np.zeros(
             (
                 grid_width,
@@ -207,312 +268,60 @@ class EnvGymWrapper(Env):
             ),
             dtype=int,
         )
-
         grid_idxs = [(x, y) for x in range(grid_width) for y in range(grid_height)]
 
-        # counters do not move
-        for counter in self.env.counters:
-            grid_idx = np.floor(counter.pos).astype(int)
-            counter_name = (
-                counter.name
-                if isinstance(counter, CookingCounter)
-                else (
-                    repr(counter)
-                    if isinstance(Counter, Dispenser)
-                    else counter.__class__.__name__
-                )
-            )
-            if counter_name == "Dispenser":
-                counter_name = f"{counter.occupied_by.name}Dispenser"
-            assert counter_name in counter_list, f"Unknown Counter {counter}"
-
-            counter_oh_idx = counter_one_hot_length
-            if counter_name in counter_list:
-                counter_oh_idx = counter_list.index(counter_name)
-
-            grid_base_array[grid_idx[0], grid_idx[1]] = counter_oh_idx
-            grid_idxs.remove((int(grid_idx[0]), int(grid_idx[1])))
-
-        for free_idx in grid_idxs:
-            grid_base_array[free_idx[0], free_idx[1]] = counter_one_hot_length - 1
-
-        counter_grid_one_hot = np.zeros(
-            (grid_width, grid_height, counter_one_hot_length), dtype=int
-        )
-        for x in range(grid_width):
-            for y in range(grid_height):
-                counter_type_idx = grid_base_array[x, y]
-                counter_grid_one_hot[x, y, counter_type_idx] = 1
-
-        player_pos = self.env.players[player].pos
         if onehot:
-            player_pos[0] /= self.env.kitchen_width
-            player_pos[1] /= self.env.kitchen_height
+            item_one_hot_length = len(item_list)
+            counter_items = np.zeros(
+                (grid_width, grid_height, item_one_hot_length), dtype=int
+            )
+            counter_one_hot_length = len(counter_list)
+            counters = np.zeros(
+                (grid_width, grid_height, counter_one_hot_length), dtype=int
+            )
         else:
-            player_pos = player_pos.astype(int)
-
-        player_dir = self.env.players[player].facing_direction
-        player_data = np.concatenate((player_pos, player_dir), axis=0)
+            counter_items = np.zeros((grid_width, grid_height), dtype=int)
+            counters = np.zeros((grid_width, grid_height), dtype=int)
 
-        items_one_hot_length = len(item_list) + 1
-        item_one_hot = np.zeros(items_one_hot_length, dtype=int)
-        player_item = self.env.players[player].holding
-        player_item_idx = items_one_hot_length - 1
-        if player_item:
-            if player_item.name in item_list:
-                player_item_idx = item_list.index(player_item.name)
-        item_one_hot[player_item_idx] = 1
-
-        final_idxs = np.concatenate(
-            (grid_base_array.flatten(), player_data, item_one_hot), axis=0
-        )
-        final_one_hot = np.concatenate(
-            (counter_grid_one_hot.flatten(), player_data, item_one_hot), axis=0
-        )
-
-        return final_one_hot if onehot else final_idxs
-
-    def setup_vectorization(self) -> VectorStateGenerationData:
-        grid_base_array = np.zeros(
-            (
-                int(self.env.kitchen_width),
-                int(self.env.kitchen_height),
-                114 + 12 + 4,  # TODO calc based on item info
-            ),
-            dtype=np.float32,
-        )
-        counter_list = [
-            "Counter",
-            "CuttingBoard",
-            "ServingWindow",
-            "Trashcan",
-            "Sink",
-            "SinkAddon",
-            "Stove",
-            "DeepFryer",
-            "Oven",
-        ]
-        grid_idxs = [
-            (x, y)
-            for x in range(int(self.env.kitchen_width))
-            for y in range(int(self.env.kitchen_height))
-        ]
-        # counters do not move
         for counter in self.env.counters:
             grid_idx = np.floor(counter.pos).astype(int)
-            counter_name = (
-                counter.name
-                if isinstance(counter, CookingCounter)
-                else (
-                    repr(counter)
-                    if isinstance(Counter, Dispenser)
-                    else counter.__class__.__name__
-                )
-            )
-            assert counter_name in counter_list or counter_name.endswith(
-                "Dispenser"
-            ), f"Unknown Counter {counter}"
-            oh_idx = len(counter_list)
-            if counter_name in counter_list:
-                oh_idx = counter_list.index(counter_name)
-
-            one_hot = [0] * (len(counter_list) + 2)
-            one_hot[oh_idx] = 1
-            grid_base_array[
-                grid_idx[0], grid_idx[1], 4 : 4 + (len(counter_list) + 2)
-            ] = np.array(one_hot, dtype=np.float32)
 
+            counter_one_hot, counter_oh_idx = self.vectorize_counter(
+                counter, counter_list
+            )
+            grid_base_array[grid_idx[0], grid_idx[1]] = counter_oh_idx
             grid_idxs.remove((int(grid_idx[0]), int(grid_idx[1])))
 
+            counter_item_one_hot, counter_item_oh_idx = self.vectorize_item(
+                counter.occupied_by, item_list
+            )
+            counter_items[grid_idx] = (
+                counter_item_one_hot if onehot else counter_item_oh_idx
+            )
+            counters[grid_idx] = counter_one_hot if onehot else counter_oh_idx
+
         for free_idx in grid_idxs:
-            one_hot = [0] * (len(counter_list) + 2)
-            one_hot[len(counter_list) + 1] = 1
-            grid_base_array[
-                free_idx[0], free_idx[1], 4 : 4 + (len(counter_list) + 2)
-            ] = np.array(one_hot, dtype=np.float32)
+            grid_base_array[free_idx[0], free_idx[1]] = counter_list.index("Empty")
 
-        player_info_base_array = np.zeros(
-            (
-                4,
-                4 + 114,
-            ),
-            dtype=np.float32,
-        )
-        order_base_array = np.zeros((10 * (8 + 1)), dtype=np.float32)
+        player_pos = self.env.players[player].pos.astype(int)
+        player_dir = self.env.players[player].facing_direction.astype(int)
+        player_data = np.concatenate((player_pos, player_dir), axis=0)
 
-        return VectorStateGenerationData(
-            grid_base_array=grid_base_array,
-            oh_len=12,
+        player_item_one_hot, player_item_idx = self.vectorize_item(
+            self.env.players[player].holding, item_list
         )
+        player_item = player_item_one_hot if onehot else [player_item_idx]
 
-    def get_simple_vectorized_item(self, item: Item) -> npt.NDArray[float]:
-        name = item.name
-        array = np.zeros(21, dtype=np.float32)
-        if item.name.startswith("Burnt"):
-            name = name[len("Burnt") :]
-            array[0] = 1.0
-        if name.startswith("Chopped"):
-            array[1] = 1.0
-            name = name[len("Chopped") :]
-        if name in [
-            "PizzaBase",
-            "GratedCheese",
-            "RawChips",
-            "RawPatty",
-        ]:
-            array[1] = 1.0
-            name = {
-                "PizzaBase": "Dough",
-                "GratedCheese": "Cheese",
-                "RawChips": "Potato",
-                "RawPatty": "Meat",
-            }[name]
-        if name == "CookedPatty":
-            array[2] = 1.0
-            name = "Meat"
-
-        if name in self.vector_state_generation.meals:
-            idx = 3 + self.vector_state_generation.meals.index(name)
-        elif name in self.vector_state_generation.ingredients:
-            idx = (
-                3
-                + len(self.vector_state_generation.meals)
-                + self.vector_state_generation.ingredients.index(name)
-            )
-        else:
-            raise ValueError(f"Unknown item {name} - {item}")
-        array[idx] = 1.0
-        return array
-
-    def get_vectorized_item(self, item: Item) -> npt.NDArray[float]:
-        item_array = np.zeros(114, dtype=np.float32)
-
-        if isinstance(item, CookingEquipment) or item.item_info.type == ItemType.Tool:
-            assert (
-                item.name in self.vector_state_generation.equipments
-            ), f"unknown equipment {item}"
-            idx = self.vector_state_generation.equipments.index(item.name)
-            item_array[idx] = 1.0
-            if isinstance(item, CookingEquipment):
-                for s_idx, sub_item in enumerate(item.content_list):
-                    if s_idx > 3:
-                        print("Too much content in the content list, info dropped")
-                        break
-                    start_idx = len(self.vector_state_generation.equipments) + 21 + 2
-                    item_array[
-                        start_idx + (s_idx * (21)) : start_idx + ((s_idx + 1) * (21))
-                    ] = self.get_simple_vectorized_item(sub_item)
-
-        else:
-            item_array[
-                len(self.vector_state_generation.equipments) : len(
-                    self.vector_state_generation.equipments
-                )
-                + 21
-            ] = self.get_simple_vectorized_item(item)
-
-        item_array[
-            len(self.vector_state_generation.equipments) + 21 + 1
-        ] = item.progress_percentage
-
-        if item.active_effects:
-            item_array[
-                len(self.vector_state_generation.equipments) + 21 + 2
-            ] = 1.0  # TODO percentage of fire...
-
-        return item_array
-
-    def get_vectorized_state_full(
-        self, player_id: str
-    ) -> Tuple[
-        npt.NDArray[npt.NDArray[float]],
-        npt.NDArray[npt.NDArray[float]],
-        float,
-        npt.NDArray[float],
-    ]:
-        grid_array = self.vector_state_generation.grid_base_array.copy()
-        for counter in self.env.counters:
-            grid_idx = np.floor(counter.pos).astype(int)  # store in counter?
-            if counter.occupied_by:
-                if isinstance(counter.occupied_by, deque):
-                    ...
-                else:
-                    item = counter.occupied_by
-                    grid_array[
-                        grid_idx[0],
-                        grid_idx[1],
-                        4 + self.vector_state_generation.oh_len :,
-                    ] = self.get_vectorized_item(item)
-            if counter.active_effects:
-                grid_array[
-                    grid_idx[0],
-                    grid_idx[1],
-                    4 + self.vector_state_generation.oh_len - 1,
-                ] = 1.0  # TODO percentage of fire...
-
-        assert len(self.env.players) <= 4, "To many players for vector representation"
-        player_vec = np.zeros(
+        final = np.concatenate(
             (
-                4,
-                4 + 114,
+                counters.flatten(),
+                counter_items.flatten(),
+                player_data.flatten(),
+                player_item,
             ),
-            dtype=np.float32,
-        )
-        player_pos = 1
-        for player in self.env.players.values():
-            if player.name == player_id:
-                idx = 0
-                player_vec[0, :4] = np.array(
-                    [
-                        player.pos[0],
-                        player.pos[1],
-                        player.facing_point[0],
-                        player.facing_point[1],
-                    ],
-                    dtype=np.float32,
-                )
-            else:
-                idx = player_pos
-
-            if not idx:
-                player_pos += 1
-            grid_idx = np.floor(player.pos).astype(int)  # store in counter?
-            player_vec[idx, :4] = np.array(
-                [
-                    player.pos[0] - grid_idx[0],
-                    player.pos[1] - grid_idx[1],
-                    player.facing_point[0] / np.linalg.norm(player.facing_point),
-                    player.facing_point[1] / np.linalg.norm(player.facing_point),
-                ],
-                dtype=np.float32,
-            )
-            grid_array[grid_idx[0], grid_idx[1], idx] = 1.0
-
-            if player.holding:
-                player_vec[idx, 4:] = self.get_vectorized_item(player.holding)
-
-        order_array = np.zeros((10 * (8 + 1)), dtype=np.float32)
-
-        for i, order in enumerate(self.env.order_manager.open_orders):
-            if i > 9:
-                print("some orders are not represented in the vectorized state")
-                break
-            assert (
-                order.meal.name in self.vector_state_generation.meals
-            ), "unknown meal in order"
-            idx = self.vector_state_generation.meals.index(order.meal.name)
-            order_array[(i * 9) + idx] = 1.0
-            order_array[(i * 9) + 8] = (
-                self.env_time - order.start_time
-            ).total_seconds() / order.max_duration.total_seconds()
-
-        return (
-            grid_array,
-            player_vec,
-            (self.env.env_time - self.env.start_time).total_seconds()
-            / (self.env.env_time_end - self.env.start_time).total_seconds(),
-            order_array,
+            axis=0,
         )
+        return final
 
     def step(self, action):
         simple_action = self.action_space_map[action]
@@ -531,6 +340,9 @@ class EnvGymWrapper(Env):
         reward = self.env.score - self.prev_score
         self.prev_score = self.env.score
 
+        if reward > 0.6:
+            print("- - - - - - - - - - - - - - - - SCORED", reward)
+
         terminated = self.env.game_ended
         truncated = self.env.game_ended
         info = {}
@@ -552,9 +364,6 @@ class EnvGymWrapper(Env):
         self.env.add_player(self.player_name)
         self.player_id = list(self.env.players.keys())[0]
 
-        if self.full_vector_state:
-            self.vector_state_generation = self.setup_vectorization()
-
         info = {}
         obs = self.get_observation()
 
@@ -579,16 +388,16 @@ class EnvGymWrapper(Env):
     def close(self):
         pass
 
-    def get_env_img(self, gridsize):
+    def get_env_img(self, grid_size=20):
         state = self.env.get_json_state(player_id=self.player_id)
         json_dict = json.loads(state)
         observation = self.visualizer.get_state_image(
-            grid_size=gridsize, state=json_dict
+            grid_size=grid_size, state=json_dict
         ).transpose((1, 0, 2))
         return (observation.transpose((2, 0, 1))).astype(np.uint8)
 
     def get_vector_state(self):
-        obs = self.get_vectorized_state_simple("0", self.use_onehot)
+        obs = self.get_vectorized_state_simple("0", self.onehot_state)
         return obs
 
     def sample_random_action(self):
diff --git a/overcooked_simulator/reinforcement_learning/item_info_rl.yaml b/overcooked_simulator/reinforcement_learning/item_info_rl.yaml
index 8128b92c..22a79261 100644
--- a/overcooked_simulator/reinforcement_learning/item_info_rl.yaml
+++ b/overcooked_simulator/reinforcement_learning/item_info_rl.yaml
@@ -73,7 +73,7 @@ Sausage:
 ChoppedTomato:
   type: Ingredient
   needs: [ Tomato ]
-  seconds: 4.0
+  seconds: 1.0
   equipment: CuttingBoard
 
 ChoppedLettuce:
diff --git a/overcooked_simulator/reinforcement_learning/rl.layout b/overcooked_simulator/reinforcement_learning/rl.layout
index e1e8c075..131e1b95 100644
--- a/overcooked_simulator/reinforcement_learning/rl.layout
+++ b/overcooked_simulator/reinforcement_learning/rl.layout
@@ -1,5 +1,5 @@
 ##X##
-T___P
 #___#
+T___P
 U___#
-#C#W#
+#C#$#
diff --git a/overcooked_simulator/reinforcement_learning/rl_small.layout b/overcooked_simulator/reinforcement_learning/rl_small.layout
index a9eda0c9..bbb4ad3e 100644
--- a/overcooked_simulator/reinforcement_learning/rl_small.layout
+++ b/overcooked_simulator/reinforcement_learning/rl_small.layout
@@ -1,4 +1,4 @@
-#X##
-T__P
-U__#
-#CW#
+##X#
+T__#
+U__P
+#C$#
-- 
GitLab