From 46699dbbb9544ed248c47a287d1d45ee773d9d95 Mon Sep 17 00:00:00 2001
From: fheinrich <fheinrich@techfak.uni-bielefeld.de>
Date: Fri, 8 Mar 2024 16:40:13 +0100
Subject: [PATCH] Experimentation with reinforcement learning

---
 cooperative_cuisine/pygame_2d_vis/drawing.py  |  4 +-
 .../environment_config_rl.yaml                | 34 +++++++++-----
 .../reinforcement_learning/gym_env.py         | 47 ++++++++++++++-----
 .../reinforcement_learning/rl_small.layout    |  2 +-
 4 files changed, 58 insertions(+), 29 deletions(-)

diff --git a/cooperative_cuisine/pygame_2d_vis/drawing.py b/cooperative_cuisine/pygame_2d_vis/drawing.py
index e3d5c0d3..f9a99462 100644
--- a/cooperative_cuisine/pygame_2d_vis/drawing.py
+++ b/cooperative_cuisine/pygame_2d_vis/drawing.py
@@ -881,9 +881,7 @@ class Visualizer:
         self.draw_gamescreen(screen, state, grid_size, [0 for _ in state["players"]])
         pygame.image.save(screen, filename)
 
-    def get_state_image(
-        self, grid_size: int, save_folder: dict
-    ) -> npt.NDArray[np.uint8]:
+    def get_state_image(self, grid_size: int, state: dict) -> npt.NDArray[np.uint8]:
         width = int(np.ceil(state["kitchen"]["width"] * grid_size))
         height = int(np.ceil(state["kitchen"]["height"] * grid_size))
 
diff --git a/cooperative_cuisine/reinforcement_learning/environment_config_rl.yaml b/cooperative_cuisine/reinforcement_learning/environment_config_rl.yaml
index 60ea66aa..2eb50382 100644
--- a/cooperative_cuisine/reinforcement_learning/environment_config_rl.yaml
+++ b/cooperative_cuisine/reinforcement_learning/environment_config_rl.yaml
@@ -7,15 +7,10 @@ plates:
 
 game:
   time_limit_seconds: 300
+  undo_dispenser_pickup: true
+  validate_recipes: false
+
 
-meals:
-  all: true
-  # if all: false -> only orders for these meals are generated
-  # TODO: what if this list is empty?
-  list:
-    - TomatoSoup
-    - OnionSoup
-    - Salad
 
 layout_chars:
   _: Free
@@ -55,7 +50,15 @@ layout_chars:
 
 
 orders:
-  order_gen_class: !!python/name:cooperative_cuisine.order.RandomOrderGeneration ''
+  meals:
+    all: true
+    # if all: false -> only orders for these meals are generated
+    # TODO: what if this list is empty?
+    list:
+      - TomatoSoup
+      - OnionSoup
+      - Salad
+  order_gen_class: !!python/name:cooperative_cuisine.orders.RandomOrderGeneration ''
   # the class to that receives the kwargs. Should be a child class of OrderGeneration in orders.py
   order_gen_kwargs:
     order_duration_random_func:
@@ -103,7 +106,7 @@ extra_setup_functions:
       hooks: [ completed_order ]
       callback_class: !!python/name:cooperative_cuisine.scores.ScoreViaHooks ''
       callback_class_kwargs:
-        static_score: 1
+        static_score: 0.95
 
   serve_not_ordered_meals:
     func: !!python/name:cooperative_cuisine.hooks.hooks_via_callback_class ''
@@ -111,7 +114,7 @@ extra_setup_functions:
       hooks: [ serve_not_ordered_meal ]
       callback_class: !!python/name:cooperative_cuisine.scores.ScoreViaHooks ''
       callback_class_kwargs:
-        static_score: 1
+        static_score: 0.95
   trashcan_usages:
     func: !!python/name:cooperative_cuisine.hooks.hooks_via_callback_class ''
     kwargs:
@@ -125,7 +128,7 @@ extra_setup_functions:
       hooks: [ cutting_board_100 ]
       callback_class: !!python/name:cooperative_cuisine.scores.ScoreViaHooks ''
       callback_class_kwargs:
-        static_score: 0.01
+        static_score: 0.1
   stepped:
     func: !!python/name:cooperative_cuisine.hooks.hooks_via_callback_class ''
     kwargs:
@@ -140,6 +143,13 @@ extra_setup_functions:
       callback_class: !!python/name:cooperative_cuisine.scores.ScoreViaHooks ''
       callback_class_kwargs:
         static_score: 0.01
+  start_interact:
+    func: !!python/name:cooperative_cuisine.hooks.hooks_via_callback_class ''
+    kwargs:
+      hooks: [ player_start_interaction ]
+      callback_class: !!python/name:cooperative_cuisine.scores.ScoreViaHooks ''
+      callback_class_kwargs:
+        static_score: 0.01
   #  json_states:
   #    func: !!python/name:cooperative_cuisine.hooks.hooks_via_callback_class ''
   #    kwargs:
diff --git a/cooperative_cuisine/reinforcement_learning/gym_env.py b/cooperative_cuisine/reinforcement_learning/gym_env.py
index 93fc02aa..8361ee9b 100644
--- a/cooperative_cuisine/reinforcement_learning/gym_env.py
+++ b/cooperative_cuisine/reinforcement_learning/gym_env.py
@@ -108,7 +108,7 @@ def shuffle_counters(env):
         if counter.__class__ != Counter:
             sample_counter.append(counter)
         else:
-            other_counters.append()
+            other_counters.append(counter)
     new_counter_pos = [c.pos for c in sample_counter]
     random.shuffle(new_counter_pos)
     for counter, new_pos in zip(sample_counter, new_counter_pos):
@@ -127,7 +127,7 @@ class EnvGymWrapper(Env):
     def __init__(self):
         super().__init__()
 
-        self.gridsize = 20
+        self.gridsize = 30
 
         self.randomize_counter_placement = True
         self.use_rgb_obs = False  # if False uses simple vectorized state
@@ -160,16 +160,17 @@ class EnvGymWrapper(Env):
 
         self.action_space = spaces.Discrete(len(self.action_space_map))
 
-        min_obs_val = -1 if not self.use_rgb_obs else 0
-        max_obs_val = 255 if self.use_rgb_obs else 1 if self.onehot_state else 9
+        self.seen_items = []
+
         dummy_obs = self.get_observation()
+        min_obs_val = -1 if not self.use_rgb_obs else 0
+        max_obs_val = 255 if self.use_rgb_obs else 1 if self.onehot_state else 20
         self.observation_space = spaces.Box(
             low=min_obs_val,
             high=max_obs_val,
             shape=dummy_obs.shape,
             dtype=np.uint8 if self.use_rgb_obs else int,
         )
-        print(self.observation_space)
 
         self.last_obs = dummy_obs
 
@@ -199,15 +200,29 @@ class EnvGymWrapper(Env):
                         item_name = "PotTwo"
                     elif len(item.content_list) == 3:
                         item_name = "PotThree"
-            elif item.name == "Plate":
-                if len(item.content_list) == 0:
-                    item_name = "Plate"
-                else:
-                    item_name = "PlateTomatoSoup"
+            if "Plate" in item.name:
+                content_list = [i.name for i in item.content_list]
+                match content_list:
+                    case ["TomatoSoup"]:
+                        item_name = "PlateTomatoSoup"
+                    case ["ChoppedTomato"]:
+                        item_name = "PlateChoppedTomato"
+                    case ["ChoppedLettuce"]:
+                        item_name = "PlateChoppedLettuce"
+                    case []:
+                        item_name = "Plate"
+                    case ["ChoppedLettuce", "ChoppedTomato"]:
+                        item_name = "PlateSalad"
+                    case other:
+                        assert False, f"Should not happen. {item}"
         assert item_name in item_list, f"Unknown item {item_name}."
         item_idx = item_list.index(item_name)
         item_one_hot[item_idx] = 1
 
+        # if item_name not in self.seen_items:
+        #     print(item, item_name)
+        #     self.seen_items.append(item_name)
+
         return item_one_hot, item_idx
 
     @staticmethod
@@ -244,6 +259,7 @@ class EnvGymWrapper(Env):
             "Trashcan",
             "Stove",
             "CuttingBoard",
+            "LettuceDispenser",
         ]
 
         item_list = [
@@ -257,6 +273,11 @@ class EnvGymWrapper(Env):
             "ChoppedTomato",
             "Plate",
             "PlateTomatoSoup",
+            "PlateSalad",
+            "Lettuce",
+            "PlateChoppedTomato",
+            "PlateChoppedLettuce",
+            "ChoppedLettuce",
         ]
 
         grid_width, grid_height = int(self.env.kitchen_width), int(
@@ -413,9 +434,9 @@ def main():
 
     config = {
         "policy_type": "MlpPolicy",
-        "total_timesteps": 30_000_000,  # hendric sagt eher so 300_000_000 schritte
+        "total_timesteps": 3_000_000,  # hendric sagt eher so 300_000_000 schritte
         "env_id": "overcooked",
-        "number_envs_parallel": 4,
+        "number_envs_parallel": 64,
     }
 
     debug = False
@@ -424,7 +445,7 @@ def main():
     number_envs_parallel = config["number_envs_parallel"]
 
     model_classes = [A2C, DQN, PPO]
-    model_class = model_classes[2]
+    model_class = model_classes[1]
 
     if vec_env:
         env = make_vec_env(EnvGymWrapper, n_envs=number_envs_parallel)
diff --git a/cooperative_cuisine/reinforcement_learning/rl_small.layout b/cooperative_cuisine/reinforcement_learning/rl_small.layout
index bbb4ad3e..1743aba4 100644
--- a/cooperative_cuisine/reinforcement_learning/rl_small.layout
+++ b/cooperative_cuisine/reinforcement_learning/rl_small.layout
@@ -1,4 +1,4 @@
 ##X#
-T__#
+T__L
 U__P
 #C$#
-- 
GitLab