From 12c03dfab558a3815c2c0d2d3aa4afb9d4be80d0 Mon Sep 17 00:00:00 2001
From: fheinrich <fheinrich@techfak.uni-bielefeld.de>
Date: Wed, 27 Mar 2024 15:07:33 +0100
Subject: [PATCH] Score is calculated and stored in order itself

---
 cooperative_cuisine/environment.py          |  2 +-
 cooperative_cuisine/hooks.py                |  1 +
 cooperative_cuisine/orders.py               | 51 ++++++++++++++++++---
 cooperative_cuisine/scores.py               | 29 ++++++++----
 cooperative_cuisine/state_representation.py |  2 +
 5 files changed, 67 insertions(+), 18 deletions(-)

diff --git a/cooperative_cuisine/environment.py b/cooperative_cuisine/environment.py
index dfe89d70..90be3168 100644
--- a/cooperative_cuisine/environment.py
+++ b/cooperative_cuisine/environment.py
@@ -504,7 +504,7 @@ class Environment:
                 "counters": [c.to_dict() for c in self.counters],
                 "kitchen": {"width": self.kitchen_width, "height": self.kitchen_height},
                 "score": self.score,
-                "orders": self.order_manager.order_state(),
+                "orders": self.order_manager.order_state(self.env_time),
                 "ended": self.game_ended,
                 "env_time": self.env_time.isoformat(),
                 "remaining_time": max(
diff --git a/cooperative_cuisine/hooks.py b/cooperative_cuisine/hooks.py
index d83daf7f..ea6d6c59 100644
--- a/cooperative_cuisine/hooks.py
+++ b/cooperative_cuisine/hooks.py
@@ -423,6 +423,7 @@ Args:
     order (Order): the order that was fulfilled.
     meal (Item): The meal that was served.
     relative_order_time (timedelta): the time that the player needed to fulfill the order.
+    remaining_time_ratio (float): the ratio of the remaining time of the order relative to order duration.
     meal_name (str): name of the meal.
 """
 INIT_ORDERS = "init_orders"
diff --git a/cooperative_cuisine/orders.py b/cooperative_cuisine/orders.py
index 2f987104..9a98314a 100644
--- a/cooperative_cuisine/orders.py
+++ b/cooperative_cuisine/orders.py
@@ -54,6 +54,7 @@ from cooperative_cuisine.hooks import (
     ORDER_EXPIRED,
 )
 from cooperative_cuisine.items import Item, Plate, ItemInfo
+from cooperative_cuisine.scores import ScoreViaHooks
 from cooperative_cuisine.state_representation import OrderState
 
 log = logging.getLogger(__name__)
@@ -185,6 +186,9 @@ class OrderManager:
 
         self.hook: Hooks = hook
         """Reference to the hook manager."""
+        self.score_callbacks = []
+        """List of score callbacks."""
+        self.find_score_hook_callbacks()
 
     def set_available_meals(self, available_meals):
         """Set the available meals from which orders can be generated.
@@ -204,6 +208,12 @@ class OrderManager:
             )
         self.next_relevant_time = next_relevant_time
 
+    def find_score_hook_callbacks(self):
+        """Find the score hook callback class."""
+        for hook in self.hook.hooks[COMPLETED_ORDER]:
+            if isinstance(hook, ScoreViaHooks):
+                self.score_callbacks.append(hook)
+
     def serve_meal(self, item: Item, env_time: datetime, player: str) -> bool:
         """Is called by the ServingWindow to serve a meal. Returns True if the meal can be served and should be
         "deleted" from the hands of the player."""
@@ -231,14 +241,15 @@ class OrderManager:
                     self.last_finished.append(order)
                     del self.open_orders[index]
                     self.served_meals.append((meal, env_time, player))
-                    order.finished_info["remaining_time_ratio"] = (
-                        order.start_time + order.max_duration - env_time
-                    ).total_seconds() / order.max_duration.total_seconds()
                     self.hook(
                         COMPLETED_ORDER,
                         order=order,
                         meal=meal,
                         relative_order_time=env_time - order.start_time,
+                        remaining_time_ratio=(
+                            order.start_time + order.max_duration - env_time
+                        ).total_seconds()
+                        / order.max_duration.total_seconds(),
                         meal_name=meal.name,
                     )
                     return True
@@ -289,13 +300,21 @@ class OrderManager:
 
     def find_order_for_meal(self, meal) -> Tuple[Order, int] | None:
         """Get the order that will be fulfilled for a meal. At the moment the oldest order in the list that has the
-        same meal (name)."""
+        same meal (name).
+
+        Args:
+            meal: The meal to find the order for.
+        """
         for index, order in enumerate(self.open_orders):
             if order.meal.name == meal.name:
                 return order, index
 
-    def order_state(self) -> list[OrderState]:
-        """Similar to the `to_dict` in `Item` and `Counter`. Relevant for the state of the environment"""
+    def order_state(self, env_time: datetime) -> list[OrderState]:
+        """Similar to the `to_dict` in `Item` and `Counter`. Relevant for the state of the environment
+
+        Args:
+            env_time: The current time of the environment.
+        """
         return [
             {
                 "id": order.uuid,
@@ -303,6 +322,24 @@ class OrderManager:
                 "meal": order.meal.name,
                 "start_time": order.start_time.isoformat(),
                 "max_duration": order.max_duration.total_seconds(),
+                "score": sum(
+                    [
+                        sc.get_score(
+                            COMPLETED_ORDER,
+                            **{
+                                "meal": order.meal,
+                                "order": order,
+                                "relative_order_time": env_time - order.start_time,
+                                "remaining_time_ratio": (
+                                    order.start_time + order.max_duration - env_time
+                                ).total_seconds()
+                                / order.max_duration.total_seconds(),
+                                "meal_name": order.meal.name,
+                            },
+                        )[0]
+                        for sc in self.score_callbacks
+                    ]
+                ),
             }
             for order in self.open_orders
         ]
@@ -439,7 +476,7 @@ class RandomOrderGeneration(OrderGeneration):
                 self.create_random_next_time_delta(now)
                 return []
 
-        # print(self.number_cur_orders, self.num_needed_orders)
+        # (self.number_cur_orders, self.num_needed_orders)
 
         if self.num_needed_orders:
             # self.num_needed_orders -= len(new_finished_orders)
diff --git a/cooperative_cuisine/scores.py b/cooperative_cuisine/scores.py
index 5f83782e..03c9bfd6 100644
--- a/cooperative_cuisine/scores.py
+++ b/cooperative_cuisine/scores.py
@@ -79,12 +79,14 @@ hook_callbacks:
 
 # Code Documentation
 """
+from __future__ import annotations
 
-from typing import Any
+from typing import Any, TYPE_CHECKING
 
 import numpy as np
 
-from cooperative_cuisine.environment import Environment
+if TYPE_CHECKING:
+    from cooperative_cuisine.environment import Environment
 from cooperative_cuisine.hooks import HookCallbackClass
 
 
@@ -98,9 +100,11 @@ def linear_score(
     time_percentage: float = 1.0,
     round_decimals: int = 0,
 ):
-    modified_score = np.round(
-        max(max_score * time_percentage, max_score * min_score_ratio),
-        decimals=round_decimals,
+    modified_score = float(
+        np.round(
+            max(max_score * time_percentage, max_score * min_score_ratio),
+            decimals=round_decimals,
+        )
     )
     return modified_score
 
@@ -117,7 +121,9 @@ def stepped_score(
 
     for threshold, ratio in zip(reversed(steps), reversed(score_ratios)):
         if time_percentage >= threshold:
-            return np.round(max_score * ratio, decimals=round_decimals)
+            return float(np.round(max_score * ratio, decimals=round_decimals))
+
+    assert False, "Should not reach here."
 
 
 class ScoreViaHooks(HookCallbackClass):
@@ -172,7 +178,7 @@ class ScoreViaHooks(HookCallbackClass):
         )
         """The keyword arguments to be passed to the time_dependence_func."""
 
-    def __call__(self, hook_ref: str, env: Environment, **kwargs):
+    def get_score(self, hook_ref: str, **kwargs):
         if self.score_on_specific_kwarg:
             if kwargs[self.score_on_specific_kwarg] in self.score_map:
                 score = self.score_map[kwargs[self.score_on_specific_kwarg]]
@@ -197,12 +203,15 @@ class ScoreViaHooks(HookCallbackClass):
 
         if score:
             if hook_ref == "completed_order":
-                ratio = kwargs["order"].finished_info["remaining_time_ratio"]
+                ratio = kwargs["remaining_time_ratio"]
             else:
                 ratio = 1.0
             modified_score = self.time_dependence_func(
                 score, time_percentage=ratio, **self.time_dependence_kwargs
             )
-            print("PREV SCORE", score, "MODIFIED SCORE", modified_score, "RATIO", ratio)
+            return modified_score, info
+        return 0, "NO INFO?"
 
-            self.env.increment_score(modified_score, info=info)
+    def __call__(self, hook_ref: str, env: Environment, **kwargs):
+        score, info = self.get_score(hook_ref, **kwargs)
+        self.env.increment_score(score, info=info)
diff --git a/cooperative_cuisine/state_representation.py b/cooperative_cuisine/state_representation.py
index 209bf7d9..68191b67 100644
--- a/cooperative_cuisine/state_representation.py
+++ b/cooperative_cuisine/state_representation.py
@@ -27,6 +27,8 @@ class OrderState(TypedDict):
     """Time of the creation of the order."""
     max_duration: float
     """Maximum duration of the order until it should be served."""
+    score: float | int
+    """Score of the order."""
 
 
 class EffectState(TypedDict):
-- 
GitLab