scores.py

"""
Scores are managed via hooks. You can add them in the `environment_config` under `hook_callbacks`.

The here defined `ScoreViaHooks` is a `HookCallbackClass`. It allows you to define how the score is effected by
specific hook events.

You can:
- score an occurrence of an event with a **static** value (`static_score`)
- map the score based on the name of the hook (`score_map`)
- score based on a specific value in the kwargs passed with the hook (`score_on_specific_kwarg` and `score_map`)

You can filter the events via `kwarg_filter`.

```yaml
hook_callbacks:
  orders:
    hooks: [ completed_order ]
    callback_class: !!python/name:cooperative_cuisine.scores.ScoreViaHooks ''
    callback_class_kwargs:
      static_score: 20
      score_on_specific_kwarg: meal_name
      score_map:
        Burger: 15
        OnionSoup: 10
        Salad: 5
        TomatoSoup: 10
  not_ordered_meals:
    hooks: [ serve_not_ordered_meal ]
    callback_class: !!python/name:cooperative_cuisine.scores.ScoreViaHooks ''
    callback_class_kwargs:
      static_score: 2
  trashcan_usages:
    hooks: [ trashcan_usage ]
    callback_class: !!python/name:cooperative_cuisine.scores.ScoreViaHooks ''
    callback_class_kwargs:
      static_score: -5
  expired_orders:
    hooks: [ order_expired ]
    callback_class: !!python/name:cooperative_cuisine.scores.ScoreViaHooks ''
    callback_class_kwargs:
      static_score: -10

The score can be modified based on the time remaining for the order to be completed.
hook_callbacks:
  orders:
    hooks: [ completed_order ]
    callback_class: !!python/name:cooperative_cuisine.scores.ScoreViaHooks ''
    callback_class_kwargs:
    time_dependence_func: !!python/name:cooperative_cuisine.scores.linear_score ''
      time_dependence_kwargs:
        min_score_ratio: 0.3
        round_decimals: 2
      static_score: 100
      score_on_specific_kwarg: meal_name
      score_map: []

    hooks: [ completed_order ]
    callback_class: !!python/name:cooperative_cuisine.scores.ScoreViaHooks ''
    callback_class_kwargs:
    time_dependence_func: !!python/name:cooperative_cuisine.scores.stepped_score ''
      time_dependence_kwargs:
        steps: [0.3, 0.6, 1.0]
        ratios: [0.3, 0.6, 1.0]
        round_decimals: 2
      static_score: 100
      score_on_specific_kwarg: meal_name
      score_map: []

    hooks: [ completed_order ]
    callback_class: !!python/name:cooperative_cuisine.scores.ScoreViaHooks ''
    callback_class_kwargs:
    time_dependence_func: !!python/name:cooperative_cuisine.scores.constant_score ''
      time_dependence_kwargs: {}
      static_score: 100
      score_on_specific_kwarg: meal_name
      score_map: []
```


# Code Documentation
"""

from typing import Any

import numpy as np

from cooperative_cuisine.environment import Environment
from cooperative_cuisine.hooks import HookCallbackClass


def constant_score(max_score: float, time_percentage: float = 1.0):
    return max_score


def linear_score(
    max_score: float,
    min_score_ratio: float = 0.0,
    time_percentage: float = 1.0,
    round_decimals: int = 0,
):
    modified_score = np.round(
        max(max_score * time_percentage, max_score * min_score_ratio),
        decimals=round_decimals,
    )
    return modified_score


def stepped_score(
    max_score: float,
    steps: list[float],
    score_ratios: list[float],
    round_decimals: int = 0,
    time_percentage: float = 1.0,
):
    if len(steps) != len(score_ratios):
        raise ValueError("steps and vals must have the same length")

    for threshold, ratio in zip(reversed(steps), reversed(score_ratios)):
        if time_percentage >= threshold:
            return np.round(max_score * ratio, decimals=round_decimals)


class ScoreViaHooks(HookCallbackClass):
    """
    Defines a class ScoreViaHooks that extends the HookCallbackClass.

    Attributes:
        name (str): The name of the ScoreViaHooks instance.
        env (Environment): The environment in which the ScoreViaHooks instance is being used.
        static_score (float): The static score to be added if no other conditions are met.
        score_map (dict[str, float]): Mapping of hook references to scores.
        score_on_specific_kwarg (str): The specific keyword argument to score on.
        kwarg_filter (dict[str, Any]): Filtering condition for keyword arguments.
    """

    def __init__(
        self,
        name: str,
        env: Environment,
        static_score: float = 0,
        score_map: dict[str, float] = None,
        score_on_specific_kwarg: str = None,
        time_dependence_func: callable = constant_score,
        time_dependence_kwargs: dict[str, Any] = None,
        kwarg_filter: dict[str, Any] = None,
        **kwargs,
    ):
        """Constructor of ScoreViaHooks.

        Args:
            name: A string representing the name of the method.
            env: An instance of the Environment class.
            static_score: A float representing the static score to be added if no other conditions are met. Default is 0.
            score_map: A dictionary mapping hook references to scores. Default is None.
            score_on_specific_kwarg: A string representing the specific keyword argument to score on. Default is None.
            kwarg_filter: A dictionary representing the filtering condition for keyword arguments. Default is None.
            **kwargs: Additional keyword arguments to be passed to the parent class.
        """
        super().__init__(name, env, **kwargs)
        self.score_map: dict[str, float] = score_map
        """Mapping of hook references to scores."""
        self.static_score: float = static_score
        """The static score to be added if no other conditions are met."""
        self.kwarg_filter: dict[str, Any] = kwarg_filter
        """Filtering condition for keyword arguments."""
        self.score_on_specific_kwarg: str = score_on_specific_kwarg
        """The specific keyword argument to score on."""
        self.time_dependence_func: callable = time_dependence_func
        """The function to calculate the score based on time."""
        self.time_dependence_kwargs: dict[str, Any] = (
            time_dependence_kwargs if time_dependence_kwargs else {}
        )
        """The keyword arguments to be passed to the time_dependence_func."""

    def __call__(self, hook_ref: str, env: Environment, **kwargs):
        if self.score_on_specific_kwarg:
            if kwargs[self.score_on_specific_kwarg] in self.score_map:
                score = self.score_map[kwargs[self.score_on_specific_kwarg]]
                info = f"{hook_ref} - {kwargs[self.score_on_specific_kwarg]}"
            else:
                score = self.static_score
                info = hook_ref
        elif self.score_map and hook_ref in self.score_map:
            if self.kwarg_filter:
                if self.kwarg_filter.items() <= kwargs.items():
                    score = self.score_map[hook_ref]
                    info = f"{hook_ref} - {self.kwarg_filter}"
                else:
                    score = 0
                    info = "NO INFO?"
            else:
                score = self.score_map[hook_ref]
                info = hook_ref
        else:
            score = self.static_score
            info = hook_ref

        if score:
            if hook_ref == "completed_order":
                ratio = kwargs["order"].finished_info["remaining_time_ratio"]
            else:
                ratio = 1.0
            modified_score = self.time_dependence_func(
                score, time_percentage=ratio, **self.time_dependence_kwargs
            )
            print("PREV SCORE", score, "MODIFIED SCORE", modified_score, "RATIO", ratio)

            self.env.increment_score(modified_score, info=info)