Skip to content
Snippets Groups Projects
scores.py 7.59 KiB
Newer Older
  • Learn to ignore specific revisions
  • Scores are managed via hooks. You can add them in the `environment_config` under `hook_callbacks`.
    
    
    The here defined `ScoreViaHooks` is a `HookCallbackClass`. It allows you to define how the score is effected by
    specific hook events.
    
    You can:
    - score an occurrence of an event with a **static** value (`static_score`)
    - map the score based on the name of the hook (`score_map`)
    - score based on a specific value in the kwargs passed with the hook (`score_on_specific_kwarg` and `score_map`)
    
    You can filter the events via `kwarg_filter`.
    
    ```yaml
    
        hooks: [ completed_order ]
        callback_class: !!python/name:cooperative_cuisine.scores.ScoreViaHooks ''
        callback_class_kwargs:
          static_score: 20
          score_on_specific_kwarg: meal_name
          score_map:
            Burger: 15
            OnionSoup: 10
            Salad: 5
            TomatoSoup: 10
    
        hooks: [ serve_not_ordered_meal ]
        callback_class: !!python/name:cooperative_cuisine.scores.ScoreViaHooks ''
        callback_class_kwargs:
          static_score: 2
    
        hooks: [ trashcan_usage ]
        callback_class: !!python/name:cooperative_cuisine.scores.ScoreViaHooks ''
        callback_class_kwargs:
          static_score: -5
    
        hooks: [ order_expired ]
        callback_class: !!python/name:cooperative_cuisine.scores.ScoreViaHooks ''
        callback_class_kwargs:
          static_score: -10
    
    
    The score can be modified based on the time remaining for the order to be completed.
    hook_callbacks:
      orders:
        hooks: [ completed_order ]
        callback_class: !!python/name:cooperative_cuisine.scores.ScoreViaHooks ''
        callback_class_kwargs:
        time_dependence_func: !!python/name:cooperative_cuisine.scores.linear_score ''
          time_dependence_kwargs:
            min_score_ratio: 0.3
            round_decimals: 2
          static_score: 100
          score_on_specific_kwarg: meal_name
          score_map: []
    
        hooks: [ completed_order ]
        callback_class: !!python/name:cooperative_cuisine.scores.ScoreViaHooks ''
        callback_class_kwargs:
        time_dependence_func: !!python/name:cooperative_cuisine.scores.stepped_score ''
          time_dependence_kwargs:
            steps: [0.3, 0.6, 1.0]
            ratios: [0.3, 0.6, 1.0]
            round_decimals: 2
          static_score: 100
          score_on_specific_kwarg: meal_name
          score_map: []
    
        hooks: [ completed_order ]
        callback_class: !!python/name:cooperative_cuisine.scores.ScoreViaHooks ''
        callback_class_kwargs:
        time_dependence_func: !!python/name:cooperative_cuisine.scores.constant_score ''
          time_dependence_kwargs: {}
          static_score: 100
          score_on_specific_kwarg: meal_name
          score_map: []
    
    from cooperative_cuisine.environment import Environment
    from cooperative_cuisine.hooks import HookCallbackClass
    
    def constant_score(max_score: float, time_percentage: float = 1.0):
        return max_score
    
    
    def linear_score(
        max_score: float,
        min_score_ratio: float = 0.0,
        time_percentage: float = 1.0,
        round_decimals: int = 0,
    ):
        modified_score = np.round(
            max(max_score * time_percentage, max_score * min_score_ratio),
            decimals=round_decimals,
        )
        return modified_score
    
    
    def stepped_score(
        max_score: float,
        steps: list[float],
        score_ratios: list[float],
        round_decimals: int = 0,
        time_percentage: float = 1.0,
    ):
        if len(steps) != len(score_ratios):
            raise ValueError("steps and vals must have the same length")
    
        for threshold, ratio in zip(reversed(steps), reversed(score_ratios)):
            if time_percentage >= threshold:
                return np.round(max_score * ratio, decimals=round_decimals)
    
    
    
    class ScoreViaHooks(HookCallbackClass):
    
        """
        Defines a class ScoreViaHooks that extends the HookCallbackClass.
    
        Attributes:
            name (str): The name of the ScoreViaHooks instance.
            env (Environment): The environment in which the ScoreViaHooks instance is being used.
            static_score (float): The static score to be added if no other conditions are met.
            score_map (dict[str, float]): Mapping of hook references to scores.
            score_on_specific_kwarg (str): The specific keyword argument to score on.
            kwarg_filter (dict[str, Any]): Filtering condition for keyword arguments.
        """
    
    
        def __init__(
            self,
            name: str,
            env: Environment,
            static_score: float = 0,
            score_map: dict[str, float] = None,
            score_on_specific_kwarg: str = None,
    
            time_dependence_func: callable = constant_score,
            time_dependence_kwargs: dict[str, Any] = None,
    
            kwarg_filter: dict[str, Any] = None,
            **kwargs,
        ):
    
            """Constructor of ScoreViaHooks.
    
            Args:
                name: A string representing the name of the method.
                env: An instance of the Environment class.
                static_score: A float representing the static score to be added if no other conditions are met. Default is 0.
                score_map: A dictionary mapping hook references to scores. Default is None.
                score_on_specific_kwarg: A string representing the specific keyword argument to score on. Default is None.
                kwarg_filter: A dictionary representing the filtering condition for keyword arguments. Default is None.
                **kwargs: Additional keyword arguments to be passed to the parent class.
            """
    
            super().__init__(name, env, **kwargs)
    
            self.score_map: dict[str, float] = score_map
    
            """Mapping of hook references to scores."""
    
            self.static_score: float = static_score
    
            """The static score to be added if no other conditions are met."""
    
            self.kwarg_filter: dict[str, Any] = kwarg_filter
    
            """Filtering condition for keyword arguments."""
    
            self.score_on_specific_kwarg: str = score_on_specific_kwarg
    
            """The specific keyword argument to score on."""
    
            self.time_dependence_func: callable = time_dependence_func
            """The function to calculate the score based on time."""
            self.time_dependence_kwargs: dict[str, Any] = (
                time_dependence_kwargs if time_dependence_kwargs else {}
            )
            """The keyword arguments to be passed to the time_dependence_func."""
    
    
        def __call__(self, hook_ref: str, env: Environment, **kwargs):
            if self.score_on_specific_kwarg:
                if kwargs[self.score_on_specific_kwarg] in self.score_map:
    
                    score = self.score_map[kwargs[self.score_on_specific_kwarg]]
                    info = f"{hook_ref} - {kwargs[self.score_on_specific_kwarg]}"
    
            elif self.score_map and hook_ref in self.score_map:
                if self.kwarg_filter:
    
                    if self.kwarg_filter.items() <= kwargs.items():
    
                        score = self.score_map[hook_ref]
                        info = f"{hook_ref} - {self.kwarg_filter}"
                    else:
                        score = 0
                        info = "NO INFO?"
    
                    score = self.score_map[hook_ref]
                    info = hook_ref
    
                score = self.static_score
                info = hook_ref
    
            if score:
                if hook_ref == "completed_order":
                    ratio = kwargs["order"].finished_info["remaining_time_ratio"]
                else:
                    ratio = 1.0
                modified_score = self.time_dependence_func(
                    score, time_percentage=ratio, **self.time_dependence_kwargs
                )
                print("PREV SCORE", score, "MODIFIED SCORE", modified_score, "RATIO", ratio)
    
                self.env.increment_score(modified_score, info=info)