diff --git a/overcooked_simulator/counters.py b/overcooked_simulator/counters.py index 9fe8eee9fd3d50108ea1dfc9b2c15635e556434d..0c3458dd61d5bfa11bef55ac25abf970b903cfe0 100644 --- a/overcooked_simulator/counters.py +++ b/overcooked_simulator/counters.py @@ -3,13 +3,11 @@ from __future__ import annotations import logging from collections import deque from datetime import datetime, timedelta -from typing import TYPE_CHECKING, Optional - -from overcooked_simulator.utils import create_init_env_time +from typing import TYPE_CHECKING, Optional, Callable if TYPE_CHECKING: from overcooked_simulator.overcooked_environment import ( - GameScore, + OrderAndScoreManager, ) import numpy as np @@ -144,23 +142,24 @@ class ServingWindow(Counter): def __init__( self, pos, - game_score: GameScore, + order_and_score: OrderAndScoreManager, meals: set[str], + env_time_func: Callable[[], datetime], plate_dispenser: PlateDispenser = None, ): - self.game_score = game_score + self.order_and_score = order_and_score self.plate_dispenser = plate_dispenser self.meals = meals + self.env_time_func = env_time_func super().__init__(pos) def drop_off(self, item) -> Item | None: - reward = 5 - log.debug(f"Drop off item {item}") - # TODO define rewards - self.game_score.increment_score(reward) - if self.plate_dispenser is not None: - self.plate_dispenser.update_plate_out_of_kitchen() - return None + env_time = self.env_time_func() + if self.order_and_score.serve_meal(item=item, env_time=env_time): + if self.plate_dispenser is not None: + self.plate_dispenser.update_plate_out_of_kitchen(env_time=env_time) + return None + return item def can_drop_off(self, item: Item) -> bool: return isinstance(item, CookingEquipment) and ( @@ -217,7 +216,6 @@ class PlateDispenser(Counter): self.plate_config = {"plate_delay": [5, 10]} self.plate_config.update(plate_config) self.next_plate_time = datetime.max - self.env_time = create_init_env_time() # is overwritten in progress anyway self.plate_transitions = plate_transitions self.setup_plates() @@ -246,10 +244,10 @@ class PlateDispenser(Counter): def add_dirty_plate(self): self.occupied_by.appendleft(self.create_item()) - def update_plate_out_of_kitchen(self): + def update_plate_out_of_kitchen(self, env_time: datetime): """Is called from the serving window to add a plate out of kitchen.""" # not perfect identical to datetime.now but based on framerate enough. - time_plate_to_add = self.env_time + timedelta( + time_plate_to_add = env_time + timedelta( seconds=np.random.uniform( low=self.plate_config["plate_delay"][0], high=self.plate_config["plate_delay"][1], @@ -276,7 +274,6 @@ class PlateDispenser(Counter): def progress(self, passed_time: timedelta, now: datetime): """Check if plates arrive from outside the kitchen and add a dirty plate accordingly""" - self.env_time = now if self.next_plate_time < now: idx_delete = [] for i, times in enumerate(self.out_of_kitchen_timer): diff --git a/overcooked_simulator/game_content/environment_config.yaml b/overcooked_simulator/game_content/environment_config.yaml index a5b77f8b13f33361009a5aedab5d6c91da98c907..d262ecbd17191323a5c3adfa3f4af25b942c2570 100644 --- a/overcooked_simulator/game_content/environment_config.yaml +++ b/overcooked_simulator/game_content/environment_config.yaml @@ -5,3 +5,30 @@ plates: game: time_limit_seconds: 180 + +orders: + kwargs: + duration_sample: + func: uniform + kwargs: + a: 30 + b: 50 + max_orders: 5 + num_start_meals: 3 + sample_on_dur: false + sample_on_dur_func: + func: uniform + kwargs: + a: 30 + b: 50 + sample_on_serving: true + score_calc_gen_kwargs: + other: 0 + scores: + Burger: 15 + OnionSoup: 10 + Salad: 5 + TomatoSoup: 10 + score_calc_gen_func: !!python/name:overcooked_simulator.order.simple_score_calc_gen_func '' + serving_not_ordered_meals: null + order_gen_class: !!python/name:overcooked_simulator.order.RandomOrderGeneration '' diff --git a/overcooked_simulator/game_items.py b/overcooked_simulator/game_items.py index 1969e9b25935664c6c201a99fde0937d7f4204ee..f470d07d92f7a1d3b00b5c1df2b3de0f667db680 100644 --- a/overcooked_simulator/game_items.py +++ b/overcooked_simulator/game_items.py @@ -196,6 +196,13 @@ class CookingEquipment(Item): super().reset() self.active_transition = None + def get_potential_meal(self) -> Item | None: + if self.content_ready: + return self.content_ready + if len(self.content_list) == 1: + return self.content_list[0] + return None + class Plate(CookingEquipment): def __init__(self, transitions, clean, *args, **kwargs): diff --git a/overcooked_simulator/order.py b/overcooked_simulator/order.py new file mode 100644 index 0000000000000000000000000000000000000000..589e588e11327819697673241b5317ae9acf00ad --- /dev/null +++ b/overcooked_simulator/order.py @@ -0,0 +1,308 @@ +import dataclasses +import logging +import random +from abc import abstractmethod +from collections import deque +from datetime import datetime, timedelta +from typing import Callable, Tuple, Any, Deque + +from overcooked_simulator.game_items import Item, Plate + +log = logging.getLogger(__name__) + + +@dataclasses.dataclass +class Order: + meal: Item + start_time: datetime + max_duration: timedelta + score_calc: Callable[[timedelta, ...], float] + timed_penalties: list[ + Tuple[timedelta, float] | Tuple[timedelta, float, int, timedelta] + ] + + finished_info: dict[str, Any] = dataclasses.field(default_factory=dict) + _timed_penalties: list[Tuple[datetime, float]] = dataclasses.field( + default_factory=list + ) + + def order_time(self, env_time: datetime) -> timedelta: + return self.start_time - env_time + + def create_penalties(self, env_time: datetime): + for penalty_info in self.timed_penalties: + match penalty_info: + case (offset, penalty): + self._timed_penalties.append((env_time + offset, penalty)) + case (duration, penalty, number_repeat, offset): + self._timed_penalties.extend( + [ + (env_time + offset + (duration * i), penalty) + for i in range(number_repeat) + ] + ) + + +class OrderGeneration: + def __init__(self, available_meals: dict[str, Item], **kwargs): + self.available_meals: list[Item] = list(available_meals.values()) + + @abstractmethod + def init_orders(self, now) -> list[Order]: + ... + + @abstractmethod + def get_orders( + self, passed_time: timedelta, now: datetime, new_finished_orders: list[Order] + ) -> list[Order]: + ... + + +@dataclasses.dataclass +class RandomOrderKwarg: + num_start_meals: int + sample_on_serving: bool + sample_on_dur: bool + sample_on_dur_func: dict + max_orders: int + duration_sample: dict + score_calc_gen_func: Callable[ + [Item, timedelta, datetime, Any], Callable[[timedelta, Order], float] + ] + score_calc_gen_kwargs: dict + + +class RandomOrderGeneration(OrderGeneration): + def __init__(self, available_meals: dict[str, Item], **kwargs): + super().__init__(available_meals, **kwargs) + self.kwargs: RandomOrderKwarg = RandomOrderKwarg(**kwargs["kwargs"]) + self.next_order_time: datetime | None = None + self.number_cur_orders = 0 + self.needed_orders: int = 0 + """For the sample on dur but when it was restricted due to max order number.""" + + def init_orders(self, now) -> list[Order]: + self.number_cur_orders = self.kwargs.num_start_meals + if self.kwargs.sample_on_dur: + self.create_random_next_time_delta(now) + return self.create_orders_for_meals( + random.choices(self.available_meals, k=self.kwargs.num_start_meals), + now, + ) + + def get_orders( + self, passed_time: timedelta, now: datetime, new_finished_orders: list[Order] + ) -> list[Order]: + self.number_cur_orders -= len(new_finished_orders) + if self.kwargs.sample_on_serving: + if new_finished_orders: + self.number_cur_orders += len(new_finished_orders) + return self.create_orders_for_meals( + random.choices(self.available_meals, k=len(new_finished_orders)), + now, + ) + if self.kwargs.sample_on_dur: + if self.needed_orders: + self.needed_orders -= len(new_finished_orders) + self.needed_orders = max(self.needed_orders, 0) + self.number_cur_orders += len(new_finished_orders) + return self.create_orders_for_meals( + random.choices(self.available_meals, k=len(new_finished_orders)), + now, + ) + if self.next_order_time < now: + if self.number_cur_orders >= self.kwargs.max_orders: + self.needed_orders += 1 + else: + self.create_random_next_time_delta(now) + self.number_cur_orders += 1 + return self.create_orders_for_meals( + random.choice(self.available_meals), + now, + ) + return [] + + def create_orders_for_meals(self, meals: list[Item], now: datetime) -> list[Order]: + orders = [] + for meal in meals: + duration = timedelta( + seconds=int( + getattr(random, self.kwargs.duration_sample["func"])( + **self.kwargs.duration_sample["kwargs"] + ) + ) + ) + log.info(f"Create order for meal {meal} with duration {duration}") + orders.append( + Order( + meal=meal, + start_time=now, + max_duration=duration, + score_calc=self.kwargs.score_calc_gen_func( + meal=meal, + duration=duration, + now=now, + kwargs=self.kwargs.score_calc_gen_kwargs, + ), + timed_penalties=[], + ) + ) + + return orders + + def create_random_next_time_delta(self, now: datetime): + self.next_order_time = timedelta( + seconds=int( + getattr(random, self.kwargs.duration_sample["func"])( + **self.kwargs.duration_sample["kwargs"] + ) + ) + ) + + +def simple_score_calc_gen_func( + meal: Item, duration: timedelta, now: datetime, kwargs: dict +) -> Callable: + scores = kwargs["scores"] + other = kwargs["other"] + + def score_calc(relative_order_time: timedelta, order: Order) -> float: + if order.meal.name in scores: + return scores[order.meal.name] + return other + + return score_calc + + +class OrderAndScoreManager: + def __init__(self, order_config, available_meals: dict[str, Item]): + self.score = 0 + self.order_gen: OrderGeneration = order_config["order_gen_class"]( + available_meals=available_meals, kwargs=order_config["kwargs"] + ) + self.kwargs_for_func = order_config["kwargs"] + self.serving_not_ordered_meals = order_config["serving_not_ordered_meals"] + self.available_meals = available_meals + self.open_orders: Deque[Order] = deque() + + # for logs or history in the future + # TODO log who / which player served which meal -> for split scores + self.served_meals: list[Tuple[Item, datetime]] = [] + self.last_finished = [] + self.penalty_timers = [] + + def serve_meal(self, item: Item, env_time: datetime) -> bool: + if isinstance(item, Plate): + meal = item.get_potential_meal() + if meal is not None: + if meal.name in self.available_meals: + order = self.find_order_for_meal(meal) + if order is None: + if self.serving_not_ordered_meals: + accept, score = self.serving_not_ordered_meals(meal) + if accept: + log.info( + f"Serving meal without order {meal.name} with score {score}" + ) + self.score += score + self.served_meals.append((meal, env_time)) + return accept + log.info( + f"Do not serve meal {meal.name} because it is not ordered" + ) + return False + order, index = order + score = order.score_calc( + relative_order_time=env_time - order.start_time, + order=order, + ) + self.score += score + order.finished_info = { + "end_time": env_time, + "score": score, + } + log.info(f"Serving meal {meal.name} with order with score {score}") + self.last_finished.append(order) + del self.open_orders[index] + self.served_meals.append((meal, env_time)) + return True + log.info(f"Do not serve item {item}") + return False + + def increment_score(self, score: int): + self.score += score + log.debug(f"Score: {self.score}") + + def create_init_orders(self, env_time): + init_orders = self.order_gen.init_orders(env_time) + self.open_orders.extend(init_orders) + + def progress(self, passed_time: timedelta, now: datetime): + new_orders = self.order_gen.get_orders( + passed_time=passed_time, now=now, new_finished_orders=self.last_finished + ) + self.open_orders.extend(new_orders) + self.last_finished = [] + + remove_orders = [] + for index, order in enumerate(self.open_orders): + if now >= order.start_time + order.max_duration: + remove_orders.append(index) + remove_penalties = [] + for index, (penalty_time, penalty) in enumerate(order.timed_penalties): + if penalty_time < now: + self.score -= penalty + remove_penalties.append(index) + + for index in remove_penalties: + # or del order.timed_penalties[index] + order.timed_penalties.pop(index) + + for remove_order in remove_orders: + del self.open_orders[remove_order] + + def find_order_for_meal(self, meal) -> Tuple[Order, int] | None: + for neg_index, order in enumerate(reversed(self.open_orders)): + if order.meal.name == meal.name: + return order, len(self.open_orders) - neg_index - 1 + + def setup_penalties(self, new_orders: list[Order], env_time: datetime): + for order in new_orders: + order.create_penalties(env_time) + + +if __name__ == "__main__": + import yaml + + order_config = yaml.safe_load( + """orders: + kwargs: + duration_sample: + func: uniform + kwargs: + a: 30 + b: 50 + max_orders: 5 + num_start_meals: 3 + sample_on_dur: false + sample_on_dur_func: + func: uniform + kwargs: + a: 30 + b: 50 + sample_on_serving: true + score_calc_gen_func: null + score_calc_gen_kwargs: + other: 0 + scores: + Burger: 15 + OnionSoup: 10 + Salad: 5 + TomatoSoup: 10 + score_calc_gen_func: ~'' + order_gen_class: ~ + serving_not_ordered_meals: null""" + ) + order_config["orders"]["order_gen_class"] = RandomOrderGeneration + order_config["orders"]["kwargs"]["score_calc_gen_func"] = simple_score_calc_gen_func + print(yaml.dump(order_config)) diff --git a/overcooked_simulator/overcooked_environment.py b/overcooked_simulator/overcooked_environment.py index e36429fe4850674f570277218f45eb6cd0b005aa..dfc9e9fd642508c84b144770e7fa15b4e51273ca 100644 --- a/overcooked_simulator/overcooked_environment.py +++ b/overcooked_simulator/overcooked_environment.py @@ -22,25 +22,18 @@ from overcooked_simulator.counters import ( PlateDispenser, SinkAddon, ) -from overcooked_simulator.game_items import ItemInfo, ItemType, CookingEquipment +from overcooked_simulator.game_items import ( + ItemInfo, + ItemType, + CookingEquipment, +) +from overcooked_simulator.order import OrderAndScoreManager from overcooked_simulator.player import Player from overcooked_simulator.utils import create_init_env_time log = logging.getLogger(__name__) -class GameScore: - def __init__(self): - self.score = 0 - - def increment_score(self, score: int): - self.score += score - log.debug(f"Score: {self.score}") - - def read_score(self): - return self.score - - class Action: """Action class, specifies player, action type and action itself.""" @@ -70,14 +63,21 @@ class Environment: self.players: dict[str, Player] = {} with open(env_config_path, "r") as file: - environment_config = yaml.safe_load(file) + environment_config = yaml.load(file, Loader=yaml.Loader) self.layout_path: Path = layout_path # self.counter_side_length = 1 # -> this changed! is 1 now self.item_info_path: Path = item_info_path self.item_info = self.load_item_info() self.validate_item_info() - self.game_score = GameScore() + self.order_and_score = OrderAndScoreManager( + order_config=environment_config["orders"], + available_meals={ + item: info + for item, info in self.item_info.items() + if info.type == ItemType.Meal + }, + ) plate_transitions = { item: { "seconds": info.seconds, @@ -102,12 +102,13 @@ class Environment: "X": Trash, "W": lambda pos: ServingWindow( pos, - self.game_score, + self.order_and_score, meals={ item for item, info in self.item_info.items() if info.type == ItemType.Meal }, + env_time_func=self.get_env_time, ), "T": lambda pos: Dispenser(pos, self.item_info["Tomato"]), "L": lambda pos: Dispenser(pos, self.item_info["Lettuce"]), @@ -179,12 +180,16 @@ class Environment: self.init_counters() self.env_time = create_init_env_time() + self.order_and_score.create_init_orders(self.env_time) self.beginning_time = self.env_time self.env_time_end = self.env_time + timedelta( seconds=environment_config["game"]["time_limit_seconds"] ) log.debug(f"End time: {self.env_time_end}") + def get_env_time(self): + return self.env_time + @property def game_ended(self) -> bool: return self.env_time >= self.env_time_end @@ -552,6 +557,7 @@ class Environment: for counter in self.counters: if isinstance(counter, (CuttingBoard, Stove, Sink, PlateDispenser)): counter.progress(passed_time=passed_time, now=self.env_time) + self.order_and_score.progress(passed_time=passed_time, now=self.env_time) def get_state(self): """Get the current state of the game environment. The state here is accessible by the current python objects. @@ -562,7 +568,8 @@ class Environment: return { "players": self.players, "counters": self.counters, - "score": self.game_score.read_score(), + "score": self.order_and_score.score, + "orders": self.order_and_score.open_orders, "ended": self.game_ended, "remaining_time": max(self.env_time_end - self.env_time, timedelta(0)), } diff --git a/setup.py b/setup.py index b3ceeba705d5d329be64f2dc239e4de19dbc5056..f50b81aaa0c54c9e70528b6b2aaf230355244185 100644 --- a/setup.py +++ b/setup.py @@ -33,7 +33,7 @@ setup( license="MIT license", long_description=readme + "\n\n" + history, include_package_data=True, - keywords=["aaambos", "overcooked_simulator"], + keywords=["overcooked_simulator"], name="overcooked_simulator", packages=find_packages(include=["overcooked_simulator", "overcooked_simulator.*"]), test_suite="tests",