diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 6bfc4d740e72b58f291b0b44b6b958e9fe22822e..d8e1afad0b9c7a5006b9e6b12a2c0402eade3121 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,11 +1,11 @@ pytest: stage: test script: - - apt-get update -qy - - apt-get install -y python3-dev python3-pip - - pip install pytest - - pip install . - - pytest --junitxml=report.xml + - apt-get update -qy + - apt-get install -y python3-dev python3-pip + - pip install pytest + - pip install . + - pytest --junitxml=report.xml artifacts: when: always reports: @@ -13,14 +13,14 @@ pytest: pages: script: - - apt-get update -qy - - apt-get install -y python3-dev python3-pip - - pip install pdoc - - pip install . - - pdoc --output-dir public overcooked_simulator --logo https://gitlab.ub.uni-bielefeld.de/uploads/-/system/project/avatar/6780/Cooking-Vector-Illustration-Icon-Graphics-4267218-1-580x435.jpg + - apt-get update -qy + - apt-get install -y python3-dev python3-pip + - pip install pdoc + - pip install . + - pdoc --output-dir public overcooked_simulator --logo https://gitlab.ub.uni-bielefeld.de/uploads/-/system/project/avatar/6780/Cooking-Vector-Illustration-Icon-Graphics-4267218-1-580x435.jpg --docformat google artifacts: paths: - - public + - public rules: - - if: $CI_COMMIT_BRANCH == "main" + - if: $CI_COMMIT_BRANCH == "main" diff --git a/overcooked_simulator/__init__.py b/overcooked_simulator/__init__.py index bcaf4521b8aa011ddb6ffc1b92f76f901030e2c0..2c7bb07c452710b36d9c0a5aa12cd56325842530 100644 --- a/overcooked_simulator/__init__.py +++ b/overcooked_simulator/__init__.py @@ -8,7 +8,8 @@ The package contains of an environment for cooperation between players/agents. A human or visual agents in 2D. A 3D web-enabled version (for example for online studies, currently under development) can be found [here](https://gitlab.ub.uni-bielefeld.de/scs/cocosy/godot-overcooked-3d-visualization) -# Background / Literature The overcooked/cooking domain is a well established cooperation domain/task. There exists +# Background / Literature +The overcooked/cooking domain is a well established cooperation domain/task. There exists environments designed for reinforcement learning agents as well as the game and adaptations of the game for human players in a more "real-time" environment. They all mostly differ in the visual and graphics dimension. 2D versions like overcooked-ai, ... are most known in the community. But more visual appealing 3D versions for cooperation with diff --git a/overcooked_simulator/counters.py b/overcooked_simulator/counters.py index d8d5afedb2946dce7f961fc5e1d8acd556ad1c1f..19fab245651a3ca596da92330ca716557b042452 100644 --- a/overcooked_simulator/counters.py +++ b/overcooked_simulator/counters.py @@ -1,9 +1,10 @@ -""" -All counters are derived from the `Counter` class. -Counters implement the `Counter.pick_up` method, which defines what should happen when the agent wants to pick something up from the counter. -On the other side, the `Counter.drop_off` method receives the item what should be put on the counter. Before that the `Counter.can_drop_off` method checked if the item can be put on the counter. -The progress on Counters or on objects on the counters are handled via the Counters. They have the task to delegate the progress call via the `Counter.progress` method. -On which type of counter the progress method is called is currently defined in the environment class. +"""All counters are derived from the `Counter` class. Counters implement the `Counter.pick_up` method, which defines +what should happen when the agent wants to pick something up from the counter. On the other side, +the `Counter.drop_off` method receives the item what should be put on the counter. Before that the +`Counter.can_drop_off` method checked if the item can be put on the counter. The progress on Counters or on objects +on the counters are handled via the Counters. They have the task to delegate the progress call via the +`progress` method, e.g., the `CuttingBoard.progress`. On which type of counter the progress method is called is currently defined in the +environment class. Inside the item_info.yaml, equipment needs to be defined. It includes counters that are part of the interaction/requirements for the interaction. ```yaml @@ -16,6 +17,19 @@ Sink: Stove: type: Equipment ``` + +The defined counter classes are: +- `Counter` +- `CuttingBoard` +- `ServingWindow` +- `Dispenser` +- `PlateDispenser` +- `Trashcan` +- `Stove` (maybe abstracted in a class for all cooking machine counters (stove, deep fryer, oven)) +- `Sink` +- `SinkAddon` + +## Code Documentation """ from __future__ import annotations @@ -45,7 +59,7 @@ log = logging.getLogger(__name__) class TransitionsValueDict(TypedDict): - """The dicts that are the values in the transitions dicts of the `CookingEquipment`.""" + """The values in the transitions dicts of the `CookingEquipment`.""" seconds: int | float """The needed seconds to progress for the transition.""" @@ -56,7 +70,7 @@ class TransitionsValueDict(TypedDict): class TransitionsValueByNameDict(TypedDict): - """The dicts that are the values in the transitions dicts of the `CuttingBoard` and the `Sink`.""" + """The values in the transitions dicts of the `CuttingBoard` and the `Sink`.""" seconds: int | float """The needed seconds to progress for the transition.""" @@ -88,11 +102,10 @@ class Counter: """Gets called upon a player performing the pickup action. If the counter can give something to the player, it does so. In the standard counter this is when an item is on the counter. - Returns: The item which the counter is occupied by. None if nothing is there. - Args: on_hands: Will the item be put on empty hands or on a cooking equipment. + Returns: The item which the counter is occupied by. None if nothing is there. """ if on_hands: if self.occupied_by: @@ -172,7 +185,7 @@ class CuttingBoard(Counter): Args: passed_time: the time passed since the last progress call - now: the current env time. Not the same as `datetime.now` + now: the current env time. **Not the same as `datetime.now`**. Checks if the item on the board is in the allowed transitions via a Cutting board. Pass the progress call to the item on the board. If the progress on the item reaches 100% it changes the name of the item based on the @@ -580,14 +593,6 @@ class SinkAddon(Counter): return self.occupied_by and self.occupied_by[-1].can_combine(item) def drop_off(self, item: Item) -> Item | None: - """Takes the thing dropped of by the player. - - Args: - item: The item to be placed on the counter. - - Returns: - - """ return self.occupied_by[-1].combine(item) def add_clean_plate(self, plate: Plate): diff --git a/overcooked_simulator/game_items.py b/overcooked_simulator/game_items.py index 5e9ecd43d8a2695ec4a3a87400f518dcba7c1b60..08e255e3eb139c98589c7ad681b82054bc2c2b75 100644 --- a/overcooked_simulator/game_items.py +++ b/overcooked_simulator/game_items.py @@ -1,12 +1,21 @@ -"""" +""" The game items that a player can hold. They have methods that -- check if items can be combined (`Item.can_combine`): cooking equipment and ingredients, and so on +- check if items can be combined (`Item.can_combine`): cooking equipment and ingredients, and so on, - combine the items after a successful check (`Item.combine`), - and a method to call the progress on the items (`Item.progress`) All game items need to be specified in the `item_info.yml`. + +The following classes are used for the base for all game items: +- `Item`: ingredients and meals. +- `CookingEquipment`: pots, pans, etc. +- `Plate`: clean and dirty plates. + +The `ItemInfo` is the dataclass for the items in the `item_info.yml`. + +## Code Documentation """ from __future__ import annotations diff --git a/overcooked_simulator/gui_2d_vis/__init__.py b/overcooked_simulator/gui_2d_vis/__init__.py index 3afb0b56bf80f430bf71e2d165e46f4e396f1fe7..775c676a8e77867394eabeca428bb80673ee0ed0 100644 --- a/overcooked_simulator/gui_2d_vis/__init__.py +++ b/overcooked_simulator/gui_2d_vis/__init__.py @@ -2,24 +2,18 @@ 2D visualization of the overcooked simulator. You can select the layout and start an environment: - -You can play the overcooked simulator. You can quit the application in the top right or end the level in the bottom right. - - -The orders are pictured in the top, the current score in the bottom left and the remaining time in the bottom. - - -The final screen after ending a level shows the score: - +- You can play the overcooked simulator. You can quit the application in the top right or end the level in the bottom right: [Screenshot](images/overcooked-start-screen.png) +- The orders are pictured in the top, the current score in the bottom left and the remaining time in the bottom: [Screenshot](https://gitlab.ub.uni-bielefeld.de/scs/cocosy/overcooked-simulator/-/raw/main/overcooked_simulator/gui_2d_vis/images/overcooked-end-screen.png?ref_type=heads) +- The final screen after ending a level shows the score: [Screenshot](https://gitlab.ub.uni-bielefeld.de/scs/cocosy/overcooked-simulator/-/raw/main/overcooked_simulator/gui_2d_vis/images/overcooked-end-screen.png?ref_type=heads) The keys for the control of the players are: -Player 1: +### Player 1: - Movement: `W`, `A`, `S`, `D`, - Pickup: `E` - Interact: `F` -Player 2: +### Player 2: - Movement: `⬆`, `⬅`, `⬇`, `➡` (arrow keys) - Pickup: `I` - Interact: `SPACE` diff --git a/overcooked_simulator/order.py b/overcooked_simulator/order.py index 9cb7d86a97f64f614f0da573aec0ee8eae937b1f..21ed894476ecad1cdd919753d265623439ffcbab 100644 --- a/overcooked_simulator/order.py +++ b/overcooked_simulator/order.py @@ -16,7 +16,29 @@ tuple of a bool and the score. If the bool is true, the score will be added to t accept the meal for serving. The `order_gen_class` should be a child of the `OrderGeneration` class. The `order_gen_kwargs` depend then on your -class referenced.""" +class referenced. + +This file defines the following classes: +- `Order` +- `OrderGeneration` +- `OrderAndScoreManager` + +Further, it defines same implementations for the basic order generation based on random sampling: +- `RandomOrderGeneration` +- `simple_score_calc_gen_func` +- `simple_score_calc_func` +- `zero` + +For an easier usage of the random orders, also some classes for type hints and dataclasses are defined: +- `RandomOrderKwarg +- `RandomFuncConfig` +- `ScoreCalcFuncType` +- `ScoreCalcGenFuncType` +- `ExpiredPenaltyFuncType` + + +## Code Documentation +""" from __future__ import annotations import dataclasses @@ -108,16 +130,176 @@ class OrderGeneration: ... +class OrderAndScoreManager: + """The Order and Score Manager that is called from the serving window.""" + + def __init__(self, order_config, available_meals: dict[str, ItemInfo]): + self.score = 0 + self.order_gen: OrderGeneration = order_config["order_gen_class"]( + available_meals=available_meals, kwargs=order_config["order_gen_kwargs"] + ) + self.serving_not_ordered_meals: Callable[ + [Item], Tuple[bool, float] + ] = order_config["serving_not_ordered_meals"] + """Function that decides if not ordered meals can be served and what score it gives""" + self.available_meals = available_meals + """The meals for that orders can be sampled from.""" + self.open_orders: Deque[Order] = deque() + """Current open orders. This attribute is used for the environment state.""" + + # TODO log who / which player served which meal -> for split scores + self.served_meals: list[Tuple[Item, datetime]] = [] + """List of served meals. Maybe for the end screen.""" + self.last_finished: list[Order] = [] + """Cache last finished orders for `OrderGeneration.get_orders` call. From the served meals.""" + self.next_relevant_time: datetime = datetime.max + """For reduced order checking. Store the next time when to create an order or check for penalties.""" + self.last_expired: list[Order] = [] + """Cache last expired orders for `OrderGeneration.get_orders` call.""" + + def update_next_relevant_time(self): + next_relevant_time = datetime.max + for order in self.open_orders: + next_relevant_time = min( + next_relevant_time, order.start_time + order.max_duration + ) + for penalty in order._timed_penalties: + next_relevant_time = min(next_relevant_time, penalty[0]) + self.next_relevant_time = next_relevant_time + + def serve_meal(self, item: Item, env_time: datetime) -> bool: + if isinstance(item, Plate): + meal = item.get_potential_meal() + if meal is not None: + if meal.name in self.available_meals: + order = self.find_order_for_meal(meal) + if order is None: + if self.serving_not_ordered_meals: + accept, score = self.serving_not_ordered_meals(meal) + if accept: + log.info( + f"Serving meal without order {meal.name} with score {score}" + ) + self.increment_score(score) + self.served_meals.append((meal, env_time)) + return accept + log.info( + f"Do not serve meal {meal.name} because it is not ordered" + ) + return False + order, index = order + score = order.score_calc( + relative_order_time=env_time - order.start_time, + order=order, + ) + self.increment_score(score) + order.finished_info = { + "end_time": env_time, + "score": score, + } + log.info(f"Serving meal {meal.name} with order with score {score}") + self.last_finished.append(order) + del self.open_orders[index] + self.served_meals.append((meal, env_time)) + return True + log.info(f"Do not serve item {item}") + return False + + def increment_score(self, score: int | float): + self.score += score + log.debug(f"Score: {self.score}") + + def create_init_orders(self, env_time): + """Create the initial orders in an environment.""" + init_orders = self.order_gen.init_orders(env_time) + self.setup_penalties(new_orders=init_orders, env_time=env_time) + self.open_orders.extend(init_orders) + + def progress(self, passed_time: timedelta, now: datetime): + """Check expired orders and check order generation.""" + new_orders = self.order_gen.get_orders( + passed_time=passed_time, + now=now, + new_finished_orders=self.last_finished, + expired_orders=self.last_expired, + ) + self.setup_penalties(new_orders=new_orders, env_time=now) + self.open_orders.extend(new_orders) + self.last_finished = [] + self.last_expired = [] + if new_orders or self.next_relevant_time <= now: + # reduce checking calls + + remove_orders: list[int] = [] + for index, order in enumerate(self.open_orders): + if now >= order.start_time + order.max_duration: + # orders expired + self.increment_score(order.expired_penalty) + remove_orders.append(index) + continue # no penalties for expired orders + remove_penalties = [] + for i, (penalty_time, penalty) in enumerate(order.timed_penalties): + # check penalties + if penalty_time < now: + self.score -= penalty + remove_penalties.append(i) + + for i in reversed(remove_penalties): + # or del order.timed_penalties[index] + order.timed_penalties.pop(i) + + expired_orders: list[Order] = [] + for remove_order in reversed(remove_orders): + expired_orders.append(self.open_orders[remove_order]) + del self.open_orders[remove_order] + self.last_expired = expired_orders + + self.update_next_relevant_time() + + def find_order_for_meal(self, meal) -> Tuple[Order, int] | None: + for index, order in enumerate(self.open_orders): + if order.meal.name == meal.name: + return order, index + + @staticmethod + def setup_penalties(new_orders: list[Order], env_time: datetime): + """Call the `Order.create_penalties` method for new orders.""" + for order in new_orders: + order.create_penalties(env_time) + + class ScoreCalcFuncType(Protocol): """Typed kwargs of the expected `Order.score_calc` function. Which is also returned by the - `RandomOrderKwarg.score_calc_gen_func`.""" + `RandomOrderKwarg.score_calc_gen_func`. + + The function should calculate the score for the completed orders. + + Args: + relative_order_time: `timedelta` the duration how long the order was active. + order: `Order` the order that was completed. + + Returns: + `float`: the score for a completed order and duration of the order. + """ def __call__(self, relative_order_time: timedelta, order: Order) -> float: ... class ScoreCalcGenFuncType(Protocol): - """Typed kwargs of the expected function for the `RandomOrderKwarg.score_calc_gen_func`.""" + """Typed kwargs of the expected function for the `RandomOrderKwarg.score_calc_gen_func`. + + Generate the ScoreCalcFunc for an order based on its meal, duration etc. + + Args: + meal: `ItemInfo` the type of meal the order orders. + duration: `timedelta` the duration after the order expires. + now: `datetime` the environment time the order is created. + kwargs: `dict` the static kwargs defined in the `environment_config.yml` + + Returns: + `ScoreCalcFuncType` a reference to a function that calculates the score for a completed meal. + """ def __call__( self, @@ -131,9 +313,13 @@ class ScoreCalcGenFuncType(Protocol): class ExpiredPenaltyFuncType(Protocol): - """Type with kwargs of the expected function for the `RandomOrderKwarg.expired_penalty_func`. + """Typed kwargs of the expected function for the `RandomOrderKwarg.expired_penalty_func`. + + An example is the `zero` function. - An example is the `zero` function.""" + Args: + item: `ItemInfo` the meal of the order that expired. It is calculated before the order is active. + """ def __call__(self, item: ItemInfo, **kwargs) -> float: ... @@ -142,24 +328,29 @@ class ExpiredPenaltyFuncType(Protocol): def zero(item: ItemInfo, **kwargs) -> float: """Example and default for the `RandomOrderKwarg.expired_penalty_func` function. - Just no penalty for expired orders.""" + Just no penalty for expired orders. + + Returns: + zero / 0.0 + """ return 0.0 class RandomFuncConfig(TypedDict): - """Types of the dict for sampling with different random functions from the `random` library. + """Types of the dict for sampling with different random functions from the [`random` library](https://docs.python.org/3/library/random.html). Example: - Sampling uniformly between `10` and `20`. + Sampling [uniform](https://docs.python.org/3/library/random.html#random.uniform)ly between `10` and `20`. ```yaml func: uniform kwargs: a: 10 b: 20 ``` - Or in pyton: + + Or in Python: ```python - random_func: RandomFuncConfig = {'func': 'uniform', 'kwargs': {'a': 10, 'b': 20}} + random_func = {'func': 'uniform', 'kwargs': {'a': 10, 'b': 20}} ``` """ @@ -378,141 +569,3 @@ def simple_expired_penalty(item: ItemInfo, default: float, **kwargs) -> float: ``` """ return default - - -class OrderAndScoreManager: - """The Order and Score Manager that is called from the serving window.""" - - def __init__(self, order_config, available_meals: dict[str, ItemInfo]): - self.score = 0 - self.order_gen: OrderGeneration = order_config["order_gen_class"]( - available_meals=available_meals, kwargs=order_config["order_gen_kwargs"] - ) - self.serving_not_ordered_meals: Callable[ - [Item], Tuple[bool, float] - ] = order_config["serving_not_ordered_meals"] - """Function that decides if not ordered meals can be served and what score it gives""" - self.available_meals = available_meals - """The meals for that orders can be sampled from.""" - self.open_orders: Deque[Order] = deque() - """Current open orders. This attribute is used for the environment state.""" - - # TODO log who / which player served which meal -> for split scores - self.served_meals: list[Tuple[Item, datetime]] = [] - """List of served meals. Maybe for the end screen.""" - self.last_finished: list[Order] = [] - """Cache last finished orders for `OrderGeneration.get_orders` call. From the served meals.""" - self.next_relevant_time: datetime = datetime.max - """For reduced order checking. Store the next time when to create an order or check for penalties.""" - self.last_expired: list[Order] = [] - """Cache last expired orders for `OrderGeneration.get_orders` call.""" - - def update_next_relevant_time(self): - next_relevant_time = datetime.max - for order in self.open_orders: - next_relevant_time = min( - next_relevant_time, order.start_time + order.max_duration - ) - for penalty in order._timed_penalties: - next_relevant_time = min(next_relevant_time, penalty[0]) - self.next_relevant_time = next_relevant_time - - def serve_meal(self, item: Item, env_time: datetime) -> bool: - if isinstance(item, Plate): - meal = item.get_potential_meal() - if meal is not None: - if meal.name in self.available_meals: - order = self.find_order_for_meal(meal) - if order is None: - if self.serving_not_ordered_meals: - accept, score = self.serving_not_ordered_meals(meal) - if accept: - log.info( - f"Serving meal without order {meal.name} with score {score}" - ) - self.increment_score(score) - self.served_meals.append((meal, env_time)) - return accept - log.info( - f"Do not serve meal {meal.name} because it is not ordered" - ) - return False - order, index = order - score = order.score_calc( - relative_order_time=env_time - order.start_time, - order=order, - ) - self.increment_score(score) - order.finished_info = { - "end_time": env_time, - "score": score, - } - log.info(f"Serving meal {meal.name} with order with score {score}") - self.last_finished.append(order) - del self.open_orders[index] - self.served_meals.append((meal, env_time)) - return True - log.info(f"Do not serve item {item}") - return False - - def increment_score(self, score: int | float): - self.score += score - log.debug(f"Score: {self.score}") - - def create_init_orders(self, env_time): - """Create the initial orders in an environment.""" - init_orders = self.order_gen.init_orders(env_time) - self.setup_penalties(new_orders=init_orders, env_time=env_time) - self.open_orders.extend(init_orders) - - def progress(self, passed_time: timedelta, now: datetime): - """Check expired orders and check order generation.""" - new_orders = self.order_gen.get_orders( - passed_time=passed_time, - now=now, - new_finished_orders=self.last_finished, - expired_orders=self.last_expired, - ) - self.setup_penalties(new_orders=new_orders, env_time=now) - self.open_orders.extend(new_orders) - self.last_finished = [] - self.last_expired = [] - if new_orders or self.next_relevant_time <= now: - # reduce checking calls - - remove_orders: list[int] = [] - for index, order in enumerate(self.open_orders): - if now >= order.start_time + order.max_duration: - # orders expired - self.increment_score(order.expired_penalty) - remove_orders.append(index) - continue # no penalties for expired orders - remove_penalties = [] - for i, (penalty_time, penalty) in enumerate(order.timed_penalties): - # check penalties - if penalty_time < now: - self.score -= penalty - remove_penalties.append(i) - - for i in reversed(remove_penalties): - # or del order.timed_penalties[index] - order.timed_penalties.pop(i) - - expired_orders: list[Order] = [] - for remove_order in reversed(remove_orders): - expired_orders.append(self.open_orders[remove_order]) - del self.open_orders[remove_order] - self.last_expired = expired_orders - - self.update_next_relevant_time() - - def find_order_for_meal(self, meal) -> Tuple[Order, int] | None: - for index, order in enumerate(self.open_orders): - if order.meal.name == meal.name: - return order, index - - @staticmethod - def setup_penalties(new_orders: list[Order], env_time: datetime): - """Call the `Order.create_penalties` method for new orders.""" - for order in new_orders: - order.create_penalties(env_time) diff --git a/overcooked_simulator/overcooked_environment.py b/overcooked_simulator/overcooked_environment.py index 0671a0a20adcbc5f9d5ffe9a8151ce04d44add16..b60ed369dbeab16833d0a2dad499ec0bac84d59a 100644 --- a/overcooked_simulator/overcooked_environment.py +++ b/overcooked_simulator/overcooked_environment.py @@ -74,11 +74,13 @@ class Action: return f"Action({self.player},{self.action_type.value},{self.action_data})" +# TODO Abstract base class for different environments + + class Environment: """Environment class which handles the game logic for the overcooked-inspired environment. Handles player movement, collision-detection, counters, cooking processes, recipes, incoming orders, time. - # TODO Abstract base class for different environments """ def __init__(self, env_config_path: Path, layout_path, item_info_path: Path): @@ -248,6 +250,7 @@ class Environment: @property def game_ended(self) -> bool: + """Whether the game is over or not based on the calculated `Environment.env_time_end`""" return self.env_time >= self.env_time_end def load_item_info(self) -> dict[str, ItemInfo]: @@ -559,7 +562,6 @@ class Environment: A counter is modelled as a rectangle (square actually), a player is modelled as a circle. The distance of the player position (circle center) and the counter rectangle is calculated, if it is smaller than the player radius, a collision is detected. - TODO: Efficiency improvement by checking only nearest counters? Quadtree...? Args: player: The player to check the collision for. @@ -572,6 +574,7 @@ class Environment: dx = max(np.abs(cx - counter.pos[0]) - 1 / 2, 0) dy = max(np.abs(cy - counter.pos[1]) - 1 / 2, 0) distance = np.linalg.norm([dx, dy]) + # TODO: Efficiency improvement by checking only nearest counters? Quadtree...? return distance < player.radius def add_player(self, player_name: str, pos: npt.NDArray = None): @@ -632,7 +635,9 @@ class Environment: for counter in self.counters: if isinstance(counter, (CuttingBoard, Stove, Sink, PlateDispenser)): counter.progress(passed_time=passed_time, now=self.env_time) - self.order_and_score.progress(passed_time=passed_time, now=self.env_time) + self.order_and_score.progress( + passed_time=passed_time, now=self.env_time + ) def get_state(self): """Get the current state of the game environment. The state here is accessible by the current python objects. diff --git a/overcooked_simulator/player.py b/overcooked_simulator/player.py index 6ff77cb692814c8ceebb5dfb0861306f8d9c5984..2e09040d474e98066aab5e86fa430dc16ec1d9c9 100644 --- a/overcooked_simulator/player.py +++ b/overcooked_simulator/player.py @@ -1,11 +1,9 @@ """The player contains the logic which method to call on counters and items for a pick action: -If the player holds nothing, it picks up the content from the counter. - -If the item the player holds can be dropped on the counter it will do so. - -If the counter is not a sink or plate dispenser, it checks if it can combine the content on the counter with the -holding object. If so, it picks up the content and combines it on its hands. +* If the player **holds nothing**, it **picks up** the content from the counter. +* If the **item** the player **holds** can be **dropped** on the counter it will do so. +* If the counter is not a sink or plate dispenser, it checks if it **can combine the content** on the counter **with the +holding object**. If so, it picks up the content and combines it on its hands. """ import dataclasses @@ -48,12 +46,15 @@ class Player: pos: Optional[npt.NDArray[float]] = None, ): self.name: str = name + """Reference for the player""" + + self.pos: npt.NDArray[float] | None = None + """The initial/suggested position of the player.""" if pos is not None: self.pos: npt.NDArray[float] = np.array(pos, dtype=float) - else: - self.pos = None self.holding: Optional[Item] = None + """What item the player is holding.""" self.radius: float = player_config.radius """See `PlayerConfig.radius`.""" @@ -66,9 +67,13 @@ class Player: self.last_interacted_counter: Optional[ Counter ] = None # needed to stop progress when moved away + """With which counter the player interacted with in the last environment step.""" self.current_nearest_counter: Optional[Counter] = None + """The counter to interact with.""" self.facing_point: npt.NDArray[float] = np.zeros(2, float) + """A point on the "circle" of the players border in the `facing_direction` with which the closest counter is + calculated with.""" def move(self, movement: npt.NDArray[float]): """Moves the player position by the given movement vector. @@ -102,6 +107,7 @@ class Player: self.update_facing_point() def update_facing_point(self): + """Update facing point on the player border circle based on the radius.""" self.facing_point = self.pos + (self.facing_direction * self.radius * 0.5) def can_reach(self, counter: Counter): diff --git a/overcooked_simulator/simulation_runner.py b/overcooked_simulator/simulation_runner.py index f0a6e9c8667ffd5f68338f407f8546cf1419195d..d4deebcd2ac2e35e2ae0b59476cb2a7c30ef16ac 100644 --- a/overcooked_simulator/simulation_runner.py +++ b/overcooked_simulator/simulation_runner.py @@ -17,6 +17,8 @@ class Simulator(Thread): Main Simulator class which runs the game environment. Players can be registered in the game. The simulator is run as its own thread. + Is a child class of the `Thread` class from the `threading` library. + Typical usage example: ```python sim = Simulator() @@ -33,15 +35,28 @@ class Simulator(Thread): item_info_path=ROOT_DIR / "game_content" / "item_info.yaml", seed: int = 8654321, ): + """Constructor of the `Simulator class. + + Args: + env_config_path: Path to the environment configuration file. + layout_path: Path to the layout file. + frequency: Frequency of the environment step function call. + item_info_path: Path to the item information configuration file. + seed: Random seed to set the numpy random number generator. + """ # TODO look at https://builtin.com/data-science/numpy-random-seed to change to other random np.random.seed(seed) self.finished: bool = False + """The environment runs as long it is `True`""" self.step_frequency: int = frequency + """Frequency of the environment step function call.""" self.preferred_sleep_time_ns: float = 1e9 / self.step_frequency + """If the environment step call would need no computation time. The duration for one "frame".""" self.env: Environment = Environment( env_config_path, layout_path, item_info_path ) + """Reference to the `Environment`.""" super().__init__() @@ -63,7 +78,6 @@ class Simulator(Thread): Returns: The current state of the game. Currently, as dict with lists of environment objects. """ - return self.env.get_state() def get_state_json(self): @@ -72,14 +86,14 @@ class Simulator(Thread): Returns: The gamest ate encoded in a json style nested dict. """ - return self.env.get_state_json() def register_player(self, player_name: str, pos=None): """Adds a player to the environment. Args: - player: The player to be added. + player_name: the reference to the player (name/id). + pos: optional position of the player. """ self.env.add_player(player_name, pos) @@ -89,13 +103,11 @@ class Simulator(Thread): Args: players: List of players to be added. """ - for p in players: self.register_player(p) def run(self): """Starts the simulator thread. Runs in a loop until stopped.""" - overslept_in_ns = 0 self.env.reset_env_time() last_step_start = time.time_ns()