added missing and weird doc strings. Fixed doc format for website.

0cf09ef8 · Florian Schröder · 059de001 · 0cf09ef8 · 0cf09ef8 · 0cf09ef8
Commit 0cf09ef8 authored 1 year ago by Florian Schröder
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
 pytest:
  stage: test
  script:
-  - apt-get update -qy
+    - apt-get update -qy
-  - apt-get install -y python3-dev python3-pip
+    - apt-get install -y python3-dev python3-pip
-  - pip install pytest
+    - pip install pytest
-  - pip install .
+    - pip install .
-  - pytest --junitxml=report.xml
+    - pytest --junitxml=report.xml
  artifacts:
    when: always
    reports:
@@ -13,14 +13,14 @@ pytest:
 pages:
  script:
-  - apt-get update -qy
+    - apt-get update -qy
-  - apt-get install -y python3-dev python3-pip
+    - apt-get install -y python3-dev python3-pip
-  - pip install pdoc
+    - pip install pdoc
-  - pip install .
+    - pip install .
-  - pdoc --output-dir public overcooked_simulator  --logo https://gitlab.ub.uni-bielefeld.de/uploads/-/system/project/avatar/6780/Cooking-Vector-Illustration-Icon-Graphics-4267218-1-580x435.jpg
+    - pdoc --output-dir public overcooked_simulator  --logo https://gitlab.ub.uni-bielefeld.de/uploads/-/system/project/avatar/6780/Cooking-Vector-Illustration-Icon-Graphics-4267218-1-580x435.jpg --docformat google
  artifacts:
    paths:
-    - public
+      - public
  rules:
-  - if: $CI_COMMIT_BRANCH == "main"
+    - if: $CI_COMMIT_BRANCH == "main"
--- a/overcooked_simulator/__init__.py
+++ b/overcooked_simulator/__init__.py
@@ -8,7 +8,8 @@ The package contains of an environment for cooperation between players/agents. A
 human or visual agents in 2D. A 3D web-enabled version (for example for online studies, currently under development)
 can be found [here](https://gitlab.ub.uni-bielefeld.de/scs/cocosy/godot-overcooked-3d-visualization)
-# Background / Literature The overcooked/cooking domain is a well established cooperation domain/task. There exists
+# Background / Literature
+The overcooked/cooking domain is a well established cooperation domain/task. There exists
 environments designed for reinforcement learning agents as well as the game and adaptations of the game for human
 players in a more "real-time" environment. They all mostly differ in the visual and graphics dimension. 2D versions
 like overcooked-ai, ... are most known in the community. But more visual appealing 3D versions for cooperation with

--- a/overcooked_simulator/counters.py
+++ b/overcooked_simulator/counters.py
-"""
+"""All counters are derived from the `Counter` class. Counters implement the `Counter.pick_up` method, which defines
-All counters are derived from the `Counter` class.
+what should happen when the agent wants to pick something up from the counter. On the other side,
-Counters implement the `Counter.pick_up` method, which defines what should happen when the agent wants to pick something up from the counter.
+the `Counter.drop_off` method receives the item what should be put on the counter. Before that the
-On the other side, the `Counter.drop_off` method receives the item what should be put on the counter. Before that the `Counter.can_drop_off` method checked if the item can be put on the counter.
+`Counter.can_drop_off` method checked if the item can be put on the counter. The progress on Counters or on objects
-The progress on Counters or on objects on the counters are handled via the Counters. They have the task to delegate the progress call via the `Counter.progress` method.
+on the counters are handled via the Counters. They have the task to delegate the progress call via the
-On which type of counter the progress method is called is currently defined in the environment class.
+`progress` method, e.g., the `CuttingBoard.progress`. On which type of counter the progress method is called is currently defined in the
+environment class.
 Inside the item_info.yaml, equipment needs to be defined. It includes counters that are part of the interaction/requirements for the interaction.
 ```yaml
@@ -16,6 +17,19 @@ Sink:
 Stove:
  type: Equipment
 ```
+The defined counter classes are:
+- `Counter`
+- `CuttingBoard`
+- `ServingWindow`
+- `Dispenser`
+- `PlateDispenser`
+- `Trashcan`
+- `Stove` (maybe abstracted in a class for all cooking machine counters (stove, deep fryer, oven))
+- `Sink`
+- `SinkAddon`
+## Code Documentation
 """
 from __future__ import annotations
@@ -45,7 +59,7 @@ log = logging.getLogger(__name__)
 class TransitionsValueDict(TypedDict):
-    """The dicts that are the values in the transitions dicts of the `CookingEquipment`."""
+    """The values in the transitions dicts of the `CookingEquipment`."""
    seconds: int | float
    """The needed seconds to progress for the transition."""
@@ -56,7 +70,7 @@ class TransitionsValueDict(TypedDict):
 class TransitionsValueByNameDict(TypedDict):
-    """The dicts that are the values in the transitions dicts of the `CuttingBoard` and the `Sink`."""
+    """The values in the transitions dicts of the `CuttingBoard` and the `Sink`."""
    seconds: int | float
    """The needed seconds to progress for the transition."""
@@ -88,11 +102,10 @@ class Counter:
        """Gets called upon a player performing the pickup action. If the counter can give something to
        the player, it does so. In the standard counter this is when an item is on the counter.
-        Returns: The item which the counter is occupied by. None if nothing is there.
        Args:
            on_hands: Will the item be put on empty hands or on a cooking equipment.
+        Returns: The item which the counter is occupied by. None if nothing is there.
        """
        if on_hands:
            if self.occupied_by:
@@ -172,7 +185,7 @@ class CuttingBoard(Counter):
        Args:
            passed_time: the time passed since the last progress call
-            now: the current env time. Not the same as `datetime.now`
+            now: the current env time. **Not the same as `datetime.now`**.
        Checks if the item on the board is in the allowed transitions via a Cutting board. Pass the progress call to
        the item on the board. If the progress on the item reaches 100% it changes the name of the item based on the
@@ -580,14 +593,6 @@ class SinkAddon(Counter):
        return self.occupied_by and self.occupied_by[-1].can_combine(item)
    def drop_off(self, item: Item) -> Item | None:
-        """Takes the thing dropped of by the player.
-        Args:
-            item: The item to be placed on the counter.
-        Returns:
-        """
        return self.occupied_by[-1].combine(item)
    def add_clean_plate(self, plate: Plate):

--- a/overcooked_simulator/game_items.py
+++ b/overcooked_simulator/game_items.py
-""""
+"""
 The game items that a player can hold.
 They have methods that
- check if items can be combined (`Item.can_combine`): cooking equipment and ingredients, and so on
+- check if items can be combined (`Item.can_combine`): cooking equipment and ingredients, and so on,
 - combine the items after a successful check (`Item.combine`),
 - and a method to call the progress on the items (`Item.progress`)
 All game items need to be specified in the `item_info.yml`.
+The following classes are used for the base for all game items:
+- `Item`: ingredients and meals.
+- `CookingEquipment`: pots, pans, etc.
+- `Plate`: clean and dirty plates.
+The `ItemInfo` is the dataclass for the items in the `item_info.yml`.
+## Code Documentation
 """
 from __future__ import annotations

--- a/overcooked_simulator/gui_2d_vis/__init__.py
+++ b/overcooked_simulator/gui_2d_vis/__init__.py
@@ -2,24 +2,18 @@
 2D visualization of the overcooked simulator.
 You can select the layout and start an environment:
+- You can play the overcooked simulator. You can quit the application in the top right or end the level in the bottom right: [Screenshot](images/overcooked-start-screen.png)
-You can play the overcooked simulator. You can quit the application in the top right or end the level in the bottom right.
+- The orders are pictured in the top, the current score in the bottom left and the remaining time in the bottom: [Screenshot](https://gitlab.ub.uni-bielefeld.de/scs/cocosy/overcooked-simulator/-/raw/main/overcooked_simulator/gui_2d_vis/images/overcooked-end-screen.png?ref_type=heads)
-![start screen](images/overcooked-start-screen.png)
+- The final screen after ending a level shows the score: [Screenshot](https://gitlab.ub.uni-bielefeld.de/scs/cocosy/overcooked-simulator/-/raw/main/overcooked_simulator/gui_2d_vis/images/overcooked-end-screen.png?ref_type=heads)
-The orders are pictured in the top, the current score in the bottom left and the remaining time in the bottom.
-![level screen](images/overcooked-level-screen.png)
-The final screen after ending a level shows the score:
-![end screen](images/overcooked-end-screen.png)
 The keys for the control of the players are:
-Player 1:
+### Player 1:
 - Movement: `W`, `A`, `S`, `D`,
 - Pickup: `E`
 - Interact: `F`
-Player 2:
+### Player 2:
 - Movement: `⬆`, `⬅`, `⬇`, `➡` (arrow keys)
 - Pickup: `I`
 - Interact: `SPACE`

--- a/overcooked_simulator/order.py
+++ b/overcooked_simulator/order.py
@@ -16,7 +16,29 @@ tuple of a bool and the score. If the bool is true, the score will be added to t
 accept the meal for serving.
 The `order_gen_class` should be a child of the `OrderGeneration` class. The `order_gen_kwargs` depend then on your
-class referenced."""
+class referenced.
+This file defines the following classes:
+- `Order`
+- `OrderGeneration`
+- `OrderAndScoreManager`
+Further, it defines same implementations for the basic order generation based on random sampling:
+- `RandomOrderGeneration`
+- `simple_score_calc_gen_func`
+- `simple_score_calc_func`
+- `zero`
+For an easier usage of the random orders, also some classes for type hints and dataclasses are defined:
+- `RandomOrderKwarg
+- `RandomFuncConfig`
+- `ScoreCalcFuncType`
+- `ScoreCalcGenFuncType`
+- `ExpiredPenaltyFuncType`
+## Code Documentation
+"""
 from __future__ import annotations
 import dataclasses
@@ -108,16 +130,176 @@ class OrderGeneration:
        ...
+class OrderAndScoreManager:
+    """The Order and Score Manager that is called from the serving window."""
+    def __init__(self, order_config, available_meals: dict[str, ItemInfo]):
+        self.score = 0
+        self.order_gen: OrderGeneration = order_config["order_gen_class"](
+            available_meals=available_meals, kwargs=order_config["order_gen_kwargs"]
+        )
+        self.serving_not_ordered_meals: Callable[
+            [Item], Tuple[bool, float]
+        ] = order_config["serving_not_ordered_meals"]
+        """Function that decides if not ordered meals can be served and what score it gives"""
+        self.available_meals = available_meals
+        """The meals for that orders can be sampled from."""
+        self.open_orders: Deque[Order] = deque()
+        """Current open orders. This attribute is used for the environment state."""
+        # TODO log who / which player served which meal -> for split scores
+        self.served_meals: list[Tuple[Item, datetime]] = []
+        """List of served meals. Maybe for the end screen."""
+        self.last_finished: list[Order] = []
+        """Cache last finished orders for `OrderGeneration.get_orders` call. From the served meals."""
+        self.next_relevant_time: datetime = datetime.max
+        """For reduced order checking. Store the next time when to create an order or check for penalties."""
+        self.last_expired: list[Order] = []
+        """Cache last expired orders for `OrderGeneration.get_orders` call."""
+    def update_next_relevant_time(self):
+        next_relevant_time = datetime.max
+        for order in self.open_orders:
+            next_relevant_time = min(
+                next_relevant_time, order.start_time + order.max_duration
+            )
+            for penalty in order._timed_penalties:
+                next_relevant_time = min(next_relevant_time, penalty[0])
+        self.next_relevant_time = next_relevant_time
+    def serve_meal(self, item: Item, env_time: datetime) -> bool:
+        if isinstance(item, Plate):
+            meal = item.get_potential_meal()
+            if meal is not None:
+                if meal.name in self.available_meals:
+                    order = self.find_order_for_meal(meal)
+                    if order is None:
+                        if self.serving_not_ordered_meals:
+                            accept, score = self.serving_not_ordered_meals(meal)
+                            if accept:
+                                log.info(
+                                    f"Serving meal without order {meal.name} with score {score}"
+                                )
+                                self.increment_score(score)
+                                self.served_meals.append((meal, env_time))
+                            return accept
+                        log.info(
+                            f"Do not serve meal {meal.name} because it is not ordered"
+                        )
+                        return False
+                    order, index = order
+                    score = order.score_calc(
+                        relative_order_time=env_time - order.start_time,
+                        order=order,
+                    )
+                    self.increment_score(score)
+                    order.finished_info = {
+                        "end_time": env_time,
+                        "score": score,
+                    }
+                    log.info(f"Serving meal {meal.name} with order with score {score}")
+                    self.last_finished.append(order)
+                    del self.open_orders[index]
+                    self.served_meals.append((meal, env_time))
+                    return True
+        log.info(f"Do not serve item {item}")
+        return False
+    def increment_score(self, score: int | float):
+        self.score += score
+        log.debug(f"Score: {self.score}")
+    def create_init_orders(self, env_time):
+        """Create the initial orders in an environment."""
+        init_orders = self.order_gen.init_orders(env_time)
+        self.setup_penalties(new_orders=init_orders, env_time=env_time)
+        self.open_orders.extend(init_orders)
+    def progress(self, passed_time: timedelta, now: datetime):
+        """Check expired orders and check order generation."""
+        new_orders = self.order_gen.get_orders(
+            passed_time=passed_time,
+            now=now,
+            new_finished_orders=self.last_finished,
+            expired_orders=self.last_expired,
+        )
+        self.setup_penalties(new_orders=new_orders, env_time=now)
+        self.open_orders.extend(new_orders)
+        self.last_finished = []
+        self.last_expired = []
+        if new_orders or self.next_relevant_time <= now:
+            # reduce checking calls
+            remove_orders: list[int] = []
+            for index, order in enumerate(self.open_orders):
+                if now >= order.start_time + order.max_duration:
+                    # orders expired
+                    self.increment_score(order.expired_penalty)
+                    remove_orders.append(index)
+                    continue  # no penalties for expired orders
+                remove_penalties = []
+                for i, (penalty_time, penalty) in enumerate(order.timed_penalties):
+                    # check penalties
+                    if penalty_time < now:
+                        self.score -= penalty
+                        remove_penalties.append(i)
+                for i in reversed(remove_penalties):
+                    # or del order.timed_penalties[index]
+                    order.timed_penalties.pop(i)
+            expired_orders: list[Order] = []
+            for remove_order in reversed(remove_orders):
+                expired_orders.append(self.open_orders[remove_order])
+                del self.open_orders[remove_order]
+            self.last_expired = expired_orders
+            self.update_next_relevant_time()
+    def find_order_for_meal(self, meal) -> Tuple[Order, int] | None:
+        for index, order in enumerate(self.open_orders):
+            if order.meal.name == meal.name:
+                return order, index
+    @staticmethod
+    def setup_penalties(new_orders: list[Order], env_time: datetime):
+        """Call the `Order.create_penalties` method for new orders."""
+        for order in new_orders:
+            order.create_penalties(env_time)
 class ScoreCalcFuncType(Protocol):
    """Typed kwargs of the expected `Order.score_calc` function. Which is also returned by the
-    `RandomOrderKwarg.score_calc_gen_func`."""
+    `RandomOrderKwarg.score_calc_gen_func`.
+    The function should calculate the score for the completed orders.
+    Args:
+        relative_order_time: `timedelta`  the duration how long the order was active.
+        order: `Order` the order that was completed.
+    Returns:
+        `float`: the score for a completed order and duration of the order.
+    """
    def __call__(self, relative_order_time: timedelta, order: Order) -> float:
        ...
 class ScoreCalcGenFuncType(Protocol):
-    """Typed kwargs of the expected function for the `RandomOrderKwarg.score_calc_gen_func`."""
+    """Typed kwargs of the expected function for the `RandomOrderKwarg.score_calc_gen_func`.
+    Generate the ScoreCalcFunc for an order based on its meal, duration etc.
+    Args:
+        meal: `ItemInfo` the type of meal the order orders.
+        duration: `timedelta` the duration after the order expires.
+        now: `datetime` the environment time the order is created.
+        kwargs: `dict` the static kwargs defined in the `environment_config.yml`
+    Returns:
+        `ScoreCalcFuncType` a reference to a function that calculates the score for a completed meal.
+    """
    def __call__(
        self,
@@ -131,9 +313,13 @@ class ScoreCalcGenFuncType(Protocol):
 class ExpiredPenaltyFuncType(Protocol):
-    """Type with kwargs of the expected function for the `RandomOrderKwarg.expired_penalty_func`.
+    """Typed kwargs of the expected function for the `RandomOrderKwarg.expired_penalty_func`.
+    An example is the `zero` function.
-    An example is the `zero` function."""
+    Args:
+        item: `ItemInfo` the meal of the order that expired. It is calculated before the order is active.
+    """
    def __call__(self, item: ItemInfo, **kwargs) -> float:
        ...
@@ -142,24 +328,29 @@ class ExpiredPenaltyFuncType(Protocol):
 def zero(item: ItemInfo, **kwargs) -> float:
    """Example and default for the `RandomOrderKwarg.expired_penalty_func` function.
-    Just no penalty for expired orders."""
+    Just no penalty for expired orders.
+    Returns:
+        zero / 0.0
+    """
    return 0.0
 class RandomFuncConfig(TypedDict):
-    """Types of the dict for sampling with different random functions from the `random` library.
+    """Types of the dict for sampling with different random functions from the [`random` library](https://docs.python.org/3/library/random.html).
    Example:
-        Sampling uniformly between `10` and `20`.
+        Sampling [uniform](https://docs.python.org/3/library/random.html#random.uniform)ly between `10` and `20`.
        ```yaml
        func: uniform
        kwargs:
          a: 10
          b: 20
        ```
-        Or in pyton:
+        Or in Python:
        ```python
-        random_func: RandomFuncConfig = {'func': 'uniform', 'kwargs': {'a': 10, 'b': 20}}
+        random_func = {'func': 'uniform', 'kwargs': {'a': 10, 'b': 20}}
        ```
    """
@@ -378,141 +569,3 @@ def simple_expired_penalty(item: ItemInfo, default: float, **kwargs) -> float:
        ```
    """
    return default
-class OrderAndScoreManager:
-    """The Order and Score Manager that is called from the serving window."""
-    def __init__(self, order_config, available_meals: dict[str, ItemInfo]):
-        self.score = 0
-        self.order_gen: OrderGeneration = order_config["order_gen_class"](
-            available_meals=available_meals, kwargs=order_config["order_gen_kwargs"]
-        )
-        self.serving_not_ordered_meals: Callable[
-            [Item], Tuple[bool, float]
-        ] = order_config["serving_not_ordered_meals"]
-        """Function that decides if not ordered meals can be served and what score it gives"""
-        self.available_meals = available_meals
-        """The meals for that orders can be sampled from."""
-        self.open_orders: Deque[Order] = deque()
-        """Current open orders. This attribute is used for the environment state."""
-        # TODO log who / which player served which meal -> for split scores
-        self.served_meals: list[Tuple[Item, datetime]] = []
-        """List of served meals. Maybe for the end screen."""
-        self.last_finished: list[Order] = []
-        """Cache last finished orders for `OrderGeneration.get_orders` call. From the served meals."""
-        self.next_relevant_time: datetime = datetime.max
-        """For reduced order checking. Store the next time when to create an order or check for penalties."""
-        self.last_expired: list[Order] = []
-        """Cache last expired orders for `OrderGeneration.get_orders` call."""
-    def update_next_relevant_time(self):
-        next_relevant_time = datetime.max
-        for order in self.open_orders:
-            next_relevant_time = min(
-                next_relevant_time, order.start_time + order.max_duration
-            )
-            for penalty in order._timed_penalties:
-                next_relevant_time = min(next_relevant_time, penalty[0])
-        self.next_relevant_time = next_relevant_time
-    def serve_meal(self, item: Item, env_time: datetime) -> bool:
-        if isinstance(item, Plate):
-            meal = item.get_potential_meal()
-            if meal is not None:
-                if meal.name in self.available_meals:
-                    order = self.find_order_for_meal(meal)
-                    if order is None:
-                        if self.serving_not_ordered_meals:
-                            accept, score = self.serving_not_ordered_meals(meal)
-                            if accept:
-                                log.info(
-                                    f"Serving meal without order {meal.name} with score {score}"
-                                )
-                                self.increment_score(score)
-                                self.served_meals.append((meal, env_time))
-                            return accept
-                        log.info(
-                            f"Do not serve meal {meal.name} because it is not ordered"
-                        )
-                        return False
-                    order, index = order
-                    score = order.score_calc(
-                        relative_order_time=env_time - order.start_time,
-                        order=order,
-                    )
-                    self.increment_score(score)
-                    order.finished_info = {
-                        "end_time": env_time,
-                        "score": score,
-                    }
-                    log.info(f"Serving meal {meal.name} with order with score {score}")
-                    self.last_finished.append(order)
-                    del self.open_orders[index]
-                    self.served_meals.append((meal, env_time))
-                    return True
-        log.info(f"Do not serve item {item}")
-        return False
-    def increment_score(self, score: int | float):
-        self.score += score
-        log.debug(f"Score: {self.score}")
-    def create_init_orders(self, env_time):
-        """Create the initial orders in an environment."""
-        init_orders = self.order_gen.init_orders(env_time)
-        self.setup_penalties(new_orders=init_orders, env_time=env_time)
-        self.open_orders.extend(init_orders)
-    def progress(self, passed_time: timedelta, now: datetime):
-        """Check expired orders and check order generation."""
-        new_orders = self.order_gen.get_orders(
-            passed_time=passed_time,
-            now=now,
-            new_finished_orders=self.last_finished,
-            expired_orders=self.last_expired,
-        )
-        self.setup_penalties(new_orders=new_orders, env_time=now)
-        self.open_orders.extend(new_orders)
-        self.last_finished = []
-        self.last_expired = []
-        if new_orders or self.next_relevant_time <= now:
-            # reduce checking calls
-            remove_orders: list[int] = []
-            for index, order in enumerate(self.open_orders):
-                if now >= order.start_time + order.max_duration:
-                    # orders expired
-                    self.increment_score(order.expired_penalty)
-                    remove_orders.append(index)
-                    continue  # no penalties for expired orders
-                remove_penalties = []
-                for i, (penalty_time, penalty) in enumerate(order.timed_penalties):
-                    # check penalties
-                    if penalty_time < now:
-                        self.score -= penalty
-                        remove_penalties.append(i)
-                for i in reversed(remove_penalties):
-                    # or del order.timed_penalties[index]
-                    order.timed_penalties.pop(i)
-            expired_orders: list[Order] = []
-            for remove_order in reversed(remove_orders):
-                expired_orders.append(self.open_orders[remove_order])
-                del self.open_orders[remove_order]
-            self.last_expired = expired_orders
-            self.update_next_relevant_time()
-    def find_order_for_meal(self, meal) -> Tuple[Order, int] | None:
-        for index, order in enumerate(self.open_orders):
-            if order.meal.name == meal.name:
-                return order, index
-    @staticmethod
-    def setup_penalties(new_orders: list[Order], env_time: datetime):
-        """Call the `Order.create_penalties` method for new orders."""
-        for order in new_orders:
-            order.create_penalties(env_time)
--- a/overcooked_simulator/overcooked_environment.py
+++ b/overcooked_simulator/overcooked_environment.py
@@ -74,11 +74,13 @@ class Action:
        return f"Action({self.player},{self.action_type.value},{self.action_data})"
+# TODO Abstract base class for different environments
 class Environment:
    """Environment class which handles the game logic for the overcooked-inspired environment.
    Handles player movement, collision-detection, counters, cooking processes, recipes, incoming orders, time.
-    # TODO Abstract base class for different environments
    """
    def __init__(self, env_config_path: Path, layout_path, item_info_path: Path):
@@ -248,6 +250,7 @@ class Environment:
    @property
    def game_ended(self) -> bool:
+        """Whether the game is over or not based on the calculated `Environment.env_time_end`"""
        return self.env_time >= self.env_time_end
    def load_item_info(self) -> dict[str, ItemInfo]:
@@ -559,7 +562,6 @@ class Environment:
        A counter is modelled as a rectangle (square actually), a player is modelled as a circle.
        The distance of the player position (circle center) and the counter rectangle is calculated, if it is
        smaller than the player radius, a collision is detected.
-        TODO: Efficiency improvement by checking only nearest counters? Quadtree...?
        Args:
            player: The player to check the collision for.
@@ -572,6 +574,7 @@ class Environment:
        dx = max(np.abs(cx - counter.pos[0]) - 1 / 2, 0)
        dy = max(np.abs(cy - counter.pos[1]) - 1 / 2, 0)
        distance = np.linalg.norm([dx, dy])
+        # TODO: Efficiency improvement by checking only nearest counters? Quadtree...?
        return distance < player.radius
    def add_player(self, player_name: str, pos: npt.NDArray = None):
@@ -632,7 +635,9 @@ class Environment:
                for counter in self.counters:
                    if isinstance(counter, (CuttingBoard, Stove, Sink, PlateDispenser)):
                        counter.progress(passed_time=passed_time, now=self.env_time)
-                self.order_and_score.progress(passed_time=passed_time, now=self.env_time)
+                self.order_and_score.progress(
+                    passed_time=passed_time, now=self.env_time
+                )
    def get_state(self):
        """Get the current state of the game environment. The state here is accessible by the current python objects.

--- a/overcooked_simulator/player.py
+++ b/overcooked_simulator/player.py
 """The player contains the logic which method to call on counters and items for a pick action:
-If the player holds nothing, it picks up the content from the counter.
+* If the player **holds nothing**, it **picks up** the content from the counter.
+* If the **item** the player **holds** can be **dropped** on the counter it will do so.
-If the item the player holds can be dropped on the counter it will do so.
+* If the counter is not a sink or plate dispenser, it checks if it **can combine the content** on the counter **with the
+holding object**. If so, it picks up the content and combines it on its hands.
-If the counter is not a sink or plate dispenser, it checks if it can combine the content on the counter with the
-holding object. If so, it picks up the content and combines it on its hands.
 """
 import dataclasses
@@ -48,12 +46,15 @@ class Player:
        pos: Optional[npt.NDArray[float]] = None,
    ):
        self.name: str = name
+        """Reference for the player"""
+        self.pos: npt.NDArray[float] | None = None
+        """The initial/suggested position of the player."""
        if pos is not None:
            self.pos: npt.NDArray[float] = np.array(pos, dtype=float)
-        else:
-            self.pos = None
        self.holding: Optional[Item] = None
+        """What item the player is holding."""
        self.radius: float = player_config.radius
        """See `PlayerConfig.radius`."""
@@ -66,9 +67,13 @@ class Player:
        self.last_interacted_counter: Optional[
            Counter
        ] = None  # needed to stop progress when moved away
+        """With which counter the player interacted with in the last environment step."""
        self.current_nearest_counter: Optional[Counter] = None
+        """The counter to interact with."""
        self.facing_point: npt.NDArray[float] = np.zeros(2, float)
+        """A point on the "circle" of the players border in the `facing_direction` with which the closest counter is 
+        calculated with."""
    def move(self, movement: npt.NDArray[float]):
        """Moves the player position by the given movement vector.
@@ -102,6 +107,7 @@ class Player:
        self.update_facing_point()
    def update_facing_point(self):
+        """Update facing point on the player border circle based on the radius."""
        self.facing_point = self.pos + (self.facing_direction * self.radius * 0.5)
    def can_reach(self, counter: Counter):

--- a/overcooked_simulator/simulation_runner.py
+++ b/overcooked_simulator/simulation_runner.py
@@ -17,6 +17,8 @@ class Simulator(Thread):
    Main Simulator class which runs the game environment. Players can be registered in the game.
    The simulator is run as its own thread.
+    Is a child class of the `Thread` class from the `threading` library.
    Typical usage example:
    ```python
    sim = Simulator()
@@ -33,15 +35,28 @@ class Simulator(Thread):
        item_info_path=ROOT_DIR / "game_content" / "item_info.yaml",
        seed: int = 8654321,
    ):
+        """Constructor of the `Simulator class.
+        Args:
+            env_config_path: Path to the environment configuration file.
+            layout_path: Path to the layout file.
+            frequency: Frequency of the environment step function call.
+            item_info_path: Path to the item information configuration file.
+            seed: Random seed to set the numpy random number generator.
+        """
        # TODO look at https://builtin.com/data-science/numpy-random-seed to change to other random
        np.random.seed(seed)
        self.finished: bool = False
+        """The environment runs as long it is `True`"""
        self.step_frequency: int = frequency
+        """Frequency of the environment step function call."""
        self.preferred_sleep_time_ns: float = 1e9 / self.step_frequency
+        """If the environment step call would need no computation time. The duration for one "frame"."""
        self.env: Environment = Environment(
            env_config_path, layout_path, item_info_path
        )
+        """Reference to the `Environment`."""
        super().__init__()
@@ -63,7 +78,6 @@ class Simulator(Thread):
        Returns:
            The current state of the game. Currently, as dict with lists of environment objects.
        """
        return self.env.get_state()
    def get_state_json(self):
@@ -72,14 +86,14 @@ class Simulator(Thread):
        Returns:
            The gamest ate encoded in a json style nested dict.
        """
        return self.env.get_state_json()
    def register_player(self, player_name: str, pos=None):
        """Adds a player to the environment.
        Args:
-            player: The player to be added.
+            player_name: the reference to the player (name/id).
+            pos: optional position of the player.
        """
        self.env.add_player(player_name, pos)
@@ -89,13 +103,11 @@ class Simulator(Thread):
        Args:
            players: List of players to be added.
        """
        for p in players:
            self.register_player(p)
    def run(self):
        """Starts the simulator thread. Runs in a loop until stopped."""
        overslept_in_ns = 0
        self.env.reset_env_time()
        last_step_start = time.time_ns()