Skip to content
Snippets Groups Projects
Commit 642d3c1c authored by Florian Schröder's avatar Florian Schröder
Browse files

Merge branch '142-restructure-reinforcement-learning-files-2' into 'dev'

Integrated overcooked-ai into cooperative cuisine.

See merge request !103
parents ffa38868 e04f5020
No related branches found
No related tags found
2 merge requests!110V1.2.0 changes,!103Integrated overcooked-ai into cooperative cuisine.
Pipeline #59221 passed
......@@ -36,6 +36,18 @@ class SimpleActionSpace(Enum):
def get_env_action(player_id, simple_action, duration):
"""
Args:
player_id: id of the player
simple_action: an action in the form of a SimpleActionSpace
duration: for how long an action should be conducted
Returns: a concrete action
"""
match simple_action:
case SimpleActionSpace.Up:
return Action(
......@@ -82,9 +94,6 @@ def get_env_action(player_id, simple_action, duration):
)
layout_path: Path = ROOT_DIR / "reinforcement_learning" / "rl_small.layout"
with open(layout_path, "r") as file:
layout = file.read()
with open(ROOT_DIR / "pygame_2d_vis" / "visualization.yaml", "r") as file:
visualization_config = yaml.safe_load(file)
......@@ -94,6 +103,12 @@ visualizer.set_grid_size(40)
def shuffle_counters(env):
"""
Shuffles the counters of an environment
Args:
env: the environment object
"""
sample_counter = []
other_counters = []
for counter in env.counters:
......@@ -110,11 +125,10 @@ def shuffle_counters(env):
class StateToObservationConverter:
'''
"""
Abstract definition of a class that gets and environment and outputs a state representation for rl
"""
'''
@abstractmethod
def setup(self, env):
...
......@@ -132,25 +146,31 @@ class EnvGymWrapper(Env):
metadata = {"render_modes": ["human", "rgb_array"], "render_fps": 10}
def __init__(self, config):
"""
Initializes all necessary variables
Args:
config:gets the rl and environment configuration from hydra
"""
super().__init__()
self.randomize_counter_placement = False
self.use_rgb_obs = True # if False uses simple vectorized state
self.use_rgb_obs = False # if False uses simple vectorized state
self.full_vector_state = True
config_env = OmegaConf.to_container(config.environment, resolve=True)
config_item_info = OmegaConf.to_container(config.item_info, resolve=True)
order_generator = config.additional_configs.order_generator
custom_config_path = ROOT_DIR / "reinforcement_learning" / "config" / order_generator
with open(custom_config_path, "r") as file:
custom_classes = yaml.load(file, Loader=yaml.Loader)
for key, value in config_env['hook_callbacks'].items():
value['callback_class'] = custom_classes['callback_class']
config_env["orders"]["order_gen_class"] = custom_classes['order_gen_class']
for val in config_env['hook_callbacks']:
config_env['hook_callbacks'][val]["callback_class"] = instantiate(config_env['hook_callbacks'][val]["callback_class"])
config_env["orders"]["order_gen_class"] = instantiate(config_env["orders"]["order_generator"])
self.config_env = config_env
self.config_item_info = config_item_info
layout_file = config_env["layout_name"]
layout_path: Path = ROOT_DIR / layout_file
with open(layout_path, "r") as file:
self.layout = file.read()
self.env: Environment = Environment(
env_config=deepcopy(config_env),
layout_config=layout,
layout_config=self.layout,
item_info=deepcopy(config_item_info),
as_files=False,
yaml_already_loaded=True,
......@@ -197,6 +217,10 @@ class EnvGymWrapper(Env):
self.prev_score = 0
def step(self, action):
"""
takes one step in the environment and returns the observation, reward, info whether terminated, truncated
and additional information
"""
# this is simply a work-around to enable no action which is necessary for the play_gym.py
if action == 8:
observation = self.get_observation()
......@@ -231,10 +255,14 @@ class EnvGymWrapper(Env):
return observation, reward, terminated, truncated, info
def reset(self, seed=None, options=None):
del visualizer.surface_cache_dict[self.env.env_name]
"""
Resets the environment according to the configs
"""
if self.env.env_name in visualizer.surface_cache_dict:
del visualizer.surface_cache_dict[self.env.env_name]
self.env: Environment = Environment(
env_config=deepcopy(self.config_env),
layout_config=layout,
layout_config=self.layout,
item_info=deepcopy(self.config_item_info),
as_files=False,
env_name=uuid.uuid4().hex,
......
......@@ -8,13 +8,20 @@ from cooperative_cuisine.reinforcement_learning.gym_env import StateToObservatio
class BaseStateConverter(StateToObservationConverter):
"""
Converts an environment state to an Encoding where each counter/item has its unique value
"""
def __init__(self):
self.onehot = False
self.grid_height: int | None = None
self.grid_width: int | None = None
self.counter_list = [
"Empty",
"Counter",
"PlateDispenser",
"TomatoDispenser",
"OnionDispenser",
"ServingWindow",
"PlateReturn",
"Trashcan",
......@@ -26,28 +33,48 @@ class BaseStateConverter(StateToObservationConverter):
self.item_list = [
"None",
"Pot",
"PotOne",
"PotTwo",
"PotThree",
"PotDone",
"PotOne_Tomato",
"PotTwo_Tomato",
"PotThree_Tomato",
"PotDone_Tomato",
"PotOne_Onion",
"PotTwo_Onion",
"PotThree_Onion",
"PotDone_Onion",
"Tomato",
"Onion",
"ChoppedTomato",
"Plate",
"PlateTomatoSoup",
"PlateOnionSoup",
"PlateSalad",
"Lettuce",
"PlateChoppedTomato",
"PlateChoppedLettuce",
"ChoppedLettuce",
"ChoppedOnion",
]
self.player = "0"
def setup(self, env):
"""
Constructor setting basic variables as attributes.
"""
self.grid_width, self.grid_height = int(env.kitchen_width), int(
env.kitchen_height)
def convert_state_to_observation(self, env) -> np.ndarray:
"""
Convert the environment into an onehot encoding
Args:
env: The environment object used
Returns: An encoding for the environment state that is not onehot
"""
grid_base_array = np.zeros(
(
self.grid_width,
......@@ -115,18 +142,31 @@ class BaseStateConverter(StateToObservationConverter):
if item.name == "Pot":
if len(item.content_list) > 0:
if item.content_list[0].name == "TomatoSoup":
item_name = "PotDone"
item_name = "PotDone_Tomato"
if item.content_list[0].name == "OnionSoup":
item_name = "PotDone_Onion"
elif len(item.content_list) == 1:
item_name = "PotOne"
if item.content_list[0].name == "Tomato":
item_name = "PotOne_Tomato"
if item.content_list[0].name == "Onion":
item_name = "PotOne_Onion"
elif len(item.content_list) == 2:
item_name = "PotTwo"
if item.content_list[0].name == "Tomato":
item_name = "PotTwo_Tomato"
if item.content_list[0].name == "Onion":
item_name = "PotTwo_Onion"
elif len(item.content_list) == 3:
item_name = "PotThree"
if item.content_list[0].name == "Tomato":
item_name = "PotThree_Tomato"
if item.content_list[0].name == "Onion":
item_name = "PotThree_Onion"
if "Plate" in item.name:
content_list = [i.name for i in item.content_list]
match content_list:
case ["TomatoSoup"]:
item_name = "PlateTomatoSoup"
case ["OnionSoup"]:
item_name = "PlateOnionSoup"
case ["ChoppedTomato"]:
item_name = "PlateChoppedTomato"
case ["ChoppedLettuce"]:
......
......@@ -8,15 +8,24 @@ from cooperative_cuisine.reinforcement_learning.gym_env import StateToObservatio
class BaseStateConverterOnehot(StateToObservationConverter):
"""
Converts an environment state to an Onehot Encoding
"""
def __init__(self):
"""
Constructor setting basic variables as attributes.
"""
self.onehot = True
self.grid_height = None
self.grid_width = None
self.grid_height: int | None = None
self.grid_width: int | None = None
self.counter_list = [
"Empty",
"Counter",
"PlateDispenser",
"TomatoDispenser",
"OnionDispenser",
"ServingWindow",
"PlateReturn",
"Trashcan",
......@@ -28,27 +37,51 @@ class BaseStateConverterOnehot(StateToObservationConverter):
self.item_list = [
"None",
"Pot",
"PotOne",
"PotTwo",
"PotThree",
"PotDone",
"PotOne_Tomato",
"PotTwo_Tomato",
"PotThree_Tomato",
"PotDone_Tomato",
"PotOne_Onion",
"PotTwo_Onion",
"PotThree_Onion",
"PotDone_Onion",
"Tomato",
"Onion",
"ChoppedTomato",
"Plate",
"PlateTomatoSoup",
"PlateOnionSoup",
"PlateSalad",
"Lettuce",
"PlateChoppedTomato",
"PlateChoppedLettuce",
"ChoppedLettuce",
"ChoppedOnion",
]
self.player = "0"
def setup(self, env):
"""
Set the grid width and height according to the present environment
Args:
env: The environment object used
"""
self.grid_width, self.grid_height = int(env.kitchen_width), int(
env.kitchen_height)
def convert_state_to_observation(self, env) -> np.ndarray:
"""
Convert the environment into an onehot encoding
Args:
env: The environment object used
Returns: An onehot encoding for the environment state
"""
grid_base_array = np.zeros(
(
self.grid_width,
......@@ -92,7 +125,7 @@ class BaseStateConverterOnehot(StateToObservationConverter):
player_item_one_hot = self.vectorize_item(
env.players[self.player].holding, self.item_list
)
# simply concat all entities to one large vector
final = np.concatenate(
(
counters.flatten(),
......@@ -116,22 +149,36 @@ class BaseStateConverterOnehot(StateToObservationConverter):
else:
item_name = item.name
# different naming convention for the different pots to include the progress. New implementation should be found here
if isinstance(item, CookingEquipment):
if item.name == "Pot":
if len(item.content_list) > 0:
if item.content_list[0].name == "TomatoSoup":
item_name = "PotDone"
item_name = "PotDone_Tomato"
if item.content_list[0].name == "OnionSoup":
item_name = "PotDone_Onion"
elif len(item.content_list) == 1:
item_name = "PotOne"
if item.content_list[0].name == "Tomato":
item_name = "PotOne_Tomato"
if item.content_list[0].name == "Onion":
item_name = "PotOne_Onion"
elif len(item.content_list) == 2:
item_name = "PotTwo"
if item.content_list[0].name == "Tomato":
item_name = "PotTwo_Tomato"
if item.content_list[0].name == "Onion":
item_name = "PotTwo_Onion"
elif len(item.content_list) == 3:
item_name = "PotThree"
if item.content_list[0].name == "Tomato":
item_name = "PotThree_Tomato"
if item.content_list[0].name == "Onion":
item_name = "PotThree_Onion"
if "Plate" in item.name:
content_list = [i.name for i in item.content_list]
match content_list:
case ["TomatoSoup"]:
item_name = "PlateTomatoSoup"
case ["OnionSoup"]:
item_name = "PlateOnionSoup"
case ["ChoppedTomato"]:
item_name = "PlateChoppedTomato"
case ["ChoppedLettuce"]:
......
# Overcooked-AI and Cooperative Cuisine
## Use the overcooked-AI levels and configs in cooperative cuisine
All the layouts from overcooked-AI can be used within cooperative cuisine. Dedicated configs are defined and can be loaded via hydra.
The overcooked-ai_environment_config.yaml must be chosen as environment config. Under layout_name any layout from overcooked-AI can be defined.
Additionally, the item_config must be item_info_overcooked-ai.yaml.
With those chosen configs the layouts and rewards from overcooked-AI are used.
## How is the connection between Overcooked-AI and cooperative cuisine defined?
Cooperative Cuisine is highly modular due to the usage of hydra as config manager.
Therefore, the parameters used for overcooked-AI are simply used in the dedicated config file.
The layout format is different, which is why a mapping is defined which converts the overcooked-AI layout into the cooperative cuisine layout.
The layout file has to be present in cooperative_cuisine/reinforcement_learning/layouts/overcooked_ai_layouts.
......@@ -7,6 +7,9 @@ from gym_env import EnvGymWrapper, SimpleActionSpace
@hydra.main(version_base="1.3", config_path="config", config_name="rl_config")
def main(cfg: DictConfig):
"""
Enables steering the agent in the environment used for rl.
"""
env = EnvGymWrapper(cfg)
env.render_mode = "rgb_array"
play(env, keys_to_action={"a": 2, "d": 3, "w": 0, "s": 1, " ": 4, "k": 5}, noop=8)
......
##X#
T__L
U__P
#C$#
import time
from pathlib import Path
import cv2
from stable_baselines3 import DQN, A2C, PPO
......@@ -11,15 +12,17 @@ from hydra.utils import instantiate, call
@hydra.main(version_base="1.3", config_path="config", config_name="rl_config")
def main(cfg: DictConfig):
"""
loads the trained model and enables the user to see an example with the according rewards.
"""
additional_config = OmegaConf.to_container(cfg.additional_configs, resolve=True)
model_save_path = additional_config["log_path"] + "/" + additional_config["checkpoint_path"] + "/" + \
additional_config["project_name"] + "_" + OmegaConf.to_container(cfg.model, resolve=True)[
"model_name"]
model_save_path = Path(additional_config["log_path"]) / Path(additional_config["checkpoint_path"]) / Path(
additional_config["project_name"] + "_" + OmegaConf.to_container(cfg.model, resolve=True)["model_name"])
model_class = call(cfg.model.model_type_inference)
model = model_class(model_save_path)
env = EnvGymWrapper(cfg)
#check_env(env)
# check_env(env)
obs, info = env.reset()
print(obs)
while True:
......
from pathlib import Path
from typing import Any
import wandb
from omegaconf import DictConfig, OmegaConf
......@@ -17,13 +18,17 @@ from hydra.utils import instantiate
@hydra.main(version_base="1.3", config_path="config", config_name="rl_config")
def main(cfg: DictConfig):
additional_configs = OmegaConf.to_container(cfg.additional_configs, resolve=True)
rl_logs = Path(additional_configs["log_path"])
"""
trains an agent from scratch and saves the model to the specified path
All configs are managed with hydra.
"""
additional_configs: dict[str, Any] = OmegaConf.to_container(cfg.additional_configs, resolve=True)
rl_logs: Path = Path(additional_configs["log_path"])
rl_logs.mkdir(exist_ok=True)
rl_agent_checkpoints = rl_logs / Path(additional_configs["checkpoint_path"])
rl_agent_checkpoints: Path = rl_logs / Path(additional_configs["checkpoint_path"])
rl_agent_checkpoints.mkdir(exist_ok=True)
config = OmegaConf.to_container(cfg.model, resolve=True)
debug = additional_configs["debug_mode"]
config: dict[str, Any] = OmegaConf.to_container(cfg.model, resolve=True)
debug: bool = additional_configs["debug_mode"]
vec_env = additional_configs["vec_env"]
number_envs_parallel = config["number_envs_parallel"]
model_class = instantiate(cfg.model.model_type)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment