Skip to content
Snippets Groups Projects
Commit 642d3c1c authored by Florian Schröder's avatar Florian Schröder
Browse files

Merge branch '142-restructure-reinforcement-learning-files-2' into 'dev'

Integrated overcooked-ai into cooperative cuisine.

See merge request !103
parents ffa38868 e04f5020
No related branches found
No related tags found
2 merge requests!110V1.2.0 changes,!103Integrated overcooked-ai into cooperative cuisine.
Pipeline #59221 passed
Showing
with 700 additions and 72 deletions
##U##
NA_AN
#___#
#P#$#
\ No newline at end of file
#########
N_#$#N#_$
#_A_U_A_#
#___U___#
###P#P###
\ No newline at end of file
###U#
#__AU
P_#_#
NA__#
#N$##
###U#
N_#AU
N_#_#
PA#_#
###$#
###UU###
#A_____#
P_####_$
#_____A#
###NN###
\ No newline at end of file
......@@ -651,6 +651,7 @@ class DeterministicOrderGeneration(OrderGeneration):
self.current_queue[0].start -= diff_to_next
self.next_order_time = self.current_queue[0].start
orders.extend(self.get_orders(passed_time, now, [], []))
log.info(f"Create order for meal {orders}.")
return orders
def parse_timed_orders(self) -> list[ParsedTimedOrder]:
......
"""
## Reinforcement Learning Module Overview
The reinforcement learning module consists of several key functions designed to:
- **Train the agent**
- **Test the agent**
- **Utilize the environment**
### Configurations
All hyperparameters related to the reinforcement learning agent and the environment are configurable via specific configuration files. These configurations are managed with **Hydra**, allowing for easy selection of specific config items or entire config files directly from the command line.
### Layouts
Several layouts are predefined in the `cooperative_cuisine/configs/layouts` directory. The layout path can be selected within the corresponding config file.
Additionally, **Overcooked-AI** layouts can be transformed into the cooperative cuisine format using the `convert_overcooked_ai_layout.py` script. To use this script:
1. Specify the path of the Overcooked-AI layout file as a command-line argument.
2. The script will generate the corresponding layout file and save it in the `configs/layouts/overcooked-ai` directory.
"""
\ No newline at end of file
order_generator: "random_orders.yaml"
# Here the filename of the converter should be given. The converter class needs to be called StateConverter and implement the abstract StateToObservationConverter class
state_converter:
_target_: "cooperative_cuisine.reinforcement_learning.obs_converter.base_converter_onehot.BaseStateConverterOnehot"
_target_: "cooperative_cuisine.reinforcement_learning.obs_converter.base_converter.BaseStateConverter"
log_path: "logs/reinforcement_learning"
checkpoint_path: "rl_agent_checkpoints"
render_mode: "rgb_array"
......
......@@ -11,7 +11,7 @@ game:
undo_dispenser_pickup: true
validate_recipes: false
layout_name: configs/layouts/rl/rl_small.layout
layout_chars:
_: Free
......@@ -51,6 +51,9 @@ layout_chars:
orders:
order_generator:
_target_: "cooperative_cuisine.orders.RandomOrderGeneration"
_partial_: true
meals:
all: false
# if all: false -> only orders for these meals are generated
......@@ -97,38 +100,59 @@ effect_manager: { }
# spreading_duration: [ 5, 10 ]
# fire_burns_ingredients_and_meals: true
hook_callbacks:
# # --------------- Scoring ---------------
orders:
hooks: [ completed_order ]
callback_class:
_target_: "cooperative_cuisine.scores.ScoreViaHooks"
_partial_: true
callback_class_kwargs:
static_score: 0.95
serve_not_ordered_meals:
hooks: [ serve_not_ordered_meal ]
callback_class:
_target_: "cooperative_cuisine.scores.ScoreViaHooks"
_partial_: true
callback_class_kwargs:
static_score: 0.95
trashcan_usages:
hooks: [ trashcan_usage ]
callback_class:
_target_: "cooperative_cuisine.scores.ScoreViaHooks"
_partial_: true
callback_class_kwargs:
static_score: -0.2
item_cut:
hooks: [ cutting_board_100 ]
callback_class:
_target_: "cooperative_cuisine.scores.ScoreViaHooks"
_partial_: true
callback_class_kwargs:
static_score: 0.1
stepped:
hooks: [ post_step ]
callback_class:
_target_: "cooperative_cuisine.scores.ScoreViaHooks"
_partial_: true
callback_class_kwargs:
static_score: -0.01
combine:
hooks: [ drop_off_on_cooking_equipment ]
callback_class:
_target_: "cooperative_cuisine.scores.ScoreViaHooks"
_partial_: true
callback_class_kwargs:
static_score: 0.01
start_interact:
hooks: [ player_start_interaction ]
callback_class:
_target_: "cooperative_cuisine.scores.ScoreViaHooks"
_partial_: true
callback_class_kwargs:
static_score: 0.01
# json_states:
# hooks: [ json_state ]
# record_class: !!python/name:cooperative_cuisine.recording.LogRecorder ''
......
plates:
clean_plates: 2
dirty_plates: 0
plate_delay: [ 2, 4 ]
return_dirty: False
# range of seconds until the dirty plate arrives.
game:
time_limit_seconds: 300
undo_dispenser_pickup: true
validate_recipes: false
layout_name: configs/layouts/rl/rl_small.layout
layout_chars:
_: Free
hash: Counter # #
A: Agent
pipe: Extinguisher
P: PlateDispenser
C: CuttingBoard
X: Trashcan
$: ServingWindow
S: Sink
+: SinkAddon
at: Plate # @ just a clean plate on a counter
U: Pot # with Stove
Q: Pan # with Stove
O: Peel # with Oven
F: Basket # with DeepFryer
T: Tomato
N: Onion # oNioN
L: Lettuce
K: Potato # Kartoffel
I: Fish # fIIIsh
D: Dough
E: Cheese # chEEEse
G: Sausage # sausaGe
B: Bun
M: Meat
question: Counter # ? mushroom
: Counter
^: Counter
right: Counter
left: Counter
wave: Free # ~ Water
minus: Free # - Ice
dquote: Counter # " wall/truck
p: Counter # second plate return ??
orders:
order_generator:
_target_: "cooperative_cuisine.orders.DeterministicOrderGeneration"
_partial_: true
meals:
all: false
# if all: false -> only orders for these meals are generated
# TODO: what if this list is empty?
list:
- TomatoSoup
- OnionSoup
#- Salad
# - FriedFish
# the class to that receives the kwargs. Should be a child class of OrderGeneration in orders.py
order_gen_kwargs:
# structure: [meal_name, start, duration] (start and duration as seconds or timedeltas https://github.com/wroberts/pytimeparse)
timed_orders:
- [ TomatoSoup, 0:00, 0:10 ]
- [ OnionSoup, 0:00, 0:10 ]
- [ TomatoSoup, 0:10, 0:10 ]
- [ TomatoSoup, 0:15, 0:06 ]
never_no_order: False
never_no_order_update_all_remaining: False
serving_not_ordered_meals: null
player_config:
radius: 0.4
speed_units_per_seconds: 1
interaction_range: 1.6
restricted_view: False
view_angle: 95
effect_manager: { }
# FireManager:
# class: !!python/name:cooperative_cuisine.effects.FireEffectManager ''
# kwargs:
# spreading_duration: [ 5, 10 ]
# fire_burns_ingredients_and_meals: true
hook_callbacks:
# # --------------- Scoring ---------------
orders:
hooks: [ completed_order ]
callback_class:
_target_: "cooperative_cuisine.scores.ScoreViaHooks"
_partial_: true
callback_class_kwargs:
static_score: 0.95
serve_not_ordered_meals:
hooks: [ serve_not_ordered_meal ]
callback_class:
_target_: "cooperative_cuisine.scores.ScoreViaHooks"
_partial_: true
callback_class_kwargs:
static_score: 0.95
trashcan_usages:
hooks: [ trashcan_usage ]
callback_class:
_target_: "cooperative_cuisine.scores.ScoreViaHooks"
_partial_: true
callback_class_kwargs:
static_score: -0.2
item_cut:
hooks: [ cutting_board_100 ]
callback_class:
_target_: "cooperative_cuisine.scores.ScoreViaHooks"
_partial_: true
callback_class_kwargs:
static_score: 0.1
stepped:
hooks: [ post_step ]
callback_class:
_target_: "cooperative_cuisine.scores.ScoreViaHooks"
_partial_: true
callback_class_kwargs:
static_score: -0.01
combine:
hooks: [ drop_off_on_cooking_equipment ]
callback_class:
_target_: "cooperative_cuisine.scores.ScoreViaHooks"
_partial_: true
callback_class_kwargs:
static_score: 0.01
start_interact:
hooks: [ player_start_interaction ]
callback_class:
_target_: "cooperative_cuisine.scores.ScoreViaHooks"
_partial_: true
callback_class_kwargs:
static_score: 0.01
# json_states:
# hooks: [ json_state ]
# record_class: !!python/name:cooperative_cuisine.recording.LogRecorder ''
# record_class_kwargs:
# record_path: USER_LOG_DIR/ENV_NAME/json_states.jsonl
# actions:
# hooks: [ pre_perform_action ]
# record_class: !!python/name:cooperative_cuisine.recording.LogRecorder ''
# record_class_kwargs:
# record_path: USER_LOG_DIR/ENV_NAME/LOG_RECORD_NAME.jsonl
# random_env_events:
# hooks: [ order_duration_sample, plate_out_of_kitchen_time ]
# record_class: !!python/name:cooperative_cuisine.recording.LogRecorder ''
# record_class_kwargs:
# record_path: USER_LOG_DIR/ENV_NAME/LOG_RECORD_NAME.jsonl
# add_hook_ref: true
# env_configs:
# hooks: [ env_initialized, item_info_config ]
# record_class: !!python/name:cooperative_cuisine.recording.LogRecorder ''
# record_class_kwargs:
# record_path: USER_LOG_DIR/ENV_NAME/LOG_RECORD_NAME.jsonl
# add_hook_ref: true
......@@ -11,6 +11,7 @@ game:
undo_dispenser_pickup: true
validate_recipes: false
layout_name: configs/layouts/rl/rl_small.layout
layout_chars:
......@@ -51,6 +52,9 @@ layout_chars:
orders:
order_generator:
_target_: "cooperative_cuisine.orders.RandomOrderGeneration"
_partial_: true
meals:
all: true
# if all: false -> only orders for these meals are generated
......@@ -97,38 +101,58 @@ effect_manager: { }
# spreading_duration: [ 5, 10 ]
# fire_burns_ingredients_and_meals: true
hook_callbacks:
# # --------------- Scoring ---------------
orders:
hooks: [ completed_order ]
callback_class:
_target_: "cooperative_cuisine.scores.ScoreViaHooks"
_partial_: true
callback_class_kwargs:
static_score: 0.1
serve_not_ordered_meals:
hooks: [ serve_not_ordered_meal ]
callback_class:
_target_: "cooperative_cuisine.scores.ScoreViaHooks"
_partial_: true
callback_class_kwargs:
static_score: 0.1
trashcan_usages:
hooks: [ trashcan_usage ]
callback_class:
_target_: "cooperative_cuisine.scores.ScoreViaHooks"
_partial_: true
callback_class_kwargs:
static_score: -0.2
item_cut:
hooks: [ cutting_board_100 ]
callback_class:
_target_: "cooperative_cuisine.scores.ScoreViaHooks"
_partial_: true
callback_class_kwargs:
static_score: 0.0
static_score: 0
stepped:
hooks: [ post_step ]
callback_class:
_target_: "cooperative_cuisine.scores.ScoreViaHooks"
_partial_: true
callback_class_kwargs:
static_score: -0.0
static_score: 0
combine:
hooks: [ drop_off_on_cooking_equipment ]
callback_class:
_target_: "cooperative_cuisine.scores.ScoreViaHooks"
_partial_: true
callback_class_kwargs:
static_score: 0.0
static_score: 0
start_interact:
hooks: [ player_start_interaction ]
callback_class:
_target_: "cooperative_cuisine.scores.ScoreViaHooks"
_partial_: true
callback_class_kwargs:
static_score: 0.0
static_score: 0
# json_states:
# hooks: [ json_state ]
# record_class: !!python/name:cooperative_cuisine.recording.LogRecorder ''
......
orders:
meals:
all: true
# if all: false -> only orders for these meals are generated
# TODO: what if this list is empty?
list:
- TomatoSoup
- OnionSoup
- Salad
#order_gen_class: !!python/name:cooperative_cuisine.orders.RandomOrderGeneration ''
# the class to that receives the kwargs. Should be a child class of OrderGeneration in orders.py
order_gen_kwargs:
order_duration_random_func:
# how long should the orders be alive
# 'random' library call with getattr, kwargs are passed to the function
func: uniform
kwargs:
a: 40
b: 60
max_orders: 6
# maximum number of active orders at the same time
num_start_meals: 2
# number of orders generated at the start of the environment
sample_on_dur_random_func:
# 'random' library call with getattr, kwargs are passed to the function
func: uniform
kwargs:
a: 10
b: 20
sample_on_serving: false
# Sample the delay for the next order only after a meal was served.
serving_not_ordered_meals: true
# can meals that are not ordered be served / dropped on the serving window
\ No newline at end of file
plates:
clean_plates: 1
dirty_plates: 0
plate_delay: [ 0, 0 ]
return_dirty: False
# range of seconds until the dirty plate arrives.
game:
time_limit_seconds: 300
undo_dispenser_pickup: true
validate_recipes: false
layout_name: configs/layouts/rl/rl_small.layout
layout_chars:
_: Free
hash: Counter # #
A: Agent
pipe: Extinguisher
P: PlateDispenser
C: CuttingBoard
X: Trashcan
$: ServingWindow
S: Sink
+: SinkAddon
at: Plate # @ just a clean plate on a counter
U: Pot # with Stove
Q: Pan # with Stove
O: Peel # with Oven
F: Basket # with DeepFryer
T: Tomato
N: Onion # oNioN
L: Lettuce
K: Potato # Kartoffel
I: Fish # fIIIsh
D: Dough
E: Cheese # chEEEse
G: Sausage # sausaGe
B: Bun
M: Meat
question: Counter # ? mushroom
: Counter
^: Counter
right: Counter
left: Counter
wave: Free # ~ Water
minus: Free # - Ice
dquote: Counter # " wall/truck
p: Counter # second plate return ??
orders:
order_generator:
_target_: "cooperative_cuisine.orders.RandomOrderGeneration"
_partial_: true
meals:
all: false
# if all: false -> only orders for these meals are generated
# TODO: what if this list is empty?
list:
- TomatoSoup
- OnionSoup
- Salad
# the class to that receives the kwargs. Should be a child class of OrderGeneration in orders.py
order_gen_kwargs:
order_duration_random_func:
# how long should the orders be alive
# 'random' library call with getattr, kwargs are passed to the function
func: uniform
kwargs:
a: 40
b: 60
max_orders: 6
# maximum number of active orders at the same time
num_start_meals: 2
# number of orders generated at the start of the environment
sample_on_dur_random_func:
# 'random' library call with getattr, kwargs are passed to the function
func: uniform
kwargs:
a: 10
b: 20
sample_on_serving: false
# Sample the delay for the next order only after a meal was served.
serving_not_ordered_meals: true
# can meals that are not ordered be served / dropped on the serving window
player_config:
radius: 0.1
speed_units_per_seconds: 1
interaction_range: 1
restricted_view: False
view_angle: 60
effect_manager: { }
# FireManager:
# class: !!python/name:cooperative_cuisine.effects.FireEffectManager ''
# kwargs:
# spreading_duration: [ 5, 10 ]
# fire_burns_ingredients_and_meals: true
hook_callbacks:
# # --------------- Scoring ---------------
orders:
hooks: [ completed_order ]
callback_class:
_target_: "cooperative_cuisine.scores.ScoreViaHooks"
_partial_: true
callback_class_kwargs:
static_score: 5
serve_not_ordered_meals:
hooks: [ serve_not_ordered_meal ]
callback_class:
_target_: "cooperative_cuisine.scores.ScoreViaHooks"
_partial_: true
callback_class_kwargs:
static_score: 3
trashcan_usages:
hooks: [ trashcan_usage ]
callback_class:
_target_: "cooperative_cuisine.scores.ScoreViaHooks"
_partial_: true
callback_class_kwargs:
static_score: 0
item_cut:
hooks: [ cutting_board_100 ]
callback_class:
_target_: "cooperative_cuisine.scores.ScoreViaHooks"
_partial_: true
callback_class_kwargs:
static_score: 0
stepped:
hooks: [ post_step ]
callback_class:
_target_: "cooperative_cuisine.scores.ScoreViaHooks"
_partial_: true
callback_class_kwargs:
static_score: 0
combine:
hooks: [ drop_off_on_cooking_equipment ]
callback_class:
_target_: "cooperative_cuisine.scores.ScoreViaHooks"
_partial_: true
callback_class_kwargs:
static_score: 0
start_interact:
hooks: [ player_start_interaction ]
callback_class:
_target_: "cooperative_cuisine.scores.ScoreViaHooks"
_partial_: true
callback_class_kwargs:
static_score: 0
# json_states:
# hooks: [ json_state ]
# record_class: !!python/name:cooperative_cuisine.recording.LogRecorder ''
# record_class_kwargs:
# record_path: USER_LOG_DIR/ENV_NAME/json_states.jsonl
# actions:
# hooks: [ pre_perform_action ]
# record_class: !!python/name:cooperative_cuisine.recording.LogRecorder ''
# record_class_kwargs:
# record_path: USER_LOG_DIR/ENV_NAME/LOG_RECORD_NAME.jsonl
# random_env_events:
# hooks: [ order_duration_sample, plate_out_of_kitchen_time ]
# record_class: !!python/name:cooperative_cuisine.recording.LogRecorder ''
# record_class_kwargs:
# record_path: USER_LOG_DIR/ENV_NAME/LOG_RECORD_NAME.jsonl
# add_hook_ref: true
# env_configs:
# hooks: [ env_initialized, item_info_config ]
# record_class: !!python/name:cooperative_cuisine.recording.LogRecorder ''
# record_class_kwargs:
# record_path: USER_LOG_DIR/ENV_NAME/LOG_RECORD_NAME.jsonl
# add_hook_ref: true
CuttingBoard:
type: Equipment
Sink:
type: Equipment
Stove:
type: Equipment
DeepFryer:
type: Equipment
Oven:
type: Equipment
Pot:
type: Equipment
equipment: Stove
Pan:
type: Equipment
equipment: Stove
Basket:
type: Equipment
equipment: DeepFryer
Peel:
type: Equipment
equipment: Oven
DirtyPlate:
type: Equipment
Plate:
type: Equipment
needs: [ DirtyPlate ]
seconds: 2.0
equipment: Sink
# --------------------------------------------------------------------------------
Tomato:
type: Ingredient
Lettuce:
type: Ingredient
Onion:
type: Ingredient
Meat:
type: Ingredient
Bun:
type: Ingredient
Potato:
type: Ingredient
Fish:
type: Ingredient
Dough:
type: Ingredient
Cheese:
type: Ingredient
Sausage:
type: Ingredient
# Chopped things
ChoppedTomato:
type: Ingredient
needs: [ Tomato ]
seconds: 4.0
equipment: CuttingBoard
ChoppedLettuce:
type: Ingredient
needs: [ Lettuce ]
seconds: 3.0
equipment: CuttingBoard
ChoppedOnion:
type: Ingredient
needs: [ Onion ]
seconds: 4.0
equipment: CuttingBoard
RawPatty:
type: Ingredient
needs: [ Meat ]
seconds: 4.0
equipment: CuttingBoard
RawChips:
type: Ingredient
needs: [ Potato ]
seconds: 4.0
equipment: CuttingBoard
ChoppedFish:
type: Ingredient
needs: [ Fish ]
seconds: 4.0
equipment: CuttingBoard
PizzaBase:
type: Ingredient
needs: [ Dough ]
seconds: 4.0
equipment: CuttingBoard
GratedCheese:
type: Ingredient
needs: [ Cheese ]
seconds: 4.0
equipment: CuttingBoard
ChoppedSausage:
type: Ingredient
needs: [ Sausage ]
seconds: 4.0
equipment: CuttingBoard
CookedPatty:
type: Ingredient
seconds: 5.0
needs: [ RawPatty ]
equipment: Pan
# --------------------------------------------------------------------------------
Chips:
type: Meal
seconds: 5.0
needs: [ RawChips ]
equipment: Basket
FriedFish:
type: Meal
seconds: 5.0
needs: [ ChoppedFish ]
equipment: Basket
Burger:
type: Meal
needs: [ Bun, ChoppedLettuce, ChoppedTomato, CookedPatty ]
equipment: ~
Salad:
type: Meal
needs: [ ChoppedLettuce, ChoppedTomato ]
equipment: ~
TomatoSoup:
type: Meal
needs: [Tomato,Tomato, Tomato ]
seconds: 1
equipment: Pot
OnionSoup:
type: Meal
needs: [ Onion, Onion, Onion ]
seconds: 1
equipment: Pot
FishAndChips:
type: Meal
needs: [ FriedFish, Chips ]
equipment: ~
Pizza:
type: Meal
needs: [ PizzaBase, ChoppedTomato, GratedCheese, ChoppedSausage ]
seconds: 7.0
equipment: Peel
# --------------------------------------------------------------------------------
BurntCookedPatty:
type: Waste
seconds: 10.0
needs: [ CookedPatty ]
equipment: Pan
BurntChips:
type: Waste
seconds: 10.0
needs: [ Chips ]
equipment: Basket
BurntFriedFish:
type: Waste
seconds: 10.0
needs: [ FriedFish ]
equipment: Basket
BurntTomatoSoup:
type: Waste
needs: [ TomatoSoup ]
seconds: 20.0
equipment: Pot
BurntOnionSoup:
type: Waste
needs: [ OnionSoup ]
seconds: 20.0
equipment: Pot
BurntPizza:
type: Waste
needs: [ Pizza ]
seconds: 10.0
equipment: Peel
......@@ -8,10 +8,10 @@ model_type_inference:
_partial_: true
_target_: stable_baselines3.PPO.load
total_timesteps: 3_000_000 # hendric sagt eher so 300_000_000 schritte
number_envs_parallel: 64
number_envs_parallel: 16
learning_rate: 0.0003
n_steps: 2048
batch_size: 64
batch_size: 16
n_epochs: 10
gamma: 0.99
gae_lambda: 0.95
......
order_gen_class: !!python/name:cooperative_cuisine.orders.RandomOrderGeneration ''
callback_class: !!python/name:cooperative_cuisine.scores.ScoreViaHooks ''
\ No newline at end of file
defaults:
- environment: environment_config_rl
- item_info: item_info_rl
- environment: overcooked-ai_environment_config
- item_info: item_info_overcooked-ai
- model: PPO
- additional_configs: additional_config_base
\ No newline at end of file
import argparse
from pathlib import Path, PurePath
from cooperative_cuisine import ROOT_DIR
def convert_overcookd_ai_layouts():
parser = argparse.ArgumentParser()
parser.add_argument(
"--file", dest="inputfile", help="Input file path", required=True
)
args = parser.parse_args()
filepath = PurePath(args.inputfile)
print(filepath)
convertion_dict = {
" ": "_",
"X": "#",
"O": "N",
"T": "T",
"P": "U",
"D": "P",
"S": "$",
"1": "A",
"2": "A"
}
savepath = Path(ROOT_DIR) / "configs" / "layouts" / "overcooked-ai" / filepath.name
with open(args.inputfile, "r") as f:
layoutfile = f.read()
f.close()
layout = eval(layoutfile)
lines = layout["grid"].split("\n")
additional_info = []
for key in layout:
if key != "grid":
additional_info.append(
'; {}: {}'.format(key, str(layout[key]).replace("'", "").replace("None", "null")))
with open(savepath, "w+") as f:
for line in lines:
line = line.lstrip()
for char in line:
f.write(convertion_dict[char])
f.write("\n")
for info in additional_info:
f.write(info)
f.write("\n")
f.close()
if __name__ == "__main__":
convert_overcookd_ai_layouts()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment