Skip to content
Snippets Groups Projects
Commit 689647b4 authored by Christoph Kowalski's avatar Christoph Kowalski
Browse files

Update the avdanced_converter_array

parent 1771d7e4
No related branches found
No related tags found
2 merge requests!110V1.2.0 changes,!109SB3 RL with Hydra
Pipeline #60821 passed
# Here the filename of the converter should be given. The converter class needs to be called StateConverter and implement the abstract StateToObservationConverter class
state_converter:
_target_: "cooperative_cuisine.reinforcement_learning.obs_converter.advanced_converter_int.AdvancedStateConverterInt"
_target_: "cooperative_cuisine.reinforcement_learning.obs_converter.advanced_converter_array.AdvancedStateConverterArray"
log_path: "logs/reinforcement_learning"
checkpoint_path: "rl_agent_checkpoints"
render_mode: "rgb_array"
......@@ -10,7 +10,7 @@ vec_env: True
sync_tensorboard: True # auto-upload sb3's tensorboard metrics
monitor_gym: True
video_save_path: "logs/reinforcement_learning/videos/"
record_video_trigger: 20_000
record_video_trigger: 40_000
video_length: 300
save_freq: 50_000
save_path_callback: "logs"
......
......@@ -110,7 +110,7 @@ hook_callbacks:
_target_: "cooperative_cuisine.scores.ScoreViaHooks"
_partial_: true
callback_class_kwargs:
static_score: -0.01
static_score: 0
item_cut:
hooks: [ cutting_board_100 ]
callback_class:
......
......@@ -29,6 +29,10 @@ class AdvancedStateConverterArray(StateToObservationConverter):
"Stove",
"CuttingBoard",
"LettuceDispenser",
"PlayerLeft",
"PlayerRight",
"PlayerUp",
"PlayerDown"
]
self.player = "0"
self.item_list = ["None"]
......@@ -58,68 +62,49 @@ class AdvancedStateConverterArray(StateToObservationConverter):
Returns: An encoding for the environment state that is not onehot
"""
grid_base_array = np.zeros(
(
self.grid_width,
self.grid_height,
),
dtype=int,
)
grid = [[[] for y in range(self.grid_height)] for x in range(self.grid_width)]
grid_idxs = [(x, y) for x in range(self.grid_width) for y in range(self.grid_height)]
# counter_items = np.zeros((self.grid_width, self.grid_height), dtype=int)
counter_items = [[[] for x in range(self.grid_width)] for y in
range(self.grid_height)]
counters = np.zeros((self.grid_width, self.grid_height), dtype=int)
counter_items = np.zeros((self.grid_width, self.grid_height), dtype=int)
for counter in env.counters:
grid_idx = np.floor(counter.pos).astype(int)
counter_oh_idx = self.vectorize_counter(
counter, self.counter_list
)
grid_base_array[grid_idx[0], grid_idx[1]] = counter_oh_idx
grid_idxs.remove((int(grid_idx[0]), int(grid_idx[1])))
# from here on the new items vectorization needs to be implemented. However, not much should change here.
counter_item_oh_idx = self.vectorize_item(
counter.occupied_by, self.item_list
)
counter_items[int(grid_idx[0])][int(grid_idx[1])] = (
counter_item_oh_idx
grid[grid_idx[0]][grid_idx[1]] = counter_item_oh_idx
counter_oh_idx = self.vectorize_counter(
counter, self.counter_list
)
counters[grid_idx] = counter_oh_idx
grid_idxs.remove((int(grid_idx[0]), int(grid_idx[1])))
grid[grid_idx[0]][grid_idx[1]]=np.append(grid[grid_idx[0]][grid_idx[1]], counter_oh_idx)
for free_idx in grid_idxs:
grid_base_array[free_idx[0], free_idx[1]] = self.counter_list.index("Empty")
counter_items[free_idx[0]][free_idx[1]] = [[0], [0], [0], [0], [0], [0]]
print(counter_items)
player_pos = env.players[self.player].pos.astype(int)
player_dir = env.players[self.player].facing_direction.astype(int)
player_data = np.concatenate((player_pos, player_dir), axis=0)
player_item_idx = self.vectorize_item(
env.players[self.player].holding, self.item_list
)
player_item = [player_item_idx]
final = np.concatenate(
(
counters.flatten(),
np.array(counter_items).flatten(),
player_data.flatten(),
player_item,
),
axis=0,
)
return final
grid_idxs.remove((int(player_pos[0]), int(player_pos[1])))
if player_dir[0] == -1:
player = "PlayerLeft"
elif player_dir[0] == 1:
player = "PlayerRight"
else:
if player_dir[1] == -1:
player = "PlayerUp"
else:
player = "PlayerDown"
grid[player_pos[0]][player_pos[1]] = player_item_idx
grid[player_pos[0]][player_pos[1]] = np.append(grid[player_pos[0]][player_pos[1]], 100 * self.counter_list.index(player))
for free_idx in grid_idxs:
grid[free_idx[0]][free_idx[1]] = self.vectorize_item(None, self.item_list)
grid[free_idx[0]][free_idx[1]] = np.append(grid[free_idx[0]][free_idx[1]], self.counter_list.index("Empty"))
return np.array(grid)
def vectorize_item(self, item, item_list):
if item is None:
item_name = "None"
elif isinstance(item, deque):
print(item)
if len(item) > 0:
item_name = item[0].name
item = item[0]
......@@ -144,7 +129,6 @@ class AdvancedStateConverterArray(StateToObservationConverter):
containing_items.sort(reverse=True)
for item in containing_items:
encoding.append(item)
print(encoding)
return np.array(encoding)
@staticmethod
......@@ -165,5 +149,4 @@ class AdvancedStateConverterArray(StateToObservationConverter):
counter_oh_idx = counter_list.index("Empty")
if counter_name in counter_list:
counter_oh_idx = counter_list.index(counter_name)
return counter_oh_idx
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment