diff --git a/mu_map/eval/measures.py b/mu_map/eval/measures.py
index f2cf2d3c8017d481a4e842d51b5ee93763ac5804..28661f7b53f1bd18d8a510fd2ba4d122cc99fcf4 100644
--- a/mu_map/eval/measures.py
+++ b/mu_map/eval/measures.py
@@ -1,53 +1,125 @@
 import numpy as np
-import pandas as pd
-import torch
 
-from mu_map.dataset.default import MuMapDataset
-from mu_map.dataset.normalization import MeanNormTransform
-from mu_map.dataset.transform import SequenceTransform, PadCropTranform
-from mu_map.models.unet import UNet
-
-torch.set_grad_enabled(False)
 
 def mse(prediction: np.array, target: np.array):
     se = (prediction - target) ** 2
     mse = se.sum() / se.size
     return mse
 
+
 def nmae(prediction: np.array, target: np.array):
     mae = np.absolute(prediction - target) / prediction.size
     nmae = mae.sum() / (target.max() - target.min())
     return nmae
 
-device = torch.device("cuda:0")
-model = UNet()
-model = model.to(device)
-model.load_state_dict(torch.load("trainings/03_cgan/snapshots/50_generator.pth", map_location=device))
-model = model.eval()
 
-transform_normalization = SequenceTransform(transforms=[MeanNormTransform(), PadCropTranform(dim=3, size=32)])
-dataset = MuMapDataset("data/initial/", transform_normalization=transform_normalization, split_name="validation")
+if __name__ == "__main__":
+    import argparse
+
+    import pandas as pd
+    import torch
+
+    from mu_map.dataset.default import MuMapDataset
+    from mu_map.dataset.normalization import norm_by_str, norm_choices
+    from mu_map.dataset.transform import SequenceTransform, PadCropTranform
+    from mu_map.models.unet import UNet
+
+    parser = argparse.ArgumentParser(
+        description="Compute, print and store measures for a given model",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+    )
+    parser.add_argument(
+        "--device",
+        type=str,
+        default="cpu",
+        choices=["cpu", "cuda"],
+        help="the device on which the model is evaluated (cpu or cuda)",
+    )
+    parser.add_argument(
+        "--weights",
+        type=str,
+        required=True,
+        help="the model weights which should be scored",
+    )
+    parser.add_argument("--out", type=str, help="write results as a csv file")
+
+    parser.add_argument(
+        "--dataset_dir",
+        type=str,
+        default="data/initial/",
+        help="directory where the dataset is found",
+    )
+    parser.add_argument(
+        "--split",
+        type=str,
+        default="validation",
+        choices=["train", "test", "validation", "all"],
+        help="the split of the dataset to be processed",
+    )
+    parser.add_argument(
+        "--norm",
+        type=str,
+        choices=["none", *norm_choices],
+        default="mean",
+        help="type of normalization applied to the reconstructions",
+    )
+    parser.add_argument(
+        "--size",
+        type=int,
+        default=32,
+        help="pad/crop the third tensor dimension to this value",
+    )
+    args = parser.parse_args()
+
+    if args.split == "all":
+        args.split == None
+
+    torch.set_grad_enabled(False)
+
+    device = torch.device(args.device)
+    model = UNet()
+    model.load_state_dict(torch.load(args.weights, map_location=device))
+    model = model.to(device).eval()
+
+    transform_normalization = SequenceTransform(
+        transforms=[
+            norm_by_str(args.normalization),
+            PadCropTranform(dim=3, size=args.size),
+        ]
+    )
+    dataset = MuMapDataset(
+        "data/initial/",
+        transform_normalization=transform_normalization,
+        split_name=args.split,
+    )
+
+    measures = {"NMAE": nmae, "MSE": mse}
+    values = dict(map(lambda x: (x, []), measures.keys()))
+    for i, (recon, mu_map) in enumerate(dataset):
+        print(
+            f"Process input {str(i):>{len(str(len(dataset)))}}/{len(dataset)}", end="\r"
+        )
+        prediction = model(recon.unsqueeze(dim=0).to(device))
 
-scores_mse = []
-scores_nmae = []
-for i, (recon, mu_map) in enumerate(dataset):
-    print(f"{i:02d}/{len(dataset)}", end="\r")
-    recon = recon.unsqueeze(dim=0).to(device)
-    prediction = model(recon).squeeze().cpu().numpy()
-    mu_map = mu_map.squeeze().cpu().numpy()
+        prediction = prediction.squeeze().cpu().numpy()
+        mu_map = mu_map.squeeze().cpu().numpy()
 
-    scores_nmae.append(nmae(prediction, mu_map))
-    scores_mse.append(mse(prediction, mu_map))
-scores_mse = np.array(scores_mse)
-scores_nmae = np.array(scores_nmae)
+        for key, measure in measures.items():
+            values[key].append(measure(prediction, mu_map))
+    print(f" " * 100, end="\r")
 
-mse_avg = scores_mse.mean()
-mse_std = np.std(scores_mse)
+    values = dict(map(lambda x: (x[0], np.array(x[1])), values.items()))
+    scores_mse = np.array(scores_mse)
+    scores_nmae = np.array(scores_nmae)
 
-nmae_avg = scores_nmae.mean()
-nmae_std = np.std(scores_nmae)
+    mse_avg = scores_mse.mean()
+    mse_std = np.std(scores_mse)
 
-print("Scores:")
-print(f" - NMAE: {nmae_avg:.6f}Â±{nmae_std:.6f}")
-print(f" -  MSE: {mse_avg:.6f}Â±{mse_std:.6f}")
+    nmae_avg = scores_nmae.mean()
+    nmae_std = np.std(scores_nmae)
 
+    print("Scores:")
+    for measure_name, measure_values in values.items():
+        mean = measure_values.mean()
+        std = np.std(measure_values)
+        print(f" - {measure_name}: {mean:.6f}Â±{std:.6f}")