extends dataset stats printing

41a344dd · Tamino Huxohl · 82ff4266 · 41a344dd
Commit 41a344dd authored 2 years ago by Tamino Huxohl
--- a/mu_map/eval/dataset_stats.py
+++ b/mu_map/eval/dataset_stats.py
@@ -20,32 +20,33 @@ if __name__ == "__main__":
    parser.add_argument("--split_csv", type=str, default="data/second/split.csv")
    args = parser.parse_args()
    print(args)
+    print()
    data = pd.read_csv(args.meta_csv)
    splits = split_csv(data, args.split_csv)
+    _from = data[headers.datetime_acquisition].min().split(" ")[0]
+    _to = data[headers.datetime_acquisition].max().split(" ")[0]
+    print(f"Scans were performed from {_from} to {_to}")
    for split_name, split in splits.items():
        print(f"{split_name[0].upper()}{split_name[1:]}:")
        n_studies = len(split)
-        print(f" -    Studies: {n_studies}")
        n_stress = len(split[split[headers.protocol] == "Stress"])
        n_rest = len(split[split[headers.protocol] == "Rest"])
-        print(f"   -   Stress: {n_stress}")
+        print(f" -    Studies: {n_studies}, Stress/Rest: {n_stress}/{n_rest}")
-        print(f"   -     Rest: {n_rest}")
        n_patients = len(split[headers.patient_id].unique())
        _group = split.groupby(headers.patient_id).count()[headers.id]
        n_studies_one = (_group == 1).sum()
        n_studies_two = (_group == 2).sum()
        n_studies_three = (_group == 3).sum()
-        print(
-            f" -   Patients: {n_patients}  [{n_studies_one}, {n_studies_two}, {n_studies_three}]"
-        )
        _split = split.drop_duplicates(headers.patient_id)
        n_males = len(_split[_split[headers.sex] == "M"])
        n_females = len(_split[_split[headers.sex] == "F"])
-        print(f"   -        M: {n_males}")
+        print(
-        print(f"   -        F: {n_females}")
+            f" -   Patients: {n_patients}, M/F: {n_males}/{n_females}, Number of studies (1, 2, 3): [{n_studies_one}, {n_studies_two}, {n_studies_three}]"
+        )
        age = split[headers.age]
        height = split[headers.size]
@@ -59,12 +60,12 @@ if __name__ == "__main__":
        for stat, label in [
            (age, "Age"),
-            (weight, "Weight"),
            (height, "Height"),
+            (weight, "Weight"),
            (bmi, "BMI"),
        ]:
-            _min = f"{str(stat.min()):>5}"
+            _min = f"{stat.min():.1f}"
-            _max = f"{str(stat.max()):>5}"
+            _max = f"{stat.max():.1f}"
            _mean = f"{stat.mean():.1f}"
            _std = f"{stat.std():.1f}"
-            print(f" - {label:>10}: [{_min}, {_max}] - {_mean:>5}±{_std:>5}")
+            print(f" - {label:>10}: [{_min:>5}, {_max:>5}] - {_mean:>5}±{_std:>5}")