diff --git a/mu_map/eval/dataset_stats.py b/mu_map/eval/dataset_stats.py index 6f10adf89f076233d4220eed9186d76a8ae00cda..b58bef7d1d664c5e2be34391456bf32d077f6d37 100644 --- a/mu_map/eval/dataset_stats.py +++ b/mu_map/eval/dataset_stats.py @@ -20,32 +20,33 @@ if __name__ == "__main__": parser.add_argument("--split_csv", type=str, default="data/second/split.csv") args = parser.parse_args() print(args) + print() data = pd.read_csv(args.meta_csv) splits = split_csv(data, args.split_csv) + _from = data[headers.datetime_acquisition].min().split(" ")[0] + _to = data[headers.datetime_acquisition].max().split(" ")[0] + print(f"Scans were performed from {_from} to {_to}") + for split_name, split in splits.items(): print(f"{split_name[0].upper()}{split_name[1:]}:") n_studies = len(split) - print(f" - Studies: {n_studies}") n_stress = len(split[split[headers.protocol] == "Stress"]) n_rest = len(split[split[headers.protocol] == "Rest"]) - print(f" - Stress: {n_stress}") - print(f" - Rest: {n_rest}") + print(f" - Studies: {n_studies}, Stress/Rest: {n_stress}/{n_rest}") n_patients = len(split[headers.patient_id].unique()) _group = split.groupby(headers.patient_id).count()[headers.id] n_studies_one = (_group == 1).sum() n_studies_two = (_group == 2).sum() n_studies_three = (_group == 3).sum() - print( - f" - Patients: {n_patients} [{n_studies_one}, {n_studies_two}, {n_studies_three}]" - ) _split = split.drop_duplicates(headers.patient_id) n_males = len(_split[_split[headers.sex] == "M"]) n_females = len(_split[_split[headers.sex] == "F"]) - print(f" - M: {n_males}") - print(f" - F: {n_females}") + print( + f" - Patients: {n_patients}, M/F: {n_males}/{n_females}, Number of studies (1, 2, 3): [{n_studies_one}, {n_studies_two}, {n_studies_three}]" + ) age = split[headers.age] height = split[headers.size] @@ -59,12 +60,12 @@ if __name__ == "__main__": for stat, label in [ (age, "Age"), - (weight, "Weight"), (height, "Height"), + (weight, "Weight"), (bmi, "BMI"), ]: - _min = f"{str(stat.min()):>5}" - _max = f"{str(stat.max()):>5}" + _min = f"{stat.min():.1f}" + _max = f"{stat.max():.1f}" _mean = f"{stat.mean():.1f}" _std = f"{stat.std():.1f}" - print(f" - {label:>10}: [{_min}, {_max}] - {_mean:>5}±{_std:>5}") + print(f" - {label:>10}: [{_min:>5}, {_max:>5}] - {_mean:>5}±{_std:>5}")