diff --git a/.gitignore b/.gitignore index 0f1491604b25f5d4035c0fb7c7e77027fa1a3c7e..39cbdb1f2d9aff0596b746d3d3d764df2d61e6a9 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +anonymous.md venv/ **__pycache__/ /data/ diff --git a/mu_map/data/dicom_util.py b/mu_map/data/dicom_util.py new file mode 100644 index 0000000000000000000000000000000000000000..a6278577f36aacab0ef2064349d13cf0ed9e0b1d --- /dev/null +++ b/mu_map/data/dicom_util.py @@ -0,0 +1,50 @@ +from datetime import datetime +from enum import Enum + +import pydicom + + +class DICOMTime(Enum): + Study = 1 + Series = 2 + Acquisition = 3 + Content = 4 + + def date_field(self): + return f"{self.name}Date" + + def time_field(self): + return f"{self.name}Time" + + def to_datetime(self, dicom: pydicom.dataset.FileDataset) -> datetime: + _date = dicom[self.date_field()].value + _time = dicom[self.time_field()].value + return datetime( + year=int(_date[0:4]), + month=int(_date[4:6]), + day=int(_date[6:8]), + hour=int(_time[0:2]), + minute=int(_time[2:4]), + second=int(_time[4:6]), + # microsecond=int(_time.split(".")[1]), + ) + + +def parse_age(patient_age: str) -> int: + """ + Parse an age string as defined in the DICOM standard into an integer representing the age in years. + + :param patient_age: age string as defined in the DICOM standard + :return: the age in years as a number + """ + assert ( + type(patient_age) == str + ), f"patient age needs to be a string and not {type(patient_age)}" + assert ( + len(patient_age) == 4 + ), f"patient age [{patient_age}] has to be four characters long" + _num, _format = patient_age[:3], patient_age[3] + assert ( + _format == "Y" + ), f"currently, only patient ages in years [Y] is supported, not [{_format}]" + return int(_num) diff --git a/mu_map/data/prepare.py b/mu_map/data/prepare.py index e123746ce3ced395cf78bf61fddcfb283fc224f1..e1735e2704b2b899c4373b1fd4fb9a39df29ba00 100644 --- a/mu_map/data/prepare.py +++ b/mu_map/data/prepare.py @@ -2,23 +2,19 @@ import argparse from datetime import datetime, timedelta from enum import Enum import os -from typing import List, Dict +from typing import List, Dict, Callable import numpy as np import pandas as pd import pydicom +from mu_map.data.dicom_util import DICOMTime, parse_age from mu_map.logging import add_logging_args, get_logger_by_args STUDY_DESCRIPTION = "µ-map_study" -class MyocardialProtocol(Enum): - Stress = 1 - Rest = 2 - - headers = argparse.Namespace() headers.id = "id" headers.patient_id = "patient_id" @@ -55,89 +51,54 @@ headers.file_recon_nac_nsc = "file_recon_nac_nsc" headers.file_mu_map = "file_mu_map" -def parse_series_time(dicom_image: pydicom.dataset.FileDataset) -> datetime: + +def get_protocol(projection: pydicom.dataset.FileDataset) -> str: """ - Parse the date and time of a DICOM series object into a datetime object. + Get the protocol (stress, rest) of a projection image by checking if + it is part of the series description. - :param dicom_image: the dicom file to parse the series date and time from - :return: an according python datetime object. + :param projection: pydicom image of the projection + :returns: the protocol as a string (Stress or Rest) """ - _date = dicom_image.SeriesDate - _time = dicom_image.SeriesTime - return datetime( - year=int(_date[0:4]), - month=int(_date[4:6]), - day=int(_date[6:8]), - hour=int(_time[0:2]), - minute=int(_time[2:4]), - second=int(_time[4:6]), - microsecond=int(_time.split(".")[1]), - ) + if "stress" in projection.SeriesDescription.lower(): + return "Stress" + if "rest" in projection.SeriesDescription.lower(): + return "Rest" -def parse_age(patient_age: str) -> int: - """ - Parse and age string as defined in the DICOM standard into an integer representing the age in years. + raise ValueError(f"Unkown protocol in projection {projection.SeriesDescription}") - :param patient_age: age string as defined in the DICOM standard - :return: the age in years as a number - """ - assert ( - type(patient_age) == str - ), f"patient age needs to be a string and not {type(patient_age)}" - assert ( - len(patient_age) == 4 - ), f"patient age [{patient_age}] has to be four characters long" - _num, _format = patient_age[:3], patient_age[3] - assert ( - _format == "Y" - ), f"currently, only patient ages in years [Y] is supported, not [{_format}]" - return int(_num) - - -def get_projections( - dicom_images: List[pydicom.dataset.FileDataset], protocol: MyocardialProtocol -) -> pydicom.dataset.FileDataset: + +def find_projections(dicom_images: List[pydicom.dataset.FileDataset]) -> pydicom.dataset.FileDataset: """ - Extract the SPECT projection from a list of DICOM images belonging to a myocardial scintigraphy study given a study protocol. + Find all projections in a list of DICOM images belonging to a study. :param dicom_images: list of DICOM images of a study - :param protocol: the protocol for which the projection images should be extracted :return: the extracted DICOM image """ _filter = filter(lambda image: "TOMO" in image.ImageType, dicom_images) - _filter = filter(lambda image: protocol.name in image.SeriesDescription, _filter) - dicom_images = list(_filter) + dicom_images = [] + for dicom_image in _filter: + # filter for allows series descriptions + if dicom_image.SeriesDescription not in ["Stress", "Rest", "Stress_2"]: + logger.warning(f"Skip projection with unkown protocol [{dicom_image.SeriesDescription}]") + # print(f" - {DICOMTime.Study.to_datetime(di)}, {DICOMTime.Series.to_datetime(di)}, {DICOMTime.Content.to_datetime(di)}, {DICOMTime.Acquisition.to_datetime(di)}") + continue + dicom_images.append(dicom_image) if len(dicom_images) == 0: - raise ValueError(f"No projection for protocol {protocol.name} available") + raise ValueError(f"No projections available") return dicom_images - -def get_reconstructions( - dicom_images: List[pydicom.dataset.FileDataset], - protocol: MyocardialProtocol, - scatter_corrected: bool, - attenuation_corrected: bool, -) -> pydicom.dataset.FileDataset: +def is_recon_type(scatter_corrected: bool, attenuation_corrected: bool) -> Callable[pydicom.dataset.FileDataset, bool]: """ - Extract a SPECT reconstruction from a list of DICOM images belonging to a myocardial scintigraphy study given a study protocol. - The corrected flag can be used to either extract an attenuation corrected or a non-attenuation corrected image. - If there are multiple images, they are sorted by acquisition date and the newest is returned. + Get a filter function for reconstructions that are (non-)scatter and/or (non-)attenuation corrected. - :param dicom_images: list of DICOM images of a study - :param protocol: the protocol for which the projection images should be extracted - :param attenuation_corrected: extract an attenuation or non-attenuation corrected image - :param scatter_corrected: extract the image to which scatter correction was applied - :return: the extracted DICOM image + :param scatter_corrected: if the filter should only return true for scatter corrected reconstructions + :param attenuation_corrected: if the filter should only return true for attenuation corrected reconstructions + :returns: a filter function """ - _filter = filter(lambda image: "RECON TOMO" in image.ImageType, dicom_images) - _filter = filter(lambda image: protocol.name in image.SeriesDescription, _filter) - _filter = filter( - lambda image: STUDY_DESCRIPTION in image.SeriesDescription, _filter - ) - if scatter_corrected and attenuation_corrected: filter_str = " SC - AC " elif not scatter_corrected and attenuation_corrected: @@ -146,50 +107,143 @@ def get_reconstructions( filter_str = " SC - NoAC " elif not scatter_corrected and not attenuation_corrected: filter_str = " NoSC - NoAC " - _filter = filter(lambda image: filter_str in image.SeriesDescription, _filter) - # for SPECT reconstructions created in clinical studies this value exists and is set to 'APEX_TO_BASE' - # for the reconstructions with attenuation maps it does not exist - # _filter = filter( - # lambda image: not hasattr(image, "SliceProgressionDirection"), _filter - # ) + return lambda dicom: filter_str in dicom.SeriesDescription + + +def find_reconstruction(dicom_images: List[pydicom.dataset.FileDataset], projection: pydicom.dataset.FileDataset, scatter_corrected: bool, attenuation_corrected: bool) -> List[pydicom.dataset.FileDataset]: + """ + Find a reconstruction in a list of dicom images of a study belonging to a projection. + + :param dicom_images: the list of dicom images belonging to the study + :param projection: the dicom image of the projection + :param scatter_corrected: if it should be searched fo a scatter corrected reconstruction + :param attenuation_corrected: if it should be searched fo a attenuation corrected reconstruction + :returns: the according reconstruction + """ + protocol = get_protocol(projection) + + _filter = filter(lambda image: "RECON TOMO" in image.ImageType, dicom_images) + _filter = filter(lambda image: protocol in image.SeriesDescription, _filter) + _filter = filter(lambda image: STUDY_DESCRIPTION in image.SeriesDescription, _filter) + _filter = filter(lambda image: "CT" not in image.SeriesDescription, _filter) # remove µ-maps + _filter = filter(is_recon_type(scatter_corrected, attenuation_corrected), _filter) + + # _filter = list(_filter) + # print("DEBUG Reconstructions: ") + # for r in _filter: + # try: + # print(f" - {r.SeriesDescription:>50} at {DICOMTime.Study.to_datetime(r)}, {DICOMTime.Series.to_datetime(r)}, {DICOMTime.Content.to_datetime(r)}, {DICOMTime.Acquisition.to_datetime(r)}") + # except Exception as e: + # print(f"Error {e}") + + _filter = filter(lambda image: DICOMTime.Acquisition.to_datetime(image) == DICOMTime.Acquisition.to_datetime(projection), _filter) dicom_images = list(_filter) if len(dicom_images) == 0: - raise ValueError( - f"'{filter_str}' Reconstruction for protocol {protocol.name} is not available" - ) + raise ValueError(f"No reconstruction with SC={scatter_corrected}, AC={attenuation_corrected} available") + + # sort oldest to be first + if len(dicom_images) > 1: + logger.warning(f"Multiple reconstructions ({len(dicom_images)}) with SC={scatter_corrected}, AC={attenuation_corrected} for projection {projection.SeriesDescription} of patient {projection.PatientID}") + dicom_images.sort(key=lambda image: DICOMTime.Series.to_datetime(image), reverse=True) + return dicom_images[0] - return dicom_images -def get_attenuation_maps( - dicom_images: List[pydicom.dataset.FileDataset], protocol: MyocardialProtocol +MAX_TIME_DIFF_S = 30 +def find_attenuation_map( + dicom_images: List[pydicom.dataset.FileDataset], projection: pydicom.dataset.FileDataset, reconstructions: List[pydicom.dataset.FileDataset], ) -> pydicom.dataset.FileDataset: """ - Extract an attenuation map from a list of DICOM images belonging to a myocardial scintigraphy study given a study protocol. - If there are multiple attenuation maps, they are sorted by acquisition date and the newest is returned. + Find a reconstruction in a list of dicom images of a study belonging to a projection and reconstructions. - :param dicom_images: list of DICOM images of a study - :param protocol: the protocol for which the projection images should be extracted - :return: the extracted DICOM image + :param dicom_images: the list of dicom images belonging to the study + :param projection: the dicom image of the projection + :param reconstructions: dicom images of reconstructions belonging to the projection + :returns: the according attenuation map """ + protocol = get_protocol(projection) + recon_times = list(map(lambda recon: DICOMTime.Series.to_datetime(recon), reconstructions)) + _filter = filter(lambda image: "RECON TOMO" in image.ImageType, dicom_images) - _filter = filter(lambda image: protocol.name in image.SeriesDescription, _filter) - _filter = filter( - lambda image: STUDY_DESCRIPTION in image.SeriesDescription, _filter - ) + _filter = filter(lambda image: protocol in image.SeriesDescription, _filter) + _filter = filter(lambda image: STUDY_DESCRIPTION in image.SeriesDescription, _filter) _filter = filter(lambda image: " µ-map]" in image.SeriesDescription, _filter) + _filter = filter(lambda image: any(map(lambda recon_time: (DICOMTime.Series.to_datetime(image) - recon_time).seconds < MAX_TIME_DIFF_S, recon_times)), _filter) dicom_images = list(_filter) if len(dicom_images) == 0: - raise ValueError( - f"Attenuation map for protocol {protocol.name} is not available" + raise ValueError(f"No Attenuation map available") + + # sort oldest to be first + if len(dicom_images) > 1: + logger.warning(f"Multiple attenuation maps ({len(dicom_images)}) for projection {projection.SeriesDescription} of patient {projection.PatientID}") + dicom_images.sort(key=lambda image: DICOMTime.Series.to_datetime(image), reverse=True) + + return dicom_images[0] + + +def get_relevant_images(patient: pydicom.dataset.FileDataset, dicom_dir: str) -> List[pydicom.dataset.FileDataset]: + """ + Get all relevant images of a patient. + + :param patient: pydicom dataset of a patient + :param dicom_dir: the directory of the DICOM files + :return: all relevant dicom images + """ + # get all myocardial scintigraphy studies + studies = list( + filter( + lambda child: child.DirectoryRecordType == "STUDY", + # and child.StudyDescription == "Myokardszintigraphie", # filter is disabled because there is a study without this description and only such studies are exported anyway + patient.children, ) + ) + # extract all dicom images + dicom_images = [] + for study in studies: + series = list( + filter( + lambda child: child.DirectoryRecordType == "SERIES", + study.children, + ) + ) + for _series in series: + images = list( + filter( + lambda child: child.DirectoryRecordType == "IMAGE", + _series.children, + ) + ) + + # all SPECT data is stored as a single 3D array which means that it is a series with a single image + # this is not the case for CTs, which are skipped here + if len(images) != 1: + continue + + images = list( + map( + lambda image: pydicom.dcmread( + os.path.join( + dicom_dir_by_patient[patient.PatientID], + *image.ReferencedFileID, + ), + stop_before_pixels=True, + ), + images, + ) + ) + + if len(images) == 0: + continue + + dicom_images.append(images[0]) return dicom_images + if __name__ == "__main__": parser = argparse.ArgumentParser( description="Prepare a dataset from DICOM directories", @@ -261,312 +315,144 @@ if __name__ == "__main__": data = pd.DataFrame(dict([(key, []) for key in vars(headers).keys()])) for i, patient in enumerate(patients, start=1): - logger.debug(f"Process patient {str(i):>3}/{len(patients)}:") - - # get all myocardial scintigraphy studies - studies = list( - filter( - lambda child: child.DirectoryRecordType == "STUDY", - # and child.StudyDescription == "Myokardszintigraphie", # filter is disabled because there is a study without this description and only such studies are exported anyway - patient.children, + logger.debug(f"Process patient {str(i):>3}/{len(patients)} - {patient.PatientName.given_name}, {patient.PatientName.family_name}:") + + dicom_images = get_relevant_images(patient, dicom_dir_by_patient[patient.PatientID]) + + projections = find_projections(dicom_images) + logger.debug(f"- Found {len(projections)}: projections with protocols {list(map(lambda p: get_protocol(p), projections))}") + + for projection in projections: + protocol = get_protocol(projection) + + reconstructions = [] + recon_headers = [headers.file_recon_nac_nsc, headers.file_recon_nac_sc, headers.file_recon_ac_nsc, headers.file_recon_ac_sc] + recon_postfixes = ["recon_nac_nsc", "recon_nac_sc", "recon_ac_nsc", "recon_ac_sc"] + for ac, sc in [(False, False), (False, True), (True, False), (True, True)]: + recon = find_reconstruction(dicom_images, projection, scatter_corrected=sc, attenuation_corrected=ac) + reconstructions.append(recon) + mu_map = find_attenuation_map(dicom_images, projection, reconstructions) + + + # for i, projection in enumerate(projections): + # _time = DICOMTime.Series.to_datetime(projection) + # print(f" - Projection: {projection.SeriesDescription:>10} at {DICOMTime.Study.to_datetime(projection)}, {DICOMTime.Series.to_datetime(projection)}, {DICOMTime.Content.to_datetime(projection)}, {DICOMTime.Acquisition.to_datetime(projection)}") + # recons = [] + # for sc, ac in [(False, False), (False, True), (True, False), (True, True)]: + # r = find_reconstruction(dicom_images, projection, scatter_corrected=sc, attenuation_corrected=ac) + # print(f" - {r.SeriesDescription:>50} at {DICOMTime.Study.to_datetime(r)}, {DICOMTime.Series.to_datetime(r)}, {DICOMTime.Content.to_datetime(r)}, {DICOMTime.Acquisition.to_datetime(r)}") + # recons.append(r) + # mu_map = find_attenuation_map(dicom_images, projection, recons) + # print(f" - {mu_map.SeriesDescription:>50} at {DICOMTime.Study.to_datetime(mu_map)}, {DICOMTime.Series.to_datetime(mu_map)}, {DICOMTime.Content.to_datetime(mu_map)}") + # print(f" -") + + + # extract pixel spacings and assert that they are equal for all reconstruction images + _map_lists = map( + lambda image: [*image.PixelSpacing, image.SliceThickness], + [*reconstructions, mu_map], ) - ) - - # extract all dicom images - dicom_images = [] - for study in studies: - series = list( - filter( - lambda child: child.DirectoryRecordType == "SERIES", - study.children, - ) + _map_lists = map( + lambda pixel_spacing: list(map(float, pixel_spacing)), + _map_lists, ) - for _series in series: - images = list( - filter( - lambda child: child.DirectoryRecordType == "IMAGE", - _series.children, - ) - ) - - # all SPECT data is stored as a single 3D array which means that it is a series with a single image - # this is not the case for CTs, which are skipped here - if len(images) != 1: - continue - - images = list( - map( - lambda image: pydicom.dcmread( - os.path.join( - dicom_dir_by_patient[patient.PatientID], - *image.ReferencedFileID, - ), - stop_before_pixels=True, - ), - images, - ) + _map_ndarrays = map( + lambda pixel_spacing: np.array(pixel_spacing), _map_lists + ) + pixel_spacings = list(_map_ndarrays) + _equal = all( + map( + lambda pixel_spacing: ( + pixel_spacing == pixel_spacings[0] + ).all(), + pixel_spacings, ) - - if len(images) == 0: - continue - - dicom_images.append(images[0]) - - for protocol in MyocardialProtocol: - if ( - len( - data[ - (data[headers.patient_id] == int(patient.PatientID)) - & (data[headers.protocol] == protocol.name) - ] - ) - > 0 - ): - logger.info( - f"Skip {patient.PatientID}:{protocol.name} since it is already contained in the dataset" - ) - continue - - extractions = [ - { - "function": get_projections, - "kwargs": {}, - "reconstruction": False, - "prefix": "projection", - "header": headers.file_projection, - }, - { - "function": get_reconstructions, - "kwargs": { - "attenuation_corrected": True, - "scatter_corrected": True, - }, - "reconstruction": True, - "prefix": "recon_ac_sc", - "header": headers.file_recon_ac_sc, - }, - { - "function": get_reconstructions, - "kwargs": { - "attenuation_corrected": True, - "scatter_corrected": False, - }, - "reconstruction": True, - "prefix": "recon_ac_nsc", - "header": headers.file_recon_ac_nsc, - }, - { - "function": get_reconstructions, - "kwargs": { - "attenuation_corrected": False, - "scatter_corrected": True, - }, - "reconstruction": True, - "prefix": "recon_nac_sc", - "header": headers.file_recon_nac_sc, - }, - { - "function": get_reconstructions, - "kwargs": { - "attenuation_corrected": False, - "scatter_corrected": False, - }, - "reconstruction": True, - "prefix": "recon_nac_nsc", - "header": headers.file_recon_nac_nsc, - }, - { - "function": get_attenuation_maps, - "kwargs": {}, - "reconstruction": True, - "prefix": "mu_map", - "header": headers.file_mu_map, - }, - ] - - try: - for extrac in extractions: - _images = extrac["function"]( - dicom_images, protocol=protocol, **extrac["kwargs"] - ) - _images.sort(key=parse_series_time) - extrac["images"] = _images - except ValueError as e: - logger.info( - f"Skip {patient.PatientID}:{protocol.name} because {e}" - ) - continue - - num_images = min( - list(map(lambda extrac: len(extrac["images"]), extractions)) + ) + assert ( + _equal + ), f"Not all pixel spacings of the reconstructions are equal: {pixel_spacings}" + pixel_spacing = pixel_spacings[0] + + # use the shape with the fewest slices, all other images will be aligned to that + _map_lists = map( + lambda image: [ + image.Rows, + image.Columns, + image.NumberOfSlices, + ], + [*reconstructions, mu_map], + ) + _map_lists = map( + lambda shape: list(map(int, shape)), _map_lists + ) + _map_ndarrays = map(lambda shape: np.array(shape), _map_lists) + shapes = list(_map_ndarrays) + shapes.sort(key=lambda shape: shape[2]) + shape = shapes[0] + + # extract and sort energy windows + energy_windows = ( + projection.EnergyWindowInformationSequence + ) + energy_windows = map( + lambda ew: ew.EnergyWindowRangeSequence[0], energy_windows + ) + energy_windows = map( + lambda ew: ( + float(ew.EnergyWindowLowerLimit), + float(ew.EnergyWindowUpperLimit), + ), + energy_windows, + ) + energy_windows = list(energy_windows) + energy_windows.sort(key=lambda ew: ew[0], reverse=True) + + row = { + headers.id: _id, + headers.patient_id: projection.PatientID, + headers.age: parse_age(projection.PatientAge), + headers.weight: float(projection.PatientWeight), + headers.size: float(projection.PatientSize), + headers.protocol: protocol, + headers.datetime_acquisition: DICOMTime.Series.to_datetime(projection), + headers.datetime_reconstruction: DICOMTime.Series.to_datetime(reconstructions[0]), + headers.pixel_spacing_x: pixel_spacing[0], + headers.pixel_spacing_y: pixel_spacing[1], + headers.pixel_spacing_z: pixel_spacing[2], + headers.shape_x: shape[0], + headers.shape_y: shape[1], + headers.shape_z: shape[2], + headers.radiopharmaceutical: projection.RadiopharmaceuticalInformationSequence[0].Radiopharmaceutical, + headers.radionuclide_dose: projection.RadiopharmaceuticalInformationSequence[0].RadionuclideTotalDose, + headers.radionuclide_code: projection.RadiopharmaceuticalInformationSequence[0].RadionuclideCodeSequence[0].CodeValue, + headers.radionuclide_meaning: projection.RadiopharmaceuticalInformationSequence[0].RadionuclideCodeSequence[0].CodeMeaning, + headers.energy_window_peak_lower: energy_windows[0][0], + headers.energy_window_peak_upper: energy_windows[0][1], + headers.energy_window_scatter_lower: energy_windows[1][0], + headers.energy_window_scatter_upper: energy_windows[1][1], + headers.detector_count: len(projection.DetectorInformationSequence), + headers.collimator_type: projection.DetectorInformationSequence[0].CollimatorType, + headers.rotation_start: float(projection.RotationInformationSequence[0].StartAngle), + headers.rotation_step: float(projection.RotationInformationSequence[0].AngularStep), + headers.rotation_scan_arc: float(projection.RotationInformationSequence[0].ScanArc), + } + + _filename_base = f"{_id:04d}-{protocol.lower()}" + _ext = "dcm" + img_headers = [headers.file_projection, *recon_headers, headers.file_mu_map] + img_postfixes = ["projection", *recon_postfixes, "mu_map"] + images = [projection, *reconstructions, mu_map] + for header, image, postfix in zip(img_headers, images, img_postfixes): + _image = pydicom.dcmread(image.filename) + filename = f"{_filename_base}-{postfix}.{_ext}" + pydicom.dcmwrite( + os.path.join(args.images_dir, filename), _image ) + row[header] = filename - # ATTENTION: this is a special filter for mu maps which could have been saved from previous test runs of the workflow - # this filter only keeps the most recent ones - if num_images < len(extractions[-1]["images"]): - _len = len(extractions[-1]["images"]) - extractions[-1]["images"] = extractions[-1]["images"][ - (_len - num_images) : - ] - - for j in range(num_images): - _recon_images = filter( - lambda extrac: extrac["reconstruction"], extractions - ) - recon_images = list( - map(lambda extrac: extrac["images"][j], _recon_images) - ) - - # extract date times and assert that they are equal for all reconstruction images - datetimes = list(map(parse_series_time, recon_images)) - _datetimes = sorted(datetimes, reverse=True) - _datetimes_delta = list( - map(lambda dt: _datetimes[0] - dt, _datetimes) - ) - # note: somehow the images receive slightly different timestamps, maybe this depends on time to save and computation time - # thus, a 10 minute interval is allowed here - _equal = all( - map(lambda dt: dt < timedelta(minutes=10), _datetimes_delta) - ) - assert ( - _equal - ), f"Not all dates and times of the reconstructions are equal: {datetimes}" - - # extract pixel spacings and assert that they are equal for all reconstruction images - _map_lists = map( - lambda image: [*image.PixelSpacing, image.SliceThickness], - recon_images, - ) - _map_lists = map( - lambda pixel_spacing: list(map(float, pixel_spacing)), - _map_lists, - ) - _map_ndarrays = map( - lambda pixel_spacing: np.array(pixel_spacing), _map_lists - ) - pixel_spacings = list(_map_ndarrays) - _equal = all( - map( - lambda pixel_spacing: ( - pixel_spacing == pixel_spacings[0] - ).all(), - pixel_spacings, - ) - ) - assert ( - _equal - ), f"Not all pixel spacings of the reconstructions are equal: {pixel_spacings}" - pixel_spacing = pixel_spacings[0] - - # use the shape with the fewest slices, all other images will be aligned to that - _map_lists = map( - lambda image: [ - image.Rows, - image.Columns, - image.NumberOfSlices, - ], - recon_images, - ) - _map_lists = map( - lambda shape: list(map(int, shape)), _map_lists - ) - _map_ndarrays = map(lambda shape: np.array(shape), _map_lists) - shapes = list(_map_ndarrays) - shapes.sort(key=lambda shape: shape[2]) - shape = shapes[0] - - projection_image = extractions[0]["images"][j] - # extract and sort energy windows - energy_windows = ( - projection_image.EnergyWindowInformationSequence - ) - energy_windows = map( - lambda ew: ew.EnergyWindowRangeSequence[0], energy_windows - ) - energy_windows = map( - lambda ew: ( - float(ew.EnergyWindowLowerLimit), - float(ew.EnergyWindowUpperLimit), - ), - energy_windows, - ) - energy_windows = list(energy_windows) - energy_windows.sort(key=lambda ew: ew[0], reverse=True) - - row = { - headers.id: _id, - headers.patient_id: projection_image.PatientID, - headers.age: parse_age(projection_image.PatientAge), - headers.weight: float(projection_image.PatientWeight), - headers.size: float(projection_image.PatientSize), - headers.protocol: protocol.name, - headers.datetime_acquisition: parse_series_time( - projection_image - ), - headers.datetime_reconstruction: datetimes[0], - headers.pixel_spacing_x: pixel_spacing[0], - headers.pixel_spacing_y: pixel_spacing[1], - headers.pixel_spacing_z: pixel_spacing[2], - headers.shape_x: shape[0], - headers.shape_y: shape[1], - headers.shape_z: shape[2], - headers.radiopharmaceutical: projection_image.RadiopharmaceuticalInformationSequence[ - 0 - ].Radiopharmaceutical, - headers.radionuclide_dose: projection_image.RadiopharmaceuticalInformationSequence[ - 0 - ].RadionuclideTotalDose, - headers.radionuclide_code: projection_image.RadiopharmaceuticalInformationSequence[ - 0 - ] - .RadionuclideCodeSequence[0] - .CodeValue, - headers.radionuclide_meaning: projection_image.RadiopharmaceuticalInformationSequence[ - 0 - ] - .RadionuclideCodeSequence[0] - .CodeMeaning, - headers.energy_window_peak_lower: energy_windows[0][0], - headers.energy_window_peak_upper: energy_windows[0][1], - headers.energy_window_scatter_lower: energy_windows[1][0], - headers.energy_window_scatter_upper: energy_windows[1][1], - headers.detector_count: len( - projection_image.DetectorInformationSequence - ), - headers.collimator_type: projection_image.DetectorInformationSequence[ - 0 - ].CollimatorType, - headers.rotation_start: float( - projection_image.RotationInformationSequence[ - 0 - ].StartAngle - ), - headers.rotation_step: float( - projection_image.RotationInformationSequence[ - 0 - ].AngularStep - ), - headers.rotation_scan_arc: float( - projection_image.RotationInformationSequence[0].ScanArc - ), - } - - _filename_base = f"{_id:04d}-{protocol.name.lower()}" - _ext = "dcm" - _images = list( - map(lambda extrac: extrac["images"][j], extractions) - ) - for _image, extrac in zip(_images, extractions): - image = pydicom.dcmread(_image.filename) - filename = f"{_filename_base}-{extrac['prefix']}.{_ext}" - pydicom.dcmwrite( - os.path.join(args.images_dir, filename), image - ) - row[extrac["header"]] = filename - - _id += 1 - row = pd.DataFrame(row, index=[0]) - data = pd.concat((data, row), ignore_index=True) + _id += 1 + row = pd.DataFrame(row, index=[0]) + data = pd.concat((data, row), ignore_index=True) data.to_csv(args.meta_csv, index=False) except Exception as e: