dicom.py

"""
Utility functions to deal with DICOM images.
"""
from dataclasses import dataclass
from datetime import datetime
from enum import Enum
import os
import random
from typing import Optional, Tuple

import numpy as np
import pydicom

from mu_map.types import Spacing


"""
Since DICOM images only allow images stored in short integer format,
the Siemens scanner software multiplies values by a factor before storing
so that no precision is lost.
The scale can be found in this private DICOM tag.
"""
DCM_TAG_PIXEL_SCALE_FACTOR = 0x00331038
DCM_TAG_DETECTOR_INFORMATION_SEQUENCE = 0x00540022
DCM_TAG_IMAGE_ORIENTATION_PATIENT = 0x00200037

"""
Maximum value that can be stored in an unsigned integer with 16 bits.
"""
UINT16_MAX = 2**16 - 1
"""
DICOM images always contain UIDs to indicate their uniqueness.
Thus, when a DICOM image is updated, UIDs have to be changed for
which the following prefix is used.
"""
UID_PREFIX = "1.2.826.0.1.3680043.2.521."
"""
Default extension of dicom files.
"""
EXTENSION_DICOM = ".dcm"


@dataclass
class DICOM:
    """
    DICOM dataclass wrapping the pydicom Filedataset (header) and the
    image as a numpy array into an object.

    This is useful because the pixel array in the pydicom Filedataset
    often needs to be scaled.
    """

    header: pydicom.dataset.FileDataset
    image: np.ndarray

    def __iter__(self):
        return iter((self.header, self.image))

    def update_image(self, image: np.ndarray, spacing: Optional[Spacing] = None):
        """
        See `update_dcm_image`.
        """
        self.image = image
        update_dcm_image(self.header, image, spacing=spacing)

    def change_uid(self):
        """
        See `change_uid`.
        """
        change_uid(self.header)

    def get_rotation_matrix(self) -> np.array:
        """
        See `get_rotation_matrix`.
        """
        return get_rotation_matrix(self.header)

    def write(self, filename: str):
        """
        Write this DICOM image to a filename.

        It is verified that the filename extension is `.dcm`.

        Parameters
        ----------
        filename: str
            the filename of the written DICOM file
        """
        filename = os.path.splitext(filename)[0] + EXTENSION_DICOM
        pydicom.dcmwrite(filename, self.header)


class DICOMTime(Enum):
    """
    Class for parsing dates and times defined in a DICOM header
    into a python datetime type. It maps the four datetime fields
    [Study, Series, Acquisition, Content] of the DICOM header into
    an enumeration type.

    Usage: DICOMTime.Series.to_datetime(dcm)
    """

    Study = 1
    Series = 2
    Acquisition = 3
    Content = 4

    def date_field(self) -> str:
        """
        Get the name of the date field according to this DICOMTime type.
        """
        return f"{self.name}Date"

    def time_field(self) -> str:
        """
        Get the name of the time field according to this DICOMTime type.
        """
        return f"{self.name}Time"

    def to_datetime(self, dcm_header: pydicom.dataset.FileDataset) -> datetime:
        """
        Get the datetime according to this DICOMTime type.
        """
        try:
            _date = dcm_header[self.date_field()].value
            _time = dcm_header[self.time_field()].value
        except:
            return "               None"

        return datetime(
            year=int(_date[0:4]),
            month=int(_date[4:6]),
            day=int(_date[6:8]),
            hour=int(_time[0:2]),
            minute=int(_time[2:4]),
            second=int(_time[4:6]),
        )


def parse_age(patient_age: str) -> int:
    """
    Parse an age string as defined in the DICOM standard into an integer representing the age in years.

    Parameters
    ----------
    patient_age: str
        age string as defined in the DICOM standard

    Returns
    -------
    int
        the age in years as a number
    """
    assert (
        type(patient_age) == str
    ), f"patient age needs to be a string and not {type(patient_age)}"
    assert (
        len(patient_age) == 4
    ), f"patient age [{patient_age}] has to be four characters long"
    _num, _format = patient_age[:3], patient_age[3]
    assert (
        _format == "Y"
    ), f"currently, only patient ages in years [Y] is supported, not [{_format}]"
    return int(_num)


def load_dcm(filename: str, direction: float = 0.0) -> DICOM:
    """
    Load a DICOM image as a header and a numpy array.

    Perform image rescaling if the header contains a slope or intercept tag.

    Parameters
    ----------
    filename: str
        filename of the DICOM image
    direction: float
        other than 0 changes the stacking order of slices to the desired direction

    Returns
    -------
    DICOM
        the DICOM image
    """
    header = pydicom.dcmread(filename)
    image = header.pixel_array
    image = get_slope(header) * image + get_intercept(header)

    dcm = DICOM(header, image)
    dcm = to_direction(dcm, direction=direction)
    return dcm


def get_slope(dcm_header: pydicom.dataset.FileDataset) -> float:
    """
    Get the rescale slope of a DICOM image.

    This is a wrapper which returns the DCM_TAG_PIXEL_SCALE_FACTOR
    used by Siemens scanners if available. Else it returns the RescaleSlope
    value or 1.0.

    Parameters
    ----------
    dcm_header: pydicom.dataset.FileDataset

    Returns
    -------
    float
    """
    if DCM_TAG_PIXEL_SCALE_FACTOR in dcm_header:
        return 1.0 / dcm_header[DCM_TAG_PIXEL_SCALE_FACTOR].value

    try:
        return dcm_header.RescaleSlope
    except:
        return 1.0


def set_slope(dcm_header: pydicom.dataset.FileDataset, slope: float):
    """
    Set rescale slope of a DICOM image.

    Similar to `get_slope` this function is a wrapper to set the value as the correct tag.
    If the header has the Siemens scale factor tag it is used and if the header has the
    RescaleSlope tag it is used.

    Parameters
    ----------
    dcm_header: pydicom.dataset.FileDataset
    slope: float
    """
    if DCM_TAG_PIXEL_SCALE_FACTOR in dcm_header:
        dcm_header[DCM_TAG_PIXEL_SCALE_FACTOR].value = float(round(1.0 / slope))
        return

    # do not set the rescale slope if the header is missing the field and it is 1 anyway
    if slope == 1.0 and not hasattr(dcm_header, "RescaleSlope"):
        return

    dcm_header.RescaleSlope = slope


def get_intercept(dcm_header: pydicom.dataset.FileDataset) -> float:
    """
    Get the rescale intercept of a DICOM image.

    This is a wrapper which returns RescaleSlope or 0.0 if the tag
    is not available.

    Parameters
    ----------
    dcm_header: pydicom.dataset.FileDataset

    Returns
    -------
    float
    """
    try:
        _intercept = dcm_header.RescaleIntercept
        return dcm_header.RescaleIntercept
    except AttributeError:
        return 0.0


def set_intercept(dcm_header: pydicom.dataset.FileDataset, intercept: float):
    """
    Set the rescale intercept of a DICOM image.

    Does nothing if the intercept is 0 and the DICOM image does not
    already have the tag

    Parameters
    ----------
    dcm_header: pydicom.dataset.FileDataset
    intercept: float
    """
    if intercept == 0.0 and not hasattr(dcm_header, "RescaleIntercept"):
        return

    dcm_header.RescaleIntercept = intercept


def set_rescale(
    dcm_header: pydicom.dataset.FileDataset, intercept: float, slope: float
):
    """
    Call `set_intercept` and `set_slope`.

    Parameters
    ----------
    dcm_header: pydicom.dataset.FileDataset
    intercept: float
    slope: float
    """
    set_intercept(dcm_header, intercept)
    set_slope(dcm_header, slope)


def compute_rescale(image: np.ndarray) -> Tuple[np.ndarray, float, float]:
    """
    Compute the rescale intercept and slope to store a numpy array
    as np.uint16 as the DICOM pixel array.

    The intercept is the to the minimal image value if it is negative.
    The slope is only set if precision is lost by converting to an
    np.uint16 image. Then, the slope is computed in a way that choses
    to cover the most range of the np.uint16 space.

    Parameters
    ----------
    image: np.ndarray

    Returns
    -------
    np.ndarray
        the rescaled image as np.uint16
    float
        the intercept used for rescaling
    float
        the slope used for rescaling
    """
    intercept = 0.0 if image.min() >= 0 else image.min()

    _image = image - intercept
    if np.allclose(_image, _image.astype(np.uint16)):
        slope = 1.0
    else:
        _max = _image.max()
        slope = 1e-5
        while _max / slope > UINT16_MAX:
            slope *= 10
    _image = (_image / slope).astype(np.uint16)

    return _image, intercept, slope


def load_dcm_img(
    filename: str,
    direction: float = 0.0,
) -> np.ndarray:
    """
    Load a DICOM image as a numpy array and apply normalization of the Siemens SPECT/CT
    Scanner.

    See `load_dcm`. This function does the same but only returns the image

    Parameters
    ----------
    filename: str
        filename of the DICOM image
    direction: float
        other than 0 changes the stacking order of slices to the desired direction

    Returns
    -------
    np.ndarray
        the image scaled and loaded into a numpy array
    """
    return load_dcm(filename, direction=direction).image


def to_direction(dcm: DICOM, direction: float = 1) -> DICOM:
    """
    Change the stacking direction of slices in a DICOM image.
    The direction is specified by the sign of the [Spacing Between Slices](https://dicom.innolitics.com/ciods/nm-image/nm-reconstruction/00180088) parameter.

    Parameters
    ----------
    dcm: DICOM header and an image
    direction: float
        the desired stacking direction as a float (>0 positive, <0 negative, =0 stay as it is)

    Returns
    -------
    DICOM:
        DICOM with image and header updated
    """
    try:
        spacing_between_slices = float(dcm.header.SpacingBetweenSlices)
    except AttributeError:
        return dcm

    _header = dcm.header.copy()
    _image = dcm.image.copy()
    if spacing_between_slices * direction < 0:
        _header.DetectorInformationSequence[0].ImagePositionPatient[2] *= (
            dcm.image.shape[0] * spacing_between_slices
        )
        _header.SpacingBetweenSlices = -spacing_between_slices
        _image = _image[::-1].copy()
    return DICOM(_header, _image)


def update_dcm_image(
    dcm_header: pydicom.dataset.FileDataset, image: np.ndarray, spacing: Optional[Spacing] = None
) -> pydicom.dataset.FileDataset:
    """
    Update the image data in a DICOM header.

    This function scales the image, converts it to unsigned integers with 16 bits and
    updates the pixel data in the DICOM header. Additionally, other related tags in the
    DICOM header, such as image dimensions and maximum pixel values, are updated accordingly.

    Note that this function modifies the given DICOM header. If you want to keep the old
    one, you should copy it first.

    Parameters
    ----------
    dcm: DICOM
        the DICOM header to be updated
    image: np.ndarray
        the image stored with the DICOM header
    spacing: Spacing, optional
        the spacing of the new image

    Returns
    -------
    DICOM
        the updated DICOM header
    """
    image, intercept, slope = compute_rescale(image)
    dcm_header.PixelData = image.tobytes()
    set_rescale(dcm_header, intercept, slope)

    dcm_header.NumberOfFrames = image.shape[0]
    dcm_header.NumberOfSlices = image.shape[0]
    dcm_header.SliceVector = list(range(1, image.shape[0] + 1))
    dcm_header.Columns = image.shape[1]
    dcm_header.Rows = image.shape[2]
    dcm_header.WindowWidth = image.max()
    dcm_header.WindowCenter = image.max() / 2
    dcm_header.LargestImagePixelValue = image.max()
    dcm_header["LargestImagePixelValue"].VR = "US"

    if spacing is not None:
        dcm_header.PixelSpacing = list(spacing[:2])
        dcm_header.SliceThickness = spacing[2]

    return dcm_header


def change_uid(dcm_header: pydicom.dataset.FileDataset) -> pydicom.dataset.FileDataset:
    """
    Change the UIDs (SeriesInstance and SOPInstance) in a DICOM header so that
    it becomes its own unique file.

    Note that this method does not guarantee that the UIDs are fully unique.
    It just generated a new random UID with a specified prefix, which hopefully is unique.

    Parameters
    ----------
    dcm: DICOM
        the DICOM header to be udpated
    """
    dcm_header.SeriesInstanceUID = pydicom.uid.generate_uid(prefix=UID_PREFIX)
    dcm_header.SOPInstanceUID = pydicom.uid.generate_uid(prefix=UID_PREFIX)
    dcm_header.file_meta.MediaStorageSOPInstanceUID = dcm_header.SOPInstanceUID
    return dcm_header


def get_rotation_matrix(dcm_header: pydicom.dataset.FileDataset) -> np.array:
    """
    Get a rotation matrix from a DICOM header.

    Parameters
    ----------
    dcm_header: pydicom.dataset.FileDataset
        the DICOM header

    Returns
    -------
    np.array
        a 3x3 rotation matrix
    """
    if (
        DCM_TAG_DETECTOR_INFORMATION_SEQUENCE not in dcm_header
        or len(dcm_header[DCM_TAG_DETECTOR_INFORMATION_SEQUENCE].value) == 0
    ):
        raise ValueError("DICOM header does not contain DetectorInformationSequence")

    detector_info = dcm_header[DCM_TAG_DETECTOR_INFORMATION_SEQUENCE].value[0]

    if DCM_TAG_IMAGE_ORIENTATION_PATIENT not in detector_info:
        raise ValueError("DICOM header does not contain ImageOrientationPatient")

    ori = detector_info[DCM_TAG_IMAGE_ORIENTATION_PATIENT].value

    x_dir = np.array(ori[:3])
    y_dir = np.array(ori[3:])
    z_dir = np.cross(x_dir, y_dir)

    dirs = [x_dir, y_dir, z_dir]
    dirs = list(map(lambda v: v / np.linalg.norm(v), dirs))
    return np.array(dirs).T


def create_empty_dicom() -> DICOM:
    """
    Create a new empty DICOM header with default values and an empty image.

    Returns
    -------
    DICOM
    """
    meta = pydicom.dataset.FileMetaDataset()
    meta.FileMetaInformationVersion = b"\x00\x01"
    meta.MediaStorageSOPClassUID = pydicom.uid.NuclearMedicineImageStorage
    meta.MediaStorageSOPInstanceUID = pydicom.uid.generate_uid(prefix=UID_PREFIX)
    meta.TransferSyntaxUID = pydicom.uid.ImplicitVRLittleEndian
    meta.ImplementationClassUID = "1.2.40.0.13.1.3"
    meta.ImplementationVersionName = "dcm4che-null"

    dcm_header = pydicom.dataset.FileDataset(
        filename_or_obj=None, dataset={}, file_meta=meta, preamble=b"\x00" * 128
    )
    dcm_header.SpecificCharacterSet = "ISO_IR 100"
    dcm_header.SOPInstanceUID = meta.MediaStorageSOPInstanceUID
    dcm_header.BitsAllocated = 16
    dcm_header.BitsStored = 16
    dcm_header.HighBit = 15
    dcm_header.PhotometricInterpretation = "MONOCHROME2"
    dcm_header.PixelRepresentation = 0
    dcm_header.SamplesPerPixel = 1
    dcm_header.PixelData = None
    dcm_header["PixelData"].VR = "OW"
    dcm_header.set_original_encoding(True, True, "ISO_IR 100")

    return DICOM(dcm_header, image=None)


if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser(
        description="Dump the header of a DICOM image",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
    )
    parser.add_argument(
        "file", type=str, help="the DICOM file of which the header is dumped"
    )
    args = parser.parse_args()

    dcm = pydicom.dcmread(args.file)
    print(dcm)