ENH: Validate PET data objects' attributes at instantiation

jhlegarreta · jhlegarreta · commit 37ed54c7a7de · 2025-11-23T19:33:56.000-05:00
Validate PET data objects' attributes at instantiation: ensures that the
attributes are present and match the expected dimensionalities.

Refactor the PET attributes so that only the required (`frame_time` and
`uptake`) and optional (`frame_duration`) parameters are accepted by the
constructor. The `midframe` and the `total_duration` attributes can be
computed from the required parameters, so exclude them from `__init__`.

Although `uptake` can also be computed from the PET frame data, the
rationale behind requiring it is similar to the one for the DWI class
`bzero`: users will be able to compute the `uptake` using their
preferred strategy and provide it to the constructor. For the `from_nii`
function, if a callable is provided, it will be used to compute the
value; otherwise a default strategy is used to compute it.

Refactor the `from_nii` function so that the required parameters are
present when instantiating the PET instance. Increase consistency with
the `dmri` data module `from_nii` counterpart function.

Use the `get_data` utils function in `from_nii` to handle automatically
the data type when loading the PET data.

Refactor the PET data creation fixture in `conftest.py` to accept the
required/optional arguments and to return the necessary data.

Refactor the tests accordingly and increase consistency with the `dmri`
data module testing helper functions. Reduces cognitive load and
maintenance burden.

Add additional object instantiation equality checks: check that objects
intantiated through reading NIfTI files equal objects instantiated
directly.
diff --git a/src/nifreeze/data/pet.py b/src/nifreeze/data/pet.py
@@ -38,20 +38,202 @@
 from nitransforms.resampling import apply
 from typing_extensions import Self
 
-from nifreeze.data.base import BaseDataset, _cmp, _data_repr
-from nifreeze.utils.ndimage import load_api
+from nifreeze.data.base import BaseDataset, _cmp, _data_repr, _has_ndim
+from nifreeze.utils.ndimage import get_data, load_api
+
+ARRAY_ATTRIBUTE_ABSENCE_ERROR_MSG = "PET '{attribute}' may not be None"
+"""PET initialization array attribute absence error message."""
+
+ARRAY_ATTRIBUTE_OBJECT_ERROR_MSG = (
+    "PET '{attribute}' must be a numeric homogeneous array-like object."
+)
+"""PET initialization array attribute object error message."""
+
+ARRAY_ATTRIBUTE_NDIM_ERROR_MSG = "PET '{attribute}' must be a 1D numpy array."
+"""PET initialization array attribute ndim error message."""
+
+ATTRIBUTE_VOLUME_DIMENSIONALITY_MISMATCH_ERROR = """\
+PET '{attribute}' length does not match number of frames: \
+expected {n_frames} values, found {attr_len}."""
+"""PET attribute shape mismatch error message."""
+
+
+def format_array_like(value: Any, attr: attrs.Attribute) -> np.ndarray:
+    """Validates that ``value`` can be converted to a :obj:`~numpy.ndarray`
+
+    This function is intended for use as an attrs-style formatter.
+
+    Parameters
+    ----------
+    value : :obj:`Any`
+        The value to format.
+    attr : :obj:`~attrs.Attribute`
+        The attribute being initialized; ``attr.name`` is used in the error message.
+
+    Returns
+    -------
+    formatted : :obj:`~numpy.ndarray`
+        The formatted value.
+
+    Raises
+    ------
+    exc:`TypeError`
+        If the input cannot be converted to a float :obj:`~numpy.ndarray`.
+    exc:`ValueError`
+        If the value is ``None``.
+    """
+
+    if value is None:
+        raise ValueError(ARRAY_ATTRIBUTE_ABSENCE_ERROR_MSG.format(attribute=attr.name))
+
+    try:
+        formatted = np.asarray(value, dtype=float)
+    except (TypeError, ValueError) as exc:
+        # Conversion failed (e.g. nested ragged objects, non-numeric)
+        raise TypeError(ARRAY_ATTRIBUTE_OBJECT_ERROR_MSG.format(attribute=attr.name)) from exc
+
+    return formatted
+
+
+def validate_1d_array(inst: PET, attr: attrs.Attribute, value: Any) -> None:
+    """Strict validator to ensure an attribute is a 1D NumPy array.
+
+    Enforces that ``value`` has exactly one dimension (``value.ndim == 1``).
+
+    This function is intended for use as an attrs-style validator.
+
+    Parameters
+    ----------
+    inst : :obj:`~nifreeze.data.pet.PET`
+        The instance being validated (unused; present for validator signature).
+    attr : :obj:`~attrs.Attribute`
+        The attribute being validated; ``attr.name`` is used in the error message.
+    value : :obj:`Any`
+        The value to validate.
+
+    Raises
+    ------
+    exc:`ValueError`
+        If the value is  not 1D.
+    """
+
+    if not _has_ndim(value, 1):
+        raise ValueError(ARRAY_ATTRIBUTE_NDIM_ERROR_MSG.format(attribute=attr.name))
 
 
 @attrs.define(slots=True)
 class PET(BaseDataset[np.ndarray]):
-    """Data representation structure for PET data."""
+    """Data representation structure for PET data.
+
+    Relevant temporal attributes, namely the per-frame duration and midframe
+    times and the total duration, are computed at initialization.
+
+    Let :math:`K` be the number of frames. For each frame :math:`k`, we define
+    the frame duration :math:`d_k`as the difference between consecutive midframe
+    times:
+
+    .. math::
+       d_k = t^{\\mathrm{end}}_k - t^{\\mathrm{start}}_k
+
+    In this implementation, the last interval is duplicated to match the
+    appropriate dimensionality.
+
+    The total duration :math:`D` of the acquisition is a scalar computed as the
+    sum of the frame durations:
+
+    .. math::
+       D = \\sum_{k=1}^{K} d_k
+         = \\sum_{k=1}^{K} \\left(t^{\\mathrm{end}}_k - t^{\\mathrm{start}}_k\\right)
+
+    One commonly useful scalar time is the overall mid-frame time (the midpoint
+    of the whole acquisition). If :math:`t_{\\mathrm{first}}` denotes the start
+    time of the first frame, the overall midframe (the midpoint of the whole
+    acquisition) is
 
-    midframe: np.ndarray = attrs.field(default=None, repr=_data_repr, eq=attrs.cmp_using(eq=_cmp))
+    .. math::
+       \\mathrm{midframe} = t_{\\mathrm{first}} + \\frac{D}{2}
+
+    Per-frame midpoints :math:`m_k` are exposed for convenience:
+
+    .. math::
+       m_k = t^{\\mathrm{start}}_k + \\frac{d_k}{2}
+
+    Users can provide their own frame duration data to be used instead of the
+    default values computed as shown above. See :meth:`~nifreeze.data.pet._compute_frame_duration`.
+
+    """
+
+    frame_time: np.ndarray = attrs.field(
+        default=None,
+        repr=_data_repr,
+        eq=attrs.cmp_using(eq=_cmp),
+        converter=attrs.Converter(format_array_like, takes_field=True),
+        validator=validate_1d_array,
+    )
+    """A (N,) numpy array specifying the timing of each sample or frame."""
+    uptake: np.ndarray = attrs.field(
+        default=None,
+        repr=_data_repr,
+        eq=attrs.cmp_using(eq=_cmp),
+        converter=attrs.Converter(format_array_like, takes_field=True),
+        validator=validate_1d_array,
+    )
+    """A (N,) numpy array specifying the uptake value of each sample or frame."""
+    frame_duration: np.ndarray | None = attrs.field(
+        default=None, repr=_data_repr, eq=attrs.cmp_using(eq=_cmp)
+    )
+    """A (N,) numpy array specifying the frame duration."""
+    midframe: np.ndarray = attrs.field(
+        default=None, repr=_data_repr, init=False, eq=attrs.cmp_using(eq=_cmp)
+    )
     """A (N,) numpy array specifying the midpoint timing of each sample or frame."""
-    total_duration: float = attrs.field(default=None, repr=True)
+    total_duration: float = attrs.field(default=None, repr=True, init=False)
     """A float representing the total duration of the dataset."""
-    uptake: np.ndarray = attrs.field(default=None, repr=_data_repr, eq=attrs.cmp_using(eq=_cmp))
-    """A (N,) numpy array specifying the uptake value of each sample or frame."""
+
+    def __attrs_post_init__(self) -> None:
+        """Enforce presence and basic consistency of PET data fields at
+        instantiation time.
+
+        Specifically, the length of the frame_time and uptake attributes must
+        match the last dimension of the data (number of frames).
+
+        Computes the values for the private attributes.
+        """
+        n_frames = int(self.dataobj.shape[-1])
+
+        if len(self.frame_time) != n_frames:
+            raise ValueError(
+                ATTRIBUTE_VOLUME_DIMENSIONALITY_MISMATCH_ERROR.format(
+                    attribute=attrs.fields_dict(self.__class__)["frame_time"].name,
+                    n_frames=n_frames,
+                    attr_len=len(self.frame_time),
+                )
+            )
+
+        if len(self.uptake) != n_frames:
+            raise ValueError(
+                ATTRIBUTE_VOLUME_DIMENSIONALITY_MISMATCH_ERROR.format(
+                    attribute=attrs.fields_dict(self.__class__)["uptake"].name,
+                    n_frames=n_frames,
+                    attr_len=len(self.uptake),
+                )
+            )
+
+        # Compute temporal attributes
+
+        # Convert to a float32 numpy array and zero out the earliest time
+        frame_time_arr = np.array(self.frame_time, dtype=np.float32)
+        frame_time_arr -= frame_time_arr[0]
+
+        # If the user did not provide frame duration values, compute them
+        if self.frame_duration is not None:
+            self.frame_duration = np.array(self.frame_duration, dtype=np.float32)
+        else:
+            self.frame_duration = _compute_frame_duration(frame_time_arr)
+
+        # Compute total duration and shift midframe to the midpoint
+        self.total_duration = float(frame_time_arr[-1] + self.frame_duration[-1])
+        self.midframe = frame_time_arr + 0.5 * self.frame_duration
 
     def _getextra(self, idx: int | slice | tuple | np.ndarray) -> tuple[np.ndarray]:
         return (self.midframe[idx],)
@@ -222,6 +404,8 @@ def from_nii(
     frame_time: np.ndarray | list[float],
     brainmask_file: Path | str | None = None,
     frame_duration: np.ndarray | list[float] | None = None,
+    uptake: np.ndarray | list[float] | None = None,
+    uptake_stat_func: Callable[..., np.ndarray] = np.sum,
 ) -> PET:
     """
     Load PET data from NIfTI, creating a PET object with appropriate metadata.
@@ -236,9 +420,12 @@ def from_nii(
         A brainmask NIfTI file. If provided, will be loaded and
         stored in the returned dataset.
     frame_duration : :obj:`numpy.ndarray` or :obj:`list` of :obj:`float`, optional
-        The duration of each frame.
-        If ``None``, it is derived by the difference of consecutive frame times,
-        defaulting the last frame to match the second-last.
+        The duration of each frame. If ``None``, its computation is deferred to
+        the :obj:`~nifreeze.data.pet.PET` object initialization.
+    uptake : :obj:`numpy.ndarray` or :obj:`list` of :obj:`float`, optional
+        Uptake values. If provided, it ``uptake_stat_func`` will be ignored.
+    uptake_stat_func : :obj:`Callable`, optional
+        The statistic function to compute the uptake value.
 
     Returns
     -------
@@ -253,37 +440,34 @@ def from_nii(
     """
 
     filename = Path(filename)
-    # Load from NIfTI
-    img = load_api(filename, SpatialImage)
-    data = img.get_fdata(dtype=np.float32)
-    pet_obj = PET(
-        dataobj=data,
-        affine=img.affine,
-    )
-
-    pet_obj.uptake = _compute_uptake_statistic(data, stat_func=np.sum)
 
-    # Convert to a float32 numpy array and zero out the earliest time
-    frame_time_arr = np.array(frame_time, dtype=np.float32)
-    frame_time_arr -= frame_time_arr[0]
-    pet_obj.midframe = frame_time_arr
+    # 1) Load a NIfTI
+    img = load_api(filename, SpatialImage)
+    fulldata = get_data(img)
 
-    # If the user doesn't provide frame_duration, we derive it:
-    if frame_duration is None:
-        durations = _compute_frame_duration(pet_obj.midframe)
+    # 2) Determine uptake value
+    if uptake is not None:
+        pass
     else:
-        durations = np.array(frame_duration, dtype=np.float32)
+        uptake = _compute_uptake_statistic(fulldata, stat_func=uptake_stat_func)
 
-    # Set total_duration and shift frame_time to the midpoint
-    pet_obj.total_duration = float(frame_time_arr[-1] + durations[-1])
-    pet_obj.midframe = frame_time_arr + 0.5 * durations
+    uptake = np.asarray(uptake)
 
-    # If a brain mask is provided, load and attach
+    # 3) If a brainmask_file was provided, load it
+    brainmask_data = None
     if brainmask_file is not None:
         mask_img = load_api(brainmask_file, SpatialImage)
-        pet_obj.brainmask = np.asanyarray(mask_img.dataobj, dtype=bool)
+        brainmask_data = np.asanyarray(mask_img.dataobj, dtype=bool)
 
-    return pet_obj
+    # 4) Create and return the PET instance
+    return PET(
+        dataobj=fulldata,
+        affine=img.affine,
+        brainmask=brainmask_data,
+        frame_time=np.asarray(frame_time),
+        frame_duration=frame_duration if frame_duration is None else np.asarray(frame_duration),
+        uptake=uptake,
+    )
 
 
 def _compute_frame_duration(midframe: np.ndarray) -> np.ndarray:
@@ -308,7 +492,7 @@ def _compute_frame_duration(midframe: np.ndarray) -> np.ndarray:
     return durations
 
 
-def _compute_uptake_statistic(data: np.ndarray, stat_func: Callable = np.sum):
+def _compute_uptake_statistic(data: np.ndarray, stat_func: Callable[..., np.ndarray] = np.sum):
     """Compute a statistic over all voxels for each frame on a PET sequence.
 
     Assumes the last dimension corresponds to the number of frames in the
diff --git a/test/conftest.py b/test/conftest.py
@@ -323,10 +323,11 @@ def setup_random_pet_data(request):
 
     n_frames = 5
     vol_size = (4, 4, 4)
-    midframe = np.arange(n_frames, dtype=np.float32) + 1
-    total_duration = float(n_frames + 1)
+    frame_time = np.arange(n_frames, dtype=np.float32) + 1
+    uptake_stat_func = np.sum
+    frame_duration = None
     if marker:
-        n_frames, vol_size, midframe, total_duration = marker.args
+        n_frames, vol_size, frame_time, uptake_stat_func, frame_duration = marker.args
 
     rng = request.node.rng
 
@@ -335,10 +336,13 @@ def setup_random_pet_data(request):
     )
     brainmask_dataobj = rng.choice([True, False], size=vol_size).astype(bool)
 
+    uptake = uptake_stat_func(pet_dataobj.reshape(-1, pet_dataobj.shape[-1]), axis=0)
+
     return (
         pet_dataobj,
         affine,
         brainmask_dataobj,
-        midframe,
-        total_duration,
+        frame_time,
+        uptake,
+        frame_duration,
     )
diff --git a/test/test_data_pet.py b/test/test_data_pet.py