Skip to content

Commit f8c71b3

Browse files
authored
🐛 Store scan parameters as strings instead of bytestrings (#2291)
2 parents 9c652b1 + 1e323dd commit f8c71b3

File tree

5 files changed

+110
-25
lines changed

5 files changed

+110
-25
lines changed

CPAC/utils/bids_utils.py

Lines changed: 25 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright (C) 2016-2024 C-PAC Developers
1+
# Copyright (C) 2016-2025 C-PAC Developers
22

33
# This file is part of C-PAC.
44

@@ -324,7 +324,8 @@ def bids_retrieve_params(bids_config_dict, f_dict, dbg=False):
324324

325325
for k, v in params.items():
326326
if isinstance(v, str):
327-
params[k] = v.encode("ascii", errors="ignore")
327+
# Force all strings to be ASCII-compatible UTF-8 (by removing all non-ASCII characters)
328+
params[k] = v.encode("ascii", errors="ignore").decode("ascii")
328329

329330
return params
330331

@@ -802,12 +803,10 @@ def bids_gen_cpac_sublist(
802803
return sublist
803804

804805

805-
def collect_bids_files_configs(bids_dir, aws_input_creds=""):
806-
"""
807-
:param bids_dir:
808-
:param aws_input_creds:
809-
:return:
810-
"""
806+
def collect_bids_files_configs(
807+
bids_dir: str, aws_input_creds: Optional[str] = ""
808+
) -> tuple[list[str], dict[str, Any]]:
809+
"""Collect NIfTI file paths and JSON configurations from a BIDS directory."""
811810
file_paths = []
812811
config_dict = {}
813812

@@ -853,7 +852,7 @@ def collect_bids_files_configs(bids_dir, aws_input_creds=""):
853852
except Exception as e:
854853
msg = (
855854
f"Error retrieving {s3_obj.key.replace(prefix, '')}"
856-
f" ({e.message})"
855+
f" ({getattr(e, 'message', str(e))})"
857856
)
858857
raise SpecifiedBotoCoreError(msg) from e
859858
elif "nii" in str(s3_obj.key):
@@ -862,7 +861,7 @@ def collect_bids_files_configs(bids_dir, aws_input_creds=""):
862861
)
863862

864863
else:
865-
for root, dirs, files in os.walk(bids_dir, topdown=False, followlinks=True):
864+
for root, _dirs, files in os.walk(bids_dir, topdown=False, followlinks=True):
866865
if files:
867866
for f in files:
868867
for suf in suffixes:
@@ -1086,26 +1085,26 @@ def cl_strip_brackets(arg_list):
10861085

10871086

10881087
def create_cpac_data_config(
1089-
bids_dir,
1090-
participant_labels=None,
1091-
aws_input_creds=None,
1092-
skip_bids_validator=False,
1093-
only_one_anat=True,
1094-
):
1088+
bids_dir: str,
1089+
participant_labels: Optional[list[str]] = None,
1090+
aws_input_creds: Optional[str] = None,
1091+
skip_bids_validator: bool = False,
1092+
only_one_anat: bool = True,
1093+
) -> list:
10951094
"""
10961095
Create a C-PAC data config YAML file from a BIDS directory.
10971096
10981097
Parameters
10991098
----------
1100-
bids_dir : str
1099+
bids_dir
11011100
1102-
participant_labels : list or None
1101+
participant_labels
11031102
11041103
aws_input_creds
11051104
1106-
skip_bids_validator : bool
1105+
skip_bids_validator
11071106
1108-
only_one_anat : bool
1107+
only_one_anat
11091108
The "anat" key for a subject expects a string value, but we
11101109
can temporarily store a list instead by passing True here if
11111110
we will be filtering that list down to a single string later
@@ -1129,8 +1128,10 @@ def create_cpac_data_config(
11291128
]
11301129

11311130
if not file_paths:
1132-
UTLOGGER.error("Did not find data for %s", ", ".join(participant_labels))
1133-
sys.exit(1)
1131+
if participant_labels:
1132+
UTLOGGER.error("Did not find data for %s", ", ".join(participant_labels))
1133+
msg = f"Did not find data in {bids_dir}"
1134+
raise FileNotFoundError(msg)
11341135

11351136
raise_error = not skip_bids_validator
11361137

@@ -1145,7 +1146,8 @@ def create_cpac_data_config(
11451146

11461147
if not sub_list:
11471148
UTLOGGER.error("Did not find data in %s", bids_dir)
1148-
sys.exit(1)
1149+
msg = f"Did not find data in {bids_dir}"
1150+
raise FileNotFoundError(msg)
11491151

11501152
return sub_list
11511153

CPAC/utils/datasource.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -514,9 +514,14 @@ def match_epi_fmaps(
514514
with open(scan_params, "r") as f:
515515
scan_params = json.load(f)
516516
if "PhaseEncodingDirection" in scan_params:
517+
from CPAC.utils.datatypes import PHASE_ENCODING_DIRECTIONS
518+
517519
epi_pedir: str | bytes = scan_params["PhaseEncodingDirection"]
518520
if isinstance(epi_pedir, bytes):
519521
epi_pedir = epi_pedir.decode("utf-8")
522+
assert (
523+
epi_pedir in PHASE_ENCODING_DIRECTIONS
524+
), f"PhaseEncodingDirection {epi_pedir} not valid (should be one of {PHASE_ENCODING_DIRECTIONS})"
520525
if epi_pedir == bold_pedir:
521526
same_pe_epi = epi_scan
522527
elif epi_pedir[0] == bold_pedir[0]:

CPAC/utils/datatypes.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,3 +68,8 @@ def __init__(self, *args, **kwargs):
6868
else:
6969
args = ([args[0]],)
7070
list.__init__(self, *args, **kwargs)
71+
72+
73+
PHASE_ENCODING_DIRECTIONS: list[str] = [
74+
f"{i}{sign}" for i in ["i", "j", "k"] for sign in ["", "-"]
75+
]

CPAC/utils/interfaces/datasink.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030

3131
# Prior to release 0.12, Nipype was licensed under a BSD license.
3232

33-
# Modifications Copyright (C) 2019-2024 C-PAC Developers
33+
# Modifications Copyright (C) 2019-2025 C-PAC Developers
3434

3535
# This file is part of C-PAC.
3636
"""Interface that allow interaction with data.
@@ -139,6 +139,7 @@ def _fetch_bucket(self, bucket_name):
139139
# Import packages
140140
try:
141141
import boto3
142+
import boto3.session
142143
import botocore
143144
except ImportError:
144145
err_msg = "Boto3 package is not installed - install boto3 and try again."

CPAC/utils/tests/test_bids_utils.py

Lines changed: 73 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright (C) 2021-2024 C-PAC Developers
1+
# Copyright (C) 2021-2025 C-PAC Developers
22

33
# This file is part of C-PAC.
44

@@ -17,9 +17,15 @@
1717
"""Tests for bids_utils."""
1818

1919
from importlib import resources
20+
from itertools import permutations
2021
import os
22+
from pathlib import Path
2123
from subprocess import run
24+
from warnings import warn
2225

26+
import boto3
27+
from botocore import UNSIGNED
28+
from botocore.client import Config
2329
import pytest
2430
import yaml
2531

@@ -33,6 +39,7 @@
3339
load_yaml_config,
3440
sub_list_filter_by_labels,
3541
)
42+
from CPAC.utils.datatypes import PHASE_ENCODING_DIRECTIONS
3643
from CPAC.utils.monitoring.custom_logging import getLogger
3744

3845
logger = getLogger("CPAC.utils.tests")
@@ -194,3 +201,68 @@ def test_sub_list_filter_by_labels(t1w_label, bold_label, participant_label):
194201
)
195202
else:
196203
assert all(len(sub.get("func")) in [0, 5] for sub in sub_list)
204+
205+
206+
@pytest.mark.parametrize("participant_labels", [["NDARAB348EWR"]])
207+
def test_scan_parameter_type(tmp_path: Path, participant_labels: list[str]) -> None:
208+
"""Test that scan parameter types are correctly interpreted."""
209+
bids_dir = _gather_scan_parameter_test_data(tmp_path, participant_labels)
210+
data_config = create_cpac_data_config(bids_dir, participant_labels)
211+
assert len(data_config) == 1
212+
if "fmap" in data_config[0]:
213+
assert (
214+
data_config[0]["fmap"]["epi_PA"]["scan_parameters"][
215+
"PhaseEncodingDirection"
216+
]
217+
in PHASE_ENCODING_DIRECTIONS
218+
)
219+
220+
221+
def _gather_scan_parameter_test_data(
222+
root_dir: Path, participant_labels: list[str]
223+
) -> str:
224+
"""Create a test BIDS dataset with structure for the given subject.
225+
226+
Downloads JSON files from S3 and creates empty placeholder files for imaging data.
227+
"""
228+
s3_bucket = "fcp-indi"
229+
bids_dir = root_dir / "data"
230+
for _participant in participant_labels:
231+
participant = (
232+
f"sub-{_participant}"
233+
if not _participant.startswith("sub-")
234+
else _participant
235+
)
236+
s3_prefix = f"data/Projects/HBN/MRI/Site-CBIC/{participant}"
237+
s3_client = boto3.client("s3", config=Config(signature_version=UNSIGNED))
238+
files = {
239+
"anat": [
240+
f"{participant}_acq-HCP_run-01_T1w",
241+
],
242+
"fmap": [
243+
f"{participant}_dir-{direction}_acq-{acq}_epi"
244+
for direction in [
245+
"".join(direction) for direction in permutations(["A", "P"], 2)
246+
]
247+
for acq in ["dwi", "fMRI"]
248+
],
249+
"func": [
250+
f"{participant}_task-movieDM_bold",
251+
],
252+
}
253+
for modality, file_list in files.items():
254+
modality_dir = bids_dir / participant / modality
255+
modality_dir.mkdir(parents=True, exist_ok=True)
256+
for file_base in file_list:
257+
# Download JSON files from S3
258+
json_file = modality_dir / f"{file_base}.json"
259+
s3_key = f"{s3_prefix}/{modality}/{file_base}.json"
260+
try:
261+
s3_client.download_file(s3_bucket, s3_key, str(json_file))
262+
except Exception as e:
263+
# If download fails, create empty JSON
264+
warn("Failed to download %s: %s" % (s3_key, e))
265+
json_file.write_text("{}")
266+
nii_file = modality_dir / f"{file_base}.nii.gz"
267+
nii_file.touch()
268+
return str(bids_dir)

0 commit comments

Comments
 (0)