Skip to content

Commit 649dbbd

Browse files
authored
fix: increase max numpy header size (#149)
* fix(dataset): increase max numpy header size When loading dataset. Prevents load failure for some datasets with large headers * fix(dataset): python 3.8 compat
1 parent 4ab2d79 commit 649dbbd

File tree

1 file changed

+6
-2
lines changed

1 file changed

+6
-2
lines changed

cryosparc/dataset/__init__.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,10 @@
104104
}
105105
MAGIC_PREFIX_FORMATS = {v: k for k, v in FORMAT_MAGIC_PREFIXES.items()} # inverse dict
106106

107+
_NUMPY_MAJOR_MINOR_VERSION = tuple(map(int, n.__version__.split(".")[:2])) # e.g., "1.23.4" -> (1, 23)
108+
_NUMPY_LOAD_KWARGS: Dict[str, Any] = {"max_header_size": 1024**3} if _NUMPY_MAJOR_MINOR_VERSION >= (1, 24) else {}
109+
"""Numpy >= 1.24 load function require max_header_size, which is 10000 by default and too small for some datasets."""
110+
107111

108112
class Dataset(Streamable, MutableMapping[str, Column], Generic[R]):
109113
"""
@@ -650,7 +654,7 @@ def _load_numpy(
650654
mmap_mode, f = None, file
651655
chunk_size = 2**60 # huge enough number so you don't use chunks
652656

653-
indata = n.load(f, mmap_mode=mmap_mode, allow_pickle=False)
657+
indata = n.load(f, mmap_mode=mmap_mode, allow_pickle=False, **_NUMPY_LOAD_KWARGS)
654658
size = len(indata)
655659
descr = filter_descr(indata.dtype.descr, keep_prefixes=prefixes, keep_fields=fields)
656660
dset = cls.allocate(size, descr)
@@ -664,7 +668,7 @@ def _load_numpy(
664668
if mmap_mode and offset < size:
665669
# reset mmap to avoid excessive memory usage
666670
del indata
667-
indata = n.load(f, mmap_mode=mmap_mode, allow_pickle=False)
671+
indata = n.load(f, mmap_mode=mmap_mode, allow_pickle=False, **_NUMPY_LOAD_KWARGS)
668672

669673
if cstrs:
670674
dset.to_cstrs()

0 commit comments

Comments
 (0)