Skip to content

Commit 5ba2065

Browse files
committed
compress_encode: Generate Zip store and chunk store
1 parent a79e036 commit 5ba2065

File tree

1 file changed

+60
-2
lines changed

1 file changed

+60
-2
lines changed

server/scripts/compress_encode.py

Lines changed: 60 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,61 @@
66
import zarr
77
from pathlib import Path
88
import numpy as np
9+
import zipfile
10+
import json
11+
12+
def zip_zchunkstore(zip_file, url=None):
13+
"""Returns a reference description for ReferenceFileSystem from an
14+
uncompressed Zarr zip file store.
15+
16+
https://github.com/intake/fsspec-reference-maker
17+
18+
Parameters
19+
----------
20+
21+
zip_file: str
22+
Path to the zip file.
23+
url: str, optional
24+
URL where the zip file will be served. Defaults to zip_file.
25+
26+
Returns
27+
-------
28+
29+
JSON-serializable reference description.
30+
"""
31+
rfs = {}
32+
with zipfile.ZipFile(zip_file) as zf:
33+
if zf.compression != 0:
34+
raise RuntimeError("Compressed zip's are not supported.")
35+
36+
zarr_json_files = ('.zattrs', '.zgroup', '.zmetadata', '.zarray')
37+
38+
data_url = zip_file
39+
if url is not None:
40+
data_url = url
41+
42+
zchunkstore = {}
43+
for info in zf.infolist():
44+
name_bytes = len(info.filename.encode("utf-8"))
45+
offset = info.header_offset + 30 + name_bytes
46+
size = info.compress_size
47+
if any([info.filename.endswith(z) for z in zarr_json_files]):
48+
content = zipfile.Path(zf, at=info.filename).read_text(encoding='utf-8')
49+
zchunkstore[info.filename] = content
50+
else:
51+
zchunkstore[info.filename] = [data_url, offset, size]
52+
53+
return zchunkstore
54+
955

1056
def compress_encode(input_filepath,
1157
output_directory,
1258
multiscale=True,
1359
chunk_size=64,
1460
cname='zstd',
1561
clevel=5,
16-
shuffle=True):
62+
shuffle=True,
63+
zip_chunk_store=True):
1764
image = itk.imread(input_filepath)
1865
image_da = itk.xarray_from_image(image)
1966
dataset_name = str(Path(input_filepath))
@@ -68,6 +115,15 @@ def compress_encode(input_filepath,
68115
# Also consolidate the metadata on the pyramid scales so they can be used independently
69116
zarr.consolidate_metadata(store)
70117

118+
if zip_chunk_store:
119+
store = zarr.DirectoryStore(store_name)
120+
zip_store_path = str(Path(output_directory)) + '.zip'
121+
with zarr.storage.ZipStore(zip_store_path, mode='w', compression=0) as zip_store:
122+
zarr.copy_store(store, zip_store)
123+
zchunkstore = zip_zchunkstore(zip_store_path)
124+
with open(zip_store_path + '.zchunkstore', 'w') as fp:
125+
json.dump(zchunkstore, fp)
126+
71127

72128
if __name__ == '__main__':
73129
parser = argparse.ArgumentParser('Convert and encode a medical image file in a compressed Zarr directory store.')
@@ -79,6 +135,7 @@ def compress_encode(input_filepath,
79135
parser.add_argument('--cname', default='zstd', help='Base compression codec.')
80136
parser.add_argument('--clevel', default=5, type=int, help='Compression level.')
81137
parser.add_argument('--no-multi-scale', action='store_true', help='Do not generate a multi-scale pyramid.')
138+
parser.add_argument('--no-zip-chunk-store', action='store_true', help='Do not generate a zip file and corresponding chunk store.')
82139

83140
args = parser.parse_args()
84141

@@ -88,4 +145,5 @@ def compress_encode(input_filepath,
88145
chunk_size=args.chunk_size,
89146
cname=args.cname,
90147
clevel=args.clevel,
91-
shuffle=not args.no_shuffle)
148+
shuffle=not args.no_shuffle,
149+
zip_chunk_store=not args.no_zip_chunk_store)

0 commit comments

Comments
 (0)