Skip to content

Commit d60b25d

Browse files
refactor: Use pathlib over os (#196)
* Use pathlib.Path over os.path in all instances of the codebase.
1 parent cb5dcf2 commit d60b25d

File tree

4 files changed

+33
-44
lines changed

4 files changed

+33
-44
lines changed

setup.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from __future__ import annotations
33

44
import os
5+
from pathlib import Path
56

67
import setuptools.command.sdist
78
from setuptools import setup
@@ -12,7 +13,7 @@
1213
data_files = {n for n in os.listdir(datafile) if any(n.endswith(ex) for ex in data_ex)}
1314

1415
if data_files:
15-
with open(os.path.join(datafile, "file_list.txt"), "w") as f:
16+
with open(Path(datafile) / "file_list.txt", "w") as f:
1617
for d in sorted(data_files):
1718
print(d.split("/")[-1], file=f)
1819

src/skhep_testdata/remote_files.py

Lines changed: 21 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,15 @@
11
from __future__ import annotations
22

3-
import errno
43
import logging
5-
import os
64
import tarfile
75
from importlib import resources
6+
from pathlib import Path
87
from typing import ClassVar
98
from urllib.request import urlretrieve
109

1110
import yaml
1211

13-
_default_data_dir = os.path.realpath(os.path.dirname(__file__))
12+
_default_data_dir = Path(__file__).resolve().parent
1413

1514

1615
class RemoteDatasetList:
@@ -33,9 +32,7 @@ def load_remote_configs(cls, file_to_load: str | None = None) -> None:
3332
return
3433

3534
if file_to_load is None:
36-
dataset_yml = resources.files("skhep_testdata").joinpath(
37-
"remote_datasets.yml"
38-
)
35+
dataset_yml = resources.files("skhep_testdata") / "remote_datasets.yml"
3936
with dataset_yml.open() as infile:
4037
datasets = yaml.load(infile, Loader=yaml.SafeLoader)
4138
else:
@@ -49,7 +46,7 @@ def load_remote_configs(cls, file_to_load: str | None = None) -> None:
4946
config["files"] = files
5047
config["dataset_name"] = dataset
5148
for filename in files:
52-
scoped_name = os.path.join(dataset, filename)
49+
scoped_name = str(Path(dataset) / filename)
5350
cls._all_files[scoped_name] = config
5451

5552
@classmethod
@@ -59,40 +56,34 @@ def is_known(cls, filename: str) -> bool:
5956

6057

6158
def make_all_dirs(path: str) -> None:
62-
try:
63-
os.makedirs(path)
64-
except OSError as exc:
65-
if exc.errno == errno.EEXIST and os.path.isdir(path):
66-
pass
67-
else:
68-
raise
59+
Path(path).mkdir(parents=True, exist_ok=True)
6960

7061

7162
def fetch_remote_dataset(
7263
dataset_name: str, files: dict[str, str], url: str, data_dir: str
7364
) -> None:
74-
dataset_dir = os.path.join(data_dir, dataset_name)
65+
dataset_dir = Path(data_dir) / dataset_name
7566

76-
writefile = os.path.join(dataset_dir, os.path.basename(url))
77-
if os.path.exists(writefile):
67+
writefile = dataset_dir / Path(url).name
68+
if writefile.exists():
7869
return
7970

80-
make_all_dirs(dataset_dir)
71+
make_all_dirs(str(dataset_dir))
8172
logging.warning("Downloading %s", url)
82-
urlretrieve(url, writefile)
73+
urlretrieve(url, str(writefile))
8374

8475
if tarfile.is_tarfile(writefile):
8576
logging.warning("Extracting %s", writefile)
8677
with tarfile.open(writefile) as tar:
8778
members = [tar.getmember(f) for f in files.values()]
88-
tar.extractall(dataset_dir, members)
79+
tar.extractall(str(dataset_dir), members)
8980

9081
for outfile, infile in files.items():
91-
full_in = os.path.join(dataset_dir, infile)
92-
full_out = os.path.join(dataset_dir, outfile)
93-
os.rename(full_in, full_out)
82+
full_in = dataset_dir / infile
83+
full_out = dataset_dir / outfile
84+
full_in.rename(full_out)
9485

95-
if not os.path.exists(writefile):
86+
if not writefile.exists():
9687
msg = "Problem obtaining remote dataset : %s"
9788
raise RuntimeError(msg % dataset_name)
9889

@@ -102,20 +93,20 @@ def is_known_remote(filename: str) -> bool:
10293

10394

10495
def remote_file(
105-
filename: str, data_dir: str = _default_data_dir, raise_missing: bool = False
96+
filename: str, data_dir: str | Path = _default_data_dir, raise_missing: bool = False
10697
) -> str:
10798
config = RemoteDatasetList.get_config_for_file(filename)
10899
if not config and raise_missing:
109100
msg = f"Unknown {filename} cannot be found"
110101
raise RuntimeError(msg)
111102

112-
path = os.path.join(data_dir, filename)
113-
if not os.path.isfile(path):
114-
config["data_dir"] = data_dir
103+
path = Path(data_dir) / filename
104+
if not path.is_file():
105+
config["data_dir"] = str(data_dir)
115106
fetch_remote_dataset(**config) # type: ignore[arg-type]
116107

117-
if not os.path.isfile(path) and raise_missing:
108+
if not path.is_file() and raise_missing:
118109
msg = f"{filename} cannot be found"
119110
raise RuntimeError(msg)
120111

121-
return path
112+
return str(path)

tests/test_local_files.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,22 @@
11
from __future__ import annotations
22

3-
import os
3+
from pathlib import Path
44

55
import pytest
66
import requests
77

88
import skhep_testdata as skhtd
99

10-
data_dir = os.path.dirname(skhtd.__file__)
11-
data_dir = os.path.join(data_dir, "data")
10+
data_dir = Path(skhtd.__file__).parent / "data"
1211

1312

1413
def test_data_path():
15-
assert os.path.exists(skhtd.data_path("uproot-Zmumu.root"))
14+
assert Path(skhtd.data_path("uproot-Zmumu.root")).exists()
1615

1716

1817
def test_data_path_missing():
1918
path = skhtd.data_path("doesnt-exist.root", raise_missing=False)
20-
assert path == os.path.join(data_dir, "doesnt-exist.root")
19+
assert path == str(data_dir / "doesnt-exist.root")
2120

2221
with pytest.raises(IOError):
2322
skhtd.data_path("doesnt-exist.root")
@@ -36,10 +35,10 @@ def test_delegate_to_remote(monkeypatch, tmpdir):
3635
def dummy_remote_file(filename, data_dir=None, raise_missing=False):
3736
if not data_dir:
3837
data_dir = str(tmpdir)
39-
return os.path.join(data_dir, filename)
38+
return str(Path(data_dir) / filename)
4039

4140
monkeypatch.setattr(skhtd.remote_files, "remote_file", dummy_remote_file)
4241
monkeypatch.setattr(skhtd.remote_files, "is_known_remote", lambda _: True)
4342

44-
path = skhtd.data_path(os.path.join("dataset", "a_remote_file.root"))
43+
path = skhtd.data_path(str(Path("dataset") / "a_remote_file.root"))
4544
assert path == str(tmpdir / "dataset" / "a_remote_file.root")

tests/test_remote_files.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,14 @@
11
from __future__ import annotations
22

3-
import os
3+
from pathlib import Path
44

55
import skhep_testdata as skhtd
66

7-
_remote_dataset_cfg = os.path.join(
8-
os.path.dirname(__file__), "test_remote_datasets.yml"
9-
)
7+
_remote_dataset_cfg = str(Path(__file__).parent / "test_remote_datasets.yml")
108
skhtd.remote_files.RemoteDatasetList.load_remote_configs(_remote_dataset_cfg)
119

12-
good_file_1 = os.path.join("dataset_1", "file_1.root")
13-
bad_file_1 = os.path.join("bad_dataset_1", "file_1.root")
10+
good_file_1 = str(Path("dataset_1") / "file_1.root")
11+
bad_file_1 = str(Path("bad_dataset_1") / "file_1.root")
1412

1513

1614
def test_is_known_remote():

0 commit comments

Comments
 (0)