11from __future__ import annotations
22
3- import errno
43import logging
5- import os
64import tarfile
75from importlib import resources
6+ from pathlib import Path
87from typing import ClassVar
98from urllib .request import urlretrieve
109
1110import yaml
1211
13- _default_data_dir = os . path . realpath ( os . path . dirname ( __file__ ))
12+ _default_data_dir = Path ( __file__ ). resolve (). parent
1413
1514
1615class RemoteDatasetList :
@@ -33,9 +32,7 @@ def load_remote_configs(cls, file_to_load: str | None = None) -> None:
3332 return
3433
3534 if file_to_load is None :
36- dataset_yml = resources .files ("skhep_testdata" ).joinpath (
37- "remote_datasets.yml"
38- )
35+ dataset_yml = resources .files ("skhep_testdata" ) / "remote_datasets.yml"
3936 with dataset_yml .open () as infile :
4037 datasets = yaml .load (infile , Loader = yaml .SafeLoader )
4138 else :
@@ -49,7 +46,7 @@ def load_remote_configs(cls, file_to_load: str | None = None) -> None:
4946 config ["files" ] = files
5047 config ["dataset_name" ] = dataset
5148 for filename in files :
52- scoped_name = os . path . join ( dataset , filename )
49+ scoped_name = str ( Path ( dataset ) / filename )
5350 cls ._all_files [scoped_name ] = config
5451
5552 @classmethod
@@ -59,40 +56,34 @@ def is_known(cls, filename: str) -> bool:
5956
6057
6158def make_all_dirs (path : str ) -> None :
62- try :
63- os .makedirs (path )
64- except OSError as exc :
65- if exc .errno == errno .EEXIST and os .path .isdir (path ):
66- pass
67- else :
68- raise
59+ Path (path ).mkdir (parents = True , exist_ok = True )
6960
7061
7162def fetch_remote_dataset (
7263 dataset_name : str , files : dict [str , str ], url : str , data_dir : str
7364) -> None :
74- dataset_dir = os . path . join (data_dir , dataset_name )
65+ dataset_dir = Path (data_dir ) / dataset_name
7566
76- writefile = os . path . join ( dataset_dir , os . path . basename (url ))
77- if os . path . exists (writefile ):
67+ writefile = dataset_dir / Path (url ). name
68+ if writefile . exists ():
7869 return
7970
80- make_all_dirs (dataset_dir )
71+ make_all_dirs (str ( dataset_dir ) )
8172 logging .warning ("Downloading %s" , url )
82- urlretrieve (url , writefile )
73+ urlretrieve (url , str ( writefile ) )
8374
8475 if tarfile .is_tarfile (writefile ):
8576 logging .warning ("Extracting %s" , writefile )
8677 with tarfile .open (writefile ) as tar :
8778 members = [tar .getmember (f ) for f in files .values ()]
88- tar .extractall (dataset_dir , members )
79+ tar .extractall (str ( dataset_dir ) , members )
8980
9081 for outfile , infile in files .items ():
91- full_in = os . path . join ( dataset_dir , infile )
92- full_out = os . path . join ( dataset_dir , outfile )
93- os .rename (full_in , full_out )
82+ full_in = dataset_dir / infile
83+ full_out = dataset_dir / outfile
84+ full_in .rename (full_out )
9485
95- if not os . path . exists (writefile ):
86+ if not writefile . exists ():
9687 msg = "Problem obtaining remote dataset : %s"
9788 raise RuntimeError (msg % dataset_name )
9889
@@ -102,20 +93,20 @@ def is_known_remote(filename: str) -> bool:
10293
10394
10495def remote_file (
105- filename : str , data_dir : str = _default_data_dir , raise_missing : bool = False
96+ filename : str , data_dir : str | Path = _default_data_dir , raise_missing : bool = False
10697) -> str :
10798 config = RemoteDatasetList .get_config_for_file (filename )
10899 if not config and raise_missing :
109100 msg = f"Unknown { filename } cannot be found"
110101 raise RuntimeError (msg )
111102
112- path = os . path . join (data_dir , filename )
113- if not os . path .isfile ( path ):
114- config ["data_dir" ] = data_dir
103+ path = Path (data_dir ) / filename
104+ if not path .is_file ( ):
105+ config ["data_dir" ] = str ( data_dir )
115106 fetch_remote_dataset (** config ) # type: ignore[arg-type]
116107
117- if not os . path .isfile ( path ) and raise_missing :
108+ if not path .is_file ( ) and raise_missing :
118109 msg = f"{ filename } cannot be found"
119110 raise RuntimeError (msg )
120111
121- return path
112+ return str ( path )
0 commit comments