Skip to content

Commit 0d2dbb0

Browse files
authored
Merge pull request #1 from swirrl-api/patch-1
Update subsetnc.py
2 parents ec317bb + afabb69 commit 0d2dbb0

File tree

1 file changed

+105
-114
lines changed

1 file changed

+105
-114
lines changed

subsetnc.py

Lines changed: 105 additions & 114 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
day_end = period_end_time[6:8]
2828
endtime = year_end+"-"+month_end+"-"+day_end
2929
end = datetime.datetime(int(year_end), int(month_end), int(day_end))
30-
30+
3131
else :
3232
period_start_time = -1
3333
period_end_time = -1
@@ -36,127 +36,118 @@
3636
allfiles=f.readlines()
3737

3838
if not( os.path.isdir('results')):
39-
os.mkdir('results')
39+
os.mkdir('results')
4040

4141
files = []
4242
for f in allfiles:
4343

44-
f = f.rstrip()
45-
print(f)
46-
fb = f.rsplit('/', 1)[-1]
44+
f = f.strip()
45+
print(f)
46+
fb = f.rsplit('/', 1)[-1]
4747

48-
process = True
48+
process = True
4949

50-
if period_start_time == -1 :
51-
dset = xr.open_dataset(f, mask_and_scale=False, decode_coords=True)
52-
fbs = fb.strip('.nc')
53-
outf = fbs + "_subset.nc"
54-
else :
55-
try:
56-
dset = xr.open_dataset(f, chunks={'time': '100MB'}, mask_and_scale=False, decode_coords=True, decode_times=True, use_cftime=True)
57-
except:
58-
dset = xr.open_dataset(f, mask_and_scale=False, decode_coords=True, decode_times=True, use_cftime=True)
59-
year_startf = dset.time.dt.year[0]
60-
month_startf = dset.time.dt.month[0]
61-
day_startf = dset.time.dt.day[0]
62-
startf = datetime.datetime(year_startf, month_startf, day_startf)
63-
year_endf = dset.time.dt.year[-1:]
64-
month_endf = dset.time.dt.month[-1:]
65-
day_endf = dset.time.dt.day[-1:]
66-
endf = datetime.datetime(year_endf, month_endf, day_endf)
67-
if not ((start >= startf and start <= endf) or (end >= startf and end <= endf)) :
68-
process = False
69-
else :
70-
if start > startf :
71-
file_start = start
72-
fstart_year = year_start
73-
fstart_month = month_start
74-
fstart_day = day_start
75-
else :
76-
file_start = startf
77-
fstart_year = year_startf
78-
fstart_month = month_startf
79-
fstart_day = day_startf
80-
if end < endf :
81-
file_end = end
82-
fend_year = year_end
83-
fend_month = month_end
84-
fend_day = day_end
85-
else :
86-
file_end = endf
87-
fend_year = year_endf
88-
fend_month = month_endf
89-
fend_day = day_endf
90-
fbs = fb.strip('.nc')
91-
outf = fbs + "_subset_" + fstart_year + fstart_month + fstart_day + "-" + fend_year + fend_month + fend_day + ".nc"
92-
try:
93-
del dset.attrs['_NCProperties']
94-
except:
95-
pass
50+
if period_start_time == -1 :
51+
dset = xr.open_dataset(f, mask_and_scale=False, decode_coords=True)
52+
fbs = fb.strip('.nc')
53+
outf = fbs + "_subset.nc"
54+
else :
55+
try:
56+
dset = xr.open_dataset(f, chunks={'time': '100MB'}, mask_and_scale=False, decode_coords=True, decode_times=True, use_cftime=True)
57+
except:
58+
dset = xr.open_dataset(f, mask_and_scale=False, decode_coords=True, decode_times=True, use_cftime=True)
59+
year_startf = dset.time.dt.year[0].data
60+
month_startf = dset.time.dt.month[0].data
61+
day_startf = dset.time.dt.day[0].data
62+
startf = datetime.datetime(year_startf, month_startf, day_startf)
63+
year_endf = dset.time.dt.year[-1:].data[0]
64+
month_endf = dset.time.dt.month[-1:].data[0]
65+
day_endf = dset.time.dt.day[-1:].data[0]
66+
endf = datetime.datetime(year_endf, month_endf, day_endf)
67+
if not ((startf >= start and startf <= end) or (endf >= start and endf <= end)) :
68+
process = False
69+
else :
70+
if start > startf :
71+
file_start = start
72+
fstart_year = year_start
73+
fstart_month = month_start
74+
fstart_day = day_start
75+
else :
76+
file_start = startf
77+
fstart_year = str(year_startf)
78+
fstart_month = str(month_startf)
79+
fstart_day = str(day_startf)
80+
if end < endf :
81+
file_end = end
82+
fend_year = year_end
83+
fend_month = month_end
84+
fend_day = day_end
85+
else :
86+
file_end = endf
87+
fend_year = str(year_endf)
88+
fend_month = str(month_endf)
89+
fend_day = str(day_endf)
90+
fbs = fb.strip('.nc')
91+
outf = fbs + "_subset_" + fstart_year + fstart_month + fstart_day + "-" + fend_year + fend_month + fend_day + ".nc"
92+
try:
93+
del dset.attrs['_NCProperties']
94+
except:
95+
pass
9696

97-
if process == True :
98-
if minlon > maxlon or minlon < 0:
99-
if period_start_time == -1 :
100-
dset = dset.sel(lat=slice(minlat,maxlat))
101-
else :
102-
dset = dset.sel(time=slice(starttime,endtime), lat=slice(minlat,maxlat))
103-
else:
104-
if period_start_time == -1 :
105-
dset = dset.sel(lon=slice(minlon,maxlon), lat=slice(minlat,maxlat))
106-
else :
107-
dset = dset.sel(time=slice(starttime,endtime), lon=slice(minlon,maxlon), lat=slice(minlat,maxlat))
97+
if process == True :
98+
if minlon > maxlon or minlon < 0:
99+
if period_start_time == -1 :
100+
dset = dset.sel(lat=slice(minlat,maxlat))
101+
else :
102+
dset = dset.sel(time=slice(starttime,endtime), lat=slice(minlat,maxlat))
103+
else:
104+
if period_start_time == -1 :
105+
dset = dset.sel(lon=slice(minlon,maxlon), lat=slice(minlat,maxlat))
106+
else :
107+
dset = dset.sel(time=slice(starttime,endtime), lon=slice(minlon,maxlon), lat=slice(minlat,maxlat))
108108

109-
print("Saving to: "+"results/"+outf)
110-
dims = dset.dims
111-
dimsf = {k: v for k, v in dims.items() if k.startswith('lat') or k.startswith('lon') or k.startswith('time')}
112-
enc = dict(dimsf)
113-
enc = dict.fromkeys(enc, {'_FillValue': None})
109+
print("Saving to: "+"results/"+outf)
110+
dims = dset.dims
111+
dimsf = {k: v for k, v in dims.items() if k.startswith('lat') or k.startswith('lon') or k.startswith('time')}
112+
enc = dict(dimsf)
113+
enc = dict.fromkeys(enc, {'_FillValue': None})
114114

115-
if period_start_time == -1 :
116-
dset.to_netcdf(path="results/"+outf, mode='w', format='NETCDF4', engine='netcdf4', encoding=enc)
117-
else:
118-
files.append("results/"+outf)
119-
dset.to_netcdf(path="results/"+outf, mode='w', format='NETCDF4', unlimited_dims='time', engine='netcdf4', encoding=enc)
120-
tunits = dset.time.encoding['units']
121-
else :
122-
print("Not processing file because time range is outside time period requested.")
123-
124-
dset.close()
125-
del dset
115+
if period_start_time == -1 :
116+
dset.to_netcdf(path="results/"+outf, mode='w', format='NETCDF4', engine='netcdf4', encoding=enc)
117+
else:
118+
files.append("results/"+outf)
119+
dset.to_netcdf(path="results/"+outf, mode='w', format='NETCDF4', unlimited_dims='time', engine='netcdf4', encoding=enc)
120+
tunits = dset.time.encoding['units']
121+
else :
122+
print("Not processing file because time range is outside time period requested.")
126123

127-
# Reorder longitudes if needed, and subset longitudes in that specific case differently (need to do it on local file for reasonable performance)
128-
if process == True :
129-
if minlon > maxlon or minlon < 0:
130-
print("Subsetting for non-contiguous longitude")
131-
if period_start_time == -1 :
132-
dsetl = xr.open_dataset("results/"+outf, mask_and_scale=False, decode_coords=True)
133-
else :
134-
try:
135-
dsetl = xr.open_dataset("results/"+outf, chunks={'time': '100MB'}, mask_and_scale=False, decode_coords=True, decode_times=True, use_cftime=True);
136-
except:
137-
dsetl = xr.open_dataset("results/"+outf, mask_and_scale=False, decode_coords=True, decode_times=True, use_cftime=True);
138-
saveattrs = dsetl.lon.attrs
139-
dsetl = dsetl.assign_coords(lon=(((dsetl.lon + 180) % 360) - 180)).roll(lon=(dsetl.dims['lon'] // 2), roll_coords=True)
140-
if minlon >= 180:
141-
minlon = minlon - 360
142-
if maxlon >= 180:
143-
maxlon = maxlon - 360
144-
dsetl = dsetl.sel(lon=slice(minlon,maxlon))
145-
dsetl.lon.attrs = saveattrs
146-
if period_start_time == -1 :
147-
dsetl.to_netcdf(path="results/tmp"+outf, mode='w', format='NETCDF4', engine='netcdf4', encoding=enc)
148-
else :
149-
dsetl.time.encoding['units'] = tunits
150-
dsetl.to_netcdf(path="results/tmp"+outf, mode='w', format='NETCDF4', unlimited_dims='time', engine='netcdf4', encoding=enc)
151-
dsetl.close()
152-
del dsetl
153-
os.rename("results/tmp"+outf, "results/"+outf)
124+
dset.close()
125+
del dset
154126

155-
# Combine all files into one
156-
#try:
157-
# dsmerged = xr.open_mfdataset(files, chunks={'time': '100MB'}, mask_and_scale=False, decode_coords=True, combine='by_coords')
158-
#except:
159-
# dsmerged = xr.open_mfdataset(files, mask_and_scale=False, decode_coords=True, combine='by_coords')
160-
#print("Merging files into: "+"results/"+outfilenc)
161-
#print(files)
162-
#dsmerged.to_netcdf(path="results/"+outfilenc, mode='w', format='NETCDF4', unlimited_dims='time')
127+
# Reorder longitudes if needed, and subset longitudes in that specific case differently (need to do it on local file for reasonable performance)
128+
if process == True :
129+
if minlon > maxlon or minlon < 0:
130+
print("Subsetting for non-contiguous longitude")
131+
if period_start_time == -1 :
132+
dsetl = xr.open_dataset("results/"+outf, mask_and_scale=False, decode_coords=True)
133+
else :
134+
try:
135+
dsetl = xr.open_dataset("results/"+outf, chunks={'time': '100MB'}, mask_and_scale=False, decode_coords=True, decode_times=True, use_cftime=True)
136+
except:
137+
dsetl = xr.open_dataset("results/"+outf, mask_and_scale=False, decode_coords=True, decode_times=True, use_cftime=True)
138+
saveattrs = dsetl.lon.attrs
139+
dsetl = dsetl.assign_coords(lon=(((dsetl.lon + 180) % 360) - 180)).roll(lon=(dsetl.dims['lon'] // 2), roll_coords=True)
140+
if minlon >= 180:
141+
minlon = minlon - 360
142+
if maxlon >= 180:
143+
maxlon = maxlon - 360
144+
dsetl = dsetl.sel(lon=slice(minlon,maxlon))
145+
dsetl.lon.attrs = saveattrs
146+
if period_start_time == -1 :
147+
dsetl.to_netcdf(path="results/tmp"+outf, mode='w', format='NETCDF4', engine='netcdf4', encoding=enc)
148+
else :
149+
dsetl.time.encoding['units'] = tunits
150+
dsetl.to_netcdf(path="results/tmp"+outf, mode='w', format='NETCDF4', unlimited_dims='time', engine='netcdf4', encoding=enc)
151+
dsetl.close()
152+
del dsetl
153+
os.rename("results/tmp"+outf, "results/"+outf)

0 commit comments

Comments
 (0)