Skip to content

Commit 33ce95e

Browse files
dcherianclaude
andauthored
Optimize padding for coarsening. (#10921)
Co-authored-by: Claude <[email protected]>
1 parent 7948820 commit 33ce95e

File tree

3 files changed

+55
-5
lines changed

3 files changed

+55
-5
lines changed

asv_bench/benchmarks/coarsen.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
import numpy as np
2+
3+
import xarray as xr
4+
5+
from . import randn
6+
7+
# Sizes chosen to test padding optimization
8+
nx_padded = 4003 # Not divisible by 10 - requires padding
9+
ny_padded = 4007 # Not divisible by 10 - requires padding
10+
11+
nx_exact = 4000 # Divisible by 10 - no padding needed
12+
ny_exact = 4000 # Divisible by 10 - no padding needed
13+
14+
window = 10
15+
16+
17+
class Coarsen:
18+
def setup(self, *args, **kwargs):
19+
# Case 1: Requires padding on both dimensions
20+
self.da_padded = xr.DataArray(
21+
randn((nx_padded, ny_padded)),
22+
dims=("x", "y"),
23+
coords={"x": np.arange(nx_padded), "y": np.arange(ny_padded)},
24+
)
25+
26+
# Case 2: No padding required
27+
self.da_exact = xr.DataArray(
28+
randn((nx_exact, ny_exact)),
29+
dims=("x", "y"),
30+
coords={"x": np.arange(nx_exact), "y": np.arange(ny_exact)},
31+
)
32+
33+
def time_coarsen_with_padding(self):
34+
"""Coarsen 2D array where both dimensions require padding."""
35+
self.da_padded.coarsen(x=window, y=window, boundary="pad").mean()
36+
37+
def time_coarsen_no_padding(self):
38+
"""Coarsen 2D array where dimensions are exact multiples (no padding)."""
39+
self.da_exact.coarsen(x=window, y=window, boundary="pad").mean()
40+
41+
def peakmem_coarsen_with_padding(self):
42+
"""Peak memory for coarsening with padding on both dimensions."""
43+
self.da_padded.coarsen(x=window, y=window, boundary="pad").mean()
44+
45+
def peakmem_coarsen_no_padding(self):
46+
"""Peak memory for coarsening without padding."""
47+
self.da_exact.coarsen(x=window, y=window, boundary="pad").mean()

doc/whats-new.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@ Performance
5151
- Speedup and reduce memory usage of :py:func:`concat`. Magnitude of improvement scales
5252
with size of the concatenation dimension. By `Deepak Cherian <https://github.com/dcherian>`_.
5353
:issue:`10864` :pull:`10866`.
54+
- Speedup and reduce memory usage when coarsening along multiple dimensions.
55+
By `Deepak Cherian <https://github.com/dcherian>`_. :pull:`10921`.
5456

5557
Documentation
5658
~~~~~~~~~~~~~

xarray/core/variable.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2280,6 +2280,7 @@ def coarsen_reshape(self, windows, boundary, side):
22802280
)
22812281

22822282
variable = self
2283+
pad_widths = {}
22832284
for d, window in windows.items():
22842285
# trim or pad the object
22852286
size = variable.shape[self._get_axis_num(d)]
@@ -2300,16 +2301,16 @@ def coarsen_reshape(self, windows, boundary, side):
23002301
pad = window * n - size
23012302
if pad < 0:
23022303
pad += window
2303-
if side[d] == "left":
2304-
pad_width = {d: (0, pad)}
2305-
else:
2306-
pad_width = {d: (pad, 0)}
2307-
variable = variable.pad(pad_width, mode="constant")
2304+
elif pad == 0:
2305+
continue
2306+
pad_widths[d] = (0, pad) if side[d] == "left" else (pad, 0)
23082307
else:
23092308
raise TypeError(
23102309
f"{boundary[d]} is invalid for boundary. Valid option is 'exact', "
23112310
"'trim' and 'pad'"
23122311
)
2312+
if pad_widths:
2313+
variable = variable.pad(pad_widths, mode="constant")
23132314

23142315
shape = []
23152316
axes = []

0 commit comments

Comments
 (0)