diff --git a/xarray/core/_aggregations.py b/xarray/core/_aggregations.py index adc064840de..9d41cf32e3c 100644 --- a/xarray/core/_aggregations.py +++ b/xarray/core/_aggregations.py @@ -104,13 +104,14 @@ def count( Data variables: foo int64 8B 5 """ - return self.reduce( + out = self.reduce( duck_array_ops.count, dim=dim, numeric_only=False, keep_attrs=keep_attrs, **kwargs, ) + return out def all( self, @@ -187,13 +188,14 @@ def all( Data variables: foo bool 1B False """ - return self.reduce( + out = self.reduce( duck_array_ops.array_all, dim=dim, numeric_only=False, keep_attrs=keep_attrs, **kwargs, ) + return out def any( self, @@ -270,13 +272,14 @@ def any( Data variables: foo bool 1B True """ - return self.reduce( + out = self.reduce( duck_array_ops.array_any, dim=dim, numeric_only=False, keep_attrs=keep_attrs, **kwargs, ) + return out def max( self, @@ -363,7 +366,7 @@ def max( Data variables: foo float64 8B nan """ - return self.reduce( + out = self.reduce( duck_array_ops.max, dim=dim, skipna=skipna, @@ -371,6 +374,7 @@ def max( keep_attrs=keep_attrs, **kwargs, ) + return out def min( self, @@ -457,7 +461,7 @@ def min( Data variables: foo float64 8B nan """ - return self.reduce( + out = self.reduce( duck_array_ops.min, dim=dim, skipna=skipna, @@ -465,6 +469,7 @@ def min( keep_attrs=keep_attrs, **kwargs, ) + return out def mean( self, @@ -555,7 +560,7 @@ def mean( Data variables: foo float64 8B nan """ - return self.reduce( + out = self.reduce( duck_array_ops.mean, dim=dim, skipna=skipna, @@ -563,6 +568,7 @@ def mean( keep_attrs=keep_attrs, **kwargs, ) + return out def prod( self, @@ -669,7 +675,7 @@ def prod( Data variables: foo float64 8B 0.0 """ - return self.reduce( + out = self.reduce( duck_array_ops.prod, dim=dim, skipna=skipna, @@ -678,6 +684,7 @@ def prod( keep_attrs=keep_attrs, **kwargs, ) + return out def sum( self, @@ -784,7 +791,7 @@ def sum( Data variables: foo float64 8B 8.0 """ - return self.reduce( + out = self.reduce( duck_array_ops.sum, dim=dim, skipna=skipna, @@ -793,6 +800,7 @@ def sum( keep_attrs=keep_attrs, **kwargs, ) + return out def std( self, @@ -896,7 +904,7 @@ def std( Data variables: foo float64 8B 1.14 """ - return self.reduce( + out = self.reduce( duck_array_ops.std, dim=dim, skipna=skipna, @@ -905,6 +913,7 @@ def std( keep_attrs=keep_attrs, **kwargs, ) + return out def var( self, @@ -1008,7 +1017,7 @@ def var( Data variables: foo float64 8B 1.3 """ - return self.reduce( + out = self.reduce( duck_array_ops.var, dim=dim, skipna=skipna, @@ -1017,6 +1026,7 @@ def var( keep_attrs=keep_attrs, **kwargs, ) + return out def median( self, @@ -1107,7 +1117,7 @@ def median( Data variables: foo float64 8B nan """ - return self.reduce( + out = self.reduce( duck_array_ops.median, dim=dim, skipna=skipna, @@ -1115,6 +1125,7 @@ def median( keep_attrs=keep_attrs, **kwargs, ) + return out def cumsum( self, @@ -1198,7 +1209,9 @@ def cumsum( Group: / Dimensions: (time: 6) - Dimensions without coordinates: time + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) Group: / Dimensions: (time: 6) - Dimensions without coordinates: time + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) Group: / Dimensions: (time: 6) - Dimensions without coordinates: time + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) Group: / Dimensions: (time: 6) - Dimensions without coordinates: time + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.cumsum() - Size: 48B + Size: 120B Dimensions: (time: 6) - Dimensions without coordinates: time + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.cumsum(skipna=False) - Size: 48B + Size: 120B Dimensions: (time: 6) - Dimensions without coordinates: time + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.cumprod() - Size: 48B + Size: 120B Dimensions: (time: 6) - Dimensions without coordinates: time + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.cumprod(skipna=False) - Size: 48B + Size: 120B Dimensions: (time: 6) - Dimensions without coordinates: time + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) Size: 8B array(5) """ - return self.reduce( + out = self.reduce( duck_array_ops.count, dim=dim, keep_attrs=keep_attrs, **kwargs, ) + return out def all( self, @@ -2669,12 +2712,13 @@ def all( Size: 1B array(False) """ - return self.reduce( + out = self.reduce( duck_array_ops.array_all, dim=dim, keep_attrs=keep_attrs, **kwargs, ) + return out def any( self, @@ -2735,12 +2779,13 @@ def any( Size: 1B array(True) """ - return self.reduce( + out = self.reduce( duck_array_ops.array_any, dim=dim, keep_attrs=keep_attrs, **kwargs, ) + return out def max( self, @@ -2813,13 +2858,14 @@ def max( Size: 8B array(nan) """ - return self.reduce( + out = self.reduce( duck_array_ops.max, dim=dim, skipna=skipna, keep_attrs=keep_attrs, **kwargs, ) + return out def min( self, @@ -2892,13 +2938,14 @@ def min( Size: 8B array(nan) """ - return self.reduce( + out = self.reduce( duck_array_ops.min, dim=dim, skipna=skipna, keep_attrs=keep_attrs, **kwargs, ) + return out def mean( self, @@ -2971,13 +3018,14 @@ def mean( Size: 8B array(nan) """ - return self.reduce( + out = self.reduce( duck_array_ops.mean, dim=dim, skipna=skipna, keep_attrs=keep_attrs, **kwargs, ) + return out def prod( self, @@ -3067,7 +3115,7 @@ def prod( Size: 8B array(0.) """ - return self.reduce( + out = self.reduce( duck_array_ops.prod, dim=dim, skipna=skipna, @@ -3075,6 +3123,7 @@ def prod( keep_attrs=keep_attrs, **kwargs, ) + return out def sum( self, @@ -3164,7 +3213,7 @@ def sum( Size: 8B array(8.) """ - return self.reduce( + out = self.reduce( duck_array_ops.sum, dim=dim, skipna=skipna, @@ -3172,6 +3221,7 @@ def sum( keep_attrs=keep_attrs, **kwargs, ) + return out def std( self, @@ -3258,7 +3308,7 @@ def std( Size: 8B array(1.14017543) """ - return self.reduce( + out = self.reduce( duck_array_ops.std, dim=dim, skipna=skipna, @@ -3266,6 +3316,7 @@ def std( keep_attrs=keep_attrs, **kwargs, ) + return out def var( self, @@ -3352,7 +3403,7 @@ def var( Size: 8B array(1.3) """ - return self.reduce( + out = self.reduce( duck_array_ops.var, dim=dim, skipna=skipna, @@ -3360,6 +3411,7 @@ def var( keep_attrs=keep_attrs, **kwargs, ) + return out def median( self, @@ -3436,13 +3488,14 @@ def median( Size: 8B array(nan) """ - return self.reduce( + out = self.reduce( duck_array_ops.median, dim=dim, skipna=skipna, keep_attrs=keep_attrs, **kwargs, ) + return out def cumsum( self, @@ -3530,13 +3583,14 @@ def cumsum( * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) Dataset: raise NotImplementedError() + def _flox_scan( + self, + dim: Dims, + *, + func: str, + skipna: bool | None = None, + keep_attrs: bool | None = None, + **kwargs: Any, + ) -> Dataset: + raise NotImplementedError() + def count( self, dim: Dims = None, @@ -3743,13 +3809,14 @@ def count( **kwargs, ) else: - return self.reduce( + out = self.reduce( duck_array_ops.count, dim=dim, numeric_only=False, keep_attrs=keep_attrs, **kwargs, ) + return out def all( self, @@ -3839,13 +3906,14 @@ def all( **kwargs, ) else: - return self.reduce( + out = self.reduce( duck_array_ops.array_all, dim=dim, numeric_only=False, keep_attrs=keep_attrs, **kwargs, ) + return out def any( self, @@ -3935,13 +4003,14 @@ def any( **kwargs, ) else: - return self.reduce( + out = self.reduce( duck_array_ops.array_any, dim=dim, numeric_only=False, keep_attrs=keep_attrs, **kwargs, ) + return out def max( self, @@ -4048,7 +4117,7 @@ def max( **kwargs, ) else: - return self.reduce( + out = self.reduce( duck_array_ops.max, dim=dim, skipna=skipna, @@ -4056,6 +4125,7 @@ def max( keep_attrs=keep_attrs, **kwargs, ) + return out def min( self, @@ -4162,7 +4232,7 @@ def min( **kwargs, ) else: - return self.reduce( + out = self.reduce( duck_array_ops.min, dim=dim, skipna=skipna, @@ -4170,6 +4240,7 @@ def min( keep_attrs=keep_attrs, **kwargs, ) + return out def mean( self, @@ -4276,7 +4347,7 @@ def mean( **kwargs, ) else: - return self.reduce( + out = self.reduce( duck_array_ops.mean, dim=dim, skipna=skipna, @@ -4284,6 +4355,7 @@ def mean( keep_attrs=keep_attrs, **kwargs, ) + return out def prod( self, @@ -4410,7 +4482,7 @@ def prod( **kwargs, ) else: - return self.reduce( + out = self.reduce( duck_array_ops.prod, dim=dim, skipna=skipna, @@ -4419,6 +4491,7 @@ def prod( keep_attrs=keep_attrs, **kwargs, ) + return out def sum( self, @@ -4545,7 +4618,7 @@ def sum( **kwargs, ) else: - return self.reduce( + out = self.reduce( duck_array_ops.sum, dim=dim, skipna=skipna, @@ -4554,6 +4627,7 @@ def sum( keep_attrs=keep_attrs, **kwargs, ) + return out def std( self, @@ -4677,7 +4751,7 @@ def std( **kwargs, ) else: - return self.reduce( + out = self.reduce( duck_array_ops.std, dim=dim, skipna=skipna, @@ -4686,6 +4760,7 @@ def std( keep_attrs=keep_attrs, **kwargs, ) + return out def var( self, @@ -4809,7 +4884,7 @@ def var( **kwargs, ) else: - return self.reduce( + out = self.reduce( duck_array_ops.var, dim=dim, skipna=skipna, @@ -4818,6 +4893,7 @@ def var( keep_attrs=keep_attrs, **kwargs, ) + return out def median( self, @@ -4911,7 +4987,7 @@ def median( Data variables: da (labels) float64 24B nan 2.0 1.5 """ - return self.reduce( + out = self.reduce( duck_array_ops.median, dim=dim, skipna=skipna, @@ -4919,6 +4995,7 @@ def median( keep_attrs=keep_attrs, **kwargs, ) + return out def cumsum( self, @@ -5000,29 +5077,50 @@ def cumsum( da (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan >>> ds.groupby("labels").cumsum() - Size: 48B + Size: 120B Dimensions: (time: 6) - Dimensions without coordinates: time + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").cumsum(skipna=False) - Size: 48B + Size: 120B Dimensions: (time: 6) - Dimensions without coordinates: time + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").cumprod() - Size: 48B + Size: 120B Dimensions: (time: 6) - Dimensions without coordinates: time + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").cumprod(skipna=False) - Size: 48B + Size: 120B Dimensions: (time: 6) - Dimensions without coordinates: time + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) Dataset: raise NotImplementedError() + def _flox_scan( + self, + dim: Dims, + *, + func: str, + skipna: bool | None = None, + keep_attrs: bool | None = None, + **kwargs: Any, + ) -> Dataset: + raise NotImplementedError() + def count( self, dim: Dims = None, @@ -5239,13 +5353,14 @@ def count( **kwargs, ) else: - return self.reduce( + out = self.reduce( duck_array_ops.count, dim=dim, numeric_only=False, keep_attrs=keep_attrs, **kwargs, ) + return out def all( self, @@ -5335,13 +5450,14 @@ def all( **kwargs, ) else: - return self.reduce( + out = self.reduce( duck_array_ops.array_all, dim=dim, numeric_only=False, keep_attrs=keep_attrs, **kwargs, ) + return out def any( self, @@ -5431,13 +5547,14 @@ def any( **kwargs, ) else: - return self.reduce( + out = self.reduce( duck_array_ops.array_any, dim=dim, numeric_only=False, keep_attrs=keep_attrs, **kwargs, ) + return out def max( self, @@ -5544,7 +5661,7 @@ def max( **kwargs, ) else: - return self.reduce( + out = self.reduce( duck_array_ops.max, dim=dim, skipna=skipna, @@ -5552,6 +5669,7 @@ def max( keep_attrs=keep_attrs, **kwargs, ) + return out def min( self, @@ -5658,7 +5776,7 @@ def min( **kwargs, ) else: - return self.reduce( + out = self.reduce( duck_array_ops.min, dim=dim, skipna=skipna, @@ -5666,6 +5784,7 @@ def min( keep_attrs=keep_attrs, **kwargs, ) + return out def mean( self, @@ -5772,7 +5891,7 @@ def mean( **kwargs, ) else: - return self.reduce( + out = self.reduce( duck_array_ops.mean, dim=dim, skipna=skipna, @@ -5780,6 +5899,7 @@ def mean( keep_attrs=keep_attrs, **kwargs, ) + return out def prod( self, @@ -5906,7 +6026,7 @@ def prod( **kwargs, ) else: - return self.reduce( + out = self.reduce( duck_array_ops.prod, dim=dim, skipna=skipna, @@ -5915,6 +6035,7 @@ def prod( keep_attrs=keep_attrs, **kwargs, ) + return out def sum( self, @@ -6041,7 +6162,7 @@ def sum( **kwargs, ) else: - return self.reduce( + out = self.reduce( duck_array_ops.sum, dim=dim, skipna=skipna, @@ -6050,6 +6171,7 @@ def sum( keep_attrs=keep_attrs, **kwargs, ) + return out def std( self, @@ -6173,7 +6295,7 @@ def std( **kwargs, ) else: - return self.reduce( + out = self.reduce( duck_array_ops.std, dim=dim, skipna=skipna, @@ -6182,6 +6304,7 @@ def std( keep_attrs=keep_attrs, **kwargs, ) + return out def var( self, @@ -6305,7 +6428,7 @@ def var( **kwargs, ) else: - return self.reduce( + out = self.reduce( duck_array_ops.var, dim=dim, skipna=skipna, @@ -6314,6 +6437,7 @@ def var( keep_attrs=keep_attrs, **kwargs, ) + return out def median( self, @@ -6407,7 +6531,7 @@ def median( Data variables: da (time) float64 24B 1.0 2.0 nan """ - return self.reduce( + out = self.reduce( duck_array_ops.median, dim=dim, skipna=skipna, @@ -6415,6 +6539,7 @@ def median( keep_attrs=keep_attrs, **kwargs, ) + return out def cumsum( self, @@ -6496,29 +6621,50 @@ def cumsum( da (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan >>> ds.resample(time="3ME").cumsum() - Size: 48B + Size: 120B Dimensions: (time: 6) - Dimensions without coordinates: time + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3ME").cumsum(skipna=False) - Size: 48B + Size: 120B Dimensions: (time: 6) - Dimensions without coordinates: time + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3ME").cumprod() - Size: 48B + Size: 120B Dimensions: (time: 6) - Dimensions without coordinates: time + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3ME").cumprod(skipna=False) - Size: 48B + Size: 120B Dimensions: (time: 6) - Dimensions without coordinates: time + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) DataArray: raise NotImplementedError() + def _flox_scan( + self, + dim: Dims, + *, + func: str, + skipna: bool | None = None, + keep_attrs: bool | None = None, + **kwargs: Any, + ) -> DataArray: + raise NotImplementedError() + def count( self, dim: Dims = None, @@ -6729,12 +6891,13 @@ def count( **kwargs, ) else: - return self.reduce( + out = self.reduce( duck_array_ops.count, dim=dim, keep_attrs=keep_attrs, **kwargs, ) + return out def all( self, @@ -6818,12 +6981,13 @@ def all( **kwargs, ) else: - return self.reduce( + out = self.reduce( duck_array_ops.array_all, dim=dim, keep_attrs=keep_attrs, **kwargs, ) + return out def any( self, @@ -6907,12 +7071,13 @@ def any( **kwargs, ) else: - return self.reduce( + out = self.reduce( duck_array_ops.array_any, dim=dim, keep_attrs=keep_attrs, **kwargs, ) + return out def max( self, @@ -7011,13 +7176,14 @@ def max( **kwargs, ) else: - return self.reduce( + out = self.reduce( duck_array_ops.max, dim=dim, skipna=skipna, keep_attrs=keep_attrs, **kwargs, ) + return out def min( self, @@ -7116,13 +7282,14 @@ def min( **kwargs, ) else: - return self.reduce( + out = self.reduce( duck_array_ops.min, dim=dim, skipna=skipna, keep_attrs=keep_attrs, **kwargs, ) + return out def mean( self, @@ -7221,13 +7388,14 @@ def mean( **kwargs, ) else: - return self.reduce( + out = self.reduce( duck_array_ops.mean, dim=dim, skipna=skipna, keep_attrs=keep_attrs, **kwargs, ) + return out def prod( self, @@ -7344,7 +7512,7 @@ def prod( **kwargs, ) else: - return self.reduce( + out = self.reduce( duck_array_ops.prod, dim=dim, skipna=skipna, @@ -7352,6 +7520,7 @@ def prod( keep_attrs=keep_attrs, **kwargs, ) + return out def sum( self, @@ -7468,7 +7637,7 @@ def sum( **kwargs, ) else: - return self.reduce( + out = self.reduce( duck_array_ops.sum, dim=dim, skipna=skipna, @@ -7476,6 +7645,7 @@ def sum( keep_attrs=keep_attrs, **kwargs, ) + return out def std( self, @@ -7589,7 +7759,7 @@ def std( **kwargs, ) else: - return self.reduce( + out = self.reduce( duck_array_ops.std, dim=dim, skipna=skipna, @@ -7597,6 +7767,7 @@ def std( keep_attrs=keep_attrs, **kwargs, ) + return out def var( self, @@ -7710,7 +7881,7 @@ def var( **kwargs, ) else: - return self.reduce( + out = self.reduce( duck_array_ops.var, dim=dim, skipna=skipna, @@ -7718,6 +7889,7 @@ def var( keep_attrs=keep_attrs, **kwargs, ) + return out def median( self, @@ -7804,13 +7976,14 @@ def median( Coordinates: * labels (labels) object 24B 'a' 'b' 'c' """ - return self.reduce( + out = self.reduce( duck_array_ops.median, dim=dim, skipna=skipna, keep_attrs=keep_attrs, **kwargs, ) + return out def cumsum( self, @@ -7904,13 +8077,29 @@ def cumsum( * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) DataArray: raise NotImplementedError() + def _flox_scan( + self, + dim: Dims, + *, + func: str, + skipna: bool | None = None, + keep_attrs: bool | None = None, + **kwargs: Any, + ) -> DataArray: + raise NotImplementedError() + def count( self, dim: Dims = None, @@ -8117,12 +8318,13 @@ def count( **kwargs, ) else: - return self.reduce( + out = self.reduce( duck_array_ops.count, dim=dim, keep_attrs=keep_attrs, **kwargs, ) + return out def all( self, @@ -8206,12 +8408,13 @@ def all( **kwargs, ) else: - return self.reduce( + out = self.reduce( duck_array_ops.array_all, dim=dim, keep_attrs=keep_attrs, **kwargs, ) + return out def any( self, @@ -8295,12 +8498,13 @@ def any( **kwargs, ) else: - return self.reduce( + out = self.reduce( duck_array_ops.array_any, dim=dim, keep_attrs=keep_attrs, **kwargs, ) + return out def max( self, @@ -8399,13 +8603,14 @@ def max( **kwargs, ) else: - return self.reduce( + out = self.reduce( duck_array_ops.max, dim=dim, skipna=skipna, keep_attrs=keep_attrs, **kwargs, ) + return out def min( self, @@ -8504,13 +8709,14 @@ def min( **kwargs, ) else: - return self.reduce( + out = self.reduce( duck_array_ops.min, dim=dim, skipna=skipna, keep_attrs=keep_attrs, **kwargs, ) + return out def mean( self, @@ -8609,13 +8815,14 @@ def mean( **kwargs, ) else: - return self.reduce( + out = self.reduce( duck_array_ops.mean, dim=dim, skipna=skipna, keep_attrs=keep_attrs, **kwargs, ) + return out def prod( self, @@ -8732,7 +8939,7 @@ def prod( **kwargs, ) else: - return self.reduce( + out = self.reduce( duck_array_ops.prod, dim=dim, skipna=skipna, @@ -8740,6 +8947,7 @@ def prod( keep_attrs=keep_attrs, **kwargs, ) + return out def sum( self, @@ -8856,7 +9064,7 @@ def sum( **kwargs, ) else: - return self.reduce( + out = self.reduce( duck_array_ops.sum, dim=dim, skipna=skipna, @@ -8864,6 +9072,7 @@ def sum( keep_attrs=keep_attrs, **kwargs, ) + return out def std( self, @@ -8977,7 +9186,7 @@ def std( **kwargs, ) else: - return self.reduce( + out = self.reduce( duck_array_ops.std, dim=dim, skipna=skipna, @@ -8985,6 +9194,7 @@ def std( keep_attrs=keep_attrs, **kwargs, ) + return out def var( self, @@ -9098,7 +9308,7 @@ def var( **kwargs, ) else: - return self.reduce( + out = self.reduce( duck_array_ops.var, dim=dim, skipna=skipna, @@ -9106,6 +9316,7 @@ def var( keep_attrs=keep_attrs, **kwargs, ) + return out def median( self, @@ -9192,13 +9403,14 @@ def median( Coordinates: * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 """ - return self.reduce( + out = self.reduce( duck_array_ops.median, dim=dim, skipna=skipna, keep_attrs=keep_attrs, **kwargs, ) + return out def cumsum( self, @@ -9280,8 +9492,8 @@ def cumsum( Size: 48B array([1., 2., 5., 5., 2., 2.]) Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) Size: 48B array([ 1., 2., 5., 5., 2., nan]) Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) Size: 48B array([1., 2., 6., 0., 2., 2.]) Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) Size: 48B array([ 1., 2., 6., 0., 2., nan]) Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) tuple[Hashable, ...]: + parsed_dim: tuple[Hashable, ...] + if isinstance(dim, str): + parsed_dim = (dim,) + elif dim is None: + parsed_dim_list = list() + # preserve order + for dim_ in itertools.chain( + *(grouper.codes.dims for grouper in self.groupers) + ): + if dim_ not in parsed_dim_list: + parsed_dim_list.append(dim_) + parsed_dim = tuple(parsed_dim_list) + elif dim is ...: + parsed_dim = tuple(self._original_obj.dims) + else: + parsed_dim = tuple(dim) + + # Do this so we raise the same error message whether flox is present or not. + # Better to control it here than in flox. + for grouper in self.groupers: + if any( + d not in grouper.codes.dims and d not in self._original_obj.dims + for d in parsed_dim + ): + # TODO: Not a helpful error, it's a sanity check that dim actually exist + # either in self.groupers or self._original_obj + raise ValueError(f"cannot reduce over dimensions {dim}.") + + return parsed_dim + def _flox_reduce( self, dim: Dims, @@ -1088,30 +1120,7 @@ def _flox_reduce( # set explicitly to avoid unnecessarily accumulating count kwargs["min_count"] = 0 - parsed_dim: tuple[Hashable, ...] - if isinstance(dim, str): - parsed_dim = (dim,) - elif dim is None: - parsed_dim_list = list() - # preserve order - for dim_ in itertools.chain( - *(grouper.codes.dims for grouper in self.groupers) - ): - if dim_ not in parsed_dim_list: - parsed_dim_list.append(dim_) - parsed_dim = tuple(parsed_dim_list) - elif dim is ...: - parsed_dim = tuple(obj.dims) - else: - parsed_dim = tuple(dim) - - # Do this so we raise the same error message whether flox is present or not. - # Better to control it here than in flox. - for grouper in self.groupers: - if any( - d not in grouper.codes.dims and d not in obj.dims for d in parsed_dim - ): - raise ValueError(f"cannot reduce over dimensions {dim}.") + parsed_dim = self._parse_dim(dim) has_missing_groups = ( self.encoded.unique_coord.size != self.encoded.full_index.size @@ -1202,6 +1211,50 @@ def _flox_reduce( return result + def _flox_scan( + self, + dim: Dims, + *, + func: str, + skipna: bool | None = None, + keep_attrs: bool | None = None, + **kwargs: Any, + ) -> T_Xarray: + from flox import groupby_scan + + parsed_dim = self._parse_dim(dim) + obj = self._original_obj.transpose(..., *parsed_dim) + axis = range(-len(parsed_dim), 0) + codes = tuple(g.codes for g in self.groupers) + + def wrapper(array, *by, func: str, skipna: bool | None, **kwargs): + if skipna or (skipna is None and array.dtype.kind in "cfO"): + if "nan" not in func: + func = f"nan{func}" + + return groupby_scan(array, *codes, func=func, **kwargs) + + actual = apply_ufunc( + wrapper, + obj, + *codes, + dask="allowed", + keep_attrs=( + _get_keep_attrs(default=True) if keep_attrs is None else keep_attrs + ), + kwargs=dict( + func=func, + skipna=skipna, + expected_groups=None, # TODO: Should be same as _flox_reduce? + axis=axis, + dtype=kwargs.get("dtype"), + method=kwargs.get("method"), + engine=kwargs.get("engine"), + ), + ) + + return actual + def fillna(self, value: Any) -> T_Xarray: """Fill missing values in this object by group. diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 6dce32aeb5c..d5ef9a28c05 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -6073,12 +6073,12 @@ def test_reduce_cumsum(self) -> None: assert_identical(expected, data.cumsum()) @pytest.mark.parametrize( - "reduct, expected", + "reduct", [ - ("dim1", ["dim2", "dim3", "time", "dim1"]), - ("dim2", ["dim3", "time", "dim1", "dim2"]), - ("dim3", ["dim2", "time", "dim1", "dim3"]), - ("time", ["dim2", "dim3", "dim1"]), + ("dim1",), + ("dim2",), + ("dim3",), + ("time",), ], ) @pytest.mark.parametrize("func", ["cumsum", "cumprod"]) @@ -6090,9 +6090,10 @@ def test_reduce_cumsum_test_dims(self, reduct, expected, func) -> None: ): getattr(data, func)(dim="bad_dim") - # ensure dimensions are correct + # ensure dimensions are retained: actual = getattr(data, func)(dim=reduct).dims - assert list(actual) == expected + expected = data.dims + assert set(actual) == set(expected) def test_reduce_non_numeric(self) -> None: data1 = create_test_data(seed=44, use_extension_array=True) diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index 47ea2fcd2b0..9253c733f66 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -2551,54 +2551,86 @@ def func(arg1, arg2, arg3=0.0): assert_identical(expected, actual) -def test_groupby_cumsum() -> None: - ds = xr.Dataset( - {"foo": (("x",), [7, 3, 1, 1, 1, 1, 1])}, - coords={"x": [0, 1, 2, 3, 4, 5, 6], "group_id": ("x", [0, 0, 1, 1, 2, 2, 2])}, - ) - actual = ds.groupby("group_id").cumsum(dim="x") - expected = xr.Dataset( - { - "foo": (("x",), [7, 10, 1, 2, 1, 2, 3]), - }, - coords={ - "x": [0, 1, 2, 3, 4, 5, 6], - "group_id": ds.group_id, - }, - ) - # TODO: Remove drop_vars when GH6528 is fixed - # when Dataset.cumsum propagates indexes, and the group variable? - assert_identical(expected.drop_vars(["x", "group_id"]), actual) - - actual = ds.foo.groupby("group_id").cumsum(dim="x") - expected.coords["group_id"] = ds.group_id - expected.coords["x"] = np.arange(7) - assert_identical(expected.foo, actual) - +@pytest.mark.parametrize( + "method, expected_array, use_flox, use_dask", + [ + ("cumsum", [7.0, 9.0, 0.0, 1.0, 2.0, 2.0], True, True), + ("cumsum", [7.0, 9.0, 0.0, 1.0, 2.0, 2.0], True, False), + ("cumsum", [7.0, 9.0, 0.0, 1.0, 2.0, 2.0], False, True), + ("cumsum", [7.0, 9.0, 0.0, 1.0, 2.0, 2.0], False, False), + pytest.param( + "cumprod", + [7.0, 14.0, 0.0, 0.0, 2.0, 2.0], + True, + True, + marks=pytest.mark.skip( + reason="TODO: Groupby with cumprod is currently not supported with flox" + ), + ), + pytest.param( + "cumprod", + [7.0, 14.0, 0.0, 0.0, 2.0, 2.0], + True, + False, + marks=pytest.mark.skip( + reason="TODO: Groupby with cumprod is currently not supported with flox" + ), + ), + ("cumprod", [7.0, 14.0, 0.0, 0.0, 2.0, 2.0], False, True), + ("cumprod", [7.0, 14.0, 0.0, 0.0, 2.0, 2.0], False, False), + ], +) +def test_groupby_scans( + method: Literal["cumsum", "cumprod"], + expected_array: list[float], + use_flox: bool, + use_dask: bool, + use_lazy_group_idx: bool = False, +) -> None: + if use_dask and not has_dask: + pytest.skip("requires dask") -def test_groupby_cumprod() -> None: + # Test Dataset groupby: ds = xr.Dataset( - {"foo": (("x",), [7, 3, 0, 1, 1, 2, 1])}, - coords={"x": [0, 1, 2, 3, 4, 5, 6], "group_id": ("x", [0, 0, 1, 1, 2, 2, 2])}, + {"foo": (("x",), [7, 2, 0, 1, 2, np.nan])}, + coords={"x": [0, 1, 2, 3, 4, 5], "group_idx": ("x", [0, 0, 1, 1, 2, 2])}, ) - actual = ds.groupby("group_id").cumprod(dim="x") + with xr.set_options(use_flox=use_flox): + if use_dask: + ds = ds.chunk() + if use_lazy_group_idx: + grouper = xr.groupers.UniqueGrouper(labels=[0, 1, 2]) + actual = getattr(ds.groupby(group_idx=grouper), method)(dim="x") + else: + grouper = ds.group_idx.compute() + actual = getattr(ds.groupby(grouper), method)(dim="x") + else: + actual = getattr(ds.groupby("group_idx"), method)(dim="x") + expected = xr.Dataset( { - "foo": (("x",), [7, 21, 0, 0, 1, 2, 2]), + "foo": (("x",), expected_array), }, coords={ - "x": [0, 1, 2, 3, 4, 5, 6], - "group_id": ds.group_id, + "x": ds.x, + "group_idx": ds.group_idx, }, ) - # TODO: Remove drop_vars when GH6528 is fixed - # when Dataset.cumsum propagates indexes, and the group variable? - assert_identical(expected.drop_vars(["x", "group_id"]), actual) + assert_identical(expected, actual.compute()) - actual = ds.foo.groupby("group_id").cumprod(dim="x") - expected.coords["group_id"] = ds.group_id - expected.coords["x"] = np.arange(7) - assert_identical(expected.foo, actual) + # Test DataArray groupby: + with xr.set_options(use_flox=use_flox): + if use_dask: + ds = ds.chunk() + if use_lazy_group_idx: + grouper = xr.groupers.UniqueGrouper(labels=[0, 1, 2]) + actual = getattr(ds.foo.groupby(group_idx=grouper), method)(dim="x") + else: + grouper = ds.group_idx.compute() + actual = getattr(ds.foo.groupby(grouper), method)(dim="x") + else: + actual = getattr(ds.foo.groupby("group_idx"), method)(dim="x") + assert_identical(expected.foo.compute(), actual.compute()) @pytest.mark.parametrize( @@ -2608,7 +2640,7 @@ def test_groupby_cumprod() -> None: ("cumprod", [1.0, 2.0, 6.0, 6.0, 2.0, 2.0]), ], ) -def test_resample_cumsum(method: str, expected_array: list[float]) -> None: +def test_resample_scans(method: str, expected_array: list[float]) -> None: ds = xr.Dataset( {"foo": ("time", [1, 2, 3, 1, 2, np.nan])}, coords={ @@ -2622,13 +2654,11 @@ def test_resample_cumsum(method: str, expected_array: list[float]) -> None: "time": xr.date_range("01-01-2001", freq="ME", periods=6, use_cftime=False), }, ) - # TODO: Remove drop_vars when GH6528 is fixed - # when Dataset.cumsum propagates indexes, and the group variable? - assert_identical(expected.drop_vars(["time"]), actual) + assert_identical(expected, actual) actual = getattr(ds.foo.resample(time="3ME"), method)(dim="time") expected.coords["time"] = ds.time - assert_identical(expected.drop_vars(["time"]).foo, actual) + assert_identical(expected.foo, actual) def test_groupby_binary_op_regression() -> None: diff --git a/xarray/util/generate_aggregations.py b/xarray/util/generate_aggregations.py index e386b96f63d..4661e0b8033 100644 --- a/xarray/util/generate_aggregations.py +++ b/xarray/util/generate_aggregations.py @@ -4,8 +4,10 @@ Usage: python xarray/util/generate_aggregations.py - pytest --doctest-modules xarray/{core,namedarray}/_aggregations.py --accept || true - pytest --doctest-modules xarray/{core,namedarray}/_aggregations.py + pytest --doctest-modules xarray/core/_aggregations.py --accept + pytest --doctest-modules xarray/core/_aggregations.py + pytest --doctest-modules xarray/namedarray/_aggregations.py --accept + pytest --doctest-modules xarray/namedarray/_aggregations.py This requires [pytest-accept](https://github.com/max-sixty/pytest-accept). The second run of pytest is deliberate, since the first will return an error @@ -15,7 +17,7 @@ import textwrap from dataclasses import dataclass, field -from typing import NamedTuple +from typing import Literal, NamedTuple MODULE_PREAMBLE = '''\ """Mixin classes with reduction operations.""" @@ -107,6 +109,17 @@ def _flox_reduce( self, dim: Dims, **kwargs: Any, + ) -> {obj}: + raise NotImplementedError() + + def _flox_scan( + self, + dim: Dims, + *, + func: str, + skipna: bool | None = None, + keep_attrs: bool | None = None, + **kwargs: Any, ) -> {obj}: raise NotImplementedError()""" @@ -131,6 +144,17 @@ def _flox_reduce( self, dim: Dims, **kwargs: Any, + ) -> {obj}: + raise NotImplementedError() + + def _flox_scan( + self, + dim: Dims, + *, + func: str, + skipna: bool | None = None, + keep_attrs: bool | None = None, + **kwargs: Any, ) -> {obj}: raise NotImplementedError()""" @@ -284,6 +308,7 @@ def __init__( see_also_methods=(), min_flox_version=None, additional_notes="", + aggregation_type: Literal["reduce", "scan"] = "reduce", ): self.name = name self.extra_kwargs = extra_kwargs @@ -292,6 +317,7 @@ def __init__( self.see_also_methods = see_also_methods self.min_flox_version = min_flox_version self.additional_notes = additional_notes + self.aggregation_type = aggregation_type if bool_reduce: self.array_method = f"array_{name}" self.np_example_array = ( @@ -444,7 +470,7 @@ def generate_code(self, method, has_keep_attrs): # median isn't enabled yet, because it would break if a single group was present in multiple # chunks. The non-flox code path will just rechunk every group to a single chunk and execute the median - method_is_not_flox_supported = method.name in ("median", "cumsum", "cumprod") + method_is_not_flox_supported = method.name in ("median", "cumprod") if method_is_not_flox_supported: indent = 12 else: @@ -455,14 +481,21 @@ def generate_code(self, method, has_keep_attrs): else: extra_kwargs = "" + if method.aggregation_type == "scan": + # Scans retain dimensions. + out_finalized = "out.assign_coords(self._obj.coords)" + else: + out_finalized = "out" + if method_is_not_flox_supported: return f"""\ - return self.reduce( + out = self.reduce( duck_array_ops.{method.array_method}, dim=dim,{extra_kwargs} keep_attrs=keep_attrs, **kwargs, - )""" + ) + return {out_finalized}""" min_version_check = f""" and module_available("flox", minversion="{method.min_flox_version}")""" @@ -476,7 +509,7 @@ def generate_code(self, method, has_keep_attrs): + f""" and contains_only_chunked_or_numpy(self._obj) ): - return self._flox_reduce( + return self._flox_{method.aggregation_type}( func="{method.name}", dim=dim,{extra_kwargs} # fill_value=fill_value, @@ -484,12 +517,13 @@ def generate_code(self, method, has_keep_attrs): **kwargs, ) else: - return self.reduce( + out = self.reduce( duck_array_ops.{method.array_method}, dim=dim,{extra_kwargs} keep_attrs=keep_attrs, **kwargs, - )""" + ) + return {out_finalized}""" ) @@ -507,12 +541,20 @@ def generate_code(self, method, has_keep_attrs): keep_attrs = ( "\n" + 12 * " " + "keep_attrs=keep_attrs," if has_keep_attrs else "" ) + + if method.aggregation_type == "scan" and self.datastructure.name == "Dataset": + # Scans retain dimensions, datasets drops them somehow: + out_finalized = "out.assign_coords(self._obj.coords)" + else: + out_finalized = "out" + return f"""\ - return self.reduce( + out = self.reduce( duck_array_ops.{method.array_method}, dim=dim,{extra_kwargs}{keep_attrs} **kwargs, - )""" + ) + return {out_finalized}""" AGGREGATION_METHODS = ( @@ -530,13 +572,15 @@ def generate_code(self, method, has_keep_attrs): Method( "median", extra_kwargs=(skipna,), numeric_only=True, min_flox_version="0.9.2" ), - # Cumulatives: + # Scans: Method( "cumsum", extra_kwargs=(skipna,), numeric_only=True, see_also_methods=("cumulative",), additional_notes=_CUM_NOTES, + min_flox_version="0.10.5", + aggregation_type="scan", ), Method( "cumprod", @@ -544,6 +588,7 @@ def generate_code(self, method, has_keep_attrs): numeric_only=True, see_also_methods=("cumulative",), additional_notes=_CUM_NOTES, + aggregation_type="scan", ), ) @@ -700,8 +745,11 @@ def write_methods(filepath, generators, preamble): from pathlib import Path p = Path(os.getcwd()) + + filepath = p.parent / "xarray" / "xarray" / "core" / "_aggregations.py" + # filepath = p.parent / "core" / "_aggregations.py" # Run from script location write_methods( - filepath=p.parent / "xarray" / "xarray" / "core" / "_aggregations.py", + filepath=filepath, generators=[ DATATREE_GENERATOR, DATASET_GENERATOR, @@ -713,9 +761,12 @@ def write_methods(filepath, generators, preamble): ], preamble=MODULE_PREAMBLE, ) + + # NamedArray: + filepath = p.parent / "xarray" / "xarray" / "namedarray" / "_aggregations.py" + # filepath = p.parent / "namedarray" / "_aggregations.py" # Run from script location write_methods( - filepath=p.parent / "xarray" / "xarray" / "namedarray" / "_aggregations.py", + filepath=filepath, generators=[NAMED_ARRAY_GENERATOR], preamble=NAMED_ARRAY_MODULE_PREAMBLE, ) - # filepath = p.parent / "core" / "_aggregations.py" # Run from script location