11function LinearOperatorCollection. grad! (res:: vecT , img:: vecT , shape:: NTuple{N,Int64} , di:: CartesianIndex{N} ) where {vecT <: AbstractGPUVector , N}
22 res = reshape (res, shape .- Tuple (di))
3+ backend = get_backend (res)
4+
5+ @kernel cpu = false inbounds = true function grad_kernel! (res, img, di)
6+ idx = @index (Global, Cartesian)
7+ res[idx] = img[idx] - img[idx + di]
8+ end
39
410 if length (res) > 0
5- gpu_call (grad_kernel!, res, reshape (img,shape), di)
11+ kernel = grad_kernel! (backend)
12+ kernel (res, reshape (img, shape), di, ndrange = size (res))
613 end
714
815 return res
916end
1017
11- function grad_kernel! (ctx, res, img, di)
12- idx = @cartesianidx (res)
13- @inbounds res[idx] = img[idx] - img[idx + di]
14- return nothing
15- end
1618
1719# adjoint of directional gradients
1820function LinearOperatorCollection. grad_t! (res:: vecT , g:: vecT , shape:: NTuple{N,Int64} , di:: CartesianIndex{N} ) where {T, vecT <: AbstractGPUVector{T} , N}
1921 res_ = reshape (res,shape)
2022 g_ = reshape (g, shape .- Tuple (di))
23+ backend = get_backend (res)
2124
2225 fill! (res, zero (T))
2326 if length (g_) > 0
24- gpu_call (grad_t_kernel_1!, res_, g_, di, elements = length (g))
25- gpu_call (grad_t_kernel_2!, res_, g_, di, elements = length (g))
27+ kernel1 = grad_t_kernel_1! (backend)
28+ kernel2 = grad_t_kernel_2! (backend)
29+ kernel1 (res_, g_, di, ndrange = size (g_))
30+ kernel2 (res_, g_, di, ndrange = size (g_))
2631 end
32+
33+ return res
2734end
2835
29- function grad_t_kernel_1! (ctx, res, g, di)
30- idx = @cartesianidx (g)
31- @inbounds res[idx] += g[idx]
32- return nothing
36+ @kernel cpu = false inbounds = true function grad_t_kernel_1! (res, g, di)
37+ idx = @index (Global, Cartesian)
38+ res[idx] += g[idx]
3339end
3440
35- function grad_t_kernel_2! (ctx, res, g, di)
36- idx = @cartesianidx (g)
37- @inbounds res[idx + di] -= g[idx]
38- return nothing
41+ @kernel cpu = false inbounds = true function grad_t_kernel_2! (res, g, di)
42+ idx = @index (Global, Cartesian)
43+ res[idx + di] -= g[idx]
3944end
4045
46+
4147function LinearOperatorCollection. grad_t! (res:: vecT , g:: vecT , shape:: NTuple{N,Int64} , dirs, dims, dim_ends, tmp) where {T, vecT <: AbstractGPUVector{T} , N}
4248 dim_start = 1
4349 res = reshape (res, shape)
50+ backend = get_backend (res)
4451
4552 fill! (res, zero (eltype (res)))
53+ kernel1 = grad_t_kernel_1! (backend)
54+ kernel2 = grad_t_kernel_2! (backend)
4655 for (i, di) in enumerate (dirs)
4756 g_ = reshape (view (g, dim_start: dim_ends[i]), shape .- Tuple (di))
4857 if length (g_) > 0
49- gpu_call (grad_t_kernel_1!, res, g_, di, elements = length (g ))
50- gpu_call (grad_t_kernel_2!, res, g_, di, elements = length (g ))
58+ kernel1 ( res, g_, di, ndrange = size (g_ ))
59+ kernel2 ( res, g_, di, ndrange = size (g_ ))
5160 end
5261 dim_start = dim_ends[i] + 1
5362 end
63+ return res
5464end
0 commit comments