|
| 1 | +module ChainRulesKernelAbstractionsExt |
| 2 | + |
| 3 | +import Adapt |
| 4 | +import Atomix |
| 5 | +import ChainRules |
| 6 | +import GPUArrays |
| 7 | +import KernelAbstractions as KA |
| 8 | + |
| 9 | +using GPUArraysCore: AbstractGPUArray |
| 10 | +using KernelAbstractions |
| 11 | + |
| 12 | +function ChainRules.∇getindex!(dx::AbstractGPUArray, dy, inds...) |
| 13 | + # kab = get_backend(dx) |
| 14 | + |
| 15 | + # if KA.supports_atomics(kab) |
| 16 | + # gids = GPUArrays.to_indices(dx, inds) |
| 17 | + # idims = map(length, gids) |
| 18 | + # Is = map(Adapt.adapt(GPUArrays.ToGPU(dy)), gids) |
| 19 | + # scatter!(kab)(+, dx, dy, idims, Is...; ndrange=length(dy)) |
| 20 | + # else |
| 21 | + dx_cpu = Adapt.adapt(Array, dx) |
| 22 | + view(dx_cpu, Adapt.adapt(Array, inds)...) .+= Adapt.adapt(Array, dy) |
| 23 | + copyto!(dx, dx_cpu) |
| 24 | + # end |
| 25 | + return dx |
| 26 | +end |
| 27 | + |
| 28 | +@kernel function scatter!(op, dest, src, idims, Is::Vararg{Any, N}) where N |
| 29 | + _scatter!(@index(Global), op, dest, src, idims, Is...) |
| 30 | +end |
| 31 | + |
| 32 | +@generated function _scatter!(i, op, dest, src, idims, Is::Vararg{Any, N}) where N |
| 33 | + quote |
| 34 | + is = @inbounds CartesianIndices(idims)[i] |
| 35 | + Base.Cartesian.@nexprs $N j -> I_j = @inbounds((Is[j])[is[j]]) |
| 36 | + dv = src[i] |
| 37 | + Base.Cartesian.@ncall $N _accum! op dest dv j -> I_j |
| 38 | + end |
| 39 | +end |
| 40 | + |
| 41 | +function _accum!(op, dest, val, ids...) |
| 42 | + Atomix.modify!(Atomix.IndexableRef(dest, (ids...,)), op, val) |
| 43 | +end |
| 44 | + |
| 45 | +end |
0 commit comments