Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions src/architecture.jl
Original file line number Diff line number Diff line change
Expand Up @@ -46,3 +46,8 @@ Explanaition of key entries:
function memory_usage(::CPU)
(; max_rss=Sys.maxrss(), gc_bytes=Base.gc_live_bytes())
end

"""
Returns the architecture of the given array.
"""
architecture(x::AbstractArray) = x isa AbstractGPUArray ? GPU{typeof(x)}() : CPU()
19 changes: 16 additions & 3 deletions src/common/quadrature.jl
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,9 @@ point `i` (not necessarily in order).
"""
simpson
@inbounds function simpson(integrand, x::AbstractVector)
n = length(x)
n <= 4 && return trapezoidal(integrand, x)
if (x[2] - x[1]) ≈ (x[3] - x[2])
if length(x) <= 4
trapezoidal(integrand, x)
elseif (x[2] - x[1]) ≈ (x[3] - x[2])
simpson_uniform(integrand, x)
else
simpson_nonuniform(integrand, x)
Expand Down Expand Up @@ -114,3 +114,16 @@ end

return I
end

"""
Return the approproate integration function given a PSP quadrature
"""
function default_psp_quadrature(x::AbstractArray)
if length(x) <= 4
trapezoidal
elseif (x[2] - x[1]) ≈ (x[3] - x[2])
simpson_uniform
else
simpson_nonuniform
end
end
2 changes: 1 addition & 1 deletion src/common/spherical_bessels.jl
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,5 @@ with `SpecialFunctions.sphericalbesselj`. Specialized for integer ``0 ≤ l ≤
l == 3 && return (sin(x) * (15 - 6x^2) + cos(x) * (x^3 - 15x)) / x^4
l == 4 && return (sin(x) * (105 - 45x^2 + x^4) + cos(x) * (10x^3 - 105x)) / x^5
l == 5 && return (sin(x) * (945 - 420x^2 + 15x^4) + cos(x) * (-945x + 105x^3 - x^5)) / x^6
error("The case l = $l is not implemented")
throw(BoundsError()) # specific l not implemented
end
12 changes: 11 additions & 1 deletion src/elements.jl
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,12 @@ function core_charge_density_fourier(::Element, ::T)::T where {T <: Real}
error("Abstract elements do not necesesarily provide core charge density.")
end

# Generic vectorized version of local_potential_fourier, GPU-safe
function local_potential_fourier(el::Element, ps::AbstractVector{T}) where {T <: Real}
arch = architecture(ps)
to_device(arch, map(p -> local_potential_fourier(el, p), to_cpu(ps)))
end

Base.show(io::IO, el::Element) = print(io, "$(typeof(el))(:$(species(el)))")

#
Expand Down Expand Up @@ -83,6 +89,7 @@ function local_potential_fourier(el::ElementCoulomb, p::T) where {T <: Real}
Z = charge_nuclear(el)
return -4T(π) * Z / p^2
end

local_potential_real(el::ElementCoulomb, r::Real) = -charge_nuclear(el) / r


Expand Down Expand Up @@ -160,9 +167,12 @@ has_core_density(el::ElementPsp) = has_core_density(el.psp)
eval_psp_energy_correction(T, el::ElementPsp) = eval_psp_energy_correction(T, el.psp)

function local_potential_fourier(el::ElementPsp, p::T) where {T <: Real}
p == 0 && return zero(T) # Compensating charge background
eval_psp_local_fourier(el.psp, p)
end
# Vectorized version of the above
function local_potential_fourier(el::ElementPsp, ps::AbstractVector{T}) where {T <: Real}
eval_psp_local_fourier(el.psp, ps)
end
local_potential_real(el::ElementPsp, r::Real) = eval_psp_local_real(el.psp, r)

function valence_charge_density_fourier(el::ElementPsp, p::T) where {T <: Real}
Expand Down
7 changes: 7 additions & 0 deletions src/pseudo/NormConservingPsp.jl
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ abstract type NormConservingPsp end
# eval_psp_projector_fourier(psp, i, l, p::Real)
# eval_psp_local_real(psp, r::Real)
# eval_psp_local_fourier(psp, p::Real)
# eval_psp_local_fourier(psp, ps::AbstractArray{<Real})
# eval_psp_energy_correction(T::Type, psp)

#### Optional methods:
Expand Down Expand Up @@ -85,6 +86,12 @@ V_{\rm loc}(p) &= ∫_{ℝ^3} (V_{\rm loc}(r) - C(r)) e^{-ip·r} dr + F[C(r)] \\
eval_psp_local_fourier(psp::NormConservingPsp, p::AbstractVector) =
eval_psp_local_fourier(psp, norm(p))

# Fallback vectorized implementation for non GPU-optimized code.
function eval_psp_local_fourier(psp::NormConservingPsp, ps::AbstractVector{T}) where {T <: Real}
arch = architecture(ps)
to_device(arch, map(p -> eval_psp_local_fourier(psp, p), to_cpu(ps)))
end

@doc raw"""
eval_psp_energy_correction([T=Float64,] psp::NormConservingPsp)
eval_psp_energy_correction([T=Float64,] element::Element)
Expand Down
2 changes: 1 addition & 1 deletion src/pseudo/PspHgh.jl
Original file line number Diff line number Diff line change
Expand Up @@ -129,11 +129,11 @@ end

# [GTH98] (6) except they do it with plane waves normalized by 1/sqrt(Ω).
function eval_psp_local_fourier(psp::PspHgh, p::T) where {T <: Real}
p == 0 && return zero(T) # Compensating charge background
t::T = p * psp.rloc
psp_local_polynomial(T, psp, t) * exp(-t^2 / 2) / t^2
end


@doc raw"""
Estimate an upper bound for the argument `p` after which
`abs(eval_psp_local_fourier(psp, p))` is a strictly decreasing function.
Expand Down
29 changes: 25 additions & 4 deletions src/pseudo/PspUpf.jl
Original file line number Diff line number Diff line change
Expand Up @@ -203,19 +203,40 @@ end

eval_psp_local_real(psp::PspUpf, r::T) where {T<:Real} = psp.vloc_interp(r)

function eval_psp_local_fourier(psp::PspUpf, p::T)::T where {T<:Real}
# Low-level function for the local part of the pseudopotential in reciprocal space
function _eval_psp_local_fourier(quadrature, rgrid, vloc, Zion, p::T)::T where {T<:Real}
# QE style C(r) = -Zerf(r)/r Coulomb tail correction used to ensure
# exponential decay of `f` so that the Hankel transform is accurate.
# H[Vloc(r)] = H[Vloc(r) - C(r)] + H[C(r)],
# where H[-Zerf(r)/r] = -Z/p^2 exp(-p^2 /4)
# ABINIT uses a more 'pure' Coulomb term with the same asymptotic behavior
# C(r) = -Z/r; H[-Z/r] = -Z/p^2
p == 0 && return zero(T) # Compensating charge background
I = quadrature(rgrid) do i, r
r * (r * vloc[i] - -Zion * erf(r)) * sphericalbesselj_fast(0, p * r)
end
4T(π) * (I + -Zion / p^2 * exp(-p^2 / T(4)))
end

function eval_psp_local_fourier(psp::PspUpf, p::T) where {T<:Real}
quadrature = default_psp_quadrature(psp.rgrid)
rgrid = @view psp.rgrid[1:psp.ircut]
vloc = @view psp.vloc[1:psp.ircut]
I = simpson(rgrid) do i, r
r * (r * vloc[i] - -psp.Zion * erf(r)) * sphericalbesselj_fast(0, p * r)
_eval_psp_local_fourier(quadrature, rgrid, vloc, psp.Zion, p)
end

# Vectorized version of the above, GPU compatible
function eval_psp_local_fourier(psp::PspUpf, ps::AbstractVector{T}) where {T<:Real}
quadrature = default_psp_quadrature(psp.rgrid)
arch = architecture(ps)
rgrid = to_device(arch, @view psp.rgrid[1:psp.ircut])
vloc = to_device(arch, @view psp.vloc[1:psp.ircut])
Zion = psp.Zion
map(ps) do p
# GPU kernels with dynamic function calls do not compile,
# hence the pre-determined explicit integration function
_eval_psp_local_fourier(quadrature, rgrid, vloc, Zion, p)
end
4T(π) * (I + -psp.Zion / p^2 * exp(-p^2 / T(4)))
end

function eval_psp_density_valence_real(psp::PspUpf, r::T) where {T<:Real}
Expand Down
17 changes: 9 additions & 8 deletions src/terms/local.jl
Original file line number Diff line number Diff line change
Expand Up @@ -81,17 +81,18 @@ function atomic_local_form_factors(basis::PlaneWaveBasis{T}; q=zero(Vec3{T})) wh
p = norm(G)
iG2ifnorm_cpu[iG] = get!(norm_indices, p, length(norm_indices) + 1)
end
iG2ifnorm = to_device(basis.architecture, iG2ifnorm_cpu)

form_factors_cpu = zeros(T, length(norm_indices), length(basis.model.atom_groups))
for(p, ifnorm) in norm_indices
for (igroup, group) in enumerate(basis.model.atom_groups)
element = basis.model.atoms[first(group)]
form_factors_cpu[ifnorm, igroup] = local_potential_fourier(element, p)
end
ni_pairs = collect(pairs(norm_indices))
ps = to_device(basis.architecture, first.(ni_pairs))
indices = to_device(basis.architecture, last.(ni_pairs))

form_factors = similar(ps, length(norm_indices), length(basis.model.atom_groups))
for (igroup, group) in enumerate(basis.model.atom_groups)
element = basis.model.atoms[first(group)]
@inbounds form_factors[indices, igroup] .= local_potential_fourier(element, ps)
Comment thread
mfherbst marked this conversation as resolved.
end

form_factors = to_device(basis.architecture, form_factors_cpu)
iG2ifnorm = to_device(basis.architecture, iG2ifnorm_cpu)
(; form_factors, iG2ifnorm)
end

Expand Down
Loading