Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions examples/custom_potential.jl
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,12 @@ CustomPotential() = CustomPotential(1.0, 0.5);

# We extend the two methods providing access to the real and Fourier
# representation of the potential to DFTK.
function DFTK.local_potential_real(el::CustomPotential, r::Real)
-el.α / (√(2π) * el.L) * exp(- (r / el.L)^2 / 2)
function DFTK.local_potential_real(el::CustomPotential, rs::Vector)
map(r -> -el.α / (√(2π) * el.L) * exp(- (r / el.L)^2 / 2), rs)
end
function DFTK.local_potential_fourier(el::CustomPotential, p::Real)
function DFTK.local_potential_fourier(el::CustomPotential, ps::Vector)
## = ∫ V(r) exp(-ix⋅p) dx
-el.α * exp(- (p * el.L)^2 / 2)
map(p -> -el.α * exp(- (p * el.L)^2 / 2), ps)
end

# !!! tip "Gaussian potentials and DFTK"
Expand Down
37 changes: 29 additions & 8 deletions src/density_methods.jl
Original file line number Diff line number Diff line change
Expand Up @@ -180,17 +180,18 @@ function atomic_density_form_factors(basis::PlaneWaveBasis{T},
p = norm(G)
iG2ifnorm_cpu[iG] = get!(norm_indices, p, length(norm_indices) + 1)
end
iG2ifnorm = to_device(basis.architecture, iG2ifnorm_cpu)

form_factors_cpu = zeros(T, length(norm_indices), length(basis.model.atom_groups))
for (p, ifnorm) in norm_indices
for (igroup, group) in enumerate(basis.model.atom_groups)
element = basis.model.atoms[first(group)]
form_factors_cpu[ifnorm, igroup] = atomic_density(element, p, method)
end
ni_pairs = collect(pairs(norm_indices))
ps = to_device(basis.architecture, first.(ni_pairs))
indices = to_device(basis.architecture, last.(ni_pairs))

form_factors = similar(ps, length(norm_indices), length(basis.model.atom_groups))
for (igroup, group) in enumerate(basis.model.atom_groups)
element = basis.model.atoms[first(group)]
@inbounds form_factors[indices, igroup] .= atomic_density(element, ps, method)
end

form_factors = to_device(basis.architecture, form_factors_cpu)
iG2ifnorm = to_device(basis.architecture, iG2ifnorm_cpu)
(; form_factors, iG2ifnorm)
end

Expand All @@ -214,6 +215,7 @@ function atomic_density(element::Element, Gnorm::T,
has_core_density(element) ? core_charge_density_fourier(element, Gnorm) : zero(T)
end

#TODO: vectorize and check GPU
function atomic_density(element::Element, Gnorm::T,
::CoreKineticEnergyDensity)::T where {T <: Real}
if has_core_kinetic_energy_density(element)
Expand All @@ -223,6 +225,25 @@ function atomic_density(element::Element, Gnorm::T,
end
end

# Generic vectoriezed version of the above
function atomic_density(element::Element, Gnorms::AbstractVector{T},
method::AtomicDensity) where {T <: Real}
arch = architecture(Gnorms)
to_device(arch,
map(Gnorm -> atomic_density(element, Gnorm, method),
to_cpu(Gnorms)))
end
Comment thread
mfherbst marked this conversation as resolved.

# Vectorized version for CoreDensity, GPU optimized
function atomic_density(element::Element, Gnorms::AbstractVector{T},
::CoreDensity) where {T <: Real}
if has_core_density(element)
core_charge_density_fourier(element, Gnorms)
else
zeros_like(Gnorms)
end
end

# Get the lengthscale of the valence density for an atom with `n_elec_core` core
# and `n_elec_valence` valence electrons.
function atom_decay_length(n_elec_core, n_elec_valence)
Expand Down
48 changes: 27 additions & 21 deletions src/elements.jl
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,18 @@ eval_psp_energy_correction(T, ::Element) = zero(T)
eval_psp_energy_correction(psp::Element) = eval_psp_energy_correction(Float64, psp)

# Fall back to the Gaussian table for Elements without pseudopotentials
function valence_charge_density_fourier(el::Element, p::T)::T where {T <: Real}
function valence_charge_density_fourier(el::Element, p)
gaussian_valence_charge_density_fourier(el, p)
end

"""Gaussian valence charge density using Abinit's coefficient table, in Fourier space."""
function gaussian_valence_charge_density_fourier(el::Element, p::T)::T where {T <: Real}
charge_ionic(el) * exp(-(p * atom_decay_length(el))^2)
end
function gaussian_valence_charge_density_fourier(el::Element, ps::AbstractVector{T}) where {T <: Real}
arch = architecture(ps)
to_device(arch, map(p -> gaussian_valence_charge_density_fourier(el, p), to_cpu(ps)))
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm surprised this needs to be executed on the CPU. The implementation above looks so simple.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It should work on the GPU, I was on auto pilot. I'll check and change

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, actually it is not that straight forward, because Element is not isbit. A simple solution like

map(p -> gaussian_valence_charge_density_fourier(el, p), ps)

would not work. It would be possible to port it given some refactoring, but I don't think it is worth the trouble, because it never shows up as a performance bottleneck.

end

function core_charge_density_fourier(::Element, ::T)::T where {T <: Real}
error("Abstract elements do not necesesarily provide core charge density.")
Expand All @@ -60,12 +64,6 @@ function core_kinetic_energy_density_fourier(::Element, ::T)::T where {T <: Real
error("Abstract elements do not necesesarily provide core kinetic energy density.")
end

# Generic vectorized version of local_potential_fourier, GPU-safe
function local_potential_fourier(el::Element, ps::AbstractVector{T}) where {T <: Real}
arch = architecture(ps)
to_device(arch, map(p -> local_potential_fourier(el, p), to_cpu(ps)))
end

Base.show(io::IO, el::Element) = print(io, "$(typeof(el))(:$(species(el)))")

#
Expand Down Expand Up @@ -175,26 +173,17 @@ has_core_density(el::ElementPsp) = has_core_density(el.psp)
has_core_kinetic_energy_density(el::ElementPsp) = has_core_kinetic_energy_density(el.psp)
eval_psp_energy_correction(T, el::ElementPsp) = eval_psp_energy_correction(T, el.psp)

function local_potential_fourier(el::ElementPsp, p::T) where {T <: Real}
eval_psp_local_fourier(el.psp, p)
end
# Vectorized version of the above
function local_potential_fourier(el::ElementPsp, ps::AbstractVector{T}) where {T <: Real}
eval_psp_local_fourier(el.psp, ps)
end
local_potential_real(el::ElementPsp, r::Real) = eval_psp_local_real(el.psp, r)
local_potential_fourier(el::ElementPsp, p) = eval_psp_local_fourier(el.psp, p)
local_potential_real(el::ElementPsp, r) = eval_psp_local_real(el.psp, r)

function valence_charge_density_fourier(el::ElementPsp, p::T) where {T <: Real}
function valence_charge_density_fourier(el::ElementPsp, p)
if has_valence_density(el.psp)
eval_psp_valence_density_fourier(el.psp, p)
else
gaussian_valence_charge_density_fourier(el, p)
end
end
function core_charge_density_fourier(el::ElementPsp, p::T) where {T <: Real}
eval_psp_core_density_fourier(el.psp, p)
end

core_charge_density_fourier(el::ElementPsp, p) = eval_psp_core_density_fourier(el.psp, p)

#
# ElementCohenBergstresser
Expand Down Expand Up @@ -264,7 +253,6 @@ function local_potential_fourier(el::ElementCohenBergstresser, p::T) where {T <:
end
# TODO Strictly speaking needs a eval_psp_energy_correction


#
# ElementGaussian
#
Expand Down Expand Up @@ -297,6 +285,24 @@ function local_potential_fourier(el::ElementGaussian, p::Real)
end
# TODO Strictly speaking needs a eval_psp_energy_correction

# Generic API expectations: all element functions taking a real space scalar |r| or a
# reciprocal space scalar |p| should have a vectorized version accepting vectors of |r| or |p|.
# The following loop vectorizes element functions by calling the single-value version
# elementwise. This is GPU safe and generic. Performance critical functions should have their
# own GPU-optimized implementation. Note: explicit loop over Element types in order to avoid
# ambiguities with the specific ElementPsp functions.
for fn in [:gaussian_valence_charge_density_fourier, :core_charge_density_fourier,
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I trust you checked we still really need this after the simplifications in the ElementPsp interface you did most recently ... so while this is still cursed, it's probably the least evil solution.

But we should make sure that once new ElementXyz types are added, the tests break loudly until one adds the element type to the list below. Is this the case ?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, while it is more compact than the previous solution, it is absolutely necessary that the abstract Element is not used here (because of ambiguity with the ElementPsp functions). If there was a way to just say "All elements, except ElementPsp`, we would be less error prone when new element types are added. I do not know of such a way though.

That being said, when adding a new element type, tests will not pass if the important vectorized functions are not provided.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this double loop is showing some limitations. One of the examples test currently fails with

LoadError: MethodError: no method matching local_potential_fourier(::Main.var"##226".Element2DCoulomb, ::Vector{Float64})

But Element2DCoulomb is defined nowhere within DFTK. It would be caught if the default vectorized functions were defined for generic Elements, but this leads to the usual ambiguities.

I am slowly running out of ideas.

:local_potential_fourier, :local_potential_real]
for el_type in [ElementCoulomb, ElementCohenBergstresser, ElementGaussian]
@eval begin
function DFTK.$fn(el::$el_type, arg::AbstractVector{T}) where {T <: Real}
arch = architecture(arg)
to_device(arch, map(p -> $fn(el, p), to_cpu(arg)))
end
end
end
end

#
# Helper functions
#
Expand Down
46 changes: 37 additions & 9 deletions src/pseudo/NormConservingPsp.jl
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ abstract type NormConservingPsp end
# eval_psp_projector_fourier(psp, i, l, ps::AbstrcatArray{<Real})
# eval_psp_local_real(psp, r::Real)
# eval_psp_local_fourier(psp, p::Real)
# eval_psp_local_fourier(psp, ps::AbstractArray{<Real})
# eval_psp_energy_correction(T::Type, psp)

#### Optional methods:
Expand All @@ -36,6 +35,42 @@ abstract type NormConservingPsp end
# count_n_pswfc_radial(psp, l::Integer)
# pswfc_label(psp, i::Integer, l::Integer)

#### Vectorization: all methods are expected to accept vectorized inputs, namely:
# eval_psp_projector_real(psp, i, l, r::AbstractVector{<Real})
# eval_psp_projector_fourier(psp, i, l, p::AbstractVector{<Real})
# eval_psp_local_real(psp, r::AbstractVector{<Real})
# eval_psp_local_fourier(psp, p::AbstractVector{<Real})
# eval_psp_valence_density_real(psp, r::AbstractVector{<Real})
# eval_psp_valence_density_fourier(psp, p::AbstractVector{<Real})
# eval_psp_core_density_real(psp, r::AbstractVector{<Real})
# eval_psp_core_density_fourier(psp, p::AbstractVector{<Real})
# eval_psp_core_kinetic_energy_density_real(psp, r::AbstractVector{<Real})
# eval_psp_core_kinetic_energy_density_fourier(psp, p::AbstractVector{<Real})
# eval_psp_pswfc_real(psp, i, l, p::AbstractVector{<Real})
# eval_psp_pswfc_fourier(psp, i, l, p::AbstractVector{<Real})

# Macros that vectorize a given method for a Psp type by calling an existing scalar
# version elementwise. Safe for GPUArray inputs. Performance critical methods should
# have their own GPU-optimized implementation instead of relying on this macro. The
# different norm-conserving pseudopotential types are responsible for the implementation
# of vectorized functions, whether by using these macros or not.
macro vectorize_psp_function(PspType, fn)
quote
function $fn(psp::$PspType, vec::AbstractVector{T}) where {T <: Real}
arch = architecture(vec)
to_device(arch, map(p -> $fn(psp, p), to_cpu(vec)))
end
end
end
macro vectorize_psp_projector_function(PspType, fn)
quote
function $fn(psp::$PspType, i, l, vec::AbstractVector{T}) where {T <: Real}
arch = architecture(vec)
to_device(arch, map(p -> $fn(psp, i, l, p), to_cpu(vec)))
end
end
end

"""
eval_psp_projector_real(psp, i, l, r)

Expand Down Expand Up @@ -97,12 +132,6 @@ V_{\rm loc}(p) &= ∫_{ℝ^3} (V_{\rm loc}(r) - C(r)) e^{-ip·r} dr + F[C(r)] \\
eval_psp_local_fourier(psp::NormConservingPsp, p::AbstractVector) =
eval_psp_local_fourier(psp, norm(p))

# Fallback vectorized implementation for non GPU-optimized code.
function eval_psp_local_fourier(psp::NormConservingPsp, ps::AbstractVector{T}) where {T <: Real}
arch = architecture(ps)
to_device(arch, map(p -> eval_psp_local_fourier(psp, p), to_cpu(ps)))
end

@doc raw"""
eval_psp_energy_correction([T=Float64,] psp::NormConservingPsp)
eval_psp_energy_correction([T=Float64,] element::Element)
Expand Down Expand Up @@ -184,7 +213,6 @@ eval_psp_core_kinetic_energy_density_fourier(::NormConservingPsp, ::T) where {T
eval_psp_core_kinetic_energy_density_fourier(psp::NormConservingPsp, p::AbstractVector) =
eval_psp_core_kinetic_energy_density_fourier(psp, norm(p))


#### Methods defined on a NormConservingPsp
import Base.Broadcast.broadcastable
Base.Broadcast.broadcastable(psp::NormConservingPsp) = Ref(psp)
Expand Down Expand Up @@ -256,4 +284,4 @@ function find_pswfc(psp::NormConservingPsp, label::String)
end
error("Could not find pseudo atomic orbital with label $label "
* "in pseudopotential $(psp).")
end
end
5 changes: 4 additions & 1 deletion src/pseudo/PspHgh.jl
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ function eval_psp_local_fourier(psp::PspHgh, p::T) where {T <: Real}

4T(π) * rloc^2 * (-Zion + sqrt(T(π) / 2) * rloc * t^2 * P) * exp(-t^2 / 2) / t^2
end
@vectorize_psp_function PspHgh DFTK.eval_psp_local_fourier

# [GTH98] (1)
function eval_psp_local_real(psp::PspHgh, r::T) where {T <: Real}
Expand All @@ -133,6 +134,7 @@ function eval_psp_local_real(psp::PspHgh, r::T) where {T <: Real}
+ exp(-rr^2 / 2) * (cloc[1] + cloc[2] * rr^2 + cloc[3] * rr^4 + cloc[4] * rr^6)
)
end
@vectorize_psp_function PspHgh DFTK.eval_psp_local_real


# [HGH98] (7-15) except they do it with plane waves normalized by 1/sqrt(Ω).
Expand Down Expand Up @@ -161,14 +163,15 @@ function eval_psp_projector_fourier(psp::PspHgh, i, l, p::T) where {T <: Real}

error("Not implemented for l=$l and i=$i")
end

@vectorize_psp_projector_function PspHgh DFTK.eval_psp_projector_fourier

# [HGH98] (3)
function eval_psp_projector_real(psp::PspHgh, i, l, r::T) where {T <: Real}
rp = T(psp.rp[l + 1])
ired = (4i - 1) / T(2)
sqrt(T(2)) * r^(l + 2(i - 1)) * exp(-r^2 / 2rp^2) / rp^(l + ired) / sqrt(gamma(l + ired))
end
@vectorize_psp_projector_function PspHgh DFTK.eval_psp_projector_real

function eval_psp_energy_correction(T, psp::PspHgh)
# By construction we need to compute the DC component of the difference
Expand Down
20 changes: 20 additions & 0 deletions src/pseudo/PspLinComb.jl
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,15 @@ macro make_psplincomb_call(fn)
end
end
end

macro make_psplincomb_call_vectorized(fn)
quote
function $fn(psp::PspLinComb, arg::AbstractVector{<:Real})
sum(c * $fn(pp, arg) for (c, pp) in zip(psp.coefficients, psp.pseudos))
end
end
end

@make_psplincomb_call DFTK.eval_psp_local_real
@make_psplincomb_call DFTK.eval_psp_local_fourier
@make_psplincomb_call DFTK.eval_psp_valence_density_real
Expand All @@ -84,3 +93,14 @@ end
@make_psplincomb_call DFTK.eval_psp_core_density_fourier
@make_psplincomb_call DFTK.eval_psp_core_kinetic_energy_density_real
@make_psplincomb_call DFTK.eval_psp_core_kinetic_energy_density_fourier

@vectorize_psp_function PspLinComb DFTK.eval_psp_local_real
@vectorize_psp_function PspLinComb DFTK.eval_psp_local_fourier
@vectorize_psp_function PspLinComb DFTK.eval_psp_valence_density_real
@vectorize_psp_function PspLinComb DFTK.eval_psp_valence_density_fourier
@vectorize_psp_function PspLinComb DFTK.eval_psp_core_density_real
@vectorize_psp_function PspLinComb DFTK.eval_psp_core_density_fourier
@vectorize_psp_function PspLinComb DFTK.eval_psp_core_kinetic_energy_density_real
@vectorize_psp_function PspLinComb DFTK.eval_psp_core_kinetic_energy_density_fourier
@vectorize_psp_projector_function PspLinComb DFTK.eval_psp_projector_real
@vectorize_psp_projector_function PspLinComb DFTK.eval_psp_projector_fourier
Loading
Loading