Skip to content

Commit d1960d7

Browse files
Fix performance regression and add some timing calls for future use (#124)
* Add timings * Run simple candidate fit in SA for trivial null spaces * Remove some debris and add annotation to RS
1 parent a3f18a5 commit d1960d7

File tree

6 files changed

+105
-54
lines changed

6 files changed

+105
-54
lines changed

Project.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,13 @@ LinearSolve = "7ed4a6bd-45f5-4d41-b270-4a48e9bafcae"
99
Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
1010
Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
1111
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
12+
TimerOutputs = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f"
1213

1314
[compat]
1415
CommonSolve = "0.2"
1516
LinearSolve = "2, 3"
1617
Reexport = "1"
18+
TimerOutputs = "0.5.29"
1719
julia = "1.6"
1820

1921
[extras]

src/AlgebraicMultigrid.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
module AlgebraicMultigrid
22

3+
using TimerOutputs
34
using Reexport
45
using LinearAlgebra
56
using LinearSolve

src/aggregation.jl

Lines changed: 83 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -13,82 +13,119 @@ function smoothed_aggregation(A::TA,
1313
diagonal_dominance = false,
1414
keep = false,
1515
coarse_solver = Pinv, kwargs...) where {T,V,bs,TA<:SparseMatrixCSC{T,V}}
16+
17+
@timeit_debug "prologue" begin
18+
1619
n = size(A, 1)
1720
B = isnothing(B) ? ones(T,n) : copy(B)
1821
@assert size(A, 1) == size(B, 1)
1922

20-
#=max_levels, max_coarse, strength =
21-
levelize_strength_or_aggregation(max_levels, max_coarse, strength)
22-
max_levels, max_coarse, aggregate =
23-
levelize_strength_or_aggregation(max_levels, max_coarse, aggregate)
24-
25-
improve_candidates =
26-
levelize_smooth_or_improve_candidates(improve_candidates, max_levels)=#
27-
# str = [stength for _ in 1:max_levels - 1]
28-
# agg = [aggregate for _ in 1:max_levels - 1]
29-
# sm = [smooth for _ in 1:max_levels]
30-
3123
levels = Vector{Level{TA, TA, Adjoint{T, TA}}}()
3224
bsr_flag = false
3325
w = MultiLevelWorkspace(Val{bs}, eltype(A))
3426
residual!(w, size(A, 1))
3527

28+
end
29+
3630
while length(levels) + 1 < max_levels && size(A, 1) > max_coarse
37-
A, B, bsr_flag = extend_hierarchy!(levels, strength, aggregate, smooth,
31+
@timeit_debug "extend_hierarchy!" A, B, bsr_flag = extend_hierarchy_sa!(levels, strength, aggregate, smooth,
3832
improve_candidates, diagonal_dominance,
3933
keep, A, B, symmetry, bsr_flag)
34+
size(A, 1) == 0 && break
4035
coarse_x!(w, size(A, 1))
4136
coarse_b!(w, size(A, 1))
42-
#=if size(A, 1) <= max_coarse
43-
break
44-
end=#
4537
residual!(w, size(A, 1))
4638
end
47-
#=A, B = extend_hierarchy!(levels, strength, aggregate, smooth,
48-
improve_candidates, diagonal_dominance,
49-
keep, A, B, symmetry)=#
50-
MultiLevel(levels, A, coarse_solver(A), presmoother, postsmoother, w)
39+
40+
@timeit_debug "coarse solver setup" cs = coarse_solver(A)
41+
@timeit_debug "ml setup" ml = MultiLevel(levels, A, cs, presmoother, postsmoother, w)
42+
return ml
5143
end
5244

5345
struct HermitianSymmetry
5446
end
5547

56-
function extend_hierarchy!(levels, strength, aggregate, smooth,
48+
function extend_hierarchy_sa!(levels, strength, aggregate, smooth,
5749
improve_candidates, diagonal_dominance, keep,
5850
A, B,
5951
symmetry, bsr_flag)
6052

6153
# Calculate strength of connection matrix
62-
if symmetry isa HermitianSymmetry
54+
@timeit_debug "strength" if symmetry isa HermitianSymmetry
6355
S, _T = strength(A, bsr_flag)
6456
else
6557
S, _T = strength(adjoint(A), bsr_flag)
6658
end
6759

6860
# Aggregation operator
69-
AggOp = aggregate(S)
61+
@timeit_debug "aggregation" AggOp = aggregate(S)
7062
# b = zeros(eltype(A), size(A, 1))
7163

7264
# Improve candidates
7365
b = zeros(size(A,1),size(B,2))
74-
improve_candidates(A, B, b)
75-
T, B = fit_candidates(AggOp, B)
66+
@timeit_debug "improve candidates" improve_candidates(A, B, b)
67+
@timeit_debug "fit candidates" T, B = fit_candidates(AggOp, B)
7668

77-
P = smooth(A, T, S, B)
78-
R = construct_R(symmetry, P)
79-
push!(levels, Level(A, P, R))
69+
@timeit_debug "restriction setup" begin
70+
P = smooth(A, T, S, B)
71+
R = construct_R(symmetry, P)
72+
end
8073

81-
A = R * A * P
74+
@timeit_debug "RAP" RAP = R * A * P
8275

83-
dropzeros!(A)
76+
push!(levels, Level(A, P, R))
8477

8578
bsr_flag = true
8679

87-
A, B, bsr_flag
80+
RAP, B, bsr_flag
8881
end
8982
construct_R(::HermitianSymmetry, P) = P'
9083

91-
function fit_candidates(AggOp, B; tol=1e-10)
84+
function fit_candidates(AggOp, B::AbstractVector; tol=1e-10)
85+
86+
A = adjoint(AggOp)
87+
n_fine, n_coarse = size(A)
88+
n_col = n_coarse
89+
90+
R = zeros(eltype(B), n_coarse)
91+
Qx = zeros(eltype(B), nnz(A))
92+
# copy!(Qx, B)
93+
for i = 1:size(Qx, 1)
94+
Qx[i] = B[i]
95+
end
96+
# copy!(A.nzval, B)
97+
for i = 1:n_col
98+
for j in nzrange(A,i)
99+
row = A.rowval[j]
100+
A.nzval[j] = B[row]
101+
end
102+
end
103+
k = 1
104+
for i = 1:n_col
105+
norm_i = norm_col(A, Qx, i)
106+
threshold_i = tol * norm_i
107+
if norm_i > threshold_i
108+
scale = 1 / norm_i
109+
R[i] = norm_i
110+
else
111+
scale = 0
112+
R[i] = 0
113+
end
114+
for j in nzrange(A, i)
115+
row = A.rowval[j]
116+
# Qx[row] *= scale
117+
#@show k
118+
# Qx[k] *= scale
119+
# k += 1
120+
A.nzval[j] *= scale
121+
end
122+
end
123+
124+
# SparseMatrixCSC(size(A)..., A.colptr, A.rowval, Qx), R
125+
A, R
126+
end
127+
128+
function fit_candidates(AggOp, B::AbstractMatrix; tol=1e-10)
92129
A = adjoint(AggOp)
93130
n_fine, m = ndims(B) == 1 ? (length(B), 1) : size(B)
94131
n_fine2, n_agg = size(A)
@@ -102,7 +139,7 @@ function fit_candidates(AggOp, B; tol=1e-10)
102139
rows = A.rowval[A.colptr[agg]:A.colptr[agg+1]-1]
103140
M = @view B[rows, :] # size(rows) × m
104141

105-
142+
# TODO the code below can be optimized
106143
F = qr(M)
107144
r = min(length(rows), m)
108145
Qfull = Matrix(F.Q)
@@ -124,3 +161,17 @@ function fit_candidates(AggOp, B; tol=1e-10)
124161

125162
return Qs, R
126163
end
164+
165+
function norm_col(A, Qx, i)
166+
s = zero(eltype(A))
167+
for j in nzrange(A, i)
168+
if A.rowval[j] > length(Qx)
169+
val = 1
170+
else
171+
val = Qx[A.rowval[j]]
172+
end
173+
# val = A.nzval[A.rowval[j]]
174+
s += val*val
175+
end
176+
sqrt(s)
177+
end

src/classical.jl

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,9 @@ function ruge_stuben(_A::Union{TA, Symmetric{Ti, TA}, Hermitian{Ti, TA}},
99
max_coarse = 10,
1010
coarse_solver = Pinv, kwargs...) where {Ti,Tv,bs,TA<:SparseMatrixCSC{Ti,Tv}}
1111

12-
1312
# fails if near null space `B` is provided
1413
haskey(kwargs, :B) && kwargs[:B] !== nothing && error("near null space `B` is only supported for smoothed aggregation AMG, not Ruge-Stüben AMG.")
15-
14+
1615
if _A isa Symmetric && Ti <: Real || _A isa Hermitian
1716
A = _A.data
1817
symmetric = true
@@ -26,35 +25,37 @@ function ruge_stuben(_A::Union{TA, Symmetric{Ti, TA}, Hermitian{Ti, TA}},
2625
residual!(w, size(A, 1))
2726

2827
while length(levels) + 1 < max_levels && size(A, 1) > max_coarse
29-
A = extend_heirarchy!(levels, strength, CF, A, symmetric)
28+
@timeit_debug "extend_hierarchy!" A = extend_hierarchy_rs!(levels, strength, CF, A, symmetric)
3029
coarse_x!(w, size(A, 1))
3130
coarse_b!(w, size(A, 1))
3231
residual!(w, size(A, 1))
3332
end
3433

35-
MultiLevel(levels, A, coarse_solver(A), presmoother, postsmoother, w)
34+
@timeit_debug "coarse solver setup" cs = coarse_solver(A)
35+
return MultiLevel(levels, A, cs, presmoother, postsmoother, w)
3636
end
3737

38-
function extend_heirarchy!(levels, strength, CF, A::SparseMatrixCSC{Ti,Tv}, symmetric) where {Ti,Tv}
38+
function extend_hierarchy_rs!(levels, strength, CF, A::SparseMatrixCSC{Ti,Tv}, symmetric) where {Ti,Tv}
3939
if symmetric
4040
At = A
4141
else
4242
At = adjoint(A)
4343
end
44-
S, T = strength(At)
45-
splitting = CF(S)
46-
P, R = direct_interpolation(At, T, splitting)
44+
@timeit_debug "strength" S, T = strength(At)
45+
@timeit_debug "splitting" splitting = CF(S)
46+
@timeit_debug "interpolation" P, R = direct_interpolation(At, T, splitting)
47+
@timeit_debug "RAP" RAP = R * A * P
4748
push!(levels, Level(A, P, R))
48-
return R * A * P
49+
return RAP
4950
end
5051

5152
function direct_interpolation(At, T, splitting)
5253
T = typeof(At)(T)
5354
fill!(T.nzval, eltype(At)(1))
5455
T .= At .* T
5556

56-
Pp = rs_direct_interpolation_pass1(T, splitting)
57-
Px, Pj, Pp = rs_direct_interpolation_pass2(At, T, splitting, Pp)
57+
@timeit_debug "di pass1" Pp = rs_direct_interpolation_pass1(T, splitting)
58+
@timeit_debug "di pass2" Px, Pj, Pp = rs_direct_interpolation_pass2(At, T, splitting, Pp)
5859
R = SparseMatrixCSC(isempty(Pj) ? 0 : maximum(Pj), size(At, 1), Pp, Pj, Px)
5960
P = R'
6061

src/multilevel.jl

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,7 @@ function _solve!(x, ml::MultiLevel, b::AbstractArray{T},
223223
itr = lvl = 1
224224
while itr <= maxiter && (!calculate_residual || normres > abstol)
225225
if length(ml) == 1
226-
ml.coarse_solver(x, b)
226+
@timeit_debug "Coarse solve" ml.coarse_solver(x, b)
227227
else
228228
__solve!(x, ml, cycle, b, lvl)
229229
end
@@ -259,27 +259,27 @@ end
259259

260260
function __solve!(x, ml, cycle::Cycle, b, lvl)
261261
A = ml.levels[lvl].A
262-
ml.presmoother(A, x, b)
262+
@timeit_debug "Presmoother" ml.presmoother(A, x, b)
263263

264264
res = ml.workspace.res_vecs[lvl]
265-
mul!(res, A, x)
265+
@timeit_debug "Residual eval" mul!(res, A, x)
266266
reshape(res, size(b)) .= b .- reshape(res, size(b))
267267

268268
coarse_b = ml.workspace.coarse_bs[lvl]
269-
mul!(coarse_b, ml.levels[lvl].R, res)
269+
@timeit_debug "Restriction" mul!(coarse_b, ml.levels[lvl].R, res)
270270

271271
coarse_x = ml.workspace.coarse_xs[lvl]
272272
coarse_x .= 0
273273
if lvl == length(ml.levels)
274-
ml.coarse_solver(coarse_x, coarse_b)
274+
@timeit_debug "Coarse solve" ml.coarse_solver(coarse_x, coarse_b)
275275
else
276276
coarse_x = __solve_next!(coarse_x, ml, cycle, coarse_b, lvl + 1)
277277
end
278278

279-
mul!(res, ml.levels[lvl].P, coarse_x)
279+
@timeit_debug "Prolongation" mul!(res, ml.levels[lvl].P, coarse_x)
280280
x .+= res
281281

282-
ml.postsmoother(A, x, b)
282+
@timeit_debug "Postsmoother" ml.postsmoother(A, x, b)
283283

284284
x
285285
end

test/runtests.jl

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -333,9 +333,6 @@ ml = ruge_stuben(X)
333333
b = rand(27_000)
334334
@test AlgebraicMultigrid._solve(ml, b, reltol = 1e-10) X \ b rtol = 1e-10
335335

336-
337-
338-
339336
# LinearSolve precs interface
340337
@testset "LinearSolvePrecs" begin
341338

@@ -350,7 +347,6 @@ for sz in [ (10,10), (20,20), (50,50)]
350347

351348
strategy = KrylovJL_CG(precs = SmoothedAggregationPreconBuilder())
352349
@test solve(prob, strategy, atol=1.0e-14) u0 rtol = 1.0e-8
353-
354350
end
355351

356352
end

0 commit comments

Comments
 (0)