Skip to content
3 changes: 3 additions & 0 deletions CMakeLists_files.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -1299,6 +1299,9 @@ if (HAVE_AVX2_EXTENSION)
opm/simulators/linalg/mixed/prec.h
opm/simulators/linalg/mixed/vec.h
opm/simulators/linalg/mixed/wrapper.hpp
opm/simulators/linalg/mixed/MatrixWrapper.hpp
opm/simulators/linalg/mixed/PreconditionerWrapper.hpp
opm/simulators/linalg/mixed/SolverAdapter.hpp
)
endif()

Expand Down
12 changes: 11 additions & 1 deletion opm/simulators/linalg/FlexibleSolver_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@

#if HAVE_AVX2_EXTENSION
#include <opm/simulators/linalg/mixed/wrapper.hpp>
#include <opm/simulators/linalg/mixed/SolverAdapter.hpp>
#endif

#include <dune/common/fmatrix.hh>
Expand Down Expand Up @@ -193,7 +194,7 @@ namespace Dune
OPM_THROW(std::invalid_argument, "mixed-bicgstab solver not supported for single precision.");
} else {
const std::string prec_type = prm.get<std::string>("preconditioner.type", "error");
bool use_mixed_dilu= (prec_type=="mixed-dilu");
bool use_mixed_dilu= (prec_type=="legacy-mixed-dilu");
using MatrixType = decltype(linearoperator_for_solver_->getmat());
linsolver_ = std::make_shared<Dune::MixedSolver<VectorType,MatrixType>>(
linearoperator_for_solver_->getmat(),
Expand All @@ -202,6 +203,15 @@ namespace Dune
use_mixed_dilu
);
}
// mixed-solver adapter starts here
} else if (solver_type == "mixed-precision") {
linsolver_ = std::make_shared<Dune::MixedAdapter<Comm,Operator,VectorType>>(linearoperator_for_solver_,
scalarproduct_,
preconditioner_,
tol, // desired residual reduction factor
maxiter, // maximum number of iterations
verbosity,
comm);
#endif
} else if (solver_type == "loopsolver") {
linsolver_ = std::make_shared<Dune::LoopSolver<VectorType>>(*linearoperator_for_solver_,
Expand Down
10 changes: 10 additions & 0 deletions opm/simulators/linalg/StandardPreconditioners_mpi.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@
#include <memory>
#include <type_traits>

#include <opm/simulators/linalg/mixed/PreconditionerWrapper.hpp>

namespace Opm {


Expand Down Expand Up @@ -163,6 +165,14 @@ struct StandardPreconditioners
DUNE_UNUSED_PARAMETER(prm);
return wrapBlockPreconditioner<MultithreadDILU<M, V, V>>(comm, op.getmat());
});
F::addCreator("mixed-ilu0", [](const O& op, const P& prm, const std::function<V()>&, std::size_t, const C& comm) {
DUNE_UNUSED_PARAMETER(prm);
return wrapBlockPreconditioner<MixedPreconditioner<M,V,V>>(comm, op.getmat());
});
F::addCreator("mixed-dilu", [](const O& op, const P& prm, const std::function<V()>&, std::size_t, const C& comm) {
DUNE_UNUSED_PARAMETER(prm);
return wrapBlockPreconditioner<MixedPreconditioner<M,V,V>>(comm, op.getmat(), true);
});
F::addCreator("jac", [](const O& op, const P& prm, const std::function<V()>&, std::size_t, const C& comm) {
const int n = prm.get<int>("repeats", 1);
const double w = prm.get<double>("relaxation", 1.0);
Expand Down
14 changes: 13 additions & 1 deletion opm/simulators/linalg/StandardPreconditioners_serial.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,10 @@
#include <memory>
#include <type_traits>


#include <opm/simulators/linalg/mixed/PreconditionerWrapper.hpp>


namespace Opm {

template <class X, class Y>
Expand Down Expand Up @@ -89,11 +93,19 @@ struct StandardPreconditioners<Operator, Dune::Amg::SequentialInformation, typen
return std::make_shared<MultithreadDILU<M, V, V>>(op.getmat());
});
F::addCreator("mixed-ilu0", [](const O& op, const P& prm, const std::function<V()>&, std::size_t) {
DUNE_UNUSED_PARAMETER(prm);
return std::make_shared<MixedPreconditioner<M,V,V>>(op.getmat());
});
F::addCreator("mixed-dilu", [](const O& op, const P& prm, const std::function<V()>&, std::size_t) {
DUNE_UNUSED_PARAMETER(prm);
return std::make_shared<MixedPreconditioner<M,V,V>>(op.getmat(),true);
});
F::addCreator("legacy-mixed-ilu0", [](const O& op, const P& prm, const std::function<V()>&, std::size_t) {
DUNE_UNUSED_PARAMETER(prm);
DUNE_UNUSED_PARAMETER(op);
return std::make_shared<TrivialPreconditioner<V,V>>();
});
F::addCreator("mixed-dilu", [](const O& op, const P& prm, const std::function<V()>&, std::size_t) {
F::addCreator("legacy-mixed-dilu", [](const O& op, const P& prm, const std::function<V()>&, std::size_t) {
DUNE_UNUSED_PARAMETER(prm);
DUNE_UNUSED_PARAMETER(op);
return std::make_shared<TrivialPreconditioner<V,V>>();
Expand Down
85 changes: 85 additions & 0 deletions opm/simulators/linalg/mixed/MatrixWrapper.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
#ifndef OPM_MIXED_MATRIX_HEADER_INCLUDED
#define OPM_MIXED_MATRIX_HEADER_INCLUDED


#include <opm/simulators/linalg/mixed/bsr.h>


//! @brief Wraps c-implementation of mixed-precision matrix
//!
//! @tparam Vector the block-vector used by linear operator
//! @tparam b block size
template <class Vector, int b>
class MatrixWrapper
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you add this to a namespace?
Also, from the point of view of the user of this class, it is irrelevant that it wraps the c implementation or not. What is important is that its storage of 3x3 blocks is downcasted to single precision and vectorized. So I would call it something else, like MixedMatrixWrapper for example.

{
public:
virtual void mv(const Vector& x, Vector& y) const;
virtual void umv(const Vector& x, Vector& y) const;
virtual void usmv(double alpha, const Vector& x, Vector& y) const;

//! @brief constructor
//!
//! @param nrows number of block rows
//! @param nnz number of nonzero blocks
MatrixWrapper(int nrows, int nnz)
{
nnz_=nnz;
M_ = bsr_alloc();
bsr_init(M_, nrows, nnz, b);
}

//! @brief destructor
~MatrixWrapper() {bsr_free(M_);}

void update(double const *data);

int *rowptr(){return M_->rowptr;}
int *colidx(){return M_->colidx;}

private:
int nnz_;
bsr_matrix *M_;
};

template <class Vector, int b>
void MatrixWrapper<Vector,b>::
mv(const Vector& x, Vector& y) const
{
// mixed-precision block spmv (y = M.x)
bsr_vmspmv3(M_, &x[0][0], &y[0][0]);
}

template <class Vector, int b>
void MatrixWrapper<Vector,b>::
umv(const Vector& x, Vector& y) const
{
// mixed-precision block spmv with update (y += M.x)
bsr_vmspumv3(M_, &x[0][0], &y[0][0], 1.0);
}

template <class Vector, int b>
void MatrixWrapper<Vector,b>::
usmv(double alpha, const Vector& x, Vector& y) const
{
// scaled mixed-precision block spmv with update (y += alpha * M.x)
bsr_vmspumv3(M_, &x[0][0], &y[0][0], alpha);
}

template <class Vector, int b>
void MatrixWrapper<Vector,b>::
update(double const *data)
{
// transpose each dense block to make them column-major
int bb=b*b;
double B[bb];
for(int k=0;k<nnz_;k++)
{
for(int i=0;i<b;i++) for(int j=0;j<b;j++) B[b*j+i] = data[bb*k + b*i + j];
for(int i=0;i<bb;i++) M_->dbl[bb*k + i] = B[i];
}

// downcast to single precision
bsr_downcast(M_);
}

#endif // OPM_MIXED_MATRIX_HEADER_INCLUDED
115 changes: 115 additions & 0 deletions opm/simulators/linalg/mixed/PreconditionerWrapper.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
#ifndef OPM_MIXED_PREC_HEADER_INCLUDED
#define OPM_MIXED_PREC_HEADER_INCLUDED

#include <opm/simulators/linalg/mixed/prec.h>

namespace Opm {

//! @brief Wraps c-implementation of mixed-precision preconditioner
//!
//! @tparam Vector the block-vector used by linear operator
//! @tparam M block-sparse matrix type
//! @tparam X block-vector input type
//! @tparam Y block vector output type
template <class M, class X, class Y>
class MixedPreconditioner : public Dune::PreconditionerWithUpdate<X, Y>
{
public:

using domain_type = X;
static constexpr auto block_size = domain_type::block_type::dimension;

//! @brief constructor
//!
//! @param A block-sparse matrix
//! @param use_dilu toggle between dilu or ilu0 factorization
MixedPreconditioner(const M& A, bool use_dilu = false)
{
// Access double precision matrix data
int nrows = A.N();
int nnz = A.nonzeroes();
double_data_ = &A[0][0][0][0];
prec_ = prec_alloc();

// allocate and initialize mixed-precision matrix
mixed_matrix_ = bsr_alloc();
bsr_init(mixed_matrix_, nrows, nnz, block_size);

// copy sparsity pattern from double preccision matrix
int *rows = mixed_matrix_->rowptr;
int *cols = mixed_matrix_->colidx;

int irow = 0;
int icol = 0;
rows[0] = 0;
for(auto row=A.begin(); row!=A.end(); row++)
{
for(unsigned int i=0; i<row->getsize(); i++)
{
cols[icol++] = row->getindexptr()[i];
}
rows[irow+1] = rows[irow]+row->getsize();
irow++;
}

// allocate and initialize preconditioner
prec_init(prec_,mixed_matrix_);

// attribute initialization
nnz_ = nnz;
use_dilu_ = use_dilu;

// perform matrix factorization
update();
};

//! @brief destructor
~MixedPreconditioner()
{
bsr_free(mixed_matrix_);
prec_free(prec_);
}

virtual void update() override;
virtual bool hasPerfectUpdate() const override {return true;}
virtual void pre ([[maybe_unused]] X& x, [[maybe_unused]] Y& y) override {};
virtual void post ([[maybe_unused]] X& x) override {};
virtual void apply ([[maybe_unused]] X& x, [[maybe_unused]] const Y& y) override;
virtual Dune::SolverCategory::Category category() const override { return Dune::SolverCategory::sequential; };

private:
bool use_dilu_;
double const *double_data_;
bsr_matrix *mixed_matrix_;
prec_t *prec_;
int nnz_;
};

template <class M, class X, class Y>
void MixedPreconditioner<M,X,Y>::
update ()
{
// transpose each dense block to make them column-major
int b = block_size;
int bb=b*b;
double B[bb];
for(int k=0;k<nnz_;k++)
{
for(int i=0;i<b;i++) for(int j=0;j<b;j++) B[b*j+i] = double_data_[bb*k + b*i + j];
for(int i=0;i<bb;i++) mixed_matrix_->dbl[bb*k + i] = B[i];
}

use_dilu_ ? prec_dilu_factorize(prec_, mixed_matrix_) : prec_ilu0_factorize(prec_, mixed_matrix_); // choose dilu or ilu0
prec_downcast(prec_);
}

template <class M, class X, class Y>
void MixedPreconditioner<M,X,Y>::
apply ([[maybe_unused]] X& x, [[maybe_unused]] const Y& y)
{
x=y;
prec_mapply3c(prec_,&x[0][0]);
}

}
#endif // OPM_MIXED_PREC_HEADER_INCLUDED
17 changes: 11 additions & 6 deletions opm/simulators/linalg/mixed/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,18 @@ and a highly optimized mixed-precision implementation of ILU0 and DILU precondit
bicgstab. Hopefully, this will inspire the exploration of mixed-precision algorithms
in OPM.

The initial implementation is specialized for 3x3 block-sparse matrices due to their
importance in reservoir simulation and restricted to serial runs. Extending the work
to parallel runs is a matter of extending OPM's parallel infrastructure and should be
relatively straight-forward.
The initial implementations are specialized for 3x3 block-sparse matrices due to their
importance in reservoir simulation. The original implementation only works in serial.
The parallel implementation wraps the optimized mixed-precision matrix-vector multiplication
and ILU0/DILU preconditioners into building blocks for the ISTL-based implementation of
bicgstab. This sacrifices some performance for improved modularity. Extending the work to
block-sparse matrices of arbitrary block size is work in progress.

The mixed-precision solver is selected by the command-line options `--linear-solver=mixed-ilu0`
or `--linear-solver=mixed-dilu`. The command-line option `--matrix-add-well-contributions=true`
must also be set as the mixed-precision solver operates directly on block-sparse matrices, not
on linear operators as other OPM solvers do. For convenience, a wrapper similar to the one below
can be used.
on linear operators as other OPM solvers do. For convenience, a wrapper similar to the one
below can be used.

``` bash
OMP_NUM_THREADS=1 mpirun -np 1 --map-by numa --bind-to core build/bin/flow \
Expand All @@ -23,3 +25,6 @@ OMP_NUM_THREADS=1 mpirun -np 1 --map-by numa --bind-to core build/bin/flow \
--linear-solver-max-iter=1024 \
$@
```

To invoke the original serial implementation add the `legacy-` prefix to the mixed-precision
linear solver options.
Loading