diff --git a/CMakeLists_files.cmake b/CMakeLists_files.cmake
index ad147010f22..60f7dac3269 100644
--- a/CMakeLists_files.cmake
+++ b/CMakeLists_files.cmake
@@ -1299,6 +1299,9 @@ if (HAVE_AVX2_EXTENSION)
     opm/simulators/linalg/mixed/prec.h
     opm/simulators/linalg/mixed/vec.h
     opm/simulators/linalg/mixed/wrapper.hpp
+    opm/simulators/linalg/mixed/MatrixWrapper.hpp
+    opm/simulators/linalg/mixed/PreconditionerWrapper.hpp
+    opm/simulators/linalg/mixed/SolverAdapter.hpp
   )
 endif()
 
diff --git a/opm/simulators/linalg/FlexibleSolver_impl.hpp b/opm/simulators/linalg/FlexibleSolver_impl.hpp
index f37fd2a2111..51a6cdc3238 100644
--- a/opm/simulators/linalg/FlexibleSolver_impl.hpp
+++ b/opm/simulators/linalg/FlexibleSolver_impl.hpp
@@ -34,6 +34,7 @@
 
 #if HAVE_AVX2_EXTENSION
 #include <opm/simulators/linalg/mixed/wrapper.hpp>
+#include <opm/simulators/linalg/mixed/SolverAdapter.hpp>
 #endif
 
 #include <dune/common/fmatrix.hh>
@@ -193,7 +194,7 @@ namespace Dune
                 OPM_THROW(std::invalid_argument, "mixed-bicgstab solver not supported for single precision.");
             } else {
                 const std::string prec_type = prm.get<std::string>("preconditioner.type", "error");
-                bool use_mixed_dilu= (prec_type=="mixed-dilu");
+                bool use_mixed_dilu= (prec_type=="legacy-mixed-dilu");
                 using MatrixType = decltype(linearoperator_for_solver_->getmat());
                 linsolver_ = std::make_shared<Dune::MixedSolver<VectorType,MatrixType>>(
                                                                             linearoperator_for_solver_->getmat(),
@@ -202,6 +203,15 @@ namespace Dune
                                                                             use_mixed_dilu
                                                                         );
             }
+            // mixed-solver adapter starts here
+          } else if (solver_type == "mixed-precision") {
+                linsolver_ = std::make_shared<Dune::MixedAdapter<Comm,Operator,VectorType>>(linearoperator_for_solver_,
+                                                                            scalarproduct_,
+                                                                            preconditioner_,
+                                                                            tol, // desired residual reduction factor
+                                                                            maxiter, // maximum number of iterations
+                                                                            verbosity,
+                                                                            comm);
 #endif
         } else if (solver_type == "loopsolver") {
             linsolver_ = std::make_shared<Dune::LoopSolver<VectorType>>(*linearoperator_for_solver_,
diff --git a/opm/simulators/linalg/StandardPreconditioners_mpi.hpp b/opm/simulators/linalg/StandardPreconditioners_mpi.hpp
index b19afb13fd8..5c6e636db77 100644
--- a/opm/simulators/linalg/StandardPreconditioners_mpi.hpp
+++ b/opm/simulators/linalg/StandardPreconditioners_mpi.hpp
@@ -33,6 +33,8 @@
 #include <memory>
 #include <type_traits>
 
+#include <opm/simulators/linalg/mixed/PreconditionerWrapper.hpp>
+
 namespace Opm {
 
 
@@ -163,6 +165,14 @@ struct StandardPreconditioners
             DUNE_UNUSED_PARAMETER(prm);
             return wrapBlockPreconditioner<MultithreadDILU<M, V, V>>(comm, op.getmat());
         });
+        F::addCreator("mixed-ilu0", [](const O& op, const P& prm, const std::function<V()>&, std::size_t, const C& comm) {
+            DUNE_UNUSED_PARAMETER(prm);
+            return wrapBlockPreconditioner<MixedPreconditioner<M,V,V>>(comm, op.getmat());
+        });
+        F::addCreator("mixed-dilu", [](const O& op, const P& prm, const std::function<V()>&, std::size_t, const C& comm) {
+            DUNE_UNUSED_PARAMETER(prm);
+            return wrapBlockPreconditioner<MixedPreconditioner<M,V,V>>(comm, op.getmat(), true);
+        });
         F::addCreator("jac", [](const O& op, const P& prm, const std::function<V()>&, std::size_t, const C& comm) {
             const int n = prm.get<int>("repeats", 1);
             const double w = prm.get<double>("relaxation", 1.0);
diff --git a/opm/simulators/linalg/StandardPreconditioners_serial.hpp b/opm/simulators/linalg/StandardPreconditioners_serial.hpp
index 23971a75614..2e390a4e5ed 100644
--- a/opm/simulators/linalg/StandardPreconditioners_serial.hpp
+++ b/opm/simulators/linalg/StandardPreconditioners_serial.hpp
@@ -33,6 +33,10 @@
 #include <memory>
 #include <type_traits>
 
+
+#include <opm/simulators/linalg/mixed/PreconditionerWrapper.hpp>
+
+
 namespace Opm {
 
 template <class X, class Y>
@@ -89,11 +93,19 @@ struct StandardPreconditioners<Operator, Dune::Amg::SequentialInformation, typen
             return std::make_shared<MultithreadDILU<M, V, V>>(op.getmat());
         });
         F::addCreator("mixed-ilu0", [](const O& op, const P& prm, const std::function<V()>&, std::size_t) {
+            DUNE_UNUSED_PARAMETER(prm);
+            return std::make_shared<MixedPreconditioner<M,V,V>>(op.getmat());
+        });
+        F::addCreator("mixed-dilu", [](const O& op, const P& prm, const std::function<V()>&, std::size_t) {
+            DUNE_UNUSED_PARAMETER(prm);
+            return std::make_shared<MixedPreconditioner<M,V,V>>(op.getmat(),true);
+        });
+        F::addCreator("legacy-mixed-ilu0", [](const O& op, const P& prm, const std::function<V()>&, std::size_t) {
             DUNE_UNUSED_PARAMETER(prm);
             DUNE_UNUSED_PARAMETER(op);
             return std::make_shared<TrivialPreconditioner<V,V>>();
         });
-        F::addCreator("mixed-dilu", [](const O& op, const P& prm, const std::function<V()>&, std::size_t) {
+        F::addCreator("legacy-mixed-dilu", [](const O& op, const P& prm, const std::function<V()>&, std::size_t) {
             DUNE_UNUSED_PARAMETER(prm);
             DUNE_UNUSED_PARAMETER(op);
             return std::make_shared<TrivialPreconditioner<V,V>>();
diff --git a/opm/simulators/linalg/mixed/MatrixWrapper.hpp b/opm/simulators/linalg/mixed/MatrixWrapper.hpp
new file mode 100644
index 00000000000..10e273a5bd5
--- /dev/null
+++ b/opm/simulators/linalg/mixed/MatrixWrapper.hpp
@@ -0,0 +1,85 @@
+#ifndef OPM_MIXED_MATRIX_HEADER_INCLUDED
+#define OPM_MIXED_MATRIX_HEADER_INCLUDED
+
+
+#include <opm/simulators/linalg/mixed/bsr.h>
+
+
+//! @brief Wraps c-implementation of mixed-precision matrix
+//!
+//! @tparam Vector the block-vector used by linear operator
+//! @tparam b block size
+template <class Vector, int b>
+class MatrixWrapper
+{
+    public:
+    virtual void mv(const Vector& x, Vector& y) const;
+    virtual void umv(const Vector& x, Vector& y) const;
+    virtual void usmv(double alpha, const Vector& x, Vector& y) const;
+
+    //! @brief constructor
+    //!
+    //! @param nrows number of block rows
+    //! @param nnz number of nonzero blocks
+    MatrixWrapper(int nrows, int nnz)
+    {
+        nnz_=nnz;
+        M_ = bsr_alloc();
+        bsr_init(M_, nrows, nnz, b);
+    }
+
+    //! @brief destructor
+    ~MatrixWrapper() {bsr_free(M_);}
+
+    void update(double const *data);
+
+    int *rowptr(){return M_->rowptr;}
+    int *colidx(){return M_->colidx;}
+
+    private:
+    int nnz_;
+    bsr_matrix  *M_;
+};
+
+template <class Vector, int b>
+void MatrixWrapper<Vector,b>::
+mv(const Vector& x, Vector& y) const
+{
+    // mixed-precision block spmv (y = M.x)
+    bsr_vmspmv3(M_, &x[0][0], &y[0][0]);
+}
+
+template <class Vector, int b>
+void MatrixWrapper<Vector,b>::
+umv(const Vector& x, Vector& y) const
+{
+    // mixed-precision block spmv with update (y += M.x)
+    bsr_vmspumv3(M_, &x[0][0], &y[0][0], 1.0);
+}
+
+template <class Vector, int b>
+void MatrixWrapper<Vector,b>::
+usmv(double alpha, const Vector& x, Vector& y) const
+{
+    // scaled mixed-precision block spmv with update (y += alpha *  M.x)
+    bsr_vmspumv3(M_, &x[0][0], &y[0][0], alpha);
+}
+
+template <class Vector, int b>
+void MatrixWrapper<Vector,b>::
+update(double const *data)
+{
+    // transpose each dense block to make them column-major
+    int bb=b*b;
+    double B[bb];
+    for(int k=0;k<nnz_;k++)
+    {
+        for(int i=0;i<b;i++) for(int j=0;j<b;j++) B[b*j+i] = data[bb*k + b*i + j];
+        for(int i=0;i<bb;i++) M_->dbl[bb*k + i] = B[i];
+    }
+
+    // downcast to single precision
+    bsr_downcast(M_);
+}
+
+#endif // OPM_MIXED_MATRIX_HEADER_INCLUDED
diff --git a/opm/simulators/linalg/mixed/PreconditionerWrapper.hpp b/opm/simulators/linalg/mixed/PreconditionerWrapper.hpp
new file mode 100644
index 00000000000..2f91f2bc1b1
--- /dev/null
+++ b/opm/simulators/linalg/mixed/PreconditionerWrapper.hpp
@@ -0,0 +1,115 @@
+#ifndef OPM_MIXED_PREC_HEADER_INCLUDED
+#define OPM_MIXED_PREC_HEADER_INCLUDED
+
+#include <opm/simulators/linalg/mixed/prec.h>
+
+namespace Opm {
+
+//! @brief Wraps c-implementation of mixed-precision preconditioner
+//!
+//! @tparam Vector the block-vector used by linear operator
+//! @tparam M block-sparse matrix type
+//! @tparam X block-vector input type
+//! @tparam Y block vector output type
+template <class M, class X, class Y>
+class MixedPreconditioner : public Dune::PreconditionerWithUpdate<X, Y>
+{
+    public:
+
+    using domain_type = X;
+    static constexpr auto block_size = domain_type::block_type::dimension;
+
+    //! @brief constructor
+    //!
+    //! @param A block-sparse matrix
+    //! @param use_dilu toggle between dilu or ilu0 factorization
+    MixedPreconditioner(const M& A, bool use_dilu = false)
+    {
+        // Access double precision matrix data
+        int nrows = A.N();
+        int nnz = A.nonzeroes();
+        double_data_  = &A[0][0][0][0];
+        prec_         = prec_alloc();
+
+        // allocate and initialize mixed-precision matrix
+        mixed_matrix_ = bsr_alloc();
+        bsr_init(mixed_matrix_, nrows, nnz, block_size);
+
+        // copy sparsity pattern from double preccision matrix
+        int *rows = mixed_matrix_->rowptr;
+        int *cols = mixed_matrix_->colidx;
+
+        int irow = 0;
+        int icol = 0;
+        rows[0]  = 0;
+        for(auto row=A.begin(); row!=A.end(); row++)
+        {
+            for(unsigned int i=0; i<row->getsize(); i++)
+            {
+                cols[icol++] = row->getindexptr()[i];
+            }
+            rows[irow+1]     = rows[irow]+row->getsize();
+            irow++;
+        }
+
+        // allocate and initialize preconditioner
+        prec_init(prec_,mixed_matrix_);
+
+        // attribute initialization
+        nnz_ = nnz;
+        use_dilu_ = use_dilu;
+
+        // perform matrix factorization
+        update();
+    };
+
+    //! @brief destructor
+    ~MixedPreconditioner()
+    {
+        bsr_free(mixed_matrix_);
+        prec_free(prec_);
+    }
+
+    virtual void update() override;
+    virtual bool hasPerfectUpdate() const override {return true;}
+    virtual void pre ([[maybe_unused]] X& x, [[maybe_unused]] Y& y) override {};
+    virtual void post ([[maybe_unused]] X& x) override {};
+    virtual void apply ([[maybe_unused]] X& x, [[maybe_unused]] const Y& y) override;
+    virtual Dune::SolverCategory::Category category() const override { return Dune::SolverCategory::sequential; };
+
+    private:
+    bool use_dilu_;
+    double const *double_data_;
+    bsr_matrix *mixed_matrix_;
+    prec_t *prec_;
+    int nnz_;
+};
+
+template <class M, class X, class Y>
+void MixedPreconditioner<M,X,Y>::
+update ()
+{
+    // transpose each dense block to make them column-major
+    int b = block_size;
+    int bb=b*b;
+    double B[bb];
+    for(int k=0;k<nnz_;k++)
+    {
+        for(int i=0;i<b;i++) for(int j=0;j<b;j++) B[b*j+i] = double_data_[bb*k + b*i + j];
+        for(int i=0;i<bb;i++) mixed_matrix_->dbl[bb*k + i] = B[i];
+    }
+
+    use_dilu_ ? prec_dilu_factorize(prec_, mixed_matrix_) : prec_ilu0_factorize(prec_, mixed_matrix_); // choose dilu or ilu0
+    prec_downcast(prec_);
+}
+
+template <class M, class X, class Y>
+void MixedPreconditioner<M,X,Y>::
+apply ([[maybe_unused]] X& x, [[maybe_unused]] const Y& y)
+{
+    x=y;
+    prec_mapply3c(prec_,&x[0][0]);
+}
+
+}
+#endif // OPM_MIXED_PREC_HEADER_INCLUDED
diff --git a/opm/simulators/linalg/mixed/README.md b/opm/simulators/linalg/mixed/README.md
index 1e27c55ea8d..44ae8572bb1 100644
--- a/opm/simulators/linalg/mixed/README.md
+++ b/opm/simulators/linalg/mixed/README.md
@@ -4,16 +4,18 @@ and a highly optimized mixed-precision implementation of ILU0 and DILU precondit
 bicgstab. Hopefully, this will inspire the exploration of mixed-precision algorithms
 in OPM.
 
-The initial implementation is specialized for 3x3 block-sparse matrices due to their
-importance in reservoir simulation and restricted to serial runs. Extending the work
-to parallel runs is a matter of extending OPM's parallel infrastructure and should be
-relatively straight-forward.
+The initial implementations are specialized for 3x3 block-sparse matrices due to their
+importance in reservoir simulation. The original implementation only works in serial.
+The parallel implementation wraps the optimized mixed-precision matrix-vector multiplication
+and ILU0/DILU preconditioners into building blocks for the ISTL-based implementation of
+bicgstab. This sacrifices some performance for improved modularity. Extending the work to
+block-sparse matrices of arbitrary block size is work in progress.
 
 The mixed-precision solver is selected by the command-line options `--linear-solver=mixed-ilu0`
 or `--linear-solver=mixed-dilu`. The command-line option `--matrix-add-well-contributions=true`
 must also be set as the mixed-precision solver operates directly on block-sparse matrices, not
-on linear operators as other OPM solvers do. For convenience, a wrapper similar to the one below
-can be used.
+on linear operators as other OPM solvers do. For convenience, a wrapper similar to the one
+below can be used.
 
 ``` bash
 OMP_NUM_THREADS=1 mpirun -np 1 --map-by numa --bind-to core build/bin/flow \
@@ -23,3 +25,6 @@ OMP_NUM_THREADS=1 mpirun -np 1 --map-by numa --bind-to core build/bin/flow \
     --linear-solver-max-iter=1024 \
     $@
 ```
+
+To invoke the original serial implementation add the `legacy-` prefix to the mixed-precision
+linear solver options.
diff --git a/opm/simulators/linalg/mixed/SolverAdapter.hpp b/opm/simulators/linalg/mixed/SolverAdapter.hpp
new file mode 100644
index 00000000000..0b4380f3d93
--- /dev/null
+++ b/opm/simulators/linalg/mixed/SolverAdapter.hpp
@@ -0,0 +1,209 @@
+#ifndef OPM_MIXED_ADAPTER_HEADER_INCLUDED
+#define OPM_MIXED_ADAPTER_HEADER_INCLUDED
+
+#include <opm/simulators/linalg/PreconditionerWithUpdate.hpp>
+
+#include <dune/istl/bcrsmatrix.hh>
+#include <dune/istl/bvector.hh>
+#include <dune/istl/schwarz.hh>
+#include <dune/istl/operators.hh>
+#include <dune/istl/solvers.hh>
+#include <dune/istl/owneroverlapcopy.hh>
+
+#include <dune/istl/matrixindexset.hh>
+
+#include <opm/simulators/linalg/mixed/MatrixWrapper.hpp>
+
+
+
+namespace Dune
+{
+//#include <dune/istl/solvers.hh>
+
+
+//! @brief Optimized sequential scalar product.
+//!
+//! @tparam Vector block-vector class with data stored as contiguous double array
+template<class Vector>
+class SeqOptmizedProduct : public Dune::SeqScalarProduct<Vector>
+{
+public:
+
+    // extract block size
+    static constexpr auto block_size = Vector::block_type::dimension;
+
+    // Compute the dot product <vx, vy>
+    virtual double dot(const Vector& vx, const Vector& vy) const override
+    {
+        // access underlying data
+        double const *x = &vx[0][0];
+        double const *y = &vy[0][0];
+
+        // total array length
+        int NN = block_size*vx.N();
+
+        // unroll loop in multiples of 8
+        int n=NN/8;
+        int N=8*n;
+        double agg[8];
+        for(int i=0;i<8;i++) agg[i]=0.0;
+        for(int i=0;i<N;i+=8) for(int j=0;j<8;j++) agg[j]+=x[i+j]*y[i+j];
+        for(int j=0;j<4;j++) agg[j]+=agg[j+4];
+        for(int j=0;j<2;j++) agg[j]+=agg[j+2];
+        for(int j=0;j<1;j++) agg[j]+=agg[j+1];
+
+        // trailing end
+        for(int j=N;j<NN;j++) agg[0]+=x[j]*y[j];
+
+        return agg[0];
+    }
+
+    // Compute the norm ||x||
+    virtual double norm(const Vector& x) const override {
+        return std::sqrt(this->dot(x, x));
+    }
+};
+
+//! @brief Generalized mixed precision operator interface
+//!
+//! @tparam Matrix the block-matrix used by linear operator
+//! @tparam Vector the block-vector used by linear operator
+//! @tparam Comm the communicator used by linear operator
+template <class Matrix, class Vector, class Comm>
+struct MixedOperator
+{
+    using type = Dune::OverlappingSchwarzOperator<Matrix, Vector, Vector, Comm>;
+};
+
+//! @brief Generalized mixed precision operator interface
+//!
+//! @tparam Matrix the block-matrix used by linear operator
+//! @tparam Vector the block-vector used by linear operator
+template <class Matrix, class Vector>
+struct MixedOperator<Matrix, Vector, Dune::Amg::SequentialInformation>
+{
+    using type = Dune::MatrixAdapter<Matrix, Vector, Vector>;
+};
+
+
+//! @brief Wraps mixed precision
+//!
+//! @tparam Comm the communicator passed to FlexibleLinearSolver
+//! @tparam Operator the linear operator passed to FlexibleLinearSolver
+//! @tparam Vector the block vector passed to FlexibleLinearSolver
+template <class Comm, class Operator, class Vector>
+class MixedAdapter:public InverseOperator<Vector, Vector>
+{
+    public:
+
+    using AbstractPrecondType = Dune::PreconditionerWithUpdate<Vector,Vector>;
+    using AbstractScalarProductType = Dune::ScalarProduct<Vector>;
+
+    static constexpr auto block_size = Vector::block_type::dimension;
+    using MixedMatrixType      = MatrixWrapper<Vector, block_size>;
+    using MixedOperatorType    = MixedOperator<MixedMatrixType, Vector, Comm>::type;
+    using OptimizedProductType = SeqOptmizedProduct<Vector>;
+
+    //! @brief constructor
+    //!
+    //! @param op the linear operator (assumed double precision)
+    //! @param sp the scalar product
+    //! @param prec the preconditioner to use
+    //! @param reduction the reduction factor passed to the iterative solver
+    //! @param maxit maximum number of iterations for the linear solver
+    //! @param verbose verbosity level
+    //! @param comm the communication object.
+    MixedAdapter(Operator *op,
+                 std::shared_ptr<AbstractScalarProductType> sp,
+                 std::shared_ptr<AbstractPrecondType> prec,
+                 const double& tol,
+                 const int& maxiter,
+                 const int& verbosity,
+                 const Comm &comm)
+    {
+
+        // Access matrix data from double precision operator
+        auto &A = op->getmat();
+        int nrows = A.N();
+        int nnz = A.nonzeroes();
+        double_data_ = &A[0][0][0][0];
+
+        //allocate mixed matrix
+        mixed_matrix_ = std::make_shared<MixedMatrixType>(nrows,nnz);
+        //auto &B = *mixed_matrix_;
+
+        // copy sparsity pattern from double precision matrix
+        int *rows = mixed_matrix_->rowptr();
+        int *cols = mixed_matrix_->colidx();
+
+        int irow = 0;
+        int icol = 0;
+        rows[0]  = 0;
+        for(auto row=A.begin(); row!=A.end(); row++)
+        {
+            for(unsigned int i=0; i<row->getsize(); i++)
+            {
+                cols[icol++] = row->getindexptr()[i];
+            }
+            rows[irow+1]     = rows[irow]+row->getsize();
+            irow++;
+        }
+
+        //initialize mixed operator and optimized scalar product
+        double_operator_ = op;
+        if constexpr (std::is_same_v<Comm, Dune::Amg::SequentialInformation>)
+        {
+            mixed_operator_ = std::make_shared<MixedOperatorType>(*mixed_matrix_);
+            scalar_product_ = std::make_shared<OptimizedProductType>();
+        }
+        else
+        {
+            mixed_operator_ = std::make_shared<MixedOperatorType>(*mixed_matrix_,comm);
+            scalar_product_ = sp;
+        }
+
+        //initialize bicgstab solver from Dune
+        solver_ = std::make_shared<Dune::BiCGSTABSolver<Vector>>(
+                                                              *mixed_operator_,
+                                                              *scalar_product_,
+                                                              *prec,
+                                                              tol, // desired residual reduction factor
+                                                              maxiter, // maximum number of iterations
+                                                              verbosity);
+    }
+
+    virtual void apply(Vector &x, Vector &b, InverseOperatorResult &res) override
+    {
+        //transpose dense blocks and demote to single precision
+        mixed_matrix_->update(double_data_);
+
+        //apply bicgstab solver from Dune
+        solver_->apply(x,b,res);
+    }
+
+    virtual void apply(Vector &x, Vector &b, double reduction, InverseOperatorResult &res) override
+    {
+        x=0;
+        b=0;
+        res.reduction = reduction;
+        OPM_THROW(std::invalid_argument, "MixedAdapter::apply(...) not implemented yet.");
+    }
+
+    virtual Dune::SolverCategory::Category category() const override{return Dune::SolverCategory::overlapping;};
+
+    private:
+    using AbstractSolverType = Dune::InverseOperator<Vector,Vector>;
+
+
+    Operator *double_operator_;
+    std::shared_ptr<AbstractSolverType> solver_;
+    std::shared_ptr<MixedOperatorType> mixed_operator_;
+    std::shared_ptr<MixedMatrixType> mixed_matrix_;
+    std::shared_ptr<AbstractScalarProductType> scalar_product_;
+    double const *double_data_;
+
+};
+
+}
+
+#endif // OPM_MIXED_ADAPTER_HEADER_INCLUDED
diff --git a/opm/simulators/linalg/mixed/bsr.c b/opm/simulators/linalg/mixed/bsr.c
index ead89998575..c7de0874add 100644
--- a/opm/simulators/linalg/mixed/bsr.c
+++ b/opm/simulators/linalg/mixed/bsr.c
@@ -107,6 +107,46 @@ void bsr_vmspmv3(bsr_matrix *A, const double *x, double *y)
     }
 }
 
+void bsr_vmspumv3(bsr_matrix *A, const double *x, double *y, double alpha)
+{
+    int nrows = A->nrows;
+    int *rowptr=A->rowptr;
+    int *colidx=A->colidx;
+    const float *data=A->flt;
+
+    const int b=3;
+
+    __m256d valpha = _mm256_set1_pd(alpha);
+
+    __m256d mm_zeros =_mm256_setzero_pd();
+    for(int i=0;i<nrows;i++)
+    {
+        __m256d vA[3];
+        for(int k=0;k<3;k++) vA[k] = mm_zeros;
+        for(int k=rowptr[i];k<rowptr[i+1];k++)
+        {
+            const float *AA=data+9*k;
+
+            int j = colidx[k];
+            __m256d vx = _mm256_loadu_pd(x+b*j);
+
+            vA[0] += _mm256_cvtps_pd(_mm_loadu_ps(AA+0))*_mm256_permute4x64_pd(vx,0b00000000); //0b01010101
+            vA[1] += _mm256_cvtps_pd(_mm_loadu_ps(AA+3))*_mm256_permute4x64_pd(vx,0b01010101); //0b01010101
+            vA[2] += _mm256_cvtps_pd(_mm_loadu_ps(AA+6))*_mm256_permute4x64_pd(vx,0b10101010); //0b01010101
+        }
+
+        // sum over columns
+        __m256d vy, vz;
+        vz = vA[0] + vA[1] + vA[2];
+
+        double *y_i = y+b*i;
+        vy = _mm256_loadu_pd(y_i);       // optional blend to keep
+        vz = vy + valpha*vz;
+        vz =_mm256_blend_pd(vy,vz,0x7);  // 4th element unchanged
+        _mm256_storeu_pd(y_i,vz);
+    }
+}
+
 void bsr_vdspmv3(bsr_matrix *A, const double *x, double *y)
 {
     int nrows = A->nrows;
diff --git a/opm/simulators/linalg/mixed/bsr.h b/opm/simulators/linalg/mixed/bsr.h
index 11e72787eaf..f093ab5f557 100644
--- a/opm/simulators/linalg/mixed/bsr.h
+++ b/opm/simulators/linalg/mixed/bsr.h
@@ -64,6 +64,19 @@ void bsr_init(bsr_matrix *A, int nrows, int nnz, int b);
  * @param x Pointer to input vector.
  * @param y Pointer to output vector.
  */
+void bsr_vmspumv3(bsr_matrix *A, const double *x, double *y, double alpha);
+
+/**
+ * @brief Sparse matrix-vector multiplication in mixed precision.
+ *
+ * @note Function is specialized for 3x3 block-sparse matrices.
+ * @note Function uses AVX2 intrinsics.
+ *
+ * @param A Pointer to bsr matrix.
+ * @param x Pointer to input vector.
+ * @param y Pointer to output vector.
+ */
+
 void bsr_vmspmv3(bsr_matrix *A, const double *x, double *y);
 
 /**
diff --git a/opm/simulators/linalg/setupPropertyTree.cpp b/opm/simulators/linalg/setupPropertyTree.cpp
index 6833ca20aac..0af2e084501 100644
--- a/opm/simulators/linalg/setupPropertyTree.cpp
+++ b/opm/simulators/linalg/setupPropertyTree.cpp
@@ -248,6 +248,16 @@ setupPropertyTree(FlowLinearSolverParameters p, // Note: copying the parameters
         return setupMixedDILU(conf, p);
     }
 
+    // mixed-precision ILU0
+    if (conf == "legacy-mixed-ilu0") {
+        return setupLegacyMixedILU(conf, p);
+    }
+
+    // mixed-precision ILU0
+    if (conf == "legacy-mixed-dilu") {
+        return setupLegacyMixedDILU(conf, p);
+    }
+
     if (conf == "dilu") {
         return setupDILU(conf, p);
     }
@@ -416,7 +426,7 @@ setupMixedILU([[maybe_unused]] const std::string& conf, const FlowLinearSolverPa
     prm.put("tol", p.linear_solver_reduction_);
     prm.put("maxiter", p.linear_solver_maxiter_);
     prm.put("verbosity", p.linear_solver_verbosity_);
-    prm.put("solver", "mixed-bicgstab"s);
+    prm.put("solver", "mixed-precision"s);
     prm.put("preconditioner.type", "mixed-ilu0"s);
     return prm;
 }
@@ -429,12 +439,39 @@ setupMixedDILU([[maybe_unused]] const std::string& conf, const FlowLinearSolverP
     prm.put("tol", p.linear_solver_reduction_);
     prm.put("maxiter", p.linear_solver_maxiter_);
     prm.put("verbosity", p.linear_solver_verbosity_);
-    prm.put("solver", "mixed-bicgstab"s);
+    prm.put("solver", "mixed-precision"s);
     prm.put("preconditioner.type", "mixed-dilu"s);
     return prm;
 }
 
 
+PropertyTree
+setupLegacyMixedILU([[maybe_unused]] const std::string& conf, const FlowLinearSolverParameters& p)
+{
+    using namespace std::string_literals;
+    PropertyTree prm;
+    prm.put("tol", p.linear_solver_reduction_);
+    prm.put("maxiter", p.linear_solver_maxiter_);
+    prm.put("verbosity", p.linear_solver_verbosity_);
+    prm.put("solver", "mixed-bicgstab"s);
+    prm.put("preconditioner.type", "legacy-mixed-ilu0"s);
+    return prm;
+}
+
+PropertyTree
+setupLegacyMixedDILU([[maybe_unused]] const std::string& conf, const FlowLinearSolverParameters& p)
+{
+    using namespace std::string_literals;
+    PropertyTree prm;
+    prm.put("tol", p.linear_solver_reduction_);
+    prm.put("maxiter", p.linear_solver_maxiter_);
+    prm.put("verbosity", p.linear_solver_verbosity_);
+    prm.put("solver", "mixed-bicgstab"s);
+    prm.put("preconditioner.type", "legacy-mixed-dilu"s);
+    return prm;
+}
+
+
 PropertyTree
 setupDILU([[maybe_unused]] const std::string& conf, const FlowLinearSolverParameters& p)
 {
diff --git a/opm/simulators/linalg/setupPropertyTree.hpp b/opm/simulators/linalg/setupPropertyTree.hpp
index a424a573259..0c5b206ce3e 100644
--- a/opm/simulators/linalg/setupPropertyTree.hpp
+++ b/opm/simulators/linalg/setupPropertyTree.hpp
@@ -38,8 +38,10 @@ PropertyTree setupCPRW(const std::string& conf, const FlowLinearSolverParameters
 PropertyTree setupCPR(const std::string& conf, const FlowLinearSolverParameters& p);
 PropertyTree setupAMG(const std::string& conf, const FlowLinearSolverParameters& p);
 PropertyTree setupILU(const std::string& conf, const FlowLinearSolverParameters& p);
+PropertyTree setupLegacyMixedILU(const std::string& conf, const FlowLinearSolverParameters& p);
 PropertyTree setupMixedILU(const std::string& conf, const FlowLinearSolverParameters& p);
 PropertyTree setupDILU(const std::string& conf, const FlowLinearSolverParameters& p);
+PropertyTree setupLegacyMixedDILU(const std::string& conf, const FlowLinearSolverParameters& p);
 PropertyTree setupMixedDILU(const std::string& conf, const FlowLinearSolverParameters& p);
 PropertyTree setupUMFPack(const std::string& conf, const FlowLinearSolverParameters& p);