From 1ce9999f45beb9f52837edf09206127637a9d38d Mon Sep 17 00:00:00 2001 From: Kjetil Olsen Lye Date: Thu, 30 Apr 2026 20:50:30 +0000 Subject: [PATCH 1/5] gpu_smart_pointer: extend GPU smart-pointer utilities Adds host-side allocator helpers, host/device-decorated raw-pointer accessors and small additions to MiniVector that the rest of the GPU intensive-quantities work depends on. Updates test_gpu_smart_pointers.cu to cover the new behaviour. --- opm/simulators/linalg/gpuistl/MiniVector.hpp | 2 + .../linalg/gpuistl/gpu_smart_pointer.hpp | 94 ++++++++++++++++++- tests/gpuistl/test_gpu_smart_pointers.cu | 41 ++++++++ 3 files changed, 136 insertions(+), 1 deletion(-) diff --git a/opm/simulators/linalg/gpuistl/MiniVector.hpp b/opm/simulators/linalg/gpuistl/MiniVector.hpp index 921f3a9271a..7c2af6e23ea 100644 --- a/opm/simulators/linalg/gpuistl/MiniVector.hpp +++ b/opm/simulators/linalg/gpuistl/MiniVector.hpp @@ -78,6 +78,7 @@ class MiniVector fill(value); } + #if HAVE_DUNE_COMMON /** * @brief Conversion constructor from Dune::FieldVector. * @param fv Source FieldVector (must have dimension `Dimension`). @@ -88,6 +89,7 @@ class MiniVector data_[i] = fv[i]; } } + #endif /** * @brief Initializer‑list constructor. diff --git a/opm/simulators/linalg/gpuistl/gpu_smart_pointer.hpp b/opm/simulators/linalg/gpuistl/gpu_smart_pointer.hpp index ee7cb4950de..9fe166ace44 100644 --- a/opm/simulators/linalg/gpuistl/gpu_smart_pointer.hpp +++ b/opm/simulators/linalg/gpuistl/gpu_smart_pointer.hpp @@ -115,6 +115,96 @@ make_gpu_unique_ptr(const T& value) return ptr; } +/** + * @brief Deleter that releases a GPU array allocation made with \c cudaMalloc. + * + * Used by \c make_gpu_unique_ptr_array. Having a named deleter type (rather than a lambda) + * makes the resulting \c std::unique_ptr type stable across translation units, which is + * important when storing it as a class member. + */ +template +struct GpuArrayDeleter { + void operator()(T* ptr) const noexcept + { + OPM_GPU_WARN_IF_ERROR(cudaFree(ptr)); + } +}; + +/** + * @brief Deleter for objects living in unified (managed) GPU memory. + * + * Calls the destructor of the contained object before releasing the allocation with + * \c cudaFree. Used by \c make_gpu_managed_unique_ptr. + */ +template +struct GpuManagedDeleter { + void operator()(T* ptr) const noexcept + { + if (ptr != nullptr) { + ptr->~T(); + OPM_GPU_WARN_IF_ERROR(cudaFree(ptr)); + } + } +}; + +/** + * @brief Creates a unique pointer managing a GPU-allocated array of \p numElements elements. + * + * This function allocates memory on the GPU for \p numElements elements of type \c T using + * \c cudaMalloc. It returns a \c std::unique_ptr that automatically releases the GPU memory + * with \c cudaFree when destroyed. + * + * The returned smart pointer uses the \c T[] specialization, so element access via + * \c operator[] is well defined; however, dereferencing the returned pointer on the host is + * not safe because the underlying memory lives on the device. + * + * @tparam T The element type to allocate on the GPU. + * @param numElements The number of elements of type \c T to allocate. + * @return A \c std::unique_ptr to the GPU-allocated array. + */ +template +std::unique_ptr> +make_gpu_unique_ptr_array(std::size_t numElements) +{ + T* ptr = nullptr; + OPM_GPU_SAFE_CALL(cudaMalloc(&ptr, numElements * sizeof(T))); + return std::unique_ptr>(ptr); +} + +/** + * @brief Creates a unique pointer managing GPU unified (managed) memory for a single object. + * + * Allocates one \c T worth of unified memory with \c cudaMallocManaged, then constructs a + * \c T in-place using the supplied arguments via placement-new. The returned + * \c std::unique_ptr destroys the contained object and releases the unified memory with + * \c cudaFree when it goes out of scope. + * + * Unified memory is accessible from both host and device, so the returned pointer can be + * dereferenced directly on either side. This is useful when an object must be visible to + * GPU kernels but also needs to be touched from host code (e.g. when a device-side member + * holds a pointer into the same unified-memory region). + * + * @tparam T The type of the object to construct in unified memory. + * @tparam Args Argument types forwarded to \c T's constructor. + * @param args Arguments forwarded to \c T's constructor. + * @return A \c std::unique_ptr owning the unified-memory allocation. + */ +template +std::unique_ptr> +make_gpu_managed_unique_ptr(Args&&... args) +{ + void* raw = nullptr; + OPM_GPU_SAFE_CALL(cudaMallocManaged(&raw, sizeof(T))); + T* ptr = nullptr; + try { + ptr = new (raw) T(std::forward(args)...); + } catch (...) { + OPM_GPU_WARN_IF_ERROR(cudaFree(raw)); + throw; + } + return std::unique_ptr>(ptr); +} + /** * @brief Copies a value from GPU-allocated memory to the host. * @@ -336,7 +426,9 @@ class ValueAsPointer { public: using element_type = T; - ValueAsPointer(const T& t) : value(t) {} + OPM_HOST_DEVICE ValueAsPointer() = default; + + OPM_HOST_DEVICE explicit ValueAsPointer(const T& t) : value(t) {} OPM_HOST_DEVICE T* operator->() { return &value; diff --git a/tests/gpuistl/test_gpu_smart_pointers.cu b/tests/gpuistl/test_gpu_smart_pointers.cu index 5ba3b7a93bb..881e19a349b 100644 --- a/tests/gpuistl/test_gpu_smart_pointers.cu +++ b/tests/gpuistl/test_gpu_smart_pointers.cu @@ -145,3 +145,44 @@ BOOST_AUTO_TEST_CASE(TestCopyToGPU) { BOOST_CHECK_EQUAL(valueFromDeviceArray[2], 3.0); BOOST_CHECK_EQUAL(valueFromDeviceArray[3], 4.0); } + +BOOST_AUTO_TEST_CASE(TestUniquePointerArray) +{ + constexpr std::size_t numElements = 5; + auto arrayPtr = Opm::gpuistl::make_gpu_unique_ptr_array(numElements); + + const std::array hostInput {1.5, 2.5, 3.5, 4.5, 5.5}; + OPM_GPU_SAFE_CALL(cudaMemcpy(arrayPtr.get(), + hostInput.data(), + numElements * sizeof(double), + cudaMemcpyHostToDevice)); + + std::array hostOutput {}; + OPM_GPU_SAFE_CALL(cudaMemcpy(hostOutput.data(), + arrayPtr.get(), + numElements * sizeof(double), + cudaMemcpyDeviceToHost)); + + for (std::size_t i = 0; i < numElements; ++i) { + BOOST_CHECK_EQUAL(hostOutput[i], hostInput[i]); + } +} + +BOOST_AUTO_TEST_CASE(TestManagedUniquePointer) +{ + // Default-constructed payload. + auto managedPtr = Opm::gpuistl::make_gpu_managed_unique_ptr(); + BOOST_CHECK_EQUAL(managedPtr->isCalled, false); + + // Mutate from the device through a regular kernel; the unified-memory + // pointer is dereferenceable on the device, just like cudaMallocManaged. + callFunction<<<1, 1>>>(Opm::gpuistl::make_view(managedPtr)); + OPM_GPU_SAFE_CALL(cudaDeviceSynchronize()); + + // Read back directly from host (the whole point of unified memory). + BOOST_CHECK_EQUAL(managedPtr->isCalled, true); + + // Forwarded constructor arguments. + auto managedDouble = Opm::gpuistl::make_gpu_managed_unique_ptr(7.25); + BOOST_CHECK_EQUAL(*managedDouble, 7.25); +} From 634524f803835e168552f2e64450b6c646c75212 Mon Sep 17 00:00:00 2001 From: Kjetil Olsen Lye Date: Thu, 30 Apr 2026 20:50:31 +0000 Subject: [PATCH 2/5] GPU portability: OPM_HOST_DEVICE/OPM_THROW additions and fluidSystem() use Decorates inline helpers in blackoil*modules.hh, blackoilprimaryvariables.hh, directionalmobility.hh, tpfalinearizer.hh, NewTranFluxModule.hpp, TemperatureModel.hpp and BlackOilEnergyIntensiveQuantitiesGlobalIndex.hpp with OPM_HOST_DEVICE, replaces a few raw `throw`s with OPM_THROW, and switches static `FluidSystem::` accesses to instance-based `fluidState.fluidSystem()` / `getFluidSystem()` so the same code works when the fluid system lives on the device. Test test_gpu_linear_two_phase_material.cu picks up the matching template adjustments. --- ...ilEnergyIntensiveQuantitiesGlobalIndex.hpp | 86 +++---- opm/models/blackoil/blackoilbrinemodules.hh | 60 ++--- opm/models/blackoil/blackoilenergymodules.hh | 214 +++++++++--------- opm/models/blackoil/blackoilfoammodules.hh | 3 +- .../blackoil/blackoilprimaryvariables.hh | 10 +- opm/models/common/directionalmobility.hh | 11 +- .../discretization/common/tpfalinearizer.hh | 4 +- opm/simulators/flow/NewTranFluxModule.hpp | 7 +- opm/simulators/flow/TemperatureModel.hpp | 21 +- .../test_gpu_linear_two_phase_material.cu | 10 +- 10 files changed, 222 insertions(+), 204 deletions(-) diff --git a/flowexperimental/BlackOilEnergyIntensiveQuantitiesGlobalIndex.hpp b/flowexperimental/BlackOilEnergyIntensiveQuantitiesGlobalIndex.hpp index bcb684800de..0c62cbbb26b 100644 --- a/flowexperimental/BlackOilEnergyIntensiveQuantitiesGlobalIndex.hpp +++ b/flowexperimental/BlackOilEnergyIntensiveQuantitiesGlobalIndex.hpp @@ -59,20 +59,20 @@ class BlackOilEnergyIntensiveQuantitiesGlobalIndex; public: - void updateTemperature_([[maybe_unused]] const Problem& problem, - [[maybe_unused]] const PrimaryVariables& priVars, - [[maybe_unused]] unsigned globalSpaceIdx, - [[maybe_unused]] unsigned timeIdx) + OPM_HOST_DEVICE void updateTemperature_([[maybe_unused]] const Problem& problem, + [[maybe_unused]] const PrimaryVariables& priVars, + [[maybe_unused]] unsigned globalSpaceIdx, + [[maybe_unused]] unsigned timeIdx) { auto& fs = this->asImp_().fluidState_; Scalar T = problem.temperature(globalSpaceIdx, timeIdx); fs.setTemperature(T); } - void updateEnergyQuantities_([[maybe_unused]] const Problem& problem, - [[maybe_unused]] const PrimaryVariables& priVars, - [[maybe_unused]] unsigned globalSpaceIdx, - [[maybe_unused]] unsigned timeIdx, - const typename FluidSystem::template ParameterCache&) + OPM_HOST_DEVICE void updateEnergyQuantities_([[maybe_unused]] const Problem& problem, + [[maybe_unused]] const PrimaryVariables& priVars, + [[maybe_unused]] unsigned globalSpaceIdx, + [[maybe_unused]] unsigned timeIdx, + const typename FluidSystem::template ParameterCache&) { } }; @@ -185,17 +187,17 @@ class BlackOilEnergyIntensiveQuantitiesGlobalIndex; public: - void updateTemperature_([[maybe_unused]] const Problem& problem, - [[maybe_unused]] const PrimaryVariables& priVars, - [[maybe_unused]] unsigned globalSpaceIdx, - [[maybe_unused]] unsigned timeIdx) + OPM_HOST_DEVICE void updateTemperature_([[maybe_unused]] const Problem& problem, + [[maybe_unused]] const PrimaryVariables& priVars, + [[maybe_unused]] unsigned globalSpaceIdx, + [[maybe_unused]] unsigned timeIdx) { } - void updateEnergyQuantities_([[maybe_unused]] const Problem& problem, - [[maybe_unused]] const PrimaryVariables& priVars, - [[maybe_unused]] unsigned globalSpaceIdx, - [[maybe_unused]] unsigned timeIdx, - const typename FluidSystem::template ParameterCache&) + OPM_HOST_DEVICE void updateEnergyQuantities_([[maybe_unused]] const Problem& problem, + [[maybe_unused]] const PrimaryVariables& priVars, + [[maybe_unused]] unsigned globalSpaceIdx, + [[maybe_unused]] unsigned timeIdx, + const typename FluidSystem::template ParameterCache&) { } }; diff --git a/opm/models/blackoil/blackoilbrinemodules.hh b/opm/models/blackoil/blackoilbrinemodules.hh index caa5daf74c2..6947429d8a4 100644 --- a/opm/models/blackoil/blackoilbrinemodules.hh +++ b/opm/models/blackoil/blackoilbrinemodules.hh @@ -98,7 +98,7 @@ public: static void registerParameters() {} - static bool primaryVarApplies(unsigned pvIdx) + static OPM_HOST_DEVICE bool primaryVarApplies(unsigned pvIdx) { if constexpr (enableBrine) { return pvIdx == saltConcentrationIdx; @@ -135,7 +135,7 @@ public: return static_cast(1.0); } - static bool eqApplies(unsigned eqIdx) + static OPM_HOST_DEVICE bool eqApplies(unsigned eqIdx) { if constexpr (enableBrine) { return eqIdx == contiBrineEqIdx; @@ -405,18 +405,18 @@ public: * primary variables * */ - void updateSaltConcentration_(const ElementContext& elemCtx, - unsigned dofIdx, - unsigned timeIdx) + OPM_HOST_DEVICE void updateSaltConcentration_(const ElementContext& elemCtx, + unsigned dofIdx, + unsigned timeIdx) { const PrimaryVariables& priVars = elemCtx.primaryVars(dofIdx, timeIdx); const LinearizationType lintype = elemCtx.linearizationType(); updateSaltConcentration_(priVars, timeIdx, lintype); } - void updateSaltConcentration_(const PrimaryVariables& priVars, - const unsigned timeIdx, - const LinearizationType lintype) + OPM_HOST_DEVICE void updateSaltConcentration_(const PrimaryVariables& priVars, + const unsigned timeIdx, + const LinearizationType lintype) { const unsigned pvtnumRegionIdx = priVars.pvtRegionIndex(); auto& fs = asImp_().fluidState_; @@ -442,9 +442,9 @@ public: } } - void saltPropertiesUpdate_([[maybe_unused]] const ElementContext& elemCtx, - [[maybe_unused]] unsigned dofIdx, - [[maybe_unused]] unsigned timeIdx) + OPM_HOST_DEVICE void saltPropertiesUpdate_([[maybe_unused]] const ElementContext& elemCtx, + [[maybe_unused]] unsigned dofIdx, + [[maybe_unused]] unsigned timeIdx) { if constexpr (enableSaltPrecipitation) { const Evaluation porosityFactor = min(1.0 - asImp_().fluidState_.saltSaturation(), 1.0); //phi/phi_0 @@ -455,20 +455,20 @@ public: } } - const Evaluation& brineRefDensity() const + OPM_HOST_DEVICE const Evaluation& brineRefDensity() const { return refDensity_; } - Scalar saltSolubility() const + OPM_HOST_DEVICE Scalar saltSolubility() const { return saltSolubility_; } - Scalar saltDensity() const + OPM_HOST_DEVICE Scalar saltDensity() const { return saltDensity_; } - const Evaluation& permFactor() const + OPM_HOST_DEVICE const Evaluation& permFactor() const { return permFactor_; } protected: - Implementation& asImp_() + OPM_HOST_DEVICE Implementation& asImp_() { return *static_cast(this); } Evaluation saltConcentration_; @@ -487,27 +487,27 @@ class BlackOilBrineIntensiveQuantities using Scalar = GetPropType; public: - void updateSaltConcentration_(const ElementContext&, - unsigned, - unsigned) + OPM_HOST_DEVICE void updateSaltConcentration_(const ElementContext&, + unsigned, + unsigned) {} - void saltPropertiesUpdate_(const ElementContext&, - unsigned, - unsigned) + OPM_HOST_DEVICE void saltPropertiesUpdate_(const ElementContext&, + unsigned, + unsigned) {} - const Evaluation& brineRefDensity() const - { throw std::runtime_error("brineRefDensity() called but brine are disabled"); } + OPM_HOST_DEVICE const Evaluation& brineRefDensity() const + { OPM_THROW(std::runtime_error, "brineRefDensity() called but brine are disabled"); } - const Scalar saltSolubility() const - { throw std::logic_error("saltSolubility() called but salt precipitation is disabled"); } + OPM_HOST_DEVICE const Scalar saltSolubility() const + { OPM_THROW(std::logic_error, "saltSolubility() called but salt precipitation is disabled"); } - const Scalar saltDensity() const - { throw std::logic_error("saltDensity() called but salt precipitation is disabled"); } + OPM_HOST_DEVICE const Scalar saltDensity() const + { OPM_THROW(std::logic_error, "saltDensity() called but salt precipitation is disabled"); } - const Evaluation& permFactor() const - { throw std::logic_error("permFactor() called but salt precipitation is disabled"); } + OPM_HOST_DEVICE const Evaluation& permFactor() const + { OPM_THROW(std::logic_error, "permFactor() called but salt precipitation is disabled"); } }; } // namespace Opm diff --git a/opm/models/blackoil/blackoilenergymodules.hh b/opm/models/blackoil/blackoilenergymodules.hh index 5d1aebf2fa4..e2cce9cd2a5 100644 --- a/opm/models/blackoil/blackoilenergymodules.hh +++ b/opm/models/blackoil/blackoilenergymodules.hh @@ -30,6 +30,7 @@ #include +#include #include #include @@ -104,7 +105,7 @@ public: } } - static bool primaryVarApplies(unsigned pvIdx) + static OPM_HOST_DEVICE bool primaryVarApplies(unsigned pvIdx) { if constexpr (enableFullyImplicitThermal) { return pvIdx == temperatureIdx; @@ -129,7 +130,7 @@ public: return static_cast(1.0); } - static bool eqApplies(unsigned eqIdx) + static OPM_HOST_DEVICE bool eqApplies(unsigned eqIdx) { if constexpr (enableFullyImplicitThermal) { return eqIdx == contiEnergyEqIdx; @@ -241,11 +242,11 @@ public: } template - static void addPhaseEnthalpyFlux_(RateVector& flux, - unsigned phaseIdx, - const ElementContext& elemCtx, - unsigned scvfIdx, - unsigned timeIdx) + OPM_HOST_DEVICE static void addPhaseEnthalpyFlux_(RateVector& flux, + unsigned phaseIdx, + const ElementContext& elemCtx, + unsigned scvfIdx, + unsigned timeIdx) { const auto& extQuants = elemCtx.extensiveQuantities(scvfIdx, timeIdx); const unsigned upIdx = extQuants.upstreamIndex(phaseIdx); @@ -258,8 +259,8 @@ public: fs); } - static void addToEnthalpyRate(RateVector& flux, - const Evaluation& hRate) + OPM_HOST_DEVICE static void addToEnthalpyRate(RateVector& flux, + const Evaluation& hRate) { if constexpr (enableFullyImplicitThermal) { flux[contiEnergyEqIdx] += hRate; @@ -270,8 +271,8 @@ public: * \brief Assign the energy specific primary variables to a PrimaryVariables object */ template - static void assignPrimaryVars(PrimaryVariables& priVars, - const FluidState& fluidState) + OPM_HOST_DEVICE static void assignPrimaryVars(PrimaryVariables& priVars, + const FluidState& fluidState) { if constexpr (enableFullyImplicitThermal) { priVars[temperatureIdx] = getValue(fluidState.temperature(/*phaseIdx=*/0)); @@ -281,9 +282,9 @@ public: /*! * \brief Do a Newton-Raphson update the primary variables of the energys. */ - static void updatePrimaryVars(PrimaryVariables& newPv, - const PrimaryVariables& oldPv, - const EqVector& delta) + OPM_HOST_DEVICE static void updatePrimaryVars(PrimaryVariables& newPv, + const PrimaryVariables& oldPv, + const EqVector& delta) { if constexpr (enableFullyImplicitThermal) { // do a plain unchopped Newton update @@ -294,8 +295,8 @@ public: /*! * \brief Return how much a Newton-Raphson update is considered an error */ - static Scalar computeUpdateError(const PrimaryVariables&, - const EqVector&) + OPM_HOST_DEVICE static Scalar computeUpdateError(const PrimaryVariables&, + const EqVector&) { // do not consider consider the cange of energy primary variables for // convergence @@ -306,7 +307,7 @@ public: /*! * \brief Return how much a residual is considered an error */ - static Scalar computeResidualError(const EqVector& resid) + OPM_HOST_DEVICE static Scalar computeResidualError(const EqVector& resid) { // do not weight the residual of energy when it comes to convergence return std::abs(scalarValue(resid[contiEnergyEqIdx])); @@ -385,9 +386,9 @@ public: * \brief Update the temperature of the intensive quantity's fluid state * */ - void updateTemperature_(const ElementContext& elemCtx, - unsigned dofIdx, - unsigned timeIdx) + OPM_HOST_DEVICE void updateTemperature_(const ElementContext& elemCtx, + unsigned dofIdx, + unsigned timeIdx) { auto& fs = asImp_().fluidState_; const auto& priVars = elemCtx.primaryVars(dofIdx, timeIdx); @@ -400,11 +401,11 @@ public: * \brief Update the temperature of the intensive quantity's fluid state * */ - void updateTemperature_([[maybe_unused]] const Problem& problem, - const PrimaryVariables& priVars, - [[maybe_unused]] unsigned globalDofIdx, - const unsigned timeIdx, - const LinearizationType& lintype) + OPM_HOST_DEVICE void updateTemperature_([[maybe_unused]] const Problem& problem, + const PrimaryVariables& priVars, + [[maybe_unused]] unsigned globalDofIdx, + const unsigned timeIdx, + const LinearizationType& lintype) { auto& fs = asImp_().fluidState_; fs.setTemperature(priVars.makeEvaluation(temperatureIdx, timeIdx, lintype)); @@ -414,27 +415,27 @@ public: * \brief Compute the intensive quantities needed to handle energy conservation * */ - void updateEnergyQuantities_(const ElementContext& elemCtx, - unsigned dofIdx, - unsigned timeIdx) + OPM_HOST_DEVICE void updateEnergyQuantities_(const ElementContext& elemCtx, + unsigned dofIdx, + unsigned timeIdx) { updateEnergyQuantities_(elemCtx.problem(), elemCtx.globalSpaceIndex(dofIdx, timeIdx), timeIdx); } - void updateEnergyQuantities_(const Problem& problem, - const unsigned globalSpaceIdx, - const unsigned timeIdx) + OPM_HOST_DEVICE void updateEnergyQuantities_(const Problem& problem, + const unsigned globalSpaceIdx, + const unsigned timeIdx) { auto& fs = asImp_().fluidState_; // compute the specific enthalpy of the fluids, the specific enthalpy of the rock // and the thermal conductivity coefficients for (int phaseIdx = 0; phaseIdx < numPhases; ++ phaseIdx) { - if (!FluidSystem::phaseIsActive(phaseIdx)) { + if (!asImp_().getFluidSystem().phaseIsActive(phaseIdx)) { continue; } - const auto& h = FluidSystem::enthalpy(fs, phaseIdx, problem.pvtRegionIndex(globalSpaceIdx)); + const auto& h = asImp_().getFluidSystem().enthalpy(fs, phaseIdx, problem.pvtRegionIndex(globalSpaceIdx)); fs.setEnthalpy(phaseIdx, h); } @@ -462,7 +463,7 @@ public: { return rockFraction_; } protected: - Implementation& asImp_() + OPM_HOST_DEVICE Implementation& asImp_() { return *static_cast(this); } Evaluation rockInternalEnergy_; @@ -484,51 +485,54 @@ class BlackOilEnergyIntensiveQuantities - void updateTemperature_(const Problem& problem, - [[maybe_unused]] const PrimaryVariables& priVars, - unsigned globalDofIdx, - unsigned timeIdx, - [[maybe_unused]] const LinearizationType& lintype - ) + OPM_HOST_DEVICE void updateTemperature_(const Problem& problem, + [[maybe_unused]] const PrimaryVariables& priVars, + unsigned globalDofIdx, + unsigned timeIdx, + [[maybe_unused]] const LinearizationType& lintype) { updateTemperature_(problem, globalDofIdx, timeIdx); } - void updateTemperature_(const Problem& problem, unsigned globalDofIdx, unsigned timeIdx) + OPM_HOST_DEVICE void updateTemperature_(const Problem& problem, + unsigned globalDofIdx, + unsigned timeIdx) { auto& fs = asImp_().fluidState_; const Scalar T = problem.temperature(globalDofIdx, timeIdx); fs.setTemperature(T); } - void updateEnergyQuantities_(const ElementContext&, - unsigned, - unsigned, - const typename FluidSystem::template ParameterCache&) + OPM_HOST_DEVICE void updateEnergyQuantities_(const ElementContext&, + unsigned, + unsigned, + const typename FluidSystem::template ParameterCache&) {} - const Evaluation& rockInternalEnergy() const + OPM_HOST_DEVICE const Evaluation& rockInternalEnergy() const { - throw std::logic_error("Requested the rock internal energy, which is " - "unavailable because energy is not conserved"); + OPM_THROW(std::logic_error, + "Requested the rock internal energy, which is " + "unavailable because energy is not conserved"); } - const Evaluation& totalThermalConductivity() const + OPM_HOST_DEVICE const Evaluation& totalThermalConductivity() const { - throw std::logic_error("Requested the total thermal conductivity, which is " - "unavailable because energy is not conserved"); + OPM_THROW(std::logic_error, + "Requested the total thermal conductivity, which is " + "unavailable because energy is not conserved"); } protected: - Implementation& asImp_() + OPM_HOST_DEVICE Implementation& asImp_() { return *static_cast(this); } }; @@ -548,26 +552,28 @@ class BlackOilEnergyIntensiveQuantities - void updateTemperature_(const Problem& problem, - [[maybe_unused]] const PrimaryVariables& priVars, - unsigned globalDofIdx, - unsigned timeIdx, - [[maybe_unused]] const LinearizationType& lintype) + OPM_HOST_DEVICE void updateTemperature_(const Problem& problem, + [[maybe_unused]] const PrimaryVariables& priVars, + unsigned globalDofIdx, + unsigned timeIdx, + [[maybe_unused]] const LinearizationType& lintype) { updateTemperature_(problem, globalDofIdx, timeIdx); } @@ -576,32 +582,34 @@ public: * \brief Compute the intensive quantities needed to handle energy conservation * */ - void updateEnergyQuantities_([[maybe_unused]] const ElementContext& elemCtx, - [[maybe_unused]] unsigned dofIdx, - [[maybe_unused]] unsigned timeIdx) + OPM_HOST_DEVICE void updateEnergyQuantities_([[maybe_unused]] const ElementContext& elemCtx, + [[maybe_unused]] unsigned dofIdx, + [[maybe_unused]] unsigned timeIdx) { } - void updateEnergyQuantities_([[maybe_unused]] const Problem& problem, - [[maybe_unused]] const unsigned globalSpaceIdx, - [[maybe_unused]] const unsigned timeIdx) + OPM_HOST_DEVICE void updateEnergyQuantities_([[maybe_unused]] const Problem& problem, + [[maybe_unused]] const unsigned globalSpaceIdx, + [[maybe_unused]] const unsigned timeIdx) { } - const Evaluation& rockInternalEnergy() const + OPM_HOST_DEVICE const Evaluation& rockInternalEnergy() const { - throw std::logic_error("Requested the rock internal energy, which is " - "unavailable because energy is not conserved"); + OPM_THROW(std::logic_error, + "Requested the rock internal energy, which is " + "unavailable because energy is not conserved"); } - const Evaluation& totalThermalConductivity() const + OPM_HOST_DEVICE const Evaluation& totalThermalConductivity() const { - throw std::logic_error("Requested the total thermal conductivity, which is " - "unavailable because energy is not conserved"); + OPM_THROW(std::logic_error, + "Requested the total thermal conductivity, which is " + "unavailable because energy is not conserved"); } protected: - Implementation& asImp_() + OPM_HOST_DEVICE Implementation& asImp_() { return *static_cast(this); } }; @@ -617,41 +625,43 @@ class BlackOilEnergyIntensiveQuantities using Problem = GetPropType; public: - void updateTemperature_([[maybe_unused]] const ElementContext& elemCtx, - [[maybe_unused]] unsigned dofIdx, - [[maybe_unused]] unsigned timeIdx) + OPM_HOST_DEVICE void updateTemperature_([[maybe_unused]] const ElementContext& elemCtx, + [[maybe_unused]] unsigned dofIdx, + [[maybe_unused]] unsigned timeIdx) { } template - void updateTemperature_([[maybe_unused]] const Problem& problem, - [[maybe_unused]] const PrimaryVariables& priVars, - [[maybe_unused]] unsigned globalDofIdx, - [[maybe_unused]] unsigned timeIdx, - [[maybe_unused]] const LinearizationType& lintype) + OPM_HOST_DEVICE void updateTemperature_([[maybe_unused]] const Problem& problem, + [[maybe_unused]] const PrimaryVariables& priVars, + [[maybe_unused]] unsigned globalDofIdx, + [[maybe_unused]] unsigned timeIdx, + [[maybe_unused]] const LinearizationType& lintype) { } - void updateEnergyQuantities_(const ElementContext&, - unsigned, - unsigned, - const typename FluidSystem::template ParameterCache&) + OPM_HOST_DEVICE void updateEnergyQuantities_(const ElementContext&, + unsigned, + unsigned, + const typename FluidSystem::template ParameterCache&) {} - const Evaluation& rockInternalEnergy() const + OPM_HOST_DEVICE const Evaluation& rockInternalEnergy() const { - throw std::logic_error("Requested the rock internal energy, which is " - "unavailable because energy is not conserved"); + OPM_THROW(std::logic_error, + "Requested the rock internal energy, which is " + "unavailable because energy is not conserved"); } - const Evaluation& totalThermalConductivity() const + OPM_HOST_DEVICE const Evaluation& totalThermalConductivity() const { - throw std::logic_error("Requested the total thermal conductivity, which is " - "unavailable because energy is not conserved"); + OPM_THROW(std::logic_error, + "Requested the total thermal conductivity, which is " + "unavailable because energy is not conserved"); } protected: - Implementation& asImp_() + OPM_HOST_DEVICE Implementation& asImp_() { return *static_cast(this); } }; @@ -873,8 +883,8 @@ public: const BoundaryFluidState& /*boundaryFs*/) {} - const Evaluation& energyFlux() const - { throw std::logic_error("Requested the energy flux, but energy is not conserved"); } + OPM_HOST_DEVICE const Evaluation& energyFlux() const + { OPM_THROW(std::logic_error, "Requested the energy flux, but energy is not conserved"); } }; template @@ -934,8 +944,8 @@ public: const BoundaryFluidState& /*boundaryFs*/) { } - const Evaluation& energyFlux() const - { throw std::logic_error("Requested the energy flux, but energy is not conserved"); } + OPM_HOST_DEVICE const Evaluation& energyFlux() const + { OPM_THROW(std::logic_error, "Requested the energy flux, but energy is not conserved"); } }; template @@ -982,8 +992,8 @@ public: const BoundaryFluidState& /*boundaryFs*/) {} - const Evaluation& energyFlux() const - { throw std::logic_error("Requested the energy flux, but energy is not conserved"); } + OPM_HOST_DEVICE const Evaluation& energyFlux() const + { OPM_THROW(std::logic_error, "Requested the energy flux, but energy is not conserved"); } }; } // namespace Opm diff --git a/opm/models/blackoil/blackoilfoammodules.hh b/opm/models/blackoil/blackoilfoammodules.hh index d90f453aff0..7fb156b091c 100644 --- a/opm/models/blackoil/blackoilfoammodules.hh +++ b/opm/models/blackoil/blackoilfoammodules.hh @@ -30,6 +30,7 @@ #include +#include #include #include @@ -184,7 +185,7 @@ public: Toolbox::template decay(intQuants.solventInverseFormationVolumeFactor()); } } else { - throw std::runtime_error("Transport phase is GAS/WATER/SOLVENT"); + OPM_THROW(std::runtime_error, "Transport phase is GAS/WATER/SOLVENT"); } // Avoid singular matrix if no gas is present. diff --git a/opm/models/blackoil/blackoilprimaryvariables.hh b/opm/models/blackoil/blackoilprimaryvariables.hh index 1ed590b8c34..8eea0396474 100644 --- a/opm/models/blackoil/blackoilprimaryvariables.hh +++ b/opm/models/blackoil/blackoilprimaryvariables.hh @@ -1024,10 +1024,10 @@ private: template OPM_HOST_DEVICE void computeCapillaryPressures_(Container& result, - Scalar so, - Scalar sg, - Scalar sw, - const MaterialLawParams& matParams) const + Scalar so, + Scalar sg, + Scalar sw, + const MaterialLawParams& matParams) const { using SatOnlyFluidState = SimpleModularFluidStategetPressureScale()); } // NOTE: When adding new member variables, be sure to update the template copy constructor, diff --git a/opm/models/common/directionalmobility.hh b/opm/models/common/directionalmobility.hh index bcf2056cbe3..35f6a81865a 100644 --- a/opm/models/common/directionalmobility.hh +++ b/opm/models/common/directionalmobility.hh @@ -30,9 +30,11 @@ #include #include +#include +#include + #include #include -#include #include namespace Opm { @@ -54,16 +56,15 @@ public: : mobility_{mX, mY, mZ} {} - const array_type& getArray(unsigned index) const + OPM_HOST_DEVICE const array_type& getArray(unsigned index) const { if (index > 2) { - throw std::runtime_error("Unexpected mobility array index " + std::to_string(index)); + OPM_THROW(std::runtime_error, "Unexpected mobility array index"); } - return mobility_[index]; } - array_type& getArray(unsigned index) + OPM_HOST_DEVICE array_type& getArray(unsigned index) { return const_cast(std::as_const(*this).getArray(index)); } diff --git a/opm/models/discretization/common/tpfalinearizer.hh b/opm/models/discretization/common/tpfalinearizer.hh index 94f6267663c..333657d7f07 100644 --- a/opm/models/discretization/common/tpfalinearizer.hh +++ b/opm/models/discretization/common/tpfalinearizer.hh @@ -98,7 +98,7 @@ struct FullDomain }; #if HAVE_CUDA && OPM_IS_COMPILING_WITH_GPU_COMPILER - FullDomain> copy_to_gpu(FullDomain<> CPUDomain) + inline FullDomain> copy_to_gpu(FullDomain<> CPUDomain) { if (CPUDomain.cells.size() == 0) { OPM_THROW(std::runtime_error, "Cannot copy empty full domain to GPU."); @@ -108,7 +108,7 @@ struct FullDomain }; }; - FullDomain> make_view(FullDomain>& buffer) + inline FullDomain> make_view(FullDomain>& buffer) { if (buffer.cells.size() == 0) { OPM_THROW(std::runtime_error, "Cannot make view of empty full domain buffer."); diff --git a/opm/simulators/flow/NewTranFluxModule.hpp b/opm/simulators/flow/NewTranFluxModule.hpp index ecfcfa05d7b..83811e38cf9 100644 --- a/opm/simulators/flow/NewTranFluxModule.hpp +++ b/opm/simulators/flow/NewTranFluxModule.hpp @@ -34,6 +34,7 @@ #include #include +#include #include #include @@ -142,7 +143,7 @@ class NewTranExtensiveQuantities */ OPM_HOST_DEVICE const DimMatrix& intrinsicPermeability() const { - throw std::invalid_argument("The ECL transmissibility module does not provide an explicit intrinsic permeability"); + OPM_THROW(std::invalid_argument, "The ECL transmissibility module does not provide an explicit intrinsic permeability"); } /*! @@ -153,7 +154,7 @@ class NewTranExtensiveQuantities */ OPM_HOST_DEVICE const EvalDimVector& potentialGrad(unsigned) const { - throw std::invalid_argument("The ECL transmissibility module does not provide explicit potential gradients"); + OPM_THROW(std::invalid_argument, "The ECL transmissibility module does not provide explicit potential gradients"); } /*! @@ -173,7 +174,7 @@ class NewTranExtensiveQuantities */ OPM_HOST_DEVICE const EvalDimVector& filterVelocity(unsigned) const { - throw std::invalid_argument("The ECL transmissibility module does not provide explicit filter velocities"); + OPM_THROW(std::invalid_argument, "The ECL transmissibility module does not provide explicit filter velocities"); } /*! diff --git a/opm/simulators/flow/TemperatureModel.hpp b/opm/simulators/flow/TemperatureModel.hpp index f387ecc3d4c..704a7cd5c0b 100644 --- a/opm/simulators/flow/TemperatureModel.hpp +++ b/opm/simulators/flow/TemperatureModel.hpp @@ -29,6 +29,7 @@ #define OPM_TEMPERATURE_MODEL_HPP #include +#include #include #include @@ -97,15 +98,17 @@ class BlackOilEnergyIntensiveQuantitiesTemp - void updateTemperature_(const Problem& problem, unsigned globalDofIdx, unsigned timeIdx) + OPM_HOST_DEVICE void updateTemperature_(const Problem& problem, + unsigned globalDofIdx, + unsigned timeIdx) { const EvaluationTemp T = EvaluationTemp::createVariable(problem.temperature(globalDofIdx, timeIdx), 0); fluidState_.setTemperature(T); } - void updateEnergyQuantities_(const Problem& problem, - const unsigned globalSpaceIdx, - const unsigned timeIdx) + OPM_HOST_DEVICE void updateEnergyQuantities_(const Problem& problem, + const unsigned globalSpaceIdx, + const unsigned timeIdx) { // compute the specific enthalpy of the fluids, the specific enthalpy of the rock // and the thermal conductivity coefficients @@ -132,20 +135,20 @@ class BlackOilEnergyIntensiveQuantitiesTemp rockFraction_ = problem.rockFraction(globalSpaceIdx, timeIdx); } - const EvaluationTemp& rockInternalEnergy() const + OPM_HOST_DEVICE const EvaluationTemp& rockInternalEnergy() const { return rockInternalEnergy_; } - const EvaluationTemp& totalThermalConductivity() const + OPM_HOST_DEVICE const EvaluationTemp& totalThermalConductivity() const { return totalThermalConductivity_; } - const Scalar& rockFraction() const + OPM_HOST_DEVICE const Scalar& rockFraction() const { return rockFraction_; } - const FluidStateTemp& fluidStateTemp() const + OPM_HOST_DEVICE const FluidStateTemp& fluidStateTemp() const { return fluidState_; } template - void setFluidState(const FluidState& fs) + OPM_HOST_DEVICE void setFluidState(const FluidState& fs) { // copy the needed part of the fluid state fluidState_.setPvtRegionIndex(fs.pvtRegionIndex()); diff --git a/tests/gpuistl/test_gpu_linear_two_phase_material.cu b/tests/gpuistl/test_gpu_linear_two_phase_material.cu index 68512d9ab8a..604de549a2c 100644 --- a/tests/gpuistl/test_gpu_linear_two_phase_material.cu +++ b/tests/gpuistl/test_gpu_linear_two_phase_material.cu @@ -33,6 +33,7 @@ #include #include #include +#include #include #include @@ -80,8 +81,10 @@ BOOST_AUTO_TEST_CASE(TestSimpleInterpolation) NorneEvaluation* gpuAdInput; NorneEvaluation* gpuAdResOnGPU; - OPM_GPU_SAFE_CALL(cudaMalloc(&gpuAdInput, sizeof(NorneEvaluation))); - OPM_GPU_SAFE_CALL(cudaMalloc(&gpuAdResOnGPU, sizeof(NorneEvaluation))); + auto gpuAdInputOwner = Opm::gpuistl::make_gpu_unique_ptr(); + auto gpuAdResOnGPUOwner = Opm::gpuistl::make_gpu_unique_ptr(); + gpuAdInput = gpuAdInputOwner.get(); + gpuAdResOnGPU = gpuAdResOnGPUOwner.get(); for (Scalar x_i : testXs){ auto cpuMadeAd = NorneEvaluation(x_i, 0); @@ -96,7 +99,4 @@ BOOST_AUTO_TEST_CASE(TestSimpleInterpolation) BOOST_CHECK(gpuAdResOnCPU == cpuInterpolatedEval); } - - OPM_GPU_SAFE_CALL(cudaFree(gpuAdInput)); - OPM_GPU_SAFE_CALL(cudaFree(gpuAdResOnGPU)); } From 868658caa5757a57e704e875212c47bda608fcec Mon Sep 17 00:00:00 2001 From: Kjetil Olsen Lye Date: Thu, 30 Apr 2026 21:48:03 +0000 Subject: [PATCH 3/5] GpuFlowProblem and GpuEcl{Material,Thermal}LawManager Adds the GPU-side problem and material/thermal law manager headers (GpuFlowProblem, GpuEclMaterialLawManager, GpuEclThermalLawManager) plus the matching GPU element-context header (fvbaseelementcontextgpu.hh) and the FlowGasWaterEnergyTypeTag.hpp typetag. Wires them through FlowProblemParameters and adds a Boost test (test_gpu_ecl_thermal_law_manager.cu) backed by tests/very_simple_deck.DATA. The new test is gated on HIP or CUDA >= 13.1 in CMakeLists_files.cmake. --- CMakeLists.txt | 2 + CMakeLists_files.cmake | 9 + .../common/fvbaseelementcontextgpu.hh | 309 +++++++++ .../flow/FlowGasWaterEnergyTypeTag.hpp | 99 +++ opm/simulators/flow/FlowProblemParameters.cpp | 18 + opm/simulators/flow/FlowProblemParameters.hpp | 9 + .../flow/GpuEclMaterialLawManager.hpp | 461 ++++++++++++++ .../flow/GpuEclThermalLawManager.hpp | 367 +++++++++++ opm/simulators/flow/GpuFlowProblem.hpp | 587 ++++++++++++++++++ .../test_gpu_ecl_thermal_law_manager.cu | 210 +++++++ tests/very_simple_deck.DATA | 53 ++ 11 files changed, 2124 insertions(+) create mode 100644 opm/models/discretization/common/fvbaseelementcontextgpu.hh create mode 100644 opm/simulators/flow/FlowGasWaterEnergyTypeTag.hpp create mode 100644 opm/simulators/flow/GpuEclMaterialLawManager.hpp create mode 100644 opm/simulators/flow/GpuEclThermalLawManager.hpp create mode 100644 opm/simulators/flow/GpuFlowProblem.hpp create mode 100644 tests/gpuistl/test_gpu_ecl_thermal_law_manager.cu create mode 100644 tests/very_simple_deck.DATA diff --git a/CMakeLists.txt b/CMakeLists.txt index a240cd865c2..48d9b8dbf7d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -355,6 +355,7 @@ macro(opm-simulators_sources_hook) set_source_files_properties( tests/gpuistl/test_blackoilfluidstategpu.cu tests/gpuistl/test_gpu_ad.cu + tests/gpuistl/test_gpu_ecl_thermal_law_manager.cu tests/gpuistl/test_gpu_linear_two_phase_material.cu tests/gpuistl/test_gpuPvt.cu tests/gpuistl/test_gpuBlackOilFluidSystem.cu @@ -479,6 +480,7 @@ macro(opm-simulators_tests_hook) cuVector_operations deviceBlockOperations gpu_ad + gpu_ecl_thermal_law_manager gpu_linear_two_phase_material gpu_resources gpu_safe_call diff --git a/CMakeLists_files.cmake b/CMakeLists_files.cmake index e16173f05db..4f8dbc3fad7 100644 --- a/CMakeLists_files.cmake +++ b/CMakeLists_files.cmake @@ -340,6 +340,10 @@ if(CUDA_FOUND OR hip_FOUND) # HEADERS + list(APPEND PUBLIC_HEADER_FILES opm/models/discretization/common/fvbaseelementcontextgpu.hh) + list(APPEND PUBLIC_HEADER_FILES opm/simulators/flow/GpuEclMaterialLawManager.hpp) + list(APPEND PUBLIC_HEADER_FILES opm/simulators/flow/GpuEclThermalLawManager.hpp) + list(APPEND PUBLIC_HEADER_FILES opm/simulators/flow/GpuFlowProblem.hpp) ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/autotuner.hpp) ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/coloringAndReorderingUtils.hpp) ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/gpu_safe_call.hpp) @@ -570,6 +574,9 @@ if(CUDA_FOUND OR hip_FOUND) ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_safe_conversion.cpp) ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_solver_adapter.cpp) ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_gpu_ad.cu) + if(hip_FOUND OR CUDA_VERSION VERSION_GREATER_EQUAL 13.1) + ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_gpu_ecl_thermal_law_manager.cu) + endif() ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_gpu_linear_two_phase_material.cu) ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_gpuPvt.cu) ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_gpu_smart_pointers.cu) @@ -707,6 +714,7 @@ list (APPEND TEST_DATA_FILES tests/data/test_stokes2c.dgf tests/data/test_stokes2cni.dgf tests/data/waterair.dgf + tests/very_simple_deck.DATA ) @@ -969,6 +977,7 @@ list (APPEND PUBLIC_HEADER_FILES opm/simulators/flow/FIPContainer.hpp opm/simulators/flow/FlowBaseProblemProperties.hpp opm/simulators/flow/FlowBaseVanguard.hpp + opm/simulators/flow/FlowGasWaterEnergyTypeTag.hpp opm/simulators/flow/FlowGenericProblem.hpp opm/simulators/flow/FlowGenericProblem_impl.hpp opm/simulators/flow/FlowGenericVanguard.hpp diff --git a/opm/models/discretization/common/fvbaseelementcontextgpu.hh b/opm/models/discretization/common/fvbaseelementcontextgpu.hh new file mode 100644 index 00000000000..f7237ea9eb0 --- /dev/null +++ b/opm/models/discretization/common/fvbaseelementcontextgpu.hh @@ -0,0 +1,309 @@ +// -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- +// vi: set et ts=4 sw=4 sts=4: +/* + This file is part of the Open Porous Media project (OPM). + + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with OPM. If not, see . + + Consult the COPYING file in the top-level source directory of this + module for the precise wording of the license and the list of + copyright holders. +*/ +/*! + * \file + * + * \brief GPU-compatible stub of FvBaseElementContext. + * + * All methods are annotated with OPM_HOST_DEVICE and return dummy/default + * values. This class is intended as a placeholder so that GPU kernels that + * receive an element context can compile; it does not implement any real + * behaviour. + * + * The heavy Dune/grid infrastructure present in FvBaseElementContext is + * intentionally absent here because those types are not usable inside a + * CUDA/HIP kernel. + */ +#ifndef OPM_FV_BASE_ELEMENT_CONTEXT_GPU_HH +#define OPM_FV_BASE_ELEMENT_CONTEXT_GPU_HH + +#include +#include +#include + +#include + +namespace Opm +{ + +/*! + * \ingroup FiniteVolumeDiscretizations + * + * \brief GPU-compatible stub mirroring the public interface of FvBaseElementContext. + * + * Uses the same TypeTag-based template parameter as FvBaseElementContext; all + * concrete types are extracted via GetPropType at compile time. + * + * \tparam TypeTag The type tag from which Scalar, PrimaryVariables, etc. are derived. + */ +template +class FvBaseElementContextGpu +{ +public: + using Scalar = GetPropType; + using PrimaryVariables = GetPropType; + using IntensiveQuantities = GetPropType; + using ExtensiveQuantities = GetPropType; + using Problem = GetPropType; + using Model = GetPropType; + using GradientCalculator = GetPropType; + + // ----------------------------------------------------------------------- + // Construction / assignment + // ----------------------------------------------------------------------- + + OPM_HOST_DEVICE FvBaseElementContextGpu() = default; + OPM_HOST_DEVICE ~FvBaseElementContextGpu() = default; + OPM_HOST_DEVICE FvBaseElementContextGpu(const FvBaseElementContextGpu&) = default; + OPM_HOST_DEVICE FvBaseElementContextGpu(FvBaseElementContextGpu&&) noexcept = default; + OPM_HOST_DEVICE FvBaseElementContextGpu& operator=(const FvBaseElementContextGpu&) = default; + OPM_HOST_DEVICE FvBaseElementContextGpu& operator=(FvBaseElementContextGpu&&) noexcept + = default; + + // ----------------------------------------------------------------------- + // Stencil / element update stubs + // ----------------------------------------------------------------------- + + OPM_HOST_DEVICE void updateAll() + { + } + OPM_HOST_DEVICE void updateStencil() + { + } + OPM_HOST_DEVICE void updatePrimaryStencil() + { + } + OPM_HOST_DEVICE void updateStencilTopology() + { + } + OPM_HOST_DEVICE void updateAllIntensiveQuantities() + { + } + OPM_HOST_DEVICE void updateIntensiveQuantities(unsigned /*timeIdx*/) + { + } + OPM_HOST_DEVICE void updatePrimaryIntensiveQuantities(unsigned /*timeIdx*/) + { + } + OPM_HOST_DEVICE void updateIntensiveQuantities(const PrimaryVariables& /*priVars*/, + unsigned /*dofIdx*/, + unsigned /*timeIdx*/) + { + } + OPM_HOST_DEVICE void updateAllExtensiveQuantities() + { + } + OPM_HOST_DEVICE void updateExtensiveQuantities(unsigned /*timeIdx*/) + { + } + + // ----------------------------------------------------------------------- + // Focus DOF + // ----------------------------------------------------------------------- + + OPM_HOST_DEVICE void setFocusDofIndex(unsigned dofIdx) + { + focusDofIdx_ = static_cast(dofIdx); + } + OPM_HOST_DEVICE unsigned focusDofIndex() const + { + return focusDofIdx_; + } + + // ----------------------------------------------------------------------- + // Linearization type + // ----------------------------------------------------------------------- + + OPM_HOST_DEVICE LinearizationType linearizationType() const + { + return LinearizationType {}; + } + + // ----------------------------------------------------------------------- + // Problem / model accessors — return references to dummy stored objects + // ----------------------------------------------------------------------- + + OPM_HOST_DEVICE const Problem& problem() const + { + return problem_; + } + OPM_HOST_DEVICE const Model& model() const + { + return model_; + } + + // ----------------------------------------------------------------------- + // DOF / face counts + // ----------------------------------------------------------------------- + + OPM_HOST_DEVICE std::size_t numDof(unsigned /*timeIdx*/) const + { + return 0; + } + OPM_HOST_DEVICE std::size_t numPrimaryDof(unsigned /*timeIdx*/) const + { + return 0; + } + OPM_HOST_DEVICE std::size_t numInteriorFaces(unsigned /*timeIdx*/) const + { + return 0; + } + OPM_HOST_DEVICE std::size_t numBoundaryFaces(unsigned /*timeIdx*/) const + { + return 0; + } + + // ----------------------------------------------------------------------- + // Global space index / volume + // ----------------------------------------------------------------------- + + OPM_HOST_DEVICE unsigned globalSpaceIndex(unsigned /*dofIdx*/, unsigned /*timeIdx*/) const + { + return 0; + } + + OPM_HOST_DEVICE Scalar dofVolume(unsigned /*dofIdx*/, unsigned /*timeIdx*/) const + { + return Scalar {0}; + } + + OPM_HOST_DEVICE Scalar dofTotalVolume(unsigned /*dofIdx*/, unsigned /*timeIdx*/) const + { + return Scalar {0}; + } + + // ----------------------------------------------------------------------- + // Boundary + // ----------------------------------------------------------------------- + + OPM_HOST_DEVICE bool onBoundary() const + { + return false; + } + + // ----------------------------------------------------------------------- + // Intensive quantities + // ----------------------------------------------------------------------- + + OPM_HOST_DEVICE const IntensiveQuantities& intensiveQuantities(unsigned /*dofIdx*/, + unsigned /*timeIdx*/) const + { + return intensiveQuantitiesStashed_; + } + + OPM_HOST_DEVICE IntensiveQuantities& intensiveQuantities(unsigned /*dofIdx*/, + unsigned /*timeIdx*/) + { + return intensiveQuantitiesStashed_; + } + + OPM_HOST_DEVICE const IntensiveQuantities* thermodynamicHint(unsigned /*dofIdx*/, + unsigned /*timeIdx*/) const + { + return nullptr; + } + + // ----------------------------------------------------------------------- + // Primary variables + // ----------------------------------------------------------------------- + + OPM_HOST_DEVICE const PrimaryVariables& primaryVars(unsigned /*dofIdx*/, + unsigned /*timeIdx*/) const + { + return priVarsStashed_; + } + + // ----------------------------------------------------------------------- + // Stash / restore + // ----------------------------------------------------------------------- + + OPM_HOST_DEVICE bool haveStashedIntensiveQuantities() const + { + return stashedDofIdx_ != -1; + } + + OPM_HOST_DEVICE int stashedDofIdx() const + { + return stashedDofIdx_; + } + + OPM_HOST_DEVICE void stashIntensiveQuantities(unsigned dofIdx) + { + stashedDofIdx_ = static_cast(dofIdx); + } + + OPM_HOST_DEVICE void restoreIntensiveQuantities(unsigned /*dofIdx*/) + { + stashedDofIdx_ = -1; + } + + // ----------------------------------------------------------------------- + // Gradient calculator + // ----------------------------------------------------------------------- + + OPM_HOST_DEVICE const GradientCalculator& gradientCalculator() const + { + return gradientCalculator_; + } + + // ----------------------------------------------------------------------- + // Extensive quantities + // ----------------------------------------------------------------------- + + OPM_HOST_DEVICE const ExtensiveQuantities& extensiveQuantities(unsigned /*fluxIdx*/, + unsigned /*timeIdx*/) const + { + return extensiveQuantitiesStashed_; + } + + // ----------------------------------------------------------------------- + // Storage cache flag + // ----------------------------------------------------------------------- + + OPM_HOST_DEVICE bool enableStorageCache() const + { + return enableStorageCache_; + } + OPM_HOST_DEVICE void setEnableStorageCache(bool yesno) + { + enableStorageCache_ = yesno; + } + +private: + // Dummy stored objects returned by reference accessors. + // On device these are default-constructed and carry no meaningful data. + Problem problem_ {}; + Model model_ {}; + GradientCalculator gradientCalculator_ {}; + IntensiveQuantities intensiveQuantitiesStashed_ {}; + ExtensiveQuantities extensiveQuantitiesStashed_ {}; + PrimaryVariables priVarsStashed_ {}; + + int stashedDofIdx_ {-1}; + int focusDofIdx_ {-1}; + bool enableStorageCache_ {false}; +}; + +} // namespace Opm + +#endif // OPM_FV_BASE_ELEMENT_CONTEXT_GPU_HH diff --git a/opm/simulators/flow/FlowGasWaterEnergyTypeTag.hpp b/opm/simulators/flow/FlowGasWaterEnergyTypeTag.hpp new file mode 100644 index 00000000000..1c91e08b05f --- /dev/null +++ b/opm/simulators/flow/FlowGasWaterEnergyTypeTag.hpp @@ -0,0 +1,99 @@ +// -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- +// vi: set et ts=4 sw=4 sts=4: +/* + Copyright 2026 Equinor ASA + + This file is part of the Open Porous Media project (OPM). + + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ +/*! + * \file + * + * Shared TypeTag declaration for the gas+water+energy (CO2STORE-style) + * blackoil flow problem. Originally inlined into + * \c flow/flow_gaswater_energy.cpp; extracted into a header so that other + * translation units (e.g. the GPU intensive-quantities dispatcher) can + * obtain matching property bindings via \c GetPropType. + */ +#ifndef OPM_FLOW_GASWATER_ENERGY_TYPETAG_HPP +#define OPM_FLOW_GASWATER_ENERGY_TYPETAG_HPP + +#include + +#include +#include +#include + +#include + +namespace Opm::Properties { +namespace TTag { +struct FlowGasWaterEnergyProblem { + using InheritsFrom = std::tuple; +}; +} // namespace TTag + +template +struct Linearizer { using type = TpfaLinearizer; }; + +template +struct LocalResidual { using type = BlackOilLocalResidualTPFA; }; + +template +struct EnableDiffusion { static constexpr bool value = true; }; + +template +struct EnableDispersion { static constexpr bool value = true; }; + +template +struct EnergyModuleType +{ static constexpr EnergyModules value = EnergyModules::FullyImplicitThermal; }; + +template +struct EnableDisgasInWater { + static constexpr bool value = true; +}; + +template +struct EnableVapwat { + static constexpr bool value = true; +}; + +//! The indices required by the model +template +struct Indices +{ +private: + // it is unfortunately not possible to simply use 'TypeTag' here because + // this leads to cyclic definitions of some properties. + using BaseTypeTag = TTag::FlowProblem; + using FluidSystem = GetPropType; + static constexpr EnergyModules energyModuleType = getPropValue(); + static constexpr int numEnergyVars = (energyModuleType == EnergyModules::FullyImplicitThermal) ? 1 : 0; +public: + using type = BlackOilTwoPhaseIndices(), + getPropValue(), + getPropValue(), + numEnergyVars, + getPropValue(), + getPropValue(), + /*PVOffset=*/0, + /*disabledCompIdx=*/FluidSystem::oilCompIdx, + getPropValue()>; +}; + +} // namespace Opm::Properties + +#endif // OPM_FLOW_GASWATER_ENERGY_TYPETAG_HPP diff --git a/opm/simulators/flow/FlowProblemParameters.cpp b/opm/simulators/flow/FlowProblemParameters.cpp index 2f5f29668da..1daa0a09b2a 100644 --- a/opm/simulators/flow/FlowProblemParameters.cpp +++ b/opm/simulators/flow/FlowProblemParameters.cpp @@ -81,6 +81,24 @@ void registerFlowProblemParameters() ("Conserve inner energy and not enthalpy " "even if THERMAL is used."); +#if HAVE_CUDA && OPM_HAVE_GPU_BLACKOIL_INTENSIVE_QUANTITIES_DISPATCHER + Parameters::Register + ("Experimental: compute BlackOilIntensiveQuantities on the GPU " + "via the GpuBlackoilIntensiveQuantitiesDispatcher. Only takes " + "effect for CO2STORE-compatible TypeTags; ignored otherwise."); +#else + // Always register so that the run fails with an explanatory error + // message (rather than an "unknown parameter" message) when this binary + // has been built without the GPU intensive-quantities dispatcher (i.e. + // without CUDA/HIP support, or with a CUDA toolkit older than 13.1) + // but the user still requests the GPU dispatcher. + Parameters::Register + ("Experimental: compute BlackOilIntensiveQuantities on the GPU. " + "This binary was built without the GPU intensive-quantities " + "dispatcher (no CUDA/HIP support, or CUDA<13.1), so enabling " + "this option will cause the run to fail at start-up."); +#endif + // By default, stop it after the universe will probably have stopped // to exist. (the ECL problem will finish the simulation explicitly // after it simulated the last episode specified in the deck.) diff --git a/opm/simulators/flow/FlowProblemParameters.hpp b/opm/simulators/flow/FlowProblemParameters.hpp index 9908026b63e..0c2c307723d 100644 --- a/opm/simulators/flow/FlowProblemParameters.hpp +++ b/opm/simulators/flow/FlowProblemParameters.hpp @@ -69,6 +69,15 @@ struct UseHybridNewton { static constexpr bool value = false; }; // Conserve inner energy instead of enthalpy even if THERMAL is used struct ConserveInnerEnergyThermal { static constexpr bool value = false; }; +// Experimental: route the per-element BlackOilIntensiveQuantities update +// through the GPU dispatcher instead of computing it on the CPU. Only takes +// effect for CO2STORE-compatible TypeTags (gas+water, no oil). The parameter +// is always declared so that the run can fail with an explanatory error +// message when the option is requested in a binary that was built without +// CUDA/HIP support (or without a sufficiently new toolkit), or when the +// active TypeTag is incompatible with the GPU dispatcher. +struct ExperimentalComputePropertiesOnGpu { static constexpr bool value = false; }; + } // namespace Opm::Parameters namespace Opm { diff --git a/opm/simulators/flow/GpuEclMaterialLawManager.hpp b/opm/simulators/flow/GpuEclMaterialLawManager.hpp new file mode 100644 index 00000000000..46611f4c688 --- /dev/null +++ b/opm/simulators/flow/GpuEclMaterialLawManager.hpp @@ -0,0 +1,461 @@ +// -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- +// vi: set et ts=4 sw=4 sts=4: +/* + Copyright 2026 Equinor ASA + + This file is part of the Open Porous Media project (OPM). + + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ +/*! + * \file + * \brief GPU-compatible, simplified version of Opm::EclMaterialLaw::Manager. + * + * This class only supports the actions needed by FlowProblem, + * FlowProblemBlackoil and the BlackOilIntensiveQuantities. It re-uses the + * generic two-phase material types from opm-common + * (\c Opm::EclTwoPhaseMaterial and \c Opm::EclTwoPhaseMaterialParams) with + * a value-storage policy (\c Opm::gpuistl::ValueAsPointer) so the per-cell + * material law parameters are trivially copyable to the device. + * + * The chain of multiplexers used by the CPU manager + * (\c EclMultiplexerMaterial \f$\to\f$ \c EclEpsTwoPhaseLaw \f$\to\f$ + * \c EclHysteresisTwoPhaseLaw \f$\to\f$ \c SatCurveMultiplexer) is bypassed: + * the GPU manager unwraps the CPU multiplexer parameters down to the + * underlying \c PiecewiseLinearTwoPhaseMaterialParams and uploads only the + * piecewise-linear sample tables. + */ +#ifndef OPM_GPU_ECL_MATERIAL_LAW_MANAGER_HPP +#define OPM_GPU_ECL_MATERIAL_LAW_MANAGER_HPP + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +namespace Opm::EclMaterialLaw +{ + +namespace detail +{ + + /*! + * \brief Walk down a CPU material-law parameter object until the enclosed + * \c PiecewiseLinearTwoPhaseMaterialParams is reached. + * + * Supports the standard layering used by the CPU \c EclMaterialLaw::Manager: + * \c EclHysteresisTwoPhaseLawParams \f$\to\f$ \c EclEpsTwoPhaseLawParams + * \f$\to\f$ \c SatCurveMultiplexerParams + * \f$\to\f$ \c PiecewiseLinearTwoPhaseMaterialParams. + */ + template + const auto& extractCpuPlParams(const CpuParams& params) + { + if constexpr (requires { params.drainageParams(); }) { + return extractCpuPlParams(params.drainageParams()); + } else if constexpr (requires { params.effectiveLawParams(); }) { + return extractCpuPlParams(params.effectiveLawParams()); + } else if constexpr (requires { + params.template getRealParams< + ::Opm::SatCurveMultiplexerApproach::PiecewiseLinear>(); + }) { + return params + .template getRealParams<::Opm::SatCurveMultiplexerApproach::PiecewiseLinear>(); + } else { + return params; + } + } + + /*! + * \brief Optional host-side base used by \c GpuManager when its storage is + * owning device memory: keeps the per-cell sample buffers alive so + * that the \c GpuView pointers stored inside the cells' + * \c PiecewiseLinearTwoPhaseMaterialParams remain valid for as long + * as the manager exists. Empty for any non-owning storage so that + * \c GpuView based managers stay device-trivially-copyable. + */ + template + struct GpuPiecewiseLinearSampleHolder { + }; + + template + struct GpuPiecewiseLinearSampleHolder { + std::vector<::Opm::gpuistl::GpuBuffer> sampleBuffers {}; + }; + +} // namespace detail + +/*! + * \brief A minimal, GPU-compatible material-law manager. + * + * Only the EclDefaultMaterial three-phase law and the EclTwoPhaseMaterial + * two-phase law are supported. The two-phase gas/oil and oil/water sub-law + * types are template parameters so the caller can choose any GPU compatible + * implementation (e.g. PiecewiseLinear). + * + * \tparam TraitsT Three-phase material traits. + * \tparam GasOilLawT Two-phase gas/oil law type (must implement the + * saturation-only API). + * \tparam OilWaterLawT Two-phase oil/water law type (must implement the + * saturation-only API). + * \tparam Storage Storage container template; defaults to a CPU vector. + * Use \c Opm::gpuistl::GpuBuffer for owning GPU storage + * and \c Opm::gpuistl::GpuView for non-owning GPU + * storage. + * \tparam MaterialLawT The three-phase material-law type to use; defaults to + * \c Opm::EclDefaultMaterial. + */ +template class Storage = ::Opm::VectorWithDefaultAllocator, + class MaterialLawT = ::Opm::EclDefaultMaterial> +class GpuManager : private detail::GpuPiecewiseLinearSampleHolder< + typename TraitsT::Scalar, + std::is_same_v, ::Opm::gpuistl::GpuBuffer>> +{ +public: + using Traits = TraitsT; + using Scalar = typename Traits::Scalar; + using GasOilLaw = GasOilLawT; + using OilWaterLaw = OilWaterLawT; + using GasOilParams = typename GasOilLaw::Params; + using OilWaterParams = typename OilWaterLaw::Params; + + /*! + * \brief The actual material-law type used by the IQ. + * + * Defaults to the three-phase \c EclDefaultMaterial. Pass an + * \c Opm::EclTwoPhaseMaterial<...> instantiation (with a value-storage + * \c EclTwoPhaseMaterialParams, e.g. one using + * \c Opm::gpuistl::ValueAsPointer) to instead use the two-phase law. + * Only the GasWater sub-approach is currently supported by the + * from-CPU constructors below; this matches the CO2STORE setup. + */ + using MaterialLaw = MaterialLawT; + using MaterialLawParams = typename MaterialLaw::Params; + + static constexpr int waterPhaseIdx = Traits::wettingPhaseIdx; + static constexpr int oilPhaseIdx = Traits::nonWettingPhaseIdx; + static constexpr int gasPhaseIdx = Traits::gasPhaseIdx; + static constexpr int numPhases = Traits::numPhases; + +private: + /*! \brief Trait: true if the material-law parameters are an + * \c EclTwoPhaseMaterialParams (i.e. expose a nested + * \c GasWaterParams type). */ + template + struct IsTwoPhaseMaterialLawParams : std::false_type { + }; + + template + struct IsTwoPhaseMaterialLawParams> + : std::true_type { + }; + + static constexpr bool isTwoPhase = IsTwoPhaseMaterialLawParams::value; + +public: + OPM_HOST_DEVICE GpuManager() = default; + + OPM_HOST_DEVICE GpuManager(Storage materialLawParams, + Storage satnumRegionArray) + : materialLawParams_(std::move(materialLawParams)) + , satnumRegionArray_(std::move(satnumRegionArray)) + { + } + + /*! + * \brief Construct from a CPU \c Opm::EclMaterialLaw::Manager directly + * into device-resident \c GpuBuffer storage. + * + * For each cell, the per-cell piecewise-linear sample arrays are + * uploaded to the GPU as individual \c GpuBuffer instances kept + * alive by this manager (via its private + * \c detail::GpuPiecewiseLinearSampleHolder base). The cell's + * \c MaterialLawParams is populated with \c GpuView views + * referencing those device buffers and is then bulk-copied to the + * device alongside the satnum array. + * + * Only enabled when this manager itself uses \c GpuBuffer storage and + * the two-phase laws use \c GpuView sample storage. + */ + template , + class GasOilParamsArg = GasOilParams, + class OilWaterParamsArg = OilWaterParams, + class IntegerStorage = Storage, + class MaterialLawParamsStorageT = Storage, + std::enable_if_t> + && std::is_same_v> + && std::is_same_v> + && std::is_same_v>, + int> + = 0> + explicit GpuManager(const CpuManager& cpu, std::size_t numElements) + : detail::GpuPiecewiseLinearSampleHolder {} + , materialLawParams_( + buildHostMaterialLawParamsForGpu(cpu, numElements, this->sampleBuffers)) + , satnumRegionArray_(buildHostSatnumRegionArray(cpu, numElements)) + { + } + + /*! \brief Material-law parameters of an active cell. */ + OPM_HOST_DEVICE MaterialLawParams materialLawParams(unsigned elemIdx) const + { + // Return by value: GpuView::operator[] const already returns by + // value, so binding a reference to materialLawParams_[idx] inside + // this function would dangle. Returning by value lets the caller + // (which typically does `const auto& mp = ...`) safely extend the + // temporary's lifetime. + return materialLawParams_[elemIdx]; + } + + OPM_HOST_DEVICE MaterialLawParams& materialLawParams(unsigned elemIdx) + { + return materialLawParams_[elemIdx]; + } + + /*! \brief Saturation-region index of an active cell. */ + OPM_HOST_DEVICE int satnumRegionIdx(unsigned elemIdx) const + { + return satnumRegionArray_[elemIdx]; + } + + /*! \brief Direct access to the underlying storages + * (used by copy_to_gpu / make_view). */ + const Storage& materialLawParamsStorage() const + { + return materialLawParams_; + } + Storage& materialLawParamsStorage() + { + return materialLawParams_; + } + + const Storage& satnumRegionArrayStorage() const + { + return satnumRegionArray_; + } + Storage& satnumRegionArrayStorage() + { + return satnumRegionArray_; + } + +private: + /*! + * \brief Build the per-cell host-side \c MaterialLawParams vector for + * the GpuBuffer-storage from-CPU constructor. Allocates one + * \c GpuBuffer per per-cell sample array and wraps each + * in a \c GpuView stored inside the cell's MLP. + * The owning device buffers are pushed into \p sampleBuffers, + * which is the manager's holder member. + */ + template + static std::vector + buildHostMaterialLawParamsForGpu(const CpuManager& cpu, + std::size_t numElements, + std::vector<::Opm::gpuistl::GpuBuffer>& sampleBuffers) + { + // The CPU manager produces one piecewise-linear sample table per + // sub-law per cell. For a three-phase EclDefaultMaterial that is up + // to twelve buffers per cell (six per gas/oil and oil/water sub-law); + // for the two-phase EclTwoPhaseMaterial it is six (a single + // gas/water sub-law). Reserving the upper bound avoids reallocation. + sampleBuffers.reserve(numElements * 12); + std::vector materialLawParams(numElements); + + auto pushSampleBuffer = [&](const auto& sampleVector) { + std::vector hostCopy(sampleVector.begin(), sampleVector.end()); + sampleBuffers.emplace_back(hostCopy); + const auto& deviceBuffer = sampleBuffers.back(); + return ::Opm::gpuistl::GpuView(deviceBuffer.data(), deviceBuffer.size()); + }; + + for (std::size_t i = 0; i < numElements; ++i) { + const auto& cpuMaterialParams = cpu.materialLawParams(static_cast(i)); + + if constexpr (isTwoPhase) { + buildTwoPhaseCellParams(cpuMaterialParams, materialLawParams[i], pushSampleBuffer); + } else { + buildThreePhaseCellParams( + cpuMaterialParams, materialLawParams[i], pushSampleBuffer); + } + } + return materialLawParams; + } + + /*! + * \brief Build a single cell's two-phase \c EclTwoPhaseMaterialParams + * instance with GPU-resident piecewise-linear sample tables. + * + * Only the \c GasWater sub-approach is currently supported, since this + * is the only configuration produced by CO2STORE-style decks. + */ + template + static void buildTwoPhaseCellParams(const CpuMaterialParams& cpuMaterialParams, + MaterialLawParams& cellParams, + PushBuffer& pushSampleBuffer) + { + if (cpuMaterialParams.approach() != ::Opm::EclMultiplexerApproach::TwoPhase) { + throw std::logic_error("Opm::EclMaterialLaw::GpuManager configured for the two-phase " + "material law requires a CPU material-law parameter set that " + "uses EclMultiplexerApproach::TwoPhase."); + } + const auto& cpuTwoPhaseParams + = cpuMaterialParams.template getRealParams<::Opm::EclMultiplexerApproach::TwoPhase>(); + if (cpuTwoPhaseParams.approach() != ::Opm::EclTwoPhaseApproach::GasWater) { + throw std::logic_error("Opm::EclMaterialLaw::GpuManager only supports the GasWater " + "two-phase sub-approach."); + } + + using GasWaterParams = typename MaterialLawParams::GasWaterParams; + const auto& cpuGasWaterPiecewiseLinear + = detail::extractCpuPlParams(cpuTwoPhaseParams.gasWaterParams()); + GasWaterParams gasWaterParams(pushSampleBuffer(cpuGasWaterPiecewiseLinear.SwPcwnSamples()), + pushSampleBuffer(cpuGasWaterPiecewiseLinear.pcwnSamples()), + pushSampleBuffer(cpuGasWaterPiecewiseLinear.SwKrwSamples()), + pushSampleBuffer(cpuGasWaterPiecewiseLinear.krwSamples()), + pushSampleBuffer(cpuGasWaterPiecewiseLinear.SwKrnSamples()), + pushSampleBuffer(cpuGasWaterPiecewiseLinear.krnSamples())); + + cellParams.setGasWaterParams( + typename MaterialLawParams::GasWaterParamsStorage(std::move(gasWaterParams))); + cellParams.setApproach(::Opm::EclTwoPhaseApproach::GasWater); + cellParams.finalize(); + } + + /*! + * \brief Build a single cell's three-phase \c EclDefaultMaterialParams + * instance with GPU-resident piecewise-linear sample tables. + */ + template + static void buildThreePhaseCellParams(const CpuMaterialParams& cpuMaterialParams, + MaterialLawParams& cellParams, + PushBuffer& pushSampleBuffer) + { + if (cpuMaterialParams.approach() != ::Opm::EclMultiplexerApproach::Default) { + throw std::logic_error("Opm::EclMaterialLaw::GpuManager only supports the Default " + "three-phase material law approach."); + } + const auto& cpuDefaultParams + = cpuMaterialParams.template getRealParams<::Opm::EclMultiplexerApproach::Default>(); + const auto& cpuGasOilPiecewiseLinear + = detail::extractCpuPlParams(cpuDefaultParams.gasOilParams()); + const auto& cpuOilWaterPiecewiseLinear + = detail::extractCpuPlParams(cpuDefaultParams.oilWaterParams()); + + GasOilParams gasOilParams(pushSampleBuffer(cpuGasOilPiecewiseLinear.SwPcwnSamples()), + pushSampleBuffer(cpuGasOilPiecewiseLinear.pcwnSamples()), + pushSampleBuffer(cpuGasOilPiecewiseLinear.SwKrwSamples()), + pushSampleBuffer(cpuGasOilPiecewiseLinear.krwSamples()), + pushSampleBuffer(cpuGasOilPiecewiseLinear.SwKrnSamples()), + pushSampleBuffer(cpuGasOilPiecewiseLinear.krnSamples())); + OilWaterParams oilWaterParams(pushSampleBuffer(cpuOilWaterPiecewiseLinear.SwPcwnSamples()), + pushSampleBuffer(cpuOilWaterPiecewiseLinear.pcwnSamples()), + pushSampleBuffer(cpuOilWaterPiecewiseLinear.SwKrwSamples()), + pushSampleBuffer(cpuOilWaterPiecewiseLinear.krwSamples()), + pushSampleBuffer(cpuOilWaterPiecewiseLinear.SwKrnSamples()), + pushSampleBuffer(cpuOilWaterPiecewiseLinear.krnSamples())); + + cellParams.setGasOilParams(std::make_shared(std::move(gasOilParams))); + cellParams.setOilWaterParams(std::make_shared(std::move(oilWaterParams))); + cellParams.setSwl(cpuDefaultParams.Swl()); + cellParams.finalize(); + } + + template + static std::vector buildHostSatnumRegionArray(const CpuManager& cpu, + std::size_t numElements) + { + std::vector satnumRegionArray(numElements); + for (std::size_t i = 0; i < numElements; ++i) { + satnumRegionArray[i] = static_cast(cpu.satnumRegionIdx(static_cast(i))); + } + return satnumRegionArray; + } + + Storage materialLawParams_ {}; + Storage satnumRegionArray_ {}; +}; + +} // namespace Opm::EclMaterialLaw + +namespace Opm::gpuistl +{ + +/*! + * \brief Copy a CPU GpuManager to GPU-resident GpuBuffer storage. + * + * The MaterialLawParams element type is assumed to be the same on the CPU + * and the GPU, i.e. the caller is responsible for making the GasOilLaw and + * OilWaterLaw GPU-compatible (typically by templating their parameter type + * on a GPU storage). + */ +template +::Opm::EclMaterialLaw::GpuManager +copy_to_gpu(const ::Opm::EclMaterialLaw::GpuManager& cpu) +{ + using GpuManagerBuffer = ::Opm::EclMaterialLaw:: + GpuManager; + using MaterialLawParams = typename GpuManagerBuffer::MaterialLawParams; + return GpuManagerBuffer(GpuBuffer(cpu.materialLawParamsStorage()), + GpuBuffer(cpu.satnumRegionArrayStorage())); +} + +/*! + * \brief Make a non-owning GpuView based GpuManager from an owning GpuBuffer + * based GpuManager. + */ +template +::Opm::EclMaterialLaw::GpuManager +make_view( + ::Opm::EclMaterialLaw::GpuManager& + buf) +{ + using GpuManagerView = ::Opm::EclMaterialLaw:: + GpuManager; + using MaterialLawParams = typename GpuManagerView::MaterialLawParams; + return GpuManagerView( + GpuView(buf.materialLawParamsStorage().data(), + buf.materialLawParamsStorage().size()), + GpuView(buf.satnumRegionArrayStorage().data(), buf.satnumRegionArrayStorage().size())); +} + +} // namespace Opm::gpuistl + +#endif // OPM_GPU_ECL_MATERIAL_LAW_MANAGER_HPP diff --git a/opm/simulators/flow/GpuEclThermalLawManager.hpp b/opm/simulators/flow/GpuEclThermalLawManager.hpp new file mode 100644 index 00000000000..bc87a3c9884 --- /dev/null +++ b/opm/simulators/flow/GpuEclThermalLawManager.hpp @@ -0,0 +1,367 @@ +// -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- +// vi: set et ts=4 sw=4 sts=4: +/* + Copyright 2026 Equinor ASA + + This file is part of the Open Porous Media project (OPM). + + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ +/*! + * \file + * + * GPU-portable, simplified version of \c Opm::EclThermalLawManager. + * + * Mirrors the design of \c Opm::EclMaterialLaw::GpuManager: + * + * - Templated on the outer storage container (per-cell / per-region + * bulk arrays of params): \c VectorWithDefaultAllocator on the CPU, + * \c GpuBuffer for owning device memory, \c GpuView for non-owning + * device storage usable from a kernel. + * - Templated on the inner storage used by the per-region SPECROCK + * sample tables (one tiny array per region). + * - The CPU \c Opm::EclThermalLawManager is treated as a *builder*: a + * dedicated constructor extracts the per-region SPECROCK sample + * tables and the per-cell THCONR coefficients and uploads them to + * device memory. + * + * Currently only the SPECROCK solid-energy approach and the THCONR + * thermal-conduction approach are supported by the builder, since these + * are the only ones used by the CO2STORE+THERMAL setup that the GPU + * dispatcher targets. The builder throws via \c OPM_THROW for any other + * approach. + */ +#ifndef OPM_GPU_ECL_THERMAL_LAW_MANAGER_HPP +#define OPM_GPU_ECL_THERMAL_LAW_MANAGER_HPP + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include + +namespace Opm::EclThermalLaw { + +namespace detail { + + /*! + * \brief Holder for the per-region SPECROCK sample buffers when the + * manager owns its device memory (i.e. the outer storage is + * \c GpuBuffer). Each per-region + * \c EclSpecrockLawParams stored in the + * bulk array references one of the buffers in this holder, so + * the holder must outlive every kernel invocation. + * + * For non-owning outer storage (host or GpuView), the holder + * is empty. + */ + template + struct SpecrockSampleHolder { + }; + + template + struct SpecrockSampleHolder { + std::vector<::Opm::gpuistl::GpuBuffer> sampleBuffers {}; + }; + +} // namespace detail + +/*! + * \brief Minimal, GPU-portable thermal-law manager. + * + * \tparam ScalarT Floating point type used for the per-cell and + * per-region sample tables. + * \tparam FluidSystemT Fluid system associated with this manager. + * \tparam OuterStorage Container used for the per-cell / per-region + * bulk arrays of params and the SATNUM index map. + * \tparam SampleStorage Container used for the per-region SPECROCK + * internal-energy / temperature sample tables. + * + * Typical instantiations: + * - CPU-only: \c GpuManager + * - GPU owning (built from CPU): \c GpuManager + * - GPU non-owning (kernel arg): \c GpuManager + */ +template class OuterStorage = ::Opm::VectorWithDefaultAllocator, + template class SampleStorage = ::Opm::VectorWithDefaultAllocator> +class GpuManager + : private detail::SpecrockSampleHolder< + ScalarT, + std::is_same_v, ::Opm::gpuistl::GpuBuffer>> +{ +public: + using Scalar = ScalarT; + using FluidSystem = FluidSystemT; + using SolidEnergyLawParams = ::Opm::EclSpecrockLawParams; + using ThermalConductionLawParams = ::Opm::EclThconrLawParams; + + static constexpr bool isOwningGpu + = std::is_same_v, ::Opm::gpuistl::GpuBuffer>; + + OPM_HOST_DEVICE GpuManager() = default; + + OPM_HOST_DEVICE + GpuManager(OuterStorage solidEnergyParams, + OuterStorage elementToSolidRegionIdx, + OuterStorage thermalConductionParams) + : solidEnergyParams_(std::move(solidEnergyParams)) + , elementToSolidRegionIdx_(std::move(elementToSolidRegionIdx)) + , thermalConductionParams_(std::move(thermalConductionParams)) + { + } + + /*! + * \brief Owning constructor that also takes ownership of the + * per-region SPECROCK sample buffers (only available for the + * owning GpuBuffer outer-storage instantiation). + */ + template = 0> + GpuManager(std::vector<::Opm::gpuistl::GpuBuffer> sampleBuffers, + OuterStorage solidEnergyParams, + OuterStorage elementToSolidRegionIdx, + OuterStorage thermalConductionParams) + : detail::SpecrockSampleHolder{std::move(sampleBuffers)} + , solidEnergyParams_(std::move(solidEnergyParams)) + , elementToSolidRegionIdx_(std::move(elementToSolidRegionIdx)) + , thermalConductionParams_(std::move(thermalConductionParams)) + { + } + + /*! \brief Solid-energy law parameters for an active cell. */ + OPM_HOST_DEVICE SolidEnergyLawParams solidEnergyLawParams(unsigned elemIdx) const + { + const int regionIdx = elementToSolidRegionIdx_[elemIdx]; + return solidEnergyParams_[regionIdx]; + } + + /*! \brief Thermal-conduction law parameters for an active cell. */ + OPM_HOST_DEVICE ThermalConductionLawParams + thermalConductionLawParams(unsigned elemIdx) const + { + return thermalConductionParams_[elemIdx]; + } + + /*! \brief Direct storage accessors used by copy_to_gpu / make_view. */ + const OuterStorage& solidEnergyParamsStorage() const + { + return solidEnergyParams_; + } + OuterStorage& solidEnergyParamsStorage() + { + return solidEnergyParams_; + } + + const OuterStorage& elementToSolidRegionIdxStorage() const + { + return elementToSolidRegionIdx_; + } + OuterStorage& elementToSolidRegionIdxStorage() + { + return elementToSolidRegionIdx_; + } + + const OuterStorage& thermalConductionParamsStorage() const + { + return thermalConductionParams_; + } + OuterStorage& thermalConductionParamsStorage() + { + return thermalConductionParams_; + } + + /*! \brief Mutable accessor for the per-region SPECROCK sample + * buffer holder; used by \c copy_to_gpu to populate the + * owning device buffers. Only available when this manager + * owns its device memory. + */ + template = 0> + std::vector<::Opm::gpuistl::GpuBuffer>& specrockSampleBuffers() + { + return this->sampleBuffers; + } + +private: + OuterStorage solidEnergyParams_ {}; + OuterStorage elementToSolidRegionIdx_ {}; + OuterStorage thermalConductionParams_ {}; +}; + +/*! + * \brief Build a CPU \c GpuManager from a CPU \c Opm::FlowProblem. + * + * Iterates over the active cells, querying the CPU problem for each + * cell's \c SolidEnergyLawParams (multiplexer) and + * \c ThermalConductionLawParams (multiplexer). The per-cell SPECROCK + * multiplexer references are deduplicated by address to recover the + * underlying region index (so the resulting GPU manager stores one + * \c SolidEnergyLawParams per region rather than one per cell). + * + * Throws via \c OPM_THROW for any solid-energy approach other than + * SPECROCK or any thermal-conduction approach other than THCONR. + */ +template +GpuManager +buildCpuManagerFromFlowProblem(const CpuFlowProblemT& cpu, std::size_t numElements) +{ + using ManagerCpu = GpuManager; + using SolidEnergyLawParams = typename ManagerCpu::SolidEnergyLawParams; + using ThermalConductionLawParams = typename ManagerCpu::ThermalConductionLawParams; + + std::vector regionMultiplexerAddress; + std::vector hostElementToSolidRegionIdx(numElements); + std::vector hostSolidEnergyParams; + std::vector hostThermalConductionParams(numElements); + + for (std::size_t i = 0; i < numElements; ++i) { + const auto& cpuSolidMultiplexer + = cpu.solidEnergyLawParams(static_cast(i), 0u); + if (cpuSolidMultiplexer.solidEnergyApproach() + != ::Opm::EclSolidEnergyApproach::Specrock) { + OPM_THROW(std::logic_error, + "Opm::EclThermalLaw::GpuManager only supports the SPECROCK " + "solid-energy approach."); + } + const void* address = static_cast(&cpuSolidMultiplexer); + int regionIdx = -1; + for (std::size_t r = 0; r < regionMultiplexerAddress.size(); ++r) { + if (regionMultiplexerAddress[r] == address) { + regionIdx = static_cast(r); + break; + } + } + if (regionIdx < 0) { + const auto& cpuSpecrock = cpuSolidMultiplexer.template getRealParams< + ::Opm::EclSolidEnergyApproach::Specrock>(); + const auto& xValues = cpuSpecrock.temperatureSamples(); + const auto& yValues = cpuSpecrock.internalEnergySamples(); + std::vector temperatureSamples(xValues.begin(), xValues.end()); + std::vector internalEnergySamples(yValues.begin(), yValues.end()); + SolidEnergyLawParams params; + params.setSamples(temperatureSamples, internalEnergySamples); + regionIdx = static_cast(hostSolidEnergyParams.size()); + hostSolidEnergyParams.emplace_back(std::move(params)); + regionMultiplexerAddress.push_back(address); + } + hostElementToSolidRegionIdx[i] = regionIdx; + + const auto& cpuThermalMultiplexer + = cpu.thermalConductionLawParams(static_cast(i), 0u); + if (cpuThermalMultiplexer.thermalConductionApproach() + != ::Opm::EclThermalConductionApproach::Thconr) { + OPM_THROW(std::logic_error, + "Opm::EclThermalLaw::GpuManager only supports the THCONR " + "thermal-conduction approach."); + } + const auto& cpuThconr = cpuThermalMultiplexer.template getRealParams< + ::Opm::EclThermalConductionApproach::Thconr>(); + hostThermalConductionParams[i].setReferenceTotalThermalConductivity( + cpuThconr.referenceTotalThermalConductivity()); + hostThermalConductionParams[i].setDTotalThermalConductivity_dSg( + cpuThconr.dTotalThermalConductivity_dSg()); + hostThermalConductionParams[i].finalize(); + } + + return ManagerCpu(std::move(hostSolidEnergyParams), + std::move(hostElementToSolidRegionIdx), + std::move(hostThermalConductionParams)); +} + +} // namespace Opm::EclThermalLaw + +namespace Opm::gpuistl { + +/*! + * \brief Copy a CPU \c GpuManager (plain host storage) to GPU-resident + * \c GpuBuffer storage. + * + * Each per-region SPECROCK sample table is uploaded to its own + * \c GpuBuffer; those buffers are owned by the returned + * manager's \c SpecrockSampleHolder base. The corresponding per-region + * \c EclSpecrockLawParams objects are then assembled + * on the host and uploaded as a single bulk + * \c GpuBuffer. + */ +template +::Opm::EclThermalLaw::GpuManager +copy_to_gpu(const ::Opm::EclThermalLaw::GpuManager& cpu) +{ + using ManagerBuf + = ::Opm::EclThermalLaw::GpuManager; + using SolidEnergyLawParamsView = typename ManagerBuf::SolidEnergyLawParams; + using ThermalConductionLawParams = typename ManagerBuf::ThermalConductionLawParams; + + std::vector> sampleBuffers; + sampleBuffers.reserve(2u * cpu.solidEnergyParamsStorage().size()); + + std::vector hostStaging; + hostStaging.reserve(cpu.solidEnergyParamsStorage().size()); + + for (const auto& cpuRegion : cpu.solidEnergyParamsStorage()) { + sampleBuffers.emplace_back(GpuBuffer(cpuRegion.temperatureSamples())); + auto& tBuf = sampleBuffers.back(); + sampleBuffers.emplace_back(GpuBuffer(cpuRegion.internalEnergySamples())); + auto& eBuf = sampleBuffers.back(); + SolidEnergyLawParamsView params(GpuView(tBuf.data(), tBuf.size()), + GpuView(eBuf.data(), eBuf.size())); + hostStaging.emplace_back(std::move(params)); + } + + return ManagerBuf(std::move(sampleBuffers), + GpuBuffer(hostStaging), + GpuBuffer(cpu.elementToSolidRegionIdxStorage()), + GpuBuffer(cpu.thermalConductionParamsStorage())); +} + +/*! + * \brief Make a non-owning \c GpuView based \c GpuManager from an owning + * \c GpuBuffer based \c GpuManager. The per-region solid-energy + * params element type is unchanged (\c GpuView sample storage in + * both cases). + */ +template +::Opm::EclThermalLaw::GpuManager +make_view(::Opm::EclThermalLaw::GpuManager& buf) +{ + using ManagerView + = ::Opm::EclThermalLaw::GpuManager; + using SolidEnergyLawParamsView = typename ManagerView::SolidEnergyLawParams; + using ThermalConductionLawParams = typename ManagerView::ThermalConductionLawParams; + return ManagerView(GpuView(buf.solidEnergyParamsStorage().data(), + buf.solidEnergyParamsStorage().size()), + GpuView(buf.elementToSolidRegionIdxStorage().data(), + buf.elementToSolidRegionIdxStorage().size()), + GpuView( + buf.thermalConductionParamsStorage().data(), + buf.thermalConductionParamsStorage().size())); +} + +} // namespace Opm::gpuistl + +#endif // OPM_GPU_ECL_THERMAL_LAW_MANAGER_HPP diff --git a/opm/simulators/flow/GpuFlowProblem.hpp b/opm/simulators/flow/GpuFlowProblem.hpp new file mode 100644 index 00000000000..73d88ee2f3a --- /dev/null +++ b/opm/simulators/flow/GpuFlowProblem.hpp @@ -0,0 +1,587 @@ +// -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- +// vi: set et ts=4 sw=4 sts=4: +/* + Copyright 2026 Equinor ASA + + This file is part of the Open Porous Media project (OPM). + + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ +/*! + * \file + * \brief GPU-compatible, simplified version of Opm::FlowProblem. + * + * Only exposes the subset of the FlowProblem / FlowProblemBlackoil interface + * that BlackOilIntensiveQuantities needs in order to call ::update(). Anything + * that is not used by the intensive quantities (well model, aquifers, source + * terms, boundary conditions, ...) is intentionally absent. + */ +#ifndef OPM_GPU_FLOW_PROBLEM_HPP +#define OPM_GPU_FLOW_PROBLEM_HPP + +#include +#include + +#include + +#include +#include + +#include +#include +#include +#include + +namespace Opm::gpuistl +{ +template +class GpuBuffer; +template +class GpuView; +} // namespace Opm::gpuistl + +namespace Opm { + +/*! + * \brief No-op thermal-law manager used as the default 4th template arg of + * \c GpuFlowProblem. + * + * Carries no data and exposes the minimum surface needed by the + * \c GpuFlowProblem builder/copy/view machinery: a \c FluidSystem alias + * fixed to \c void (so the \c hasThermal compile-time switch evaluates + * to false), trivial empty params types, and trivial accessors that + * return default-constructed values. + * + * Intentionally not callable on the device when actually queried: it is + * only used by non-thermal \c GpuFlowProblem instantiations whose dispatch + * paths never call \c solidEnergyLawParams or \c thermalConductionLawParams. + */ +struct NoThermalLawManager +{ + using FluidSystem = void; + struct SolidEnergyLawParams {}; + struct ThermalConductionLawParams {}; + + OPM_HOST_DEVICE SolidEnergyLawParams solidEnergyLawParams(unsigned /*elemIdx*/) const + { + return SolidEnergyLawParams{}; + } + OPM_HOST_DEVICE ThermalConductionLawParams + thermalConductionLawParams(unsigned /*elemIdx*/) const + { + return ThermalConductionLawParams{}; + } +}; + +} // namespace Opm + +namespace Opm::gpuistl { + +/*! \brief copy_to_gpu overload for the no-op thermal manager. */ +inline ::Opm::NoThermalLawManager +copy_to_gpu(const ::Opm::NoThermalLawManager& /*cpu*/) +{ + return ::Opm::NoThermalLawManager{}; +} + +/*! \brief make_view overload for the no-op thermal manager. */ +inline ::Opm::NoThermalLawManager +make_view(::Opm::NoThermalLawManager& /*buf*/) +{ + return ::Opm::NoThermalLawManager{}; +} + +} // namespace Opm::gpuistl + +namespace Opm { + +/*! + * \brief Minimal, GPU-compatible problem class. + * + * Provides exactly the interface required by BlackOilIntensiveQuantities to + * compute a primary-variable update on the device. All cell-wise rock / + * fluid bookkeeping data is kept in templated Storage so the same class can + * be used as a CPU object (default \c std::vector) or as a GPU object + * (\c GpuBuffer / \c GpuView). + * + * The material law manager is held by value (so it is trivially copyable + * into a CUDA kernel together with the rest of the problem state). + * + * \tparam ScalarT Floating point type used for rock data. + * \tparam MaterialLawManagerT GPU material law manager type (e.g. + * \c Opm::EclMaterialLaw::GpuManager). + * \tparam Storage Container template used for per-cell data. + * \tparam ThermalLawManagerT GPU thermal-law manager type (e.g. + * \c Opm::EclThermalLaw::GpuManager). Defaults + * to \c NoThermalLawManager, a no-op stub used + * by non-thermal callers. + */ +template class Storage = ::Opm::VectorWithDefaultAllocator, + class ThermalLawManagerT = ::Opm::NoThermalLawManager> +class GpuFlowProblem +{ +public: + using Scalar = ScalarT; + using EclMaterialLawManager = MaterialLawManagerT; + using MaterialLawParams = typename EclMaterialLawManager::MaterialLawParams; + using EclThermalLawManager = ThermalLawManagerT; + using SolidEnergyLawParams = typename EclThermalLawManager::SolidEnergyLawParams; + using ThermalConductionLawParams = typename EclThermalLawManager::ThermalConductionLawParams; + + /*! \brief True iff the thermal-law manager carries a real fluid-system + * tag (anything other than \c void) and therefore the + * thermal-extraction code paths should be enabled. */ + static constexpr bool hasThermal + = !std::is_same_v; + + /*! \brief Trivial nested model() helper that satisfies the + * \c problem.model().linearizer().getLinearizationType() chain + * that BlackOilIntensiveQuantities walks. */ + struct ModelView + { + struct LinearizerView + { + OPM_HOST_DEVICE LinearizationType getLinearizationType() const + { + return LinearizationType{}; + } + }; + + OPM_HOST_DEVICE LinearizerView linearizer() const + { + return LinearizerView{}; + } + }; + + OPM_HOST_DEVICE GpuFlowProblem() = default; + + OPM_HOST_DEVICE GpuFlowProblem(EclMaterialLawManager materialLawManager, + Storage porosity, + Storage rockCompressibility, + Storage rockReferencePressure, + Storage maxOilSaturation, + Storage maxOilVaporizationFactor, + Storage maxGasDissolutionFactor, + EclThermalLawManager thermalLawManager, + Storage rockFraction, + Storage pvtRegionIndex) + : materialLawManager_(std::move(materialLawManager)) + , porosity_(std::move(porosity)) + , rockCompressibility_(std::move(rockCompressibility)) + , rockReferencePressure_(std::move(rockReferencePressure)) + , maxOilSaturation_(std::move(maxOilSaturation)) + , maxOilVaporizationFactor_(std::move(maxOilVaporizationFactor)) + , maxGasDissolutionFactor_(std::move(maxGasDissolutionFactor)) + , thermalLawManager_(std::move(thermalLawManager)) + , rockFraction_(std::move(rockFraction)) + , pvtRegionIndex_(std::move(pvtRegionIndex)) + { + } + + /*! + * \brief Construct from any CPU \c FlowProblem (or \c FlowProblemBlackoil). + * + * Iterates over all grid degrees of freedom and copies the rock / + * mixing-control fields used by \c BlackOilIntensiveQuantities into local + * vectors. The inner material-law manager is constructed from + * \c *cpu.materialLawManager(), so the GPU manager type held by this + * problem must in turn be constructible from + * \c Opm::EclMaterialLaw::Manager (i.e. use CPU vector storage and + * piecewise-linear vector-storage two-phase laws). + * + * Only enabled when this problem itself uses CPU vector storage. + */ + template , + std::enable_if_t< + std::is_same_v>, + int> = 0> + explicit GpuFlowProblem(const CpuProblem& cpu) + : materialLawManager_(*cpu.materialLawManager(), cpu.model().numGridDof()) + { + const std::size_t n = cpu.model().numGridDof(); + porosity_.resize(n); + rockCompressibility_.resize(n); + rockReferencePressure_.resize(n); + maxOilSaturation_.resize(n); + maxOilVaporizationFactor_.resize(n); + maxGasDissolutionFactor_.resize(n); + for (std::size_t i = 0; i < n; ++i) { + const unsigned u = static_cast(i); + porosity_[i] = cpu.porosity(u, 0u); + rockCompressibility_[i] = cpu.rockCompressibility(u); + rockReferencePressure_[i] = cpu.rockReferencePressure(u); + maxOilSaturation_[i] = cpu.maxOilSaturation(u); + maxOilVaporizationFactor_[i] = cpu.maxOilVaporizationFactor(0u, u); + maxGasDissolutionFactor_[i] = cpu.maxGasDissolutionFactor(0u, u); + } + if constexpr (hasThermal) { + using FluidSystemTag = typename EclThermalLawManager::FluidSystem; + thermalLawManager_ + = ::Opm::EclThermalLaw::buildCpuManagerFromFlowProblem( + cpu, n); + rockFraction_.resize(n); + pvtRegionIndex_.resize(n); + for (std::size_t i = 0; i < n; ++i) { + const unsigned u = static_cast(i); + rockFraction_[i] = cpu.rockFraction(u, 0u); + pvtRegionIndex_[i] = static_cast(cpu.pvtRegionIndex(u)); + } + } + } + + /*! + * \brief Construct directly into device-resident \c GpuBuffer storage + * from any CPU \c FlowProblem (or \c FlowProblemBlackoil). + * + * Internally builds the per-cell rock/mixing data on the host as + * \c std::vector, then uploads each as a single + * \c GpuBuffer. The inner GPU material law manager is constructed + * from \c *cpu.materialLawManager() via its own GpuBuffer-storage + * constructor, which uploads the per-cell sample arrays. + * + * Only enabled when this problem itself uses \c GpuBuffer storage. + */ + template , + std::enable_if_t< + std::is_same_v>, + int> = 0> + explicit GpuFlowProblem(const CpuProblem& cpu) + : materialLawManager_(*cpu.materialLawManager(), cpu.model().numGridDof()) + , porosity_(extractRockField(cpu, [](const CpuProblem& p, unsigned u) { + return Scalar(p.porosity(u, 0u)); + })) + , rockCompressibility_(extractRockField(cpu, [](const CpuProblem& p, unsigned u) { + return Scalar(p.rockCompressibility(u)); + })) + , rockReferencePressure_(extractRockField(cpu, [](const CpuProblem& p, unsigned u) { + return Scalar(p.rockReferencePressure(u)); + })) + , maxOilSaturation_(extractRockField(cpu, [](const CpuProblem& p, unsigned u) { + return Scalar(p.maxOilSaturation(u)); + })) + , maxOilVaporizationFactor_(extractRockField(cpu, [](const CpuProblem& p, unsigned u) { + return Scalar(p.maxOilVaporizationFactor(0u, u)); + })) + , maxGasDissolutionFactor_(extractRockField(cpu, [](const CpuProblem& p, unsigned u) { + return Scalar(p.maxGasDissolutionFactor(0u, u)); + })) + , thermalLawManager_(buildGpuThermalManager(cpu)) + , rockFraction_(extractThermalScalarField(cpu, [](const CpuProblem& p, unsigned u) { + return Scalar(p.rockFraction(u, 0u)); + })) + , pvtRegionIndex_(extractThermalIntField(cpu, [](const CpuProblem& p, unsigned u) { + return static_cast(p.pvtRegionIndex(u)); + })) + { + } + + OPM_HOST_DEVICE ModelView model() const + { + return ModelView{}; + } + + OPM_HOST_DEVICE int satnumRegionIndex(std::size_t elemIdx) const + { + return materialLawManager_.satnumRegionIdx(static_cast(elemIdx)); + } + + OPM_HOST_DEVICE MaterialLawParams materialLawParams(std::size_t elemIdx) const + { + return materialLawManager_.materialLawParams(static_cast(elemIdx)); + } + + OPM_HOST_DEVICE Scalar rockCompressibility(std::size_t elemIdx) const + { + return rockCompressibility_.size() == 0 ? Scalar(0) : rockCompressibility_[elemIdx]; + } + + OPM_HOST_DEVICE Scalar rockReferencePressure(std::size_t elemIdx) const + { + return rockReferencePressure_.size() == 0 ? Scalar(0) : rockReferencePressure_[elemIdx]; + } + + OPM_HOST_DEVICE Scalar porosity(std::size_t elemIdx, unsigned /*timeIdx*/) const + { + return porosity_.size() == 0 ? Scalar(0) : porosity_[elemIdx]; + } + + OPM_HOST_DEVICE Scalar maxOilVaporizationFactor(unsigned /*timeIdx*/, std::size_t elemIdx) const + { + return maxOilVaporizationFactor_.size() == 0 ? Scalar(0) : maxOilVaporizationFactor_[elemIdx]; + } + + OPM_HOST_DEVICE Scalar maxGasDissolutionFactor(unsigned /*timeIdx*/, std::size_t elemIdx) const + { + return maxGasDissolutionFactor_.size() == 0 ? Scalar(0) : maxGasDissolutionFactor_[elemIdx]; + } + + OPM_HOST_DEVICE Scalar maxOilSaturation(std::size_t elemIdx) const + { + return maxOilSaturation_.size() == 0 ? Scalar(0) : maxOilSaturation_[elemIdx]; + } + + /*! \brief Per-cell PVT region index. Returns 0 when the GpuFlowProblem + * instantiation has no thermal support. */ + OPM_HOST_DEVICE unsigned pvtRegionIndex(std::size_t elemIdx) const + { + return pvtRegionIndex_.size() == 0 + ? 0u + : static_cast(pvtRegionIndex_[elemIdx]); + } + + /*! \brief Per-cell rock fraction (1 - effective porosity). Returns 0 + * when the GpuFlowProblem instantiation has no thermal + * support. */ + OPM_HOST_DEVICE Scalar rockFraction(std::size_t elemIdx, unsigned /*timeIdx*/) const + { + return rockFraction_.size() == 0 ? Scalar(0) : rockFraction_[elemIdx]; + } + + /*! \brief Solid-energy law parameters for a single cell. Forwards to + * the embedded thermal-law manager. */ + OPM_HOST_DEVICE SolidEnergyLawParams + solidEnergyLawParams(std::size_t elemIdx, unsigned /*timeIdx*/) const + { + return thermalLawManager_.solidEnergyLawParams(static_cast(elemIdx)); + } + + /*! \brief Thermal-conduction law parameters for a single cell. + * Forwards to the embedded thermal-law manager. */ + OPM_HOST_DEVICE ThermalConductionLawParams + thermalConductionLawParams(std::size_t elemIdx, unsigned /*timeIdx*/) const + { + return thermalLawManager_.thermalConductionLawParams(static_cast(elemIdx)); + } + + /*! \brief Default rock-pore-volume multiplier (1 if no compressibility). */ + template + OPM_HOST_DEVICE Evaluation rockCompPoroMultiplier(const auto& /*intQuants*/, std::size_t /*elemIdx*/) const + { + return Evaluation(1.0); + } + + /*! \brief Default rock-trans multiplier (1 if no compressibility). */ + template + OPM_HOST_DEVICE Evaluation rockCompTransMultiplier(const auto& /*intQuants*/, std::size_t /*elemIdx*/) const + { + return Evaluation(1.0); + } + + /*! + * \brief Update the relative permeabilities of all phases for a single + * cell, in the same way as the CPU \c FlowProblem does. + * + * Calls \c MaterialLaw::relativePermeabilities on the cell's + * \c MaterialLawParams. The \c BlackOilIntensiveQuantities later divides + * the result by the phase viscosity to obtain the mobility. + * + * Directional relative permeabilities are not supported by the GPU + * problem, so the \p dirMob output is left untouched. + */ + template + OPM_HOST_DEVICE void updateRelperms(auto& mobility, + auto& /*dirMob*/, + FluidState& fluidState, + unsigned globalSpaceIdx) const + { + using ContainerT = std::decay_t; + const auto materialParams = materialLawParams(globalSpaceIdx); + EclMaterialLawManager::MaterialLaw::template relativePermeabilities< + ContainerT, FluidState, Args...>(mobility, materialParams, fluidState); + } + + /*! \name Direct accessors for serialization to the GPU. */ + //!\{ + const EclMaterialLawManager& materialLawManager() const { return materialLawManager_; } + EclMaterialLawManager& materialLawManager() { return materialLawManager_; } + const Storage& porosityStorage() const { return porosity_; } + Storage& porosityStorage() { return porosity_; } + const Storage& rockCompressibilityStorage() const { return rockCompressibility_; } + Storage& rockCompressibilityStorage() { return rockCompressibility_; } + const Storage& rockReferencePressureStorage() const { return rockReferencePressure_; } + Storage& rockReferencePressureStorage() { return rockReferencePressure_; } + const Storage& maxOilSaturationStorage() const { return maxOilSaturation_; } + Storage& maxOilSaturationStorage() { return maxOilSaturation_; } + const Storage& maxOilVaporizationFactorStorage() const { return maxOilVaporizationFactor_; } + Storage& maxOilVaporizationFactorStorage() { return maxOilVaporizationFactor_; } + const Storage& maxGasDissolutionFactorStorage() const { return maxGasDissolutionFactor_; } + Storage& maxGasDissolutionFactorStorage() { return maxGasDissolutionFactor_; } + const EclThermalLawManager& thermalLawManager() const { return thermalLawManager_; } + EclThermalLawManager& thermalLawManager() { return thermalLawManager_; } + const Storage& rockFractionStorage() const { return rockFraction_; } + Storage& rockFractionStorage() { return rockFraction_; } + const Storage& pvtRegionIndexStorage() const { return pvtRegionIndex_; } + Storage& pvtRegionIndexStorage() { return pvtRegionIndex_; } + //!\} + +private: + template + static Storage extractRockField(const CpuProblem& cpu, F f) + { + const std::size_t n = cpu.model().numGridDof(); + std::vector v(n); + for (std::size_t i = 0; i < n; ++i) { + v[i] = f(cpu, static_cast(i)); + } + if constexpr (std::is_same_v, + ::Opm::VectorWithDefaultAllocator>) { + return Storage(v.begin(), v.end()); + } else { + return Storage(v); + } + } + + /*! \brief Build the per-cell rockFraction storage; empty when the + * GpuFlowProblem instantiation has no thermal support. */ + template + static Storage extractThermalScalarField(const CpuProblem& cpu, F f) + { + if constexpr (!hasThermal) { + return Storage{}; + } else { + return extractRockField(cpu, f); + } + } + + /*! \brief Build the per-cell pvtRegionIndex storage; empty when the + * GpuFlowProblem instantiation has no thermal support. */ + template + static Storage extractThermalIntField(const CpuProblem& cpu, F f) + { + if constexpr (!hasThermal) { + return Storage{}; + } else { + const std::size_t n = cpu.model().numGridDof(); + std::vector v(n); + for (std::size_t i = 0; i < n; ++i) { + v[i] = f(cpu, static_cast(i)); + } + if constexpr (std::is_same_v, + ::Opm::VectorWithDefaultAllocator>) { + return Storage(v.begin(), v.end()); + } else { + return Storage(v); + } + } + } + + /*! \brief Build the GPU thermal-law manager from a CPU FlowProblem. + * Returns a default-constructed (empty) manager when this + * GpuFlowProblem instantiation has no thermal support. */ + template + static EclThermalLawManager buildGpuThermalManager(const CpuProblem& cpu) + { + if constexpr (!hasThermal) { + return EclThermalLawManager{}; + } else { + using FluidSystemTag = typename EclThermalLawManager::FluidSystem; + auto cpuMgr + = ::Opm::EclThermalLaw::buildCpuManagerFromFlowProblem( + cpu, cpu.model().numGridDof()); + return ::Opm::gpuistl::copy_to_gpu(cpuMgr); + } + } + + EclMaterialLawManager materialLawManager_{}; + Storage porosity_{}; + Storage rockCompressibility_{}; + Storage rockReferencePressure_{}; + Storage maxOilSaturation_{}; + Storage maxOilVaporizationFactor_{}; + Storage maxGasDissolutionFactor_{}; + EclThermalLawManager thermalLawManager_{}; + Storage rockFraction_{}; + Storage pvtRegionIndex_{}; +}; + +} // namespace Opm + +namespace Opm::gpuistl { + +/*! + * \brief Copy a CPU GpuFlowProblem to GPU-resident GpuBuffer storage. + * + * The material law manager is also moved to the GPU (via copy_to_gpu). + */ +template +auto copy_to_gpu( + const ::Opm::GpuFlowProblem& cpu) +{ + using GpuMaterialLawManagerBuffer + = decltype(::Opm::gpuistl::copy_to_gpu(cpu.materialLawManager())); + using GpuThermalLawManagerBuffer + = decltype(::Opm::gpuistl::copy_to_gpu(cpu.thermalLawManager())); + using GpuProblemType = ::Opm::GpuFlowProblem; + + return GpuProblemType(::Opm::gpuistl::copy_to_gpu(cpu.materialLawManager()), + GpuBuffer(cpu.porosityStorage()), + GpuBuffer(cpu.rockCompressibilityStorage()), + GpuBuffer(cpu.rockReferencePressureStorage()), + GpuBuffer(cpu.maxOilSaturationStorage()), + GpuBuffer(cpu.maxOilVaporizationFactorStorage()), + GpuBuffer(cpu.maxGasDissolutionFactorStorage()), + ::Opm::gpuistl::copy_to_gpu(cpu.thermalLawManager()), + GpuBuffer(cpu.rockFractionStorage()), + GpuBuffer(cpu.pvtRegionIndexStorage())); +} + +/*! + * \brief Make a non-owning GpuView based GpuFlowProblem from an owning + * GpuBuffer based GpuFlowProblem. + */ +template +auto make_view(::Opm::GpuFlowProblem& buf) +{ + using GpuMaterialLawManagerView + = decltype(::Opm::gpuistl::make_view(buf.materialLawManager())); + using GpuThermalLawManagerView + = decltype(::Opm::gpuistl::make_view(buf.thermalLawManager())); + using GpuProblemView = ::Opm::GpuFlowProblem; + + auto toView = [](auto& storage) { + using T = typename std::decay_t::value_type; + return GpuView(storage.data(), storage.size()); + }; + + return GpuProblemView(::Opm::gpuistl::make_view(buf.materialLawManager()), + toView(buf.porosityStorage()), + toView(buf.rockCompressibilityStorage()), + toView(buf.rockReferencePressureStorage()), + toView(buf.maxOilSaturationStorage()), + toView(buf.maxOilVaporizationFactorStorage()), + toView(buf.maxGasDissolutionFactorStorage()), + ::Opm::gpuistl::make_view(buf.thermalLawManager()), + toView(buf.rockFractionStorage()), + toView(buf.pvtRegionIndexStorage())); +} + +} // namespace Opm::gpuistl + +#endif // OPM_GPU_FLOW_PROBLEM_HPP diff --git a/tests/gpuistl/test_gpu_ecl_thermal_law_manager.cu b/tests/gpuistl/test_gpu_ecl_thermal_law_manager.cu new file mode 100644 index 00000000000..534e9d1abc2 --- /dev/null +++ b/tests/gpuistl/test_gpu_ecl_thermal_law_manager.cu @@ -0,0 +1,210 @@ +/* + Copyright 2026 Equinor ASA + + This file is part of the Open Porous Media project (OPM). + + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ +#include + +#define BOOST_TEST_MODULE TestGpuEclThermalLawManager + +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include + +namespace { + +using Scalar = double; + +// A minimal stand-in fluid system. The thermal-conduction law only ever +// asks the fluid system whether the gas phase is active and what its +// phase index is. Both are compile-time constants. +struct DummyFluidSystem { + static constexpr int numPhases = 3; + static constexpr int gasPhaseIdx = 2; + OPM_HOST_DEVICE static constexpr bool phaseIsActive(int phaseIdx) + { + return phaseIdx == gasPhaseIdx; + } +}; + +// A minimal stand-in fluid state with just enough surface area to feed +// EclSpecrockLaw / EclThconrLaw / their GPU counterparts. +template +struct DummyFluidState { + using ValueType = ScalarT; + ScalarT temperature_ = 0; + ScalarT gasSaturation_ = 0; + + OPM_HOST_DEVICE ScalarT temperature(int /*phaseIdx*/) const { return temperature_; } + OPM_HOST_DEVICE ScalarT saturation(int /*phaseIdx*/) const { return gasSaturation_; } + OPM_HOST_DEVICE DummyFluidSystem fluidSystem() const { return DummyFluidSystem{}; } +}; + +using GpuSpecrockParamsBuf = Opm::EclSpecrockLawParams; +using GpuSpecrockParamsView = Opm::EclSpecrockLawParams; +using GpuSpecrockLawView = Opm::EclSpecrockLaw; +using GpuThconrLaw = Opm::EclThconrLaw; + +using ManagerCpu = Opm::EclThermalLaw::GpuManager; +using ManagerBuf + = Opm::EclThermalLaw::GpuManager; +using ManagerView + = Opm::EclThermalLaw::GpuManager; + +// Kernel: per-cell, evaluate solid internal energy from the law manager +// at the supplied temperature, and total thermal conductivity at the +// supplied gas saturation. +__global__ void evaluateThermalKernel(ManagerView view, + const Scalar* temperatures, + const Scalar* gasSaturations, + Scalar* outSolidEnergy, + Scalar* outConductivity, + std::size_t numCells) +{ + const std::size_t i = + static_cast(blockIdx.x) * blockDim.x + threadIdx.x; + if (i >= numCells) { + return; + } + DummyFluidState fs; + fs.temperature_ = temperatures[i]; + fs.gasSaturation_ = gasSaturations[i]; + + const auto solidEnergyParams = view.solidEnergyLawParams(static_cast(i)); + outSolidEnergy[i] = + GpuSpecrockLawView::solidInternalEnergy(solidEnergyParams, fs); + + const auto thermalConductionParams = + view.thermalConductionLawParams(static_cast(i)); + outConductivity[i] = + GpuThconrLaw::thermalConductivity(thermalConductionParams, fs); +} + +} // namespace + +BOOST_AUTO_TEST_CASE(SpecrockAndThconrEvaluateMatchesCpu) +{ + // -- Build a synthetic two-region SPECROCK plus per-cell THCONR setup + // on the host. Cells 0..2 belong to region 0, cells 3..5 to region 1. + constexpr std::size_t numCells = 6; + + const std::vector regionTemperatures0 = { 280.0, 320.0, 360.0, 400.0 }; + const std::vector regionEnergies0 = { 0.0, 4.0e7, 9.0e7, 1.5e8 }; + const std::vector regionTemperatures1 = { 280.0, 350.0, 420.0 }; + const std::vector regionEnergies1 = { 0.0, 3.5e7, 8.4e7 }; + + // Build a host-side CPU manager directly from the basic constructor. + Opm::EclSpecrockLawParams region0Params; + region0Params.setSamples(regionTemperatures0, regionEnergies0); + Opm::EclSpecrockLawParams region1Params; + region1Params.setSamples(regionTemperatures1, regionEnergies1); + + std::vector> cpuSolidEnergyParams; + cpuSolidEnergyParams.push_back(region0Params); + cpuSolidEnergyParams.push_back(region1Params); + + std::vector cpuElementToRegion(numCells); + for (std::size_t i = 0; i < numCells; ++i) { + cpuElementToRegion[i] = (i < 3) ? 0 : 1; + } + + std::vector> cpuThermalConductionParams(numCells); + for (std::size_t i = 0; i < numCells; ++i) { + cpuThermalConductionParams[i].setReferenceTotalThermalConductivity( + 100.0 + 5.0 * static_cast(i)); + cpuThermalConductionParams[i].setDTotalThermalConductivity_dSg( + 0.20 + 0.01 * static_cast(i)); + cpuThermalConductionParams[i].finalize(); + } + + ManagerCpu cpuManager(std::move(cpuSolidEnergyParams), + std::move(cpuElementToRegion), + std::move(cpuThermalConductionParams)); + + // -- Upload to the GPU and create a non-owning view. + auto bufManager = Opm::gpuistl::copy_to_gpu(cpuManager); + auto viewManager = Opm::gpuistl::make_view(bufManager); + + // -- Pick a per-cell test temperature and gas saturation each. + std::array hostTemperatures = { + 290.0, 310.0, 355.0, 285.0, 360.0, 410.0 + }; + std::array hostGasSaturations = { + 0.0, 0.1, 0.25, 0.5, 0.75, 0.99 + }; + + // -- Run the kernel. + auto dTOwner = Opm::gpuistl::make_gpu_unique_ptr_array(numCells); + auto dSgOwner = Opm::gpuistl::make_gpu_unique_ptr_array(numCells); + auto dEnergyOwner = Opm::gpuistl::make_gpu_unique_ptr_array(numCells); + auto dConductivityOwner = Opm::gpuistl::make_gpu_unique_ptr_array(numCells); + Scalar* dT = dTOwner.get(); + Scalar* dSg = dSgOwner.get(); + Scalar* dEnergy = dEnergyOwner.get(); + Scalar* dConductivity = dConductivityOwner.get(); + OPM_GPU_SAFE_CALL(cudaMemcpy(dT, hostTemperatures.data(), + numCells * sizeof(Scalar), cudaMemcpyHostToDevice)); + OPM_GPU_SAFE_CALL(cudaMemcpy(dSg, hostGasSaturations.data(), + numCells * sizeof(Scalar), cudaMemcpyHostToDevice)); + + evaluateThermalKernel<<<1, 32>>>(viewManager, dT, dSg, dEnergy, dConductivity, numCells); + OPM_GPU_SAFE_CALL(cudaDeviceSynchronize()); + OPM_GPU_SAFE_CALL(cudaGetLastError()); + + std::array hostEnergyResult {}; + std::array hostConductivityResult {}; + OPM_GPU_SAFE_CALL(cudaMemcpy(hostEnergyResult.data(), dEnergy, + numCells * sizeof(Scalar), cudaMemcpyDeviceToHost)); + OPM_GPU_SAFE_CALL(cudaMemcpy(hostConductivityResult.data(), dConductivity, + numCells * sizeof(Scalar), cudaMemcpyDeviceToHost)); + + // -- Compare against the CPU manager. + for (std::size_t i = 0; i < numCells; ++i) { + DummyFluidState fs; + fs.temperature_ = hostTemperatures[i]; + fs.gasSaturation_ = hostGasSaturations[i]; + + const auto& cpuSolidParams = + cpuManager.solidEnergyLawParams(static_cast(i)); + const Scalar cpuEnergy = + Opm::EclSpecrockLaw::solidInternalEnergy(cpuSolidParams, fs); + BOOST_CHECK_CLOSE(cpuEnergy, hostEnergyResult[i], 1e-9); + + const auto& cpuConductionParams = + cpuManager.thermalConductionLawParams(static_cast(i)); + const Scalar cpuConductivity = + GpuThconrLaw::thermalConductivity(cpuConductionParams, fs); + BOOST_CHECK_CLOSE(cpuConductivity, hostConductivityResult[i], 1e-9); + } +} diff --git a/tests/very_simple_deck.DATA b/tests/very_simple_deck.DATA new file mode 100644 index 00000000000..8c0623914ab --- /dev/null +++ b/tests/very_simple_deck.DATA @@ -0,0 +1,53 @@ +-- =============== RUNSPEC +RUNSPEC +DIMENS +3 3 3 / +EQLDIMS +/ +TABDIMS +/ +WATER +GAS +CO2STORE +METRIC + +-- =============== GRID +GRID +GRIDFILE +0 0 / +DX +27*1 / +DY +27*1 / +DZ +27*1 / +TOPS +9*0 / +PERMX +27*1013.25 / +PORO +27*0.25 / +COPY +PERMX PERMY / +PERMX PERMZ / +/ + +-- =============== PROPS +PROPS +SGWFN +0.000000E+00 0.000000E+00 1.000000E+00 3.060000E-02 +1.000000E+00 1.000000E+00 0.000000E+00 3.060000E-01 / + +-- =============== SOLUTION +SOLUTION +RPTRST +'BASIC=0' / +EQUIL +0 300 100 0 0 0 1 1 0 / + +-- =============== SCHEDULE +SCHEDULE +RPTRST +'BASIC=0' / +TSTEP +1 / From 8b5bb960bcd895f545bb7c45914582d99d152272 Mon Sep 17 00:00:00 2001 From: Kjetil Olsen Lye Date: Thu, 30 Apr 2026 21:48:03 +0000 Subject: [PATCH 4/5] blackoilintensivequantities: full property update on GPU Reworks BlackOilIntensiveQuantities so its update path is fully usable from device code (relaxed-constexpr-friendly storage, host/device-decorated helpers, moved init helpers). Adds tests/gpuistl/test_blackoilintensivequantities_gpu.cu which exercises the GPU update path end-to-end and matches it against the CPU reference, including derivatives. The test is gated on HIP or CUDA >= 13.1. --- CMakeLists.txt | 5 + CMakeLists_files.cmake | 4 + .../blackoil/blackoilintensivequantities.hh | 142 ++- .../test_blackoilintensivequantities_gpu.cu | 1007 +++++++++++++++++ 4 files changed, 1146 insertions(+), 12 deletions(-) create mode 100644 tests/gpuistl/test_blackoilintensivequantities_gpu.cu diff --git a/CMakeLists.txt b/CMakeLists.txt index 48d9b8dbf7d..6e2a1baa07c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -353,6 +353,7 @@ macro(opm-simulators_sources_hook) # cuda warns when constxpr functions are used in kernels. # since the entire stl is constexpr ..., we enable relaxed flag set_source_files_properties( + tests/gpuistl/test_blackoilintensivequantities_gpu.cu tests/gpuistl/test_blackoilfluidstategpu.cu tests/gpuistl/test_gpu_ad.cu tests/gpuistl/test_gpu_ecl_thermal_law_manager.cu @@ -506,6 +507,7 @@ macro(opm-simulators_tests_hook) primary_variables_gpu solver_adapter throw_macros_on_gpu + blackoilintensivequantities_gpu ) if(TEST ${test}) set_tests_properties(${test} @@ -672,6 +674,9 @@ macro(opm-simulators_targets_hook) target_sources(test_RestartSerialization PRIVATE $) target_sources(test_glift1 PRIVATE $) target_sources(test_tpsa_localresidual PRIVATE $) + if (TARGET test_blackoilintensivequantities_gpu) + target_sources(test_blackoilintensivequantities_gpu PRIVATE $) + endif() if(MPI_FOUND) target_sources(test_chopstep PRIVATE $) endif() diff --git a/CMakeLists_files.cmake b/CMakeLists_files.cmake index 4f8dbc3fad7..54253bf1e89 100644 --- a/CMakeLists_files.cmake +++ b/CMakeLists_files.cmake @@ -594,6 +594,10 @@ if(CUDA_FOUND OR hip_FOUND) ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_primary_variables_gpu.cu) endif() ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_MiniMatrix.cu) + if(hip_FOUND OR CUDA_VERSION VERSION_GREATER_EQUAL 13.1) + ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_blackoilintensivequantities_gpu.cu) + endif() + # Boost < 1.75 + nvcc = trouble in this test if(Boost_VERSION VERSION_GREATER 1.74) ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_MiniVector.cu) diff --git a/opm/models/blackoil/blackoilintensivequantities.hh b/opm/models/blackoil/blackoilintensivequantities.hh index c249bdd3b9f..b862cce00e3 100644 --- a/opm/models/blackoil/blackoilintensivequantities.hh +++ b/opm/models/blackoil/blackoilintensivequantities.hh @@ -175,7 +175,6 @@ public: fluidState_.setRsw(0.0); } } - BlackOilIntensiveQuantities(const BlackOilIntensiveQuantities& other) = default; BlackOilIntensiveQuantities& operator=(const BlackOilIntensiveQuantities& other) = default; @@ -184,6 +183,7 @@ public: // This ctor is used when switching to the GPU typetag which currently supports thermal effects and diffusion. template + requires (energyModuleType == EnergyModules::FullyImplicitThermal && (enableDiffusion != 0)) explicit BlackOilIntensiveQuantities( const BlackOilIntensiveQuantities& other, const FluidSystem& fsystem) : fluidState_(other.fluidState_.withOtherFluidSystem(fsystem)) @@ -206,6 +206,72 @@ public: static_assert(!enableDispersion); } + template + requires (energyModuleType == EnergyModules::FullyImplicitThermal && (enableDiffusion == 0)) + explicit BlackOilIntensiveQuantities( + const BlackOilIntensiveQuantities& other, const FluidSystem& fsystem) + : fluidState_(other.fluidState_.withOtherFluidSystem(fsystem)) + , BlackOilEnergyIntensiveQuantities( + other.rockInternalEnergy_, other.totalThermalConductivity_, other.rockFraction_) + , referencePorosity_(other.referencePorosity_) + , porosity_(other.porosity_) + , rockCompTransMultiplier_(other.rockCompTransMultiplier_) + , mobility_(other.mobility_) + , dirMob_(/*NOT YET SUPPORTED ON GPU*/) + { + static_assert(!enableSolvent); + static_assert(!enableExtbo); + static_assert(!enablePolymer); + static_assert(!enableFoam); + static_assert(!enableMICP); + static_assert(!enableBrine); + static_assert(!enableDispersion); + } + + template + requires (energyModuleType != EnergyModules::FullyImplicitThermal && (enableDiffusion != 0)) + explicit BlackOilIntensiveQuantities( + const BlackOilIntensiveQuantities& other, const FluidSystem& fsystem) + : fluidState_(other.fluidState_.withOtherFluidSystem(fsystem)) + , BlackOilDiffusionIntensiveQuantities( + other.tortuosities(), other.diffusionCoefficients()) + , referencePorosity_(other.referencePorosity_) + , porosity_(other.porosity_) + , rockCompTransMultiplier_(other.rockCompTransMultiplier_) + , mobility_(other.mobility_) + , dirMob_(/*NOT YET SUPPORTED ON GPU*/) + { + static_assert(!enableSolvent); + static_assert(!enableExtbo); + static_assert(!enablePolymer); + static_assert(!enableFoam); + static_assert(!enableMICP); + static_assert(!enableBrine); + static_assert(!enableDispersion); + } + + template + requires (energyModuleType != EnergyModules::FullyImplicitThermal && (enableDiffusion == 0)) + explicit BlackOilIntensiveQuantities( + const BlackOilIntensiveQuantities& other, const FluidSystem& fsystem) + : fluidState_(other.fluidState_.withOtherFluidSystem(fsystem)) + , referencePorosity_(other.referencePorosity_) + , porosity_(other.porosity_) + , rockCompTransMultiplier_(other.rockCompTransMultiplier_) + , mobility_(other.mobility_) + , dirMob_(/*NOT YET SUPPORTED ON GPU*/) + { + static_assert(!enableSolvent); + static_assert(!enableExtbo); + static_assert(!enablePolymer); + static_assert(!enableFoam); + static_assert(!enableMICP); + static_assert(!enableBrine); + static_assert(!enableDispersion); + } + + BlackOilIntensiveQuantities(const BlackOilIntensiveQuantities& other) = default; + /** * \brief Create a copy of this intensive quantities object * @@ -221,6 +287,45 @@ public: return newIntQuants; } + /*! + * \brief Field-by-field overlay of the BlackOil intensive-quantity values from + * another \c BlackOilIntensiveQuantities instantiation onto this one. + * + * Used by the experimental GPU intensive-quantities dispatcher to write the + * GPU-computed result onto a CPU-side \c IntensiveQuantities even when the + * two TypeTags are not value-compatible (e.g. when the CPU TypeTag enables + * dispersion or other modules that the GPU TypeTag does not). The fields + * actually computed by the dispatcher are copied here, including the + * \c mobility_ array which the GPU relperm path now populates correctly + * via \c iq.update() (see \c GpuBlackoilIntensiveQuantitiesDispatcher). + */ + template + OPM_HOST_DEVICE void overlayBlackOilFieldsFrom( + const BlackOilIntensiveQuantities& other) + { + // Full fluid-state copy (handles every stored field, including + // Rs/Rv/Rsw/Rvw, salt concentration, solvent fields, ...). This + // is intentionally chosen over a hand-picked field list so that + // the GPU dispatcher can be used as the *only* fill of the IQ + // cache (no CPU pre-pass required). + fluidState_.assign(other.fluidState_); + + for (unsigned phaseIdx = 0; phaseIdx < numPhases; ++phaseIdx) { + if (!FluidSystem::phaseIsActive(phaseIdx)) { + continue; + } + mobility_[phaseIdx] = other.mobility_[phaseIdx]; + } + if constexpr (energyModuleType == EnergyModules::FullyImplicitThermal) { + this->rockInternalEnergy_ = other.rockInternalEnergy_; + this->totalThermalConductivity_ = other.totalThermalConductivity_; + this->rockFraction_ = other.rockFraction_; + } + porosity_ = other.porosity_; + referencePorosity_ = other.referencePorosity_; + rockCompTransMultiplier_ = other.rockCompTransMultiplier_; + } + OPM_HOST_DEVICE void updateTempSalt(const Problem& problem, const PrimaryVariables& priVars, const unsigned globalSpaceIdx, @@ -430,11 +535,15 @@ public: } else { if (getFluidSystem().enableDissolvedGas()) { // Add So > 0? i.e. if only water set rs = 0) - const Evaluation& RsSat = enableExtbo ? asImp_().rs() : - getFluidSystem().saturatedDissolutionFactor(fluidState_, + Evaluation RsSat; + if constexpr (enableExtbo) { + RsSat = asImp_().rs(); + } else { + RsSat = getFluidSystem().saturatedDissolutionFactor(fluidState_, oilPhaseIdx, pvtRegionIdx, SoMax); + } fluidState_.setRs(min(RsMax, RsSat)); } else { @@ -448,11 +557,15 @@ public: } else { if (getFluidSystem().enableVaporizedOil() ) { // Add Sg > 0? i.e. if only water set rv = 0) - const Evaluation& RvSat = enableExtbo ? asImp_().rv() : - getFluidSystem().saturatedDissolutionFactor(fluidState_, + Evaluation RvSat; + if constexpr (enableExtbo) { + RvSat = asImp_().rv(); + } else { + RvSat = getFluidSystem().saturatedDissolutionFactor(fluidState_, gasPhaseIdx, pvtRegionIdx, SoMax); + } fluidState_.setRv(min(RvMax, RvSat)); } else { @@ -514,11 +627,16 @@ public: const auto [b, mu] = getFluidSystem().inverseFormationVolumeFactorAndViscosity(fluidState_, phaseIdx, pvtRegionIdx); fluidState_.setInvB(phaseIdx, b); for (int i = 0; i < nmobilities; ++i) { - if (enableExtbo && phaseIdx == oilPhaseIdx) { - (*mobilities[i])[phaseIdx] /= asImp_().oilViscosity(); - } - else if (enableExtbo && phaseIdx == gasPhaseIdx) { - (*mobilities[i])[phaseIdx] /= asImp_().gasViscosity(); + if constexpr (enableExtbo) { + if (phaseIdx == oilPhaseIdx) { + (*mobilities[i])[phaseIdx] /= asImp_().oilViscosity(); + } + else if (phaseIdx == gasPhaseIdx) { + (*mobilities[i])[phaseIdx] /= asImp_().gasViscosity(); + } + else { + (*mobilities[i])[phaseIdx] /= mu; + } } else { (*mobilities[i])[phaseIdx] /= mu; @@ -845,7 +963,7 @@ public: case Dir::ZPlus: return dirMob_->getArray(2)[phaseIdx]; default: - throw std::runtime_error("Unexpected face direction"); + OPM_THROW(std::runtime_error, "Unexpected face direction"); } } else{ @@ -906,7 +1024,7 @@ public: return BrineIntQua::permFactor(); } else { - throw std::logic_error("permFactor() called but salt precipitation or bioeffects are disabled"); + OPM_THROW(std::logic_error, "permFactor() called but salt precipitation or bioeffects are disabled"); } } diff --git a/tests/gpuistl/test_blackoilintensivequantities_gpu.cu b/tests/gpuistl/test_blackoilintensivequantities_gpu.cu new file mode 100644 index 00000000000..71ef4b067a7 --- /dev/null +++ b/tests/gpuistl/test_blackoilintensivequantities_gpu.cu @@ -0,0 +1,1007 @@ +/* + Copyright 2025 Equinor ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ +#include + +// Tricks for older versions of ROCm that do not support alugrid +// or dune fem. Note that these changes are only valid for this test file. +#ifdef HAVE_DUNE_ALUGRID +#undef HAVE_DUNE_ALUGRID +#endif +#define HAVE_DUNE_ALUGRID 0 +#ifdef HAVE_DUNE_FEM +#undef HAVE_DUNE_FEM +#endif +#define HAVE_DUNE_FEM 0 + +#include +#include +#include +#define HAVE_ECL_INPUT 1 + + +#define BOOST_TEST_MODULE TestBlackOilIntensiveQuantitiesGPU + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include + + +/// Build a CO2STORE WATER+GAS deck with the requested DIMENS. The grid is +/// a uniform unit-cube grid; PERMX/PORO are constant. EQUIL gives the +/// pressure profile so different cells (different depths) get different +/// fluid states, exercising the per-cell update on both CPU and GPU. +static std::string makeDeckString(int nx, int ny, int nz) +{ + const long total = static_cast(nx) * ny * nz; + const long areal = static_cast(nx) * ny; + std::ostringstream o; + o << "RUNSPEC\n" + << "DIMENS\n" << nx << ' ' << ny << ' ' << nz << " /\n" + << "EQLDIMS\n/\n" + << "TABDIMS\n/\n" + << "WATER\nGAS\nCO2STORE\nTHERMAL\nMETRIC\n" + << "GRID\n" + << "GRIDFILE\n0 0 /\n" + << "DX\n" << total << "*1 /\n" + << "DY\n" << total << "*1 /\n" + << "DZ\n" << total << "*1 /\n" + << "TOPS\n" << areal << "*0 /\n" + << "PERMX\n" << total << "*1013.25 /\n" + << "PORO\n" << total << "*0.25 /\n" + << "COPY\nPERMX PERMY /\nPERMX PERMZ /\n/\n" + << "THCONR\n" << total << "*1.5 /\n" + << "PROPS\n" + << "SGWFN\n" + << "0.000000E+00 0.000000E+00 1.000000E+00 3.060000E-02\n" + << "1.000000E+00 1.000000E+00 0.000000E+00 3.060000E-01 /\n" + << "SPECROCK\n20 2125.0 100 2125.0 /\n" + << "SOLUTION\n" + << "RPTRST\n'BASIC=0' /\n" + << "EQUIL\n0 300 100 0 0 0 1 1 0 /\n" + << "RTEMPVD\n0 36.12\n1000 70.0 /\n" + << "SCHEDULE\n" + << "RPTRST\n'BASIC=0' /\n" + << "TSTEP\n1 /"; + return o.str(); +} + + +static constexpr const char* deckString1 = "-- =============== RUNSPEC\n" + "RUNSPEC\n" + "DIMENS\n" + "3 3 3 /\n" + "EQLDIMS\n" + "/\n" + "TABDIMS\n" + "/\n" + "WATER\n" + "GAS\n" + "CO2STORE\n" + "THERMAL\n" + "METRIC\n" + "-- =============== GRID\n" + "GRID\n" + "GRIDFILE\n" + "0 0 /\n" + "DX\n" + "27*1 /\n" + "DY\n" + "27*1 /\n" + "DZ\n" + "27*1 /\n" + "TOPS\n" + "9*0 /\n" + "PERMX\n" + "27*1013.25 /\n" + "PORO\n" + "27*0.25 /\n" + "COPY\n" + "PERMX PERMY /\n" + "PERMX PERMZ /\n" + "/\n" + "THCONR\n" + "27*1.5 /\n" + "-- =============== PROPS\n" + "PROPS\n" + "SGWFN\n" + "0.000000E+00 0.000000E+00 1.000000E+00 3.060000E-02\n" + "1.000000E+00 1.000000E+00 0.000000E+00 3.060000E-01 /\n" + "SPECROCK\n" + "20 2125.0 100 2125.0 /\n" + "-- =============== SOLUTION\n" + "SOLUTION\n" + "RPTRST\n" + "'BASIC=0' /\n" + "EQUIL\n" + "0 300 100 0 0 0 1 1 0 /\n" + "RTEMPVD\n" + "0 36.12\n" + "1000 70.0 /\n" + "-- =============== SCHEDULE\n" + "SCHEDULE\n" + "RPTRST\n" + "'BASIC=0' /\n" + "TSTEP\n" + "1 /"; + +using ScalarToUse = Opm::GetPropType; + +using GpuB = Opm::gpuistl::GpuBuffer; +using GpuV = Opm::gpuistl::GpuView; +using FluidSystem = Opm::BlackOilFluidSystem; +using Evaluation = Opm::DenseAd::Evaluation; +using BlackOilFluidSystemView + = Opm::BlackOilFluidSystemNonStatic; + +template +OPM_HOST_DEVICE double asDouble(const Value& value) +{ + if constexpr (requires { value.value(); }) { + return static_cast(value.value()); + } + else { + return static_cast(value); + } +} + +/// Compare two scalar-or-Evaluation quantities. Always compares the value +/// part. When \p checkDerivatives is true and both operands are +/// `DenseAd::Evaluation`s, also checks every partial derivative using +/// \c BOOST_CHECK_CLOSE with the supplied tolerance and increments +/// \p derivativeComparisons by the number of partial derivatives compared. +/// If \p checkDerivatives is true but the operands are not Evaluations, +/// the test fails with \c BOOST_FAIL so a misconfigured derivative test +/// cannot silently pass on plain scalars. +template +void checkValueAndDerivatives(const CpuValue& cpuValue, + const GpuValue& gpuValue, + double tol, + bool checkDerivatives, + const char* label, + std::size_t& derivativeComparisons) +{ + BOOST_CHECK_CLOSE(asDouble(cpuValue), asDouble(gpuValue), tol); + + if (!checkDerivatives) { + return; + } + + if constexpr (requires { + CpuValue::numVars; + cpuValue.derivative(0); + GpuValue::numVars; + gpuValue.derivative(0); + }) + { + static_assert(static_cast(CpuValue::numVars) + == static_cast(GpuValue::numVars), + "CPU and GPU Evaluation types must have the same numVars"); + static_assert(static_cast(CpuValue::numVars) > 0, + "Derivative comparison requires at least one partial derivative"); + for (int derivIdx = 0; derivIdx < CpuValue::numVars; ++derivIdx) { + const double cpuDerivative = static_cast(cpuValue.derivative(derivIdx)); + const double gpuDerivative = static_cast(gpuValue.derivative(derivIdx)); + BOOST_CHECK_MESSAGE(std::abs(cpuDerivative - gpuDerivative) + <= tol * (1.0 + std::abs(cpuDerivative)), + label << " derivative[" << derivIdx + << "] cpu=" << cpuDerivative + << " gpu=" << gpuDerivative); + ++derivativeComparisons; + } + } + else { + BOOST_FAIL(std::string("checkDerivatives=true but ") + label + + " is not a DenseAd::Evaluation"); + } +} + +namespace Opm +{ +namespace Properties +{ +namespace TTag +{ + +/// CPU side mirror of the simulation TypeTag, with diffusion/dispersion +/// disabled because the simpler \c BlackOilIntensiveQuantities::update() +/// overload used in this test does not support them. All other property +/// bindings (Indices, MaterialLaw, FluidSystem, etc.) are inherited as-is +/// from \c FlowGasWaterEnergyProblem. +struct FlowGasWaterEnergyProblemTest { + using InheritsFrom = std::tuple; +}; + +/// CPU TypeTag used by the real-deck test: identical to the production +/// \c FlowGasWaterEnergyProblem (does not force-disable diffusion or +/// dispersion) so that decks enabling those keywords can construct the +/// simulator successfully. +struct FlowGasWaterEnergyProblemTestRealDeck { + using InheritsFrom = std::tuple; +}; + +} // namespace TTag + +template +struct EnableDiffusion +{ static constexpr bool value = false; }; + +template +struct EnableDispersion +{ static constexpr bool value = false; }; + +} // namespace Properties + +} // namespace Opm + +using TypeTag = Opm::Properties::TTag::FlowGasWaterEnergyProblemTest; +using TypeNacht = Opm::Properties::TTag::FlowGasWaterEnergyDummyProblemGPU; +using TypeTagGPU = Opm::Properties::TTag::FlowGasWaterEnergyProblemGPU; + +template +__global__ void +updateAllCellsKernel(GpuProblem problem, + Opm::gpuistl::GpuView primaryVariables, + Opm::gpuistl::GpuView outIntensiveQuantities, + std::size_t numCells) +{ + const std::size_t i = static_cast(blockIdx.x) * blockDim.x + threadIdx.x; + if (i >= numCells) { + return; + } + IntensiveQuantities intensiveQuantities = outIntensiveQuantities[i]; + intensiveQuantities.updateSaturations(primaryVariables[i], 0, Opm::LinearizationType{}); + intensiveQuantities.update(problem, primaryVariables[i], static_cast(i), 0); + intensiveQuantities.updateEnergyQuantities_(problem, static_cast(i), 0u); + outIntensiveQuantities[i] = intensiveQuantities; +} + +/// One-shot global initialization of MPI, communicator, and parameter +/// registration. Safe to call from multiple BOOST test cases; only the +/// first call performs the work. +static void initSimulatorOnce() +{ + static bool done = false; + if (done) { + return; + } + using namespace Opm; + int argc1 = boost::unit_test::framework::master_test_suite().argc; + char** argv1 = boost::unit_test::framework::master_test_suite().argv; +#if HAVE_DUNE_FEM + Dune::Fem::MPIManager::initialize(argc1, argv1); +#else + Dune::MPIHelper::instance(argc1, argv1); +#endif + FlowGenericVanguard::setCommunication(std::make_unique()); + Opm::ThreadManager::registerParameters(); + Opm::NewtonMethodParams::registerParameters(); + BlackoilModelParameters::registerParameters(); + AdaptiveTimeStepping::registerParameters(); + Parameters::Register( + "Dummy added for the well model to compile."); + registerAllParameters_(/*finalizeRegistration=*/false); + // Also register parameters for the RealDeck TypeTag here so that all + // tests in the same process share a single (still-open) registration + // pass. Once endRegistration() (called via setupParameters_) closes + // registration, no further parameters can be added globally. + using RealDeckTypeTag = Opm::Properties::TTag::FlowGasWaterEnergyProblemTestRealDeck; + AdaptiveTimeStepping::registerParameters(); + registerAllParameters_(/*finalizeRegistration=*/false); + Parameters::endRegistration(); + done = true; +} + +/// Run the per-cell intensive quantities update on both GPU and CPU for the +/// deck stored at \p deckPath, optionally measuring wall-clock time on both +/// sides. +/// +/// Correctness is checked by comparing CPU vs GPU intensive quantities +/// cell-by-cell at indices \p i = 0, \p sampleStride, 2*\p sampleStride, ... +/// +/// \param deckPath path to a CO2STORE WATER+GAS deck written to disk +/// \param expectedNumCells number of grid cells the deck describes +/// \param sampleStride stride for sampled correctness; 1 means full check +/// \param measureTiming if true, prints CPU and GPU wall-clock to stdout +static void runIntensiveQuantitiesTestForDeck(const std::string& deckPath, + std::size_t expectedNumCells, + std::size_t sampleStride, + bool measureTiming, + bool checkDerivatives = false) +{ + Opm::resetLocale(); + initSimulatorOnce(); + + using namespace Opm; + using Simulator = Opm::GetPropType; + + const auto filenameArg = std::string{"--ecl-deck-file-name="} + deckPath; + const char* argv2[] = { + "test_gpuflowproblem", + filenameArg.c_str(), + "--check-satfunc-consistency=false", + }; + Opm::setupParameters_(/*argc=*/sizeof(argv2) / sizeof(argv2[0]), + argv2, + /*registerParams=*/false, + /*allowUnused=*/true, + /*handleHelp=*/false, + /*myRank=*/0); + + Opm::FlowGenericVanguard::readDeck(deckPath); + auto sim = std::make_unique(); + + // The Simulator constructor only calls finishInit; the solution vector + // is not yet EQUIL-populated. Trigger initial-solution application so + // model().solution(0)[i] holds the real per-cell primary variables + // (otherwise zero-initialized PVs make the GPU update produce + // non-finite densities and trip the assertion in + // BlackOilIntensiveQuantities::assertFiniteMembers()). + sim->model().applyInitialSolution(); + + auto& cpuProblem = sim->problem(); + auto& dynamicFluidSystem = FluidSystem::getNonStaticInstance(); + + auto dynamicGpuFluidSystemBuffer = ::Opm::gpuistl::copy_to_gpu(dynamicFluidSystem); + auto dynamicGpuFluidSystemView = ::Opm::gpuistl::make_view(dynamicGpuFluidSystemBuffer); + + // Place the FluidSystemView in unified memory so that the device-side + // BlackOilFluidState pointer dereference is valid both on host and on + // device. + using FluidSystemViewType = std::decay_t; + FluidSystemViewType* managedFluidSystemView = nullptr; + auto managedFluidSystemViewOwner = + Opm::gpuistl::make_gpu_managed_unique_ptr(dynamicGpuFluidSystemView); + managedFluidSystemView = managedFluidSystemViewOwner.get(); + + using CpuMaterialLawManager = typename Opm::GetProp::EclMaterialLawManager; + using Traits = typename CpuMaterialLawManager::MaterialLaw::Traits; + using TwoPhaseTraits = Opm::TwoPhaseMaterialTraits; + using GpuPiecewiseLinearParams = Opm::PiecewiseLinearTwoPhaseMaterialParams< + TwoPhaseTraits, Opm::gpuistl::GpuView>; + using GpuPiecewiseLinearLaw = Opm::PiecewiseLinearTwoPhaseMaterial; + using GpuMaterialLawParams = Opm::EclTwoPhaseMaterialParams< + Traits, GpuPiecewiseLinearParams, GpuPiecewiseLinearParams, GpuPiecewiseLinearParams, + Opm::gpuistl::ValueAsPointer>; + using GpuMaterialLaw = Opm::EclTwoPhaseMaterial< + Traits, GpuPiecewiseLinearLaw, GpuPiecewiseLinearLaw, GpuPiecewiseLinearLaw, + GpuMaterialLawParams>; + using GpuManagerBuffer = Opm::EclMaterialLaw::GpuManager< + Traits, GpuPiecewiseLinearLaw, GpuPiecewiseLinearLaw, + Opm::gpuistl::GpuBuffer, GpuMaterialLaw>; + using GpuThermalManagerBuffer = Opm::EclThermalLaw::GpuManager< + ScalarToUse, FluidSystemViewType, + Opm::gpuistl::GpuBuffer, Opm::gpuistl::GpuView>; + using GpuProblemBuffer = Opm::GpuFlowProblem; + + GpuProblemBuffer gpuProblemBuffer(cpuProblem); + auto gpuProblemView = Opm::gpuistl::make_view(gpuProblemBuffer); + + const std::size_t numCells = cpuProblem.model().numGridDof(); + BOOST_REQUIRE_EQUAL(numCells, expectedNumCells); + BOOST_REQUIRE_GT(numCells, 0u); + + using PrimaryVariablesCpu = Opm::GetPropType; + using PrimaryVariablesGpu = Opm::BlackOilPrimaryVariables; + using IntensiveQuantitiesCpu = Opm::BlackOilIntensiveQuantities; + using IntensiveQuantitiesGpu = Opm::BlackOilIntensiveQuantities; + + // Pull the EQUIL-initialized per-cell primary variables straight from + // the simulator's solution vector. Using zero-initialized PVs makes the + // GPU update produce non-finite densities (e.g. for CO2STORE the PVT + // tables are not defined at p=0), which trips the assertion in + // BlackOilIntensiveQuantities::assertFiniteMembers(). + const auto& cpuSolution = cpuProblem.model().solution(/*timeIdx=*/0); + BOOST_REQUIRE_EQUAL(cpuSolution.size(), numCells); + std::vector cpuPrimaryVariables(numCells); + std::vector hostPrimaryVariablesGpu; + hostPrimaryVariablesGpu.reserve(numCells); + for (std::size_t i = 0; i < numCells; ++i) { + cpuPrimaryVariables[i] = cpuSolution[i]; + hostPrimaryVariablesGpu.emplace_back(cpuPrimaryVariables[i]); + } + Opm::gpuistl::GpuBuffer primaryVariablesBuffer(hostPrimaryVariablesGpu); + + IntensiveQuantitiesCpu cpuIntensiveQuantitiesPrototype; + IntensiveQuantitiesGpu gpuIntensiveQuantitiesPrototype = cpuIntensiveQuantitiesPrototype.template withOtherFluidSystem(*managedFluidSystemView); + if (measureTiming) { + std::cout << std::format("[timing] sizeof(IntensiveQuantitiesGpu)={} sizeof(IntensiveQuantitiesCpu)={} " + "sizeof(PrimaryVariablesGpu)={} cells={} " + "approx GPU mem for IQ buffer={:.2f} MiB\n", + sizeof(IntensiveQuantitiesGpu), sizeof(IntensiveQuantitiesCpu), + sizeof(PrimaryVariablesGpu), numCells, + (sizeof(IntensiveQuantitiesGpu) * numCells) / (1024.0 * 1024.0)); + } + std::vector hostIntensiveQuantities(numCells, gpuIntensiveQuantitiesPrototype); + Opm::gpuistl::GpuBuffer intensiveQuantitiesBuffer(hostIntensiveQuantities); + + const unsigned blockSize = 64u; + const unsigned gridSize = static_cast((numCells + blockSize - 1u) / blockSize); + + // Time the GPU kernel using CUDA events (excludes the host-to-device + // setup and device-to-host copy already done via GpuBuffer ctors above). + cudaEvent_t eventStart{}, eventStop{}; + OPM_GPU_SAFE_CALL(cudaEventCreate(&eventStart)); + OPM_GPU_SAFE_CALL(cudaEventCreate(&eventStop)); + OPM_GPU_SAFE_CALL(cudaEventRecord(eventStart)); + updateAllCellsKernel<<>>( + gpuProblemView, + Opm::gpuistl::GpuView(primaryVariablesBuffer.data(), primaryVariablesBuffer.size()), + Opm::gpuistl::GpuView(intensiveQuantitiesBuffer.data(), intensiveQuantitiesBuffer.size()), + numCells); + OPM_GPU_SAFE_CALL(cudaEventRecord(eventStop)); + OPM_GPU_SAFE_CALL(cudaEventSynchronize(eventStop)); + OPM_GPU_SAFE_CALL(cudaGetLastError()); + float gpuMilliseconds = 0.0f; + OPM_GPU_SAFE_CALL(cudaEventElapsedTime(&gpuMilliseconds, eventStart, eventStop)); + OPM_GPU_SAFE_CALL(cudaEventDestroy(eventStart)); + OPM_GPU_SAFE_CALL(cudaEventDestroy(eventStop)); + + // CPU reference: use the IntensiveQuantities the simulator itself + // computed during invalidateAndUpdateIntensiveQuantities(0). This keeps + // the CPU side consistent with the static FluidSystem state of the + // current simulator instance (which may otherwise diverge from the + // dynamic GPU FluidSystem when other tests have run before this one). + std::vector cpuIntensiveQuantities(numCells); + const auto cpuT0 = std::chrono::steady_clock::now(); + sim->model().invalidateAndUpdateIntensiveQuantities(/*timeIdx=*/0); + for (std::size_t i = 0; i < numCells; ++i) { + const auto* cached = sim->model().cachedIntensiveQuantities(static_cast(i), 0); + BOOST_REQUIRE(cached != nullptr); + cpuIntensiveQuantities[i] = *cached; + } + const auto cpuT1 = std::chrono::steady_clock::now(); + const double cpuMilliseconds = + std::chrono::duration(cpuT1 - cpuT0).count(); + + if (measureTiming) { + std::cout << std::format("[timing] cells={} CPU={:.3f} ms GPU(kernel)={:.3f} ms " + "speedup={:.2f}x\n", + numCells, cpuMilliseconds, gpuMilliseconds, + cpuMilliseconds / gpuMilliseconds); + } + + // Bring GPU intensive quantities back and compare cells (sampled by sampleStride). + std::vector gpuIntensiveQuantitiesHost(numCells, gpuIntensiveQuantitiesPrototype); + OPM_GPU_SAFE_CALL(cudaMemcpy(gpuIntensiveQuantitiesHost.data(), + intensiveQuantitiesBuffer.data(), + numCells * sizeof(IntensiveQuantitiesGpu), + cudaMemcpyDeviceToHost)); + BOOST_REQUIRE_EQUAL(gpuIntensiveQuantitiesHost.size(), numCells); + + constexpr double tol = 1e-6; + // Active phase indices for CO2STORE WATER+GAS (oil disabled). + constexpr unsigned waterPhaseIdx = FluidSystem::waterPhaseIdx; + constexpr unsigned gasPhaseIdx = FluidSystem::gasPhaseIdx; + const unsigned activePhases[] = {waterPhaseIdx, gasPhaseIdx}; + + std::size_t checked = 0; + std::size_t derivativeComparisons = 0; + for (std::size_t i = 0; i < numCells; i += sampleStride) { + const auto& cpuIQ = cpuIntensiveQuantities[i]; + const auto& gpuIQ = gpuIntensiveQuantitiesHost[i]; + const auto& cpuFluidState = cpuIQ.fluidState(); + const auto& gpuFluidState = gpuIQ.fluidState(); + + // Per-phase fluid state quantities for the active phases. Restricted + // to fields that are stored directly on the FluidState (no call into + // the underlying FluidSystem), since the GPU-side FluidState's + // FluidSystem holds device pointers (GpuView) that are not + // dereferenceable on the host. + for (unsigned p : activePhases) { + checkValueAndDerivatives(cpuFluidState.saturation(p), + gpuFluidState.saturation(p), + tol, checkDerivatives, "saturation", + derivativeComparisons); + checkValueAndDerivatives(cpuFluidState.pressure(p), + gpuFluidState.pressure(p), + tol, checkDerivatives, "pressure", + derivativeComparisons); + checkValueAndDerivatives(cpuFluidState.density(p), + gpuFluidState.density(p), + tol, checkDerivatives, "density", + derivativeComparisons); + checkValueAndDerivatives(cpuFluidState.invB(p), + gpuFluidState.invB(p), + tol, checkDerivatives, "invB", + derivativeComparisons); + // Mobility now goes through the opm-common EclTwoPhaseMaterial + // (relativePermeabilities) on both CPU and GPU, so the values + // must agree to the same tolerance as the rest of the IQ. + checkValueAndDerivatives(cpuIQ.mobility(p), + gpuIQ.mobility(p), + tol, checkDerivatives, "mobility", + derivativeComparisons); + } + + // Scalar / per-cell quantities. Both CPU and GPU FluidState configs + // have enableVapwat=true and enableDisgasInWater=true so Rsw/Rvw are + // stored on both sides. + checkValueAndDerivatives(cpuFluidState.Rsw(), gpuFluidState.Rsw(), + tol, checkDerivatives, "Rsw", + derivativeComparisons); + checkValueAndDerivatives(cpuFluidState.Rvw(), gpuFluidState.Rvw(), + tol, checkDerivatives, "Rvw", + derivativeComparisons); + BOOST_CHECK_EQUAL(static_cast(cpuFluidState.pvtRegionIndex()), + static_cast(gpuFluidState.pvtRegionIndex())); + + checkValueAndDerivatives(cpuIQ.porosity(), gpuIQ.porosity(), + tol, checkDerivatives, "porosity", + derivativeComparisons); + BOOST_CHECK_CLOSE(static_cast(cpuIQ.referencePorosity()), + static_cast(gpuIQ.referencePorosity()), tol); + checkValueAndDerivatives(cpuIQ.rockCompTransMultiplier(), + gpuIQ.rockCompTransMultiplier(), + tol, checkDerivatives, "rockCompTransMultiplier", + derivativeComparisons); + + ++checked; + } + BOOST_TEST_MESSAGE("Per-cell GPU vs CPU IQ comparison: checked " + << checked << " / " << numCells << " cells"); + if (checkDerivatives) { + BOOST_TEST_MESSAGE("Derivative comparisons performed: " << derivativeComparisons); + // Guard against silent regressions: if checkDerivatives is requested + // but the helper never actually compared a derivative (e.g. because + // the IQ types degraded to plain scalars), fail loudly. + BOOST_REQUIRE_GT(derivativeComparisons, 0u); + } +} + +/// RAII wrapper around a temporary file. The file is deleted when the +/// object goes out of scope. +struct TemporaryFile { + std::filesystem::path path; + explicit TemporaryFile(std::string_view filename) + : path(std::filesystem::temp_directory_path() / filename) {} + ~TemporaryFile() { std::filesystem::remove(path); } + TemporaryFile(const TemporaryFile&) = delete; + TemporaryFile& operator=(const TemporaryFile&) = delete; + TemporaryFile(TemporaryFile&&) = delete; + TemporaryFile& operator=(TemporaryFile&&) = delete; +}; + +/// Variant of \c runIntensiveQuantitiesTestForDeck that takes the per-cell +/// primary variables from the simulator's initial (EQUIL-driven) solution +/// vector instead of using a single uniform prototype. This exercises the +/// exact code path used by the dispatcher in production: +/// \c BlackOilPrimaryVariables(cpuPrimaryVariables[i]) -> GPU +/// kernel update -> compare CPU vs GPU IQ field-by-field. \p sampleStride +/// controls how often we check (1 means every cell). +static void runIntensiveQuantitiesTestFromSimulatorSolution(const std::string& deckPath, + std::size_t sampleStride) +{ + Opm::resetLocale(); + + using namespace Opm; + using LocalTypeTag = Opm::Properties::TTag::FlowGasWaterEnergyProblemTestRealDeck; + using LocalGpuTypeTag = Opm::Properties::TTag::FlowGasWaterEnergyDummyProblemGPU; + using Simulator = Opm::GetPropType; + + // Reuse the global init: it registers parameters for both TypeTags so + // this test works whether it runs first or after another test in the + // same process (parameter registration is closed after the first + // setupParameters_ call). + initSimulatorOnce(); + + const auto filenameArg = std::string{"--ecl-deck-file-name="} + deckPath; + const char* argv2[] = { + "test_gpuflowproblem", + filenameArg.c_str(), + "--check-satfunc-consistency=false", + "--enable-tuning=false", + }; + Opm::setupParameters_(/*argc=*/sizeof(argv2) / sizeof(argv2[0]), + argv2, + /*registerParams=*/false, + /*allowUnused=*/true, + /*handleHelp=*/false, + /*myRank=*/0); + + Opm::FlowGenericVanguard::readDeck(deckPath); + auto sim = std::make_unique(); + + // The Simulator constructor only calls finishInit; the solution vector + // is not yet EQUIL-populated. Trigger initial-solution application so + // model().solution(0)[i] holds the real per-cell primary variables. + sim->model().applyInitialSolution(); + + auto& cpuProblem = sim->problem(); + auto& dynamicFluidSystem = FluidSystem::getNonStaticInstance(); + + BOOST_TEST_MESSAGE(std::format( + "FluidSystem phase activation: static[O={}, W={}, G={}] dynamic[O={}, W={}, G={}]", + FluidSystem::phaseIsActive(FluidSystem::oilPhaseIdx), + FluidSystem::phaseIsActive(FluidSystem::waterPhaseIdx), + FluidSystem::phaseIsActive(FluidSystem::gasPhaseIdx), + dynamicFluidSystem.phaseIsActive(FluidSystem::oilPhaseIdx), + dynamicFluidSystem.phaseIsActive(FluidSystem::waterPhaseIdx), + dynamicFluidSystem.phaseIsActive(FluidSystem::gasPhaseIdx))); + + auto dynamicGpuFluidSystemBuffer = ::Opm::gpuistl::copy_to_gpu(dynamicFluidSystem); + auto dynamicGpuFluidSystemView = ::Opm::gpuistl::make_view(dynamicGpuFluidSystemBuffer); + + using FluidSystemViewType = std::decay_t; + FluidSystemViewType* managedFluidSystemView = nullptr; + auto managedFluidSystemViewOwner = + Opm::gpuistl::make_gpu_managed_unique_ptr(dynamicGpuFluidSystemView); + managedFluidSystemView = managedFluidSystemViewOwner.get(); + + BOOST_TEST_MESSAGE(std::format( + "GPU fluid-system view phase activation: O={} W={} G={}", + managedFluidSystemView->phaseIsActive(FluidSystem::oilPhaseIdx), + managedFluidSystemView->phaseIsActive(FluidSystem::waterPhaseIdx), + managedFluidSystemView->phaseIsActive(FluidSystem::gasPhaseIdx))); + + using CpuMaterialLawManager = typename Opm::GetProp::EclMaterialLawManager; + using Traits = typename CpuMaterialLawManager::MaterialLaw::Traits; + using TwoPhaseTraits = Opm::TwoPhaseMaterialTraits; + using GpuPiecewiseLinearParams = Opm::PiecewiseLinearTwoPhaseMaterialParams< + TwoPhaseTraits, Opm::gpuistl::GpuView>; + using GpuPiecewiseLinearLaw = Opm::PiecewiseLinearTwoPhaseMaterial; + using GpuMaterialLawParams = Opm::EclTwoPhaseMaterialParams< + Traits, GpuPiecewiseLinearParams, GpuPiecewiseLinearParams, GpuPiecewiseLinearParams, + Opm::gpuistl::ValueAsPointer>; + using GpuMaterialLaw = Opm::EclTwoPhaseMaterial< + Traits, GpuPiecewiseLinearLaw, GpuPiecewiseLinearLaw, GpuPiecewiseLinearLaw, + GpuMaterialLawParams>; + using GpuManagerBuffer = Opm::EclMaterialLaw::GpuManager< + Traits, GpuPiecewiseLinearLaw, GpuPiecewiseLinearLaw, + Opm::gpuistl::GpuBuffer, GpuMaterialLaw>; + using GpuThermalManagerBuffer = Opm::EclThermalLaw::GpuManager< + ScalarToUse, FluidSystemViewType, + Opm::gpuistl::GpuBuffer, Opm::gpuistl::GpuView>; + using GpuProblemBuffer = Opm::GpuFlowProblem; + + GpuProblemBuffer gpuProblemBuffer(cpuProblem); + auto gpuProblemView = Opm::gpuistl::make_view(gpuProblemBuffer); + + const std::size_t numCells = cpuProblem.model().numGridDof(); + BOOST_REQUIRE_GT(numCells, 0u); + BOOST_TEST_MESSAGE("Real-deck test: numCells=" << numCells); + + using PrimaryVariablesCpu = Opm::GetPropType; + using PrimaryVariablesGpu = Opm::BlackOilPrimaryVariables; + using IntensiveQuantitiesCpu = Opm::BlackOilIntensiveQuantities; + using IntensiveQuantitiesGpu = Opm::BlackOilIntensiveQuantities; + + // Pull the EQUIL-initialized per-cell primary variables straight from + // the simulator's solution vector and convert them to the GPU + // PrimaryVariables type cell-by-cell. + const auto& cpuSolution = cpuProblem.model().solution(/*timeIdx=*/0); + BOOST_REQUIRE_EQUAL(cpuSolution.size(), numCells); + std::vector cpuPrimaryVariables(numCells); + std::vector hostPrimaryVariablesGpu; + hostPrimaryVariablesGpu.reserve(numCells); + for (std::size_t i = 0; i < numCells; ++i) { + cpuPrimaryVariables[i] = cpuSolution[i]; + hostPrimaryVariablesGpu.emplace_back(cpuPrimaryVariables[i]); + } + Opm::gpuistl::GpuBuffer primaryVariablesBuffer(hostPrimaryVariablesGpu); + + // Diagnostic: distribution of pressure-meaning values across cells. + { + std::array meaningCounts{0, 0, 0}; + for (std::size_t i = 0; i < numCells; ++i) { + const auto m = cpuPrimaryVariables[i].primaryVarsMeaningPressure(); + switch (m) { + case Opm::BlackOil::PressureMeaning::Pg: ++meaningCounts[0]; break; + case Opm::BlackOil::PressureMeaning::Po: ++meaningCounts[1]; break; + case Opm::BlackOil::PressureMeaning::Pw: ++meaningCounts[2]; break; + default: break; + } + } + BOOST_TEST_MESSAGE(std::format( + "PressureMeaning distribution (cells={}): Pg={} Po={} Pw={}", + numCells, meaningCounts[0], meaningCounts[1], meaningCounts[2])); + } + if (numCells > 0) { + const auto& pv0 = cpuPrimaryVariables[0]; + BOOST_TEST_MESSAGE(std::format( + "Cell 0 PV: meaning(P={},W={},G={}) pvtRegion={}", + static_cast(pv0.primaryVarsMeaningPressure()), + static_cast(pv0.primaryVarsMeaningWater()), + static_cast(pv0.primaryVarsMeaningGas()), + pv0.pvtRegionIndex())); + for (unsigned k = 0; k < pv0.size(); ++k) { + BOOST_TEST_MESSAGE(std::format(" pv[{}] = {}", k, double(pv0[k]))); + } + } + + + IntensiveQuantitiesCpu cpuIntensiveQuantitiesPrototype; + IntensiveQuantitiesGpu gpuIntensiveQuantitiesPrototype = + cpuIntensiveQuantitiesPrototype.template withOtherFluidSystem(*managedFluidSystemView); + std::vector hostIntensiveQuantities(numCells, gpuIntensiveQuantitiesPrototype); + Opm::gpuistl::GpuBuffer intensiveQuantitiesBuffer(hostIntensiveQuantities); + + const unsigned blockSize = 64u; + const unsigned gridSize = static_cast((numCells + blockSize - 1u) / blockSize); + + updateAllCellsKernel<<>>( + gpuProblemView, + Opm::gpuistl::GpuView( + primaryVariablesBuffer.data(), primaryVariablesBuffer.size()), + Opm::gpuistl::GpuView( + intensiveQuantitiesBuffer.data(), intensiveQuantitiesBuffer.size()), + numCells); + OPM_GPU_SAFE_CALL(cudaDeviceSynchronize()); + OPM_GPU_SAFE_CALL(cudaGetLastError()); + + // CPU reference: use the IntensiveQuantities the simulator itself + // computed during invalidateAndUpdateIntensiveQuantities(0). This is the + // exact CPU value that the production dispatcher path would overlay + // GPU-computed BlackOil fields onto, so it is a meaningful reference for + // the GPU comparison without requiring the (diffusion-incompatible) + // simpler IQ::update overload to compile here. + sim->model().invalidateAndUpdateIntensiveQuantities(/*timeIdx=*/0); + std::vector cpuIntensiveQuantities(numCells); + for (std::size_t i = 0; i < numCells; ++i) { + const auto* cached = sim->model().cachedIntensiveQuantities(static_cast(i), 0); + BOOST_REQUIRE(cached != nullptr); + cpuIntensiveQuantities[i] = *cached; + } + + std::vector gpuIntensiveQuantitiesHost(numCells, gpuIntensiveQuantitiesPrototype); + OPM_GPU_SAFE_CALL(cudaMemcpy(gpuIntensiveQuantitiesHost.data(), + intensiveQuantitiesBuffer.data(), + numCells * sizeof(IntensiveQuantitiesGpu), + cudaMemcpyDeviceToHost)); + + constexpr double tol = 1e-6; + constexpr unsigned waterPhaseIdx = FluidSystem::waterPhaseIdx; + constexpr unsigned gasPhaseIdx = FluidSystem::gasPhaseIdx; + const unsigned activePhases[] = {waterPhaseIdx, gasPhaseIdx}; + + std::size_t checked = 0; + std::size_t derivativeComparisons = 0; + std::size_t firstBadCell = static_cast(-1); + for (std::size_t i = 0; i < numCells; i += sampleStride) { + const auto& cpuIQ = cpuIntensiveQuantities[i]; + const auto& gpuIQ = gpuIntensiveQuantitiesHost[i]; + const auto& cpuFs = cpuIQ.fluidState(); + const auto& gpuFs = gpuIQ.fluidState(); + + // Probe non-finite values explicitly; report the first offender so + // we can debug the production simulator failure mode. + for (unsigned p : activePhases) { + const double cpuRho = asDouble(cpuFs.density(p)); + const double gpuRho = asDouble(gpuFs.density(p)); + const double cpuP = asDouble(cpuFs.pressure(p)); + const double gpuP = asDouble(gpuFs.pressure(p)); + const double cpuS = asDouble(cpuFs.saturation(p)); + const double gpuS = asDouble(gpuFs.saturation(p)); + if (!std::isfinite(gpuRho) || !std::isfinite(gpuP) || !std::isfinite(gpuS)) { + if (firstBadCell == static_cast(-1)) { + firstBadCell = i; + BOOST_TEST_MESSAGE(std::format( + "First non-finite GPU IQ at cell {} (phase {}): " + "S(cpu={}, gpu={}) P(cpu={}, gpu={}) rho(cpu={}, gpu={})", + i, p, cpuS, gpuS, cpuP, gpuP, cpuRho, gpuRho)); + } + } + } + + for (unsigned p : activePhases) { + checkValueAndDerivatives(cpuFs.saturation(p), gpuFs.saturation(p), + tol, /*checkDerivatives=*/false, "saturation", + derivativeComparisons); + checkValueAndDerivatives(cpuFs.pressure(p), gpuFs.pressure(p), + tol, /*checkDerivatives=*/false, "pressure", + derivativeComparisons); + checkValueAndDerivatives(cpuFs.density(p), gpuFs.density(p), + tol, /*checkDerivatives=*/false, "density", + derivativeComparisons); + checkValueAndDerivatives(cpuFs.invB(p), gpuFs.invB(p), + tol, /*checkDerivatives=*/false, "invB", + derivativeComparisons); + // mobility is intentionally NOT compared: the GPU dispatcher + // does not overlay it (left to the CPU-computed value in + // production). + } + checkValueAndDerivatives(cpuFs.Rsw(), gpuFs.Rsw(), + tol, /*checkDerivatives=*/false, "Rsw", + derivativeComparisons); + checkValueAndDerivatives(cpuFs.Rvw(), gpuFs.Rvw(), + tol, /*checkDerivatives=*/false, "Rvw", + derivativeComparisons); + BOOST_CHECK_EQUAL(static_cast(cpuFs.pvtRegionIndex()), + static_cast(gpuFs.pvtRegionIndex())); + checkValueAndDerivatives(cpuIQ.porosity(), gpuIQ.porosity(), + tol, /*checkDerivatives=*/false, "porosity", + derivativeComparisons); + BOOST_CHECK_CLOSE(static_cast(cpuIQ.referencePorosity()), + static_cast(gpuIQ.referencePorosity()), tol); + + // Thermal IQ fields: temperature in the fluid state, and the three + // BlackOilEnergyIntensiveQuantities scalars (rockInternalEnergy, + // totalThermalConductivity, rockFraction). For FullyImplicitThermal + // the GPU dispatcher writes all of these via overlayBlackOilFieldsFrom, + // so they should match the CPU value field-by-field. + checkValueAndDerivatives(cpuFs.temperature(/*phaseIdx=*/0), + gpuFs.temperature(/*phaseIdx=*/0), + tol, /*checkDerivatives=*/false, "temperature", + derivativeComparisons); + checkValueAndDerivatives(cpuIQ.rockInternalEnergy(), + gpuIQ.rockInternalEnergy(), + tol, /*checkDerivatives=*/false, "rockInternalEnergy", + derivativeComparisons); + checkValueAndDerivatives(cpuIQ.totalThermalConductivity(), + gpuIQ.totalThermalConductivity(), + tol, /*checkDerivatives=*/false, "totalThermalConductivity", + derivativeComparisons); + BOOST_CHECK_CLOSE(static_cast(cpuIQ.rockFraction()), + static_cast(gpuIQ.rockFraction()), tol); + for (unsigned p : activePhases) { + checkValueAndDerivatives(cpuFs.enthalpy(p), gpuFs.enthalpy(p), + tol, /*checkDerivatives=*/false, "enthalpy", + derivativeComparisons); + } + ++checked; + } + BOOST_TEST_MESSAGE("Real-deck per-cell GPU vs CPU IQ comparison: checked " + << checked << " / " << numCells << " cells"); +} + +BOOST_AUTO_TEST_CASE(TestRealDeckGpuVsCpuFromSimulatorSolution) +{ + // Drive the per-cell IQ update with the EQUIL-initialized primary + // variables of the production CO2STORE deck the user reports diverges + // when the dispatcher is enabled. This reproduces the exact code path + // taken by the dispatcher in production (per-cell PrimaryVariables + // upload + GPU update + readback) and compares every IQ field against + // a CPU reference, so we can identify which field becomes non-finite. + // + // NOTE: this test must run BEFORE the synthetic-deck tests because the + // BlackOilFluidSystem singleton is global state that gets re-set by + // every readDeck call; running this after the synthetic tests in the + // same process leaves the static FluidSystem in a state that is + // inconsistent with the per-cell GPU FluidSystem copy taken here. + const std::string deckPath = "/workspaces/opm/thecaseiwant/deck/THECASEIWANT.DATA"; + if (!std::filesystem::exists(deckPath)) { + BOOST_TEST_MESSAGE("Skipping: deck not found at " << deckPath); + return; + } + runIntensiveQuantitiesTestFromSimulatorSolution(deckPath, /*sampleStride=*/1u); +} + +BOOST_AUTO_TEST_CASE(TestInstantiateGpuFlowProblem) +{ + // Persist the in-source two-phase WATER+GAS+CO2STORE deck to a file so + // the standard Vanguard read path can ingest it. + const TemporaryFile tempFile("test_blackoilintensivequantities_gpu.DATA"); + { + std::ofstream f(tempFile.path); + f << deckString1; + } + runIntensiveQuantitiesTestForDeck(tempFile.path.string(), + /*expectedNumCells=*/27u, + /*sampleStride=*/1u, + /*measureTiming=*/false); +} + +BOOST_AUTO_TEST_CASE(TestPerformanceGpuVsCpuLargeGrid) +{ + // 100^3 = 1'000'000 cells. Build a CO2STORE WATER+GAS deck with a uniform + // unit-cube grid and EQUIL initialization, then time the per-cell IQ + // update on both GPU and CPU. Correctness is checked at every 1000th + // cell (1000 sample points) so the test stays reasonably fast. + // 64^3 = 262'144 cells. Build a CO2STORE WATER+GAS deck with a uniform + // unit-cube grid and EQUIL initialization, then time the per-cell IQ + // update on both GPU and CPU. Correctness is checked at every 256th cell + // (~1024 sample points) so the test stays reasonably fast. + // + // NOTE: the upper grid size here is bounded by the per-cell device + // allocation pattern in GpuEclMaterialLawManager (one cudaMalloc per + // piecewise-linear sample array per cell, i.e. ~12 cudaMalloc per cell). + // With ROCm's typical 4 KiB allocation granularity, ~16 GiB of VRAM + // limits us to a few hundred thousand cells; a 1M-cell deck would + // require consolidating per-cell sample buffers (left as a future + // optimisation, since for a homogeneous deck all cells share the same + // SGWFN table). + constexpr int dim = 64; + constexpr std::size_t expected = + static_cast(dim) * dim * dim; + const TemporaryFile tempFile("test_blackoilintensivequantities_gpu_1M.DATA"); + { + std::ofstream f(tempFile.path); + f << makeDeckString(dim, dim, dim); + } + runIntensiveQuantitiesTestForDeck(tempFile.path.string(), + /*expectedNumCells=*/expected, + /*sampleStride=*/256u, + /*measureTiming=*/true); +} + +BOOST_AUTO_TEST_CASE(TestGpuVsCpuIntensiveQuantitiesEvaluationDerivatives) +{ + // Re-runs the small (3x3x3) WATER+GAS+CO2STORE deck used by + // TestInstantiateGpuFlowProblem, but in addition to the value of every + // intensive-quantity field this test also compares every partial + // derivative of the underlying DenseAd::Evaluation between CPU and GPU. + // This guarantees that the GPU material-law / fluid-system evaluation + // chain is differentiated identically to the CPU one. + const TemporaryFile tempFile("test_blackoilintensivequantities_gpu_deriv.DATA"); + { + std::ofstream f(tempFile.path); + f << deckString1; + } + runIntensiveQuantitiesTestForDeck(tempFile.path.string(), + /*expectedNumCells=*/27u, + /*sampleStride=*/1u, + /*measureTiming=*/false, + /*checkDerivatives=*/true); +} + From d17e80137a33936e9dcef1a373e251583d415f47 Mon Sep 17 00:00:00 2001 From: Kjetil Olsen Lye Date: Thu, 30 Apr 2026 21:48:03 +0000 Subject: [PATCH 5/5] GpuBlackoilIntensiveQuantitiesDispatcher and FIBlackoilModel wiring Adds the GPU intensive-quantities dispatcher (GpuBlackoilIntensiveQuantitiesDispatcher.{hpp,cu}) and the supporting GpuFlowGasWaterEnergyTypeTags.hpp. FIBlackoilModel is updated in this branch (rather than earlier) to consume the dispatcher as the entry point used to push BlackOilIntensiveQuantities updates to the device. The .cu translation unit and the OPM_HAVE_GPU_BLACKOIL_INTENSIVE_QUANTITIES_DISPATCHER macro are gated on HIP or CUDA >= 13.1. --- CMakeLists.txt | 12 + CMakeLists_files.cmake | 5 + opm/simulators/flow/FIBlackoilModel.hpp | 196 +++++++++ ...puBlackoilIntensiveQuantitiesDispatcher.cu | 377 ++++++++++++++++++ ...uBlackoilIntensiveQuantitiesDispatcher.hpp | 123 ++++++ .../gpuistl/GpuFlowGasWaterEnergyTypeTags.hpp | 243 +++++++++++ 6 files changed, 956 insertions(+) create mode 100644 opm/simulators/linalg/gpuistl/GpuBlackoilIntensiveQuantitiesDispatcher.cu create mode 100644 opm/simulators/linalg/gpuistl/GpuBlackoilIntensiveQuantitiesDispatcher.hpp create mode 100644 opm/simulators/linalg/gpuistl/GpuFlowGasWaterEnergyTypeTags.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 6e2a1baa07c..5e9c7e3978d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -424,6 +424,18 @@ macro(opm-simulators_config_hook) target_compile_definitions(opmsimulators PUBLIC HAVE_CUDA=1) endif() + # The BlackOil intensive-quantities GPU dispatcher requires either HIP or + # CUDA >= 13.1; the corresponding .cu source is only added to the build + # under the same condition (see CMakeLists_files.cmake). Expose a public + # macro so that callers (e.g. FIBlackoilModel) can guard their use of the + # dispatcher accordingly. + if(hip_FOUND OR CUDA_VERSION VERSION_GREATER_EQUAL 13.1) + target_compile_definitions(opmsimulators + PUBLIC + OPM_HAVE_GPU_BLACKOIL_INTENSIVE_QUANTITIES_DISPATCHER=1 + ) + endif() + if(USE_GPU_BRIDGE) target_compile_definitions(opmsimulators PUBLIC COMPILE_GPU_BRIDGE=1) endif() diff --git a/CMakeLists_files.cmake b/CMakeLists_files.cmake index 54253bf1e89..dcaaed7bb6b 100644 --- a/CMakeLists_files.cmake +++ b/CMakeLists_files.cmake @@ -337,6 +337,9 @@ if(CUDA_FOUND OR hip_FOUND) ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg detail/FlexibleSolverWrapper.cpp) ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg FlexibleSolver_gpu_instantiate.cpp) ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg PreconditionerFactory_gpu_instantiate.cpp) + if(hip_FOUND OR CUDA_VERSION VERSION_GREATER_EQUAL 13.1) + ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg GpuBlackoilIntensiveQuantitiesDispatcher.cu) + endif() # HEADERS @@ -344,6 +347,8 @@ if(CUDA_FOUND OR hip_FOUND) list(APPEND PUBLIC_HEADER_FILES opm/simulators/flow/GpuEclMaterialLawManager.hpp) list(APPEND PUBLIC_HEADER_FILES opm/simulators/flow/GpuEclThermalLawManager.hpp) list(APPEND PUBLIC_HEADER_FILES opm/simulators/flow/GpuFlowProblem.hpp) + ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg GpuBlackoilIntensiveQuantitiesDispatcher.hpp) + ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg GpuFlowGasWaterEnergyTypeTags.hpp) ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/autotuner.hpp) ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/coloringAndReorderingUtils.hpp) ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/gpu_safe_call.hpp) diff --git a/opm/simulators/flow/FIBlackoilModel.hpp b/opm/simulators/flow/FIBlackoilModel.hpp index 5a56ed016f8..3ec0c49b528 100644 --- a/opm/simulators/flow/FIBlackoilModel.hpp +++ b/opm/simulators/flow/FIBlackoilModel.hpp @@ -42,7 +42,20 @@ #include +#include + +#include + +#if HAVE_CUDA +#include +#include +#include +#include +#endif + +#include #include +#include #include #include @@ -79,14 +92,30 @@ class FIBlackOilModel : public BlackOilModel Dune::Partitions::all, ThreadManager::maxThreads()) { + initGpuIntensiveQuantitiesDispatcher_(); } void invalidateAndUpdateIntensiveQuantities(unsigned timeIdx) const { this->invalidateIntensiveQuantitiesCache(timeIdx); + + // When the experimental GPU dispatcher is active and the CPU has + // already populated the cache once (including the fields the GPU + // dispatcher does not overlay, e.g. mobility / energy), skip the + // expensive CPU loop and rely solely on the GPU dispatcher to + // refresh the BlackOil intensive-quantities fields. The remaining + // cached fields keep their previously computed values, which is + // the intended behaviour for this experimental GPU-only path. + if (gpuDispatcherActiveAndInitialized_()) { + markIntensiveQuantitiesCacheValid_(timeIdx); + maybeRunGpuIntensiveQuantitiesDispatcher_(timeIdx); + return; + } + if constexpr (gridIsUnchanging) { if constexpr (avoidElementContext) { updateCachedIntQuants(timeIdx); + cpuIntensiveQuantitiesInitialized_ = true; return; } OPM_BEGIN_PARALLEL_TRY_CATCH(); @@ -102,6 +131,11 @@ class FIBlackOilModel : public BlackOilModel } OPM_END_PARALLEL_TRY_CATCH("invalidateAndUpdateIntensiveQuantities: state error", this->simulator_.vanguard().grid().comm()); + // After all cells are CPU-updated and written into the cache, + // overlay the GPU-computed BlackOil fields in one batched call + // (no-op when the GPU dispatcher is unavailable or disabled). + maybeRunGpuIntensiveQuantitiesDispatcher_(timeIdx); + cpuIntensiveQuantitiesInitialized_ = true; } else { // Grid is possibly refined or otherwise changed between calls. ElementContext elemCtx(this->simulator_); @@ -262,6 +296,16 @@ class FIBlackOilModel : public BlackOilModel this->template updateSingleCachedIntQuantUnchecked(elementMapper.index(elem), timeIdx); } } + + // After the CPU per-cell update has populated all fields, optionally + // overlay the BlackOil intensive-quantities fields with their GPU + // counterparts via the experimental dispatcher. The dispatcher + // overwrites the subset of fields covered by + // BlackOilIntensiveQuantities::overlayBlackOilFieldsFrom; any field + // not handled there keeps the CPU-computed value. + // The call is a no-op when the GPU dispatcher is unavailable or the + // user has not enabled it. + maybeRunGpuIntensiveQuantitiesDispatcher_(timeIdx); } template @@ -276,6 +320,158 @@ class FIBlackOilModel : public BlackOilModel } ElementChunks element_chunks_; + + // ---------------------------------------------------------------------- + // GPU intensive-quantities dispatcher integration. + // + // All knowledge about whether the experimental GPU dispatcher is + // available, whether it is supported for the current TypeTag, and how + // to invoke it, is intentionally isolated to the few private members + // and helper methods below. The rest of the model only sees a single + // entry point: maybeRunGpuIntensiveQuantitiesDispatcher_(timeIdx). + // ---------------------------------------------------------------------- + + // Compile-time predicates. We use them to avoid sprinkling preprocessor + // conditionals throughout the rest of the class. + static constexpr bool gpuDispatcherCompiledIn_ = +#if HAVE_CUDA && OPM_HAVE_GPU_BLACKOIL_INTENSIVE_QUANTITIES_DISPATCHER + true; +#else + false; +#endif + + static constexpr bool gpuDispatcherSupportsTypeTag_ = +#if HAVE_CUDA && OPM_HAVE_GPU_BLACKOIL_INTENSIVE_QUANTITIES_DISPATCHER + Opm::gpuistl::GpuBlackoilIntensiveQuantitiesDispatcherSupport::value; +#else + false; +#endif + + bool useGpuIntensiveQuantitiesDispatcher_{false}; + + // Tracks whether the CPU intensive-quantities update has been run at + // least once. Used to ensure non-BlackOil cached fields (mobility, + // energy, ...) are populated before we switch to the GPU-only path + // when the experimental dispatcher is enabled. + mutable bool cpuIntensiveQuantitiesInitialized_{false}; + + // True iff the GPU dispatcher is compiled in, supported for the + // current TypeTag, enabled at runtime, and the CPU has already been + // run once to populate the cached non-BlackOil fields. + bool gpuDispatcherActiveAndInitialized_() const noexcept + { + if constexpr (gpuDispatcherCompiledIn_ && gpuDispatcherSupportsTypeTag_) { + return useGpuIntensiveQuantitiesDispatcher_ + && cpuIntensiveQuantitiesInitialized_; + } else { + return false; + } + } + + // Mark every entry of the intensive-quantity cache for the given time + // index as valid. Used on the GPU-only path where we skip the CPU + // update loop (which would normally mark each entry valid as a + // side-effect) and rely on the GPU dispatcher to refresh the + // BlackOil fields in-place. + void markIntensiveQuantitiesCacheValid_(const unsigned timeIdx) const + { + const std::size_t numCells = this->intensiveQuantityCache_[timeIdx].size(); + for (std::size_t i = 0; i < numCells; ++i) { + this->setIntensiveQuantitiesCacheEntryValidity(i, timeIdx, true); + } + } + +#if HAVE_CUDA && OPM_HAVE_GPU_BLACKOIL_INTENSIVE_QUANTITIES_DISPATCHER + using GpuDispatcherStorage = std::conditional_t< + Opm::gpuistl::GpuBlackoilIntensiveQuantitiesDispatcherSupport::value, + std::unique_ptr>, + std::monostate>; + mutable GpuDispatcherStorage gpuIntensiveQuantitiesDispatcher_{}; +#endif + + // Read the user-facing parameter and validate that the requested + // configuration is actually supported by this build / TypeTag. + // Throws (with a self-contained reason) when the user requests the GPU + // dispatcher but it is not available. + void initGpuIntensiveQuantitiesDispatcher_() + { + const bool requested = + Parameters::Get(); + if (!requested) { + useGpuIntensiveQuantitiesDispatcher_ = false; + return; + } + + if constexpr (!gpuDispatcherCompiledIn_) { + OPM_THROW(std::runtime_error, + "--experimental-compute-properties-on-gpu=true was " + "specified, but this binary was built without a " + "compatible GPU back-end. The GPU intensive-quantities " + "dispatcher requires either HIP or CUDA >= 13.1; " + "rebuild with HIP or with a sufficiently new CUDA " + "toolkit (see CMake option " + "OPM_HAVE_GPU_BLACKOIL_INTENSIVE_QUANTITIES_DISPATCHER)."); + } else if constexpr (!gpuDispatcherSupportsTypeTag_) { + OPM_THROW(std::runtime_error, + "--experimental-compute-properties-on-gpu=true was " + "specified, but the active TypeTag is not supported " + "by the GPU BlackOil intensive-quantities dispatcher. " + "Only the CO2STORE-compatible TypeTag " + "FlowGasWaterEnergyProblem (gas+water+energy, no oil) " + "is currently supported by the experimental GPU " + "dispatcher."); + } else { + useGpuIntensiveQuantitiesDispatcher_ = true; + } + } + + // Single entry point that hides all dispatcher-related logic from the + // rest of the model. No-op when the dispatcher is either not compiled + // in, not supported for the current TypeTag, or not enabled at runtime. + void maybeRunGpuIntensiveQuantitiesDispatcher_(const unsigned timeIdx) const + { + if constexpr (gpuDispatcherCompiledIn_ && gpuDispatcherSupportsTypeTag_) { + if (useGpuIntensiveQuantitiesDispatcher_) { + const auto gpuStartTime = std::chrono::steady_clock::now(); + this->runGpuIntensiveQuantitiesDispatcher_(timeIdx); + const auto gpuDuration = + std::chrono::duration_cast( + std::chrono::steady_clock::now() - gpuStartTime); + Opm::OpmLog::info(std::format( + "GPU intensive-quantities dispatch took {} ms", + gpuDuration.count())); + } + } else { + (void)timeIdx; + } + } + +#if HAVE_CUDA && OPM_HAVE_GPU_BLACKOIL_INTENSIVE_QUANTITIES_DISPATCHER + void runGpuIntensiveQuantitiesDispatcher_(const unsigned timeIdx) const + { + if constexpr (Opm::gpuistl::GpuBlackoilIntensiveQuantitiesDispatcherSupport::value) { + if (!gpuIntensiveQuantitiesDispatcher_) { + gpuIntensiveQuantitiesDispatcher_ = + std::make_unique< + Opm::gpuistl::GpuBlackoilIntensiveQuantitiesDispatcher>(); + } + using PV = GetPropType; + const auto& sol = this->solution(timeIdx); + const std::size_t numCells = this->intensiveQuantityCache_[timeIdx].size(); + std::vector priVarsPtrs(numCells); + std::vector outIqPtrs(numCells); + for (std::size_t i = 0; i < numCells; ++i) { + priVarsPtrs[i] = &sol[i]; + outIqPtrs[i] = &this->intensiveQuantityCache_[timeIdx][i]; + } + gpuIntensiveQuantitiesDispatcher_->update( + this->simulator_.problem(), + priVarsPtrs.data(), + outIqPtrs.data(), + numCells); + } + } +#endif }; } // namespace Opm diff --git a/opm/simulators/linalg/gpuistl/GpuBlackoilIntensiveQuantitiesDispatcher.cu b/opm/simulators/linalg/gpuistl/GpuBlackoilIntensiveQuantitiesDispatcher.cu new file mode 100644 index 00000000000..ae9e738612c --- /dev/null +++ b/opm/simulators/linalg/gpuistl/GpuBlackoilIntensiveQuantitiesDispatcher.cu @@ -0,0 +1,377 @@ +/* + Copyright 2026 Equinor ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ +#include + +// Tricks for older versions of ROCm that do not support alugrid or dune-fem +// (mirrors test_blackoilintensivequantities_gpu.cu). +#ifdef HAVE_DUNE_ALUGRID +#undef HAVE_DUNE_ALUGRID +#endif +#define HAVE_DUNE_ALUGRID 0 +#ifdef HAVE_DUNE_FEM +#undef HAVE_DUNE_FEM +#endif +#define HAVE_DUNE_FEM 0 + +#include + +#include +#include +#include + +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include + +#include + +#include +#include +#include +#include + +namespace { + +// Small helper to take a std::chrono::steady_clock timestamp. +inline auto dispatcherNow() +{ + return std::chrono::steady_clock::now(); +} + +inline double dispatcherElapsedMs(const std::chrono::steady_clock::time_point& start, + const std::chrono::steady_clock::time_point& end) +{ + return std::chrono::duration(end - start).count(); +} + +} // namespace + +namespace Opm::gpuistl { + +namespace { + +using DispatcherCpuTag = Opm::Properties::TTag::FlowGasWaterEnergyProblem; +using DispatcherGpuTag = Opm::Properties::TTag::FlowGasWaterEnergyDummyProblemGPU; + +using DispatcherScalar = Opm::GetPropType; + +using DispatcherCpuFluidSystem = Opm::BlackOilFluidSystem; +using DispatcherFluidSystemView + = Opm::BlackOilFluidSystemNonStatic; + +using DispatcherGpuPrimaryVariables + = Opm::BlackOilPrimaryVariables; +using DispatcherGpuIntensiveQuantities = Opm::BlackOilIntensiveQuantities; + +using DispatcherCpuMaterialLawManager = + typename Opm::GetProp::EclMaterialLawManager; +using DispatcherTraits = typename DispatcherCpuMaterialLawManager::MaterialLaw::Traits; +using DispatcherTwoPhaseTraits = + Opm::TwoPhaseMaterialTraits; +using DispatcherGpuPiecewiseLinearParamsBuf = + Opm::PiecewiseLinearTwoPhaseMaterialParams>; +using DispatcherGpuPiecewiseLinearLawBuf = + Opm::PiecewiseLinearTwoPhaseMaterial; +using DispatcherGpuMaterialLawParamsBuf = + Opm::EclTwoPhaseMaterialParams; +using DispatcherGpuMaterialLawBuf = + Opm::EclTwoPhaseMaterial; +using DispatcherGpuManagerBuf = + Opm::EclMaterialLaw::GpuManager; +using DispatcherGpuThermalManagerBuf = + Opm::EclThermalLaw::GpuManager; +using DispatcherGpuFlowProblemBuf = + Opm::GpuFlowProblem; +using DispatcherGpuFlowProblemView = + decltype(Opm::gpuistl::make_view(std::declval())); + +template +__global__ void +dispatcherUpdateAllCellsKernel(GpuProblem problem, + Opm::gpuistl::GpuView primaryVariables, + Opm::gpuistl::GpuView outIntensiveQuantities, + std::size_t numCells) +{ + const std::size_t i = static_cast(blockIdx.x) * blockDim.x + threadIdx.x; + if (i >= numCells) { + return; + } + IntensiveQuantitiesT iq = outIntensiveQuantities[i]; + iq.updateSaturations(primaryVariables[i], 0, Opm::LinearizationType{}); + iq.update(problem, primaryVariables[i], static_cast(i), 0); + iq.updateEnergyQuantities_(problem, static_cast(i), 0u); + outIntensiveQuantities[i] = iq; +} + +} // namespace + +// ============================================================================= +// Impl +// ============================================================================= +template +struct GpuBlackoilIntensiveQuantitiesDispatcher::Impl { + using ManagedFluidSystemViewPtr = + std::unique_ptr>; + + bool initialized = false; + std::unique_ptr problemBuf; + DispatcherGpuFlowProblemView problemView{}; + ManagedFluidSystemViewPtr managedFluidSystemView{}; + std::optional prototype; + std::size_t callCount = 0; + + // Cumulative per-stage timings (milliseconds, host-side). + double totalConvertMs = 0.0; + double totalH2DMs = 0.0; + double totalKernelMs = 0.0; + double totalD2HMs = 0.0; + double totalOverlayMs = 0.0; + double totalDofsProcessed = 0.0; + + ~Impl() + { + if (callCount > 0u) { + const double totalMs = + totalConvertMs + totalH2DMs + totalKernelMs + totalD2HMs + totalOverlayMs; + const double avgConvertMs = totalConvertMs / static_cast(callCount); + const double avgH2DMs = totalH2DMs / static_cast(callCount); + const double avgKernelMs = totalKernelMs / static_cast(callCount); + const double avgD2HMs = totalD2HMs / static_cast(callCount); + const double avgOverlayMs = totalOverlayMs / static_cast(callCount); + const double avgTotalMs = totalMs / static_cast(callCount); + const double avgDofs = totalDofsProcessed / static_cast(callCount); + Opm::OpmLog::info(std::format( + "[GpuBlackoilIntensiveQuantitiesDispatcher] aggregate over {} call(s)," + " {} dof-updates:\n" + " totals : convert={:.3f}ms h2d={:.3f}ms kernel={:.3f}ms" + " d2h={:.3f}ms overlay={:.3f}ms total={:.3f}ms\n" + " per-call: convert={:.3f}ms h2d={:.3f}ms kernel={:.3f}ms" + " d2h={:.3f}ms overlay={:.3f}ms total={:.3f}ms avg_dofs={:.1f}", + callCount, + static_cast(totalDofsProcessed), + totalConvertMs, totalH2DMs, totalKernelMs, totalD2HMs, totalOverlayMs, totalMs, + avgConvertMs, avgH2DMs, avgKernelMs, avgD2HMs, avgOverlayMs, avgTotalMs, + avgDofs)); + } + } + + Impl() = default; + Impl(const Impl&) = delete; + Impl& operator=(const Impl&) = delete; + Impl(Impl&&) = delete; + Impl& operator=(Impl&&) = delete; +}; + +template +GpuBlackoilIntensiveQuantitiesDispatcher::GpuBlackoilIntensiveQuantitiesDispatcher() + : impl_(std::make_unique()) +{ +} + +template +GpuBlackoilIntensiveQuantitiesDispatcher::~GpuBlackoilIntensiveQuantitiesDispatcher() + = default; + +template +void GpuBlackoilIntensiveQuantitiesDispatcher::update( + const Problem& cpuProblem, + const PrimaryVariables* const* cpuPriVars, + IntensiveQuantities* const* outIQ, + std::size_t numDof) +{ + if (numDof == 0u) { + return; + } + + if (!impl_->initialized) { + // Build the GPU FlowProblem from the CPU FlowProblem (one-time setup). + impl_->problemBuf = std::make_unique(cpuProblem); + impl_->problemView = Opm::gpuistl::make_view(*impl_->problemBuf); + + // Place the FluidSystemView in unified memory so its device pointer + // dereferences are valid both on host and device (mirrors the test). + auto& dynamicCpuFluidSystem = DispatcherCpuFluidSystem::getNonStaticInstance(); + // The fluid-system buffer is owning device memory; leak it intentionally + // for the lifetime of the process via a function-local static. + using FsBufferType = decltype(Opm::gpuistl::copy_to_gpu(dynamicCpuFluidSystem)); + static FsBufferType s_fsBuffer = Opm::gpuistl::copy_to_gpu(dynamicCpuFluidSystem); + auto fsView = Opm::gpuistl::make_view(s_fsBuffer); + + DispatcherFluidSystemView* managed = nullptr; + impl_->managedFluidSystemView = + Opm::gpuistl::make_gpu_managed_unique_ptr(fsView); + managed = impl_->managedFluidSystemView.get(); + + // Build a default IntensiveQuantities prototype for the GPU side. + Opm::BlackOilIntensiveQuantities cpuPrototype; + impl_->prototype = cpuPrototype.template withOtherFluidSystem( + *managed); + + impl_->initialized = true; + Opm::OpmLog::info(std::format( + "[GpuBlackoilIntensiveQuantitiesDispatcher] initialized for {} cells", + cpuProblem.model().numGridDof())); + } + + // ------------------------------------------------------------------- + // 1. Convert CPU primary variables to the dispatcher's GPU primary + // variables (host-side). + // ------------------------------------------------------------------- + const auto t0 = dispatcherNow(); + std::vector hostPriVars; + hostPriVars.reserve(numDof); + for (std::size_t i = 0; i < numDof; ++i) { + // The BlackOilPrimaryVariables converting copy ctor handles the + // TypeTag/storage difference. + hostPriVars.emplace_back(*cpuPriVars[i]); + } + std::vector hostIQ(numDof, *impl_->prototype); + const auto t1 = dispatcherNow(); + + // ------------------------------------------------------------------- + // 2. Upload host buffers to the device (host-to-device copies are + // embedded in the GpuBuffer ctors). + // ------------------------------------------------------------------- + Opm::gpuistl::GpuBuffer primaryVariablesBuffer(hostPriVars); + Opm::gpuistl::GpuBuffer intensiveQuantitiesBuffer(hostIQ); + OPM_GPU_SAFE_CALL(cudaDeviceSynchronize()); + const auto t2 = dispatcherNow(); + + // ------------------------------------------------------------------- + // 3. Launch the per-cell update kernel. + // ------------------------------------------------------------------- + const unsigned blockSize = 64u; + const unsigned gridSize = static_cast((numDof + blockSize - 1u) / blockSize); + + dispatcherUpdateAllCellsKernel<<>>( + impl_->problemView, + Opm::gpuistl::GpuView( + primaryVariablesBuffer.data(), primaryVariablesBuffer.size()), + Opm::gpuistl::GpuView( + intensiveQuantitiesBuffer.data(), intensiveQuantitiesBuffer.size()), + numDof); + OPM_GPU_SAFE_CALL(cudaDeviceSynchronize()); + OPM_GPU_SAFE_CALL(cudaGetLastError()); + const auto t3 = dispatcherNow(); + + // ------------------------------------------------------------------- + // 4. Read the GPU IntensiveQuantities back to host memory. + // ------------------------------------------------------------------- + OPM_GPU_SAFE_CALL(cudaMemcpy(hostIQ.data(), + intensiveQuantitiesBuffer.data(), + numDof * sizeof(DispatcherGpuIntensiveQuantities), + cudaMemcpyDeviceToHost)); + const auto t4 = dispatcherNow(); + + // ------------------------------------------------------------------- + // 5. Field-by-field overlay onto the caller's CPU IntensiveQuantities. + // The caller is expected to have run the CPU update first to + // populate any fields the dispatcher does not overwrite (e.g. + // \c mobility_, which the GPU relperm path currently leaves at 0). + // ------------------------------------------------------------------- + for (std::size_t i = 0; i < numDof; ++i) { + outIQ[i]->overlayBlackOilFieldsFrom(hostIQ[i]); + } + const auto t5 = dispatcherNow(); + + // ------------------------------------------------------------------- + // Per-call timings. + // ------------------------------------------------------------------- + const double convertMs = dispatcherElapsedMs(t0, t1); + const double h2dMs = dispatcherElapsedMs(t1, t2); + const double kernelMs = dispatcherElapsedMs(t2, t3); + const double d2hMs = dispatcherElapsedMs(t3, t4); + const double overlayMs = dispatcherElapsedMs(t4, t5); + + impl_->totalConvertMs += convertMs; + impl_->totalH2DMs += h2dMs; + impl_->totalKernelMs += kernelMs; + impl_->totalD2HMs += d2hMs; + impl_->totalOverlayMs += overlayMs; + impl_->totalDofsProcessed += static_cast(numDof); + + Opm::OpmLog::info(std::format( + "[GpuBlackoilIntensiveQuantitiesDispatcher] call={} dofs={}" + " convert={:.3f}ms h2d={:.3f}ms kernel={:.3f}ms d2h={:.3f}ms overlay={:.3f}ms total={:.3f}ms", + impl_->callCount, numDof, + convertMs, h2dMs, kernelMs, d2hMs, overlayMs, + convertMs + h2dMs + kernelMs + d2hMs + overlayMs)); + + ++impl_->callCount; +} + +// ============================================================================= +// Explicit instantiation for the user-facing CO2STORE simulation TypeTag, +// namely \c FlowGasWaterEnergyProblem (used by the Flow simulator binary +// when a CO2STORE deck with WATER+GAS+THERMAL is loaded). The GPU-specific +// property bindings live in \c GpuFlowGasWaterEnergyTypeTags.hpp. +// ============================================================================= +template class GpuBlackoilIntensiveQuantitiesDispatcher< + Opm::Properties::TTag::FlowGasWaterEnergyProblem>; + +} // namespace Opm::gpuistl diff --git a/opm/simulators/linalg/gpuistl/GpuBlackoilIntensiveQuantitiesDispatcher.hpp b/opm/simulators/linalg/gpuistl/GpuBlackoilIntensiveQuantitiesDispatcher.hpp new file mode 100644 index 00000000000..572bc80d4a9 --- /dev/null +++ b/opm/simulators/linalg/gpuistl/GpuBlackoilIntensiveQuantitiesDispatcher.hpp @@ -0,0 +1,123 @@ +// -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- +// vi: set et ts=4 sw=4 sts=4: +/* + Copyright 2026 Equinor ASA + + This file is part of the Open Porous Media project (OPM). + + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ +#ifndef OPM_GPU_BLACKOIL_INTENSIVE_QUANTITIES_DISPATCHER_HPP +#define OPM_GPU_BLACKOIL_INTENSIVE_QUANTITIES_DISPATCHER_HPP + +// The dispatcher (and its underlying CUDA/HIP translation unit) requires +// either HIP (any version) or CUDA >= 13.1; older CUDA toolkits cannot +// compile the kernel translation unit. The build system gates inclusion of +// the .cu file (and the explicit instantiations therein) on the same +// condition, and exports the +// \c OPM_HAVE_GPU_BLACKOIL_INTENSIVE_QUANTITIES_DISPATCHER compile +// definition iff the dispatcher is available. Callers should gate uses of +// the dispatcher on that macro (or, equivalently, on +// \c GpuBlackoilIntensiveQuantitiesDispatcherSupport, which is only +// specialized to \c true when the dispatcher is available). +#ifndef OPM_HAVE_GPU_BLACKOIL_INTENSIVE_QUANTITIES_DISPATCHER +#define OPM_HAVE_GPU_BLACKOIL_INTENSIVE_QUANTITIES_DISPATCHER 0 +#endif + +#if OPM_HAVE_GPU_BLACKOIL_INTENSIVE_QUANTITIES_DISPATCHER + +#include + +#include +#include + +namespace Opm::Properties::TTag { + struct FlowGasWaterEnergyProblem; +} + +namespace Opm::gpuistl { + +/// Compile-time predicate: does this CPU \c TypeTag describe the specific +/// CO2STORE configuration (FlowGasWaterEnergyProblem) that the GPU +/// intensive-quantities dispatcher currently supports? Restricted to a +/// single TypeTag so that explicit instantiations of the dispatcher +/// remain manageable for the experimental prototype. +template +struct GpuBlackoilIntensiveQuantitiesDispatcherSupport { + static constexpr bool value = false; +}; + +template <> +struct GpuBlackoilIntensiveQuantitiesDispatcherSupport< + Opm::Properties::TTag::FlowGasWaterEnergyProblem> { + static constexpr bool value = true; +}; + +/// Runs the BlackOil intensive-quantities update on the GPU for a given set +/// of degrees of freedom. The dispatcher is process-wide (one singleton per +/// CPU \c TypeTag). It lazily constructs the GPU-side \c GpuFlowProblem +/// from the supplied CPU problem on the first call. +/// +/// On every call, primary variables for the requested DoFs are uploaded to +/// the device, the per-cell update kernel from +/// \c test_blackoilintensivequantities_gpu.cu is launched (one thread per +/// DoF), and the resulting intensive quantities are read back to host +/// memory, as the design plan in \c PLAN.md requires. +/// +/// The class is a template on the CPU \c TypeTag and is explicitly +/// instantiated in the \c .cu translation unit. +template +class GpuBlackoilIntensiveQuantitiesDispatcher +{ +public: + using Problem = Opm::GetPropType; + using PrimaryVariables = Opm::GetPropType; + using IntensiveQuantities = Opm::GetPropType; + + GpuBlackoilIntensiveQuantitiesDispatcher(); + ~GpuBlackoilIntensiveQuantitiesDispatcher(); + + GpuBlackoilIntensiveQuantitiesDispatcher(const GpuBlackoilIntensiveQuantitiesDispatcher&) = delete; + GpuBlackoilIntensiveQuantitiesDispatcher& + operator=(const GpuBlackoilIntensiveQuantitiesDispatcher&) = delete; + GpuBlackoilIntensiveQuantitiesDispatcher(GpuBlackoilIntensiveQuantitiesDispatcher&&) = delete; + GpuBlackoilIntensiveQuantitiesDispatcher& + operator=(GpuBlackoilIntensiveQuantitiesDispatcher&&) = delete; + + /// Run the per-cell intensive-quantities update kernel on \p numDof + /// DoFs. \p cpuPriVars[i] points at the CPU primary variables for DoF + /// \c i. The GPU-computed BlackOil intensive quantities are written + /// onto \p outIQ[i] field-by-field via + /// \c BlackOilIntensiveQuantities::overlayBlackOilFieldsFrom, which + /// includes \c mobility_ (the GPU relperm path now populates it via + /// \c iq.update()). + /// + /// Per-call host/device timings are accumulated and printed every call + /// via \c Opm::OpmLog::info, prefixed with + /// \c "[GpuBlackoilIntensiveQuantitiesDispatcher]". + void update(const Problem& cpuProblem, + const PrimaryVariables* const* cpuPriVars, + IntensiveQuantities* const* outIQ, + std::size_t numDof); + +private: + struct Impl; + std::unique_ptr impl_; +}; + +} // namespace Opm::gpuistl + +#endif // OPM_HAVE_GPU_BLACKOIL_INTENSIVE_QUANTITIES_DISPATCHER + +#endif // OPM_GPU_BLACKOIL_INTENSIVE_QUANTITIES_DISPATCHER_HPP diff --git a/opm/simulators/linalg/gpuistl/GpuFlowGasWaterEnergyTypeTags.hpp b/opm/simulators/linalg/gpuistl/GpuFlowGasWaterEnergyTypeTags.hpp new file mode 100644 index 00000000000..ad8cdf16594 --- /dev/null +++ b/opm/simulators/linalg/gpuistl/GpuFlowGasWaterEnergyTypeTags.hpp @@ -0,0 +1,243 @@ +// -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- +// vi: set et ts=4 sw=4 sts=4: +/* + Copyright 2026 Equinor ASA + + This file is part of the Open Porous Media project (OPM). + + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ +/*! + * \file + * + * Shared GPU TypeTag derivations on top of the simulation-side + * \c FlowGasWaterEnergyProblem TypeTag (CO2STORE configuration). + * + * The intent is that GPU code (the dispatcher and the + * \c test_blackoilintensivequantities_gpu test) reuses the *exact* same + * TypeTag as the simulator and only overrides the small set of properties + * that have to change for the GPU build (storage container of the primary + * variables, the material law manager, the fluid system, the problem and + * the element context). + */ +#ifndef OPM_GPU_FLOW_GASWATER_ENERGY_TYPETAGS_HPP +#define OPM_GPU_FLOW_GASWATER_ENERGY_TYPETAGS_HPP + +#if HAVE_CUDA + +#include +#include +#include +#include + +#include + +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace Opm::Properties { + +namespace TTag { + +/// The GPU sibling of \c FlowGasWaterEnergyProblem. Inherits all property +/// bindings from the simulation TypeTag and only overrides the storage of +/// the primary variables, the material law and the fluid system to their +/// GPU equivalents. +struct FlowGasWaterEnergyProblemGPU { + using InheritsFrom = std::tuple; +}; + +/// A second derived TypeTag used for the per-cell intensive-quantities +/// kernel: it additionally swaps in \c GpuFlowProblem and +/// \c FvBaseElementContextGpu so that the GPU device side has trivially +/// copyable \c Problem / \c ElementContext types. +struct FlowGasWaterEnergyDummyProblemGPU { + using InheritsFrom = std::tuple; +}; + +} // namespace TTag + +// ----------------------------------------------------------------------- +// Diffusion / dispersion: not yet implemented in the GPU IntensiveQuantities +// update path; force them off on the GPU TypeTag. +// ----------------------------------------------------------------------- +template +struct EnableDiffusion +{ + static constexpr bool value = false; +}; + +template +struct EnableDispersion +{ + static constexpr bool value = false; +}; + +// ----------------------------------------------------------------------- +// MaterialLaw: GPU-friendly EclTwoPhaseMaterial backed by GpuView storage. +// ----------------------------------------------------------------------- +template +struct MaterialLaw +{ +private: + using Scalar = GetPropType; + using FluidSystem = GetPropType; + using Traits = ThreePhaseMaterialTraits(), + getPropValue()>; + using TwoPhaseTraits = TwoPhaseMaterialTraits; + using TwoPhaseParams = ::Opm::PiecewiseLinearTwoPhaseMaterialParams< + TwoPhaseTraits, ::Opm::gpuistl::GpuView>; + using TwoPhaseLaw = ::Opm::PiecewiseLinearTwoPhaseMaterial; + using GpuMaterialLawParams = ::Opm::EclTwoPhaseMaterialParams< + Traits, TwoPhaseParams, TwoPhaseParams, TwoPhaseParams, ::Opm::gpuistl::ValueAsPointer>; + using GpuMaterialLaw = ::Opm::EclTwoPhaseMaterial< + Traits, TwoPhaseLaw, TwoPhaseLaw, TwoPhaseLaw, GpuMaterialLawParams>; + +public: + using EclMaterialLawManager = ::Opm::EclMaterialLaw::GpuManager< + Traits, TwoPhaseLaw, TwoPhaseLaw, ::Opm::VectorWithDefaultAllocator, GpuMaterialLaw>; + using type = typename EclMaterialLawManager::MaterialLaw; +}; + +// ----------------------------------------------------------------------- +// PrimaryVariables: GPU build uses MiniVector storage so the type is +// trivially copyable to the device. +// ----------------------------------------------------------------------- +template +struct PrimaryVariables +{ + using type = ::Opm::BlackOilPrimaryVariables; +}; + +// ----------------------------------------------------------------------- +// FluidSystem: non-static, GpuView-backed instantiation. +// ----------------------------------------------------------------------- +template +struct FluidSystem +{ + using type = ::Opm::BlackOilFluidSystemNonStatic< + GetPropType, + ::Opm::BlackOilDefaultFluidSystemIndices, + ::Opm::gpuistl::GpuView>; +}; + +template +struct FluidSystem +{ + using type = ::Opm::BlackOilFluidSystemNonStatic< + GetPropType, + ::Opm::BlackOilDefaultFluidSystemIndices, + ::Opm::gpuistl::GpuView>; +}; + +// ----------------------------------------------------------------------- +// SolidEnergyLaw / ThermalConductionLaw: bind to the GPU-portable laws +// (EclSpecrockLaw / EclThconrLaw). Both expose an +// \c EclThermalLawManager nested typedef matching the convention used by +// FlowBaseProblemProperties so existing GetProp<...>::EclThermalLawManager +// chains keep working. +// ----------------------------------------------------------------------- +template +struct SolidEnergyLaw +{ +private: + using Scalar = GetPropType; + using FluidSystem = GetPropType; + +public: + using EclThermalLawManager = ::Opm::EclThermalLaw::GpuManager< + Scalar, FluidSystem, ::Opm::gpuistl::GpuView, ::Opm::gpuistl::GpuView>; + + using type = ::Opm::EclSpecrockLaw< + Scalar, + ::Opm::EclSpecrockLawParams>; +}; + +template +struct ThermalConductionLaw +{ +private: + using Scalar = GetPropType; + using FluidSystem = GetPropType; + +public: + using EclThermalLawManager = ::Opm::EclThermalLaw::GpuManager< + Scalar, FluidSystem, ::Opm::gpuistl::GpuView, ::Opm::gpuistl::GpuView>; + + using type = ::Opm::EclThconrLaw; +}; + +// ----------------------------------------------------------------------- +// Problem: GpuFlowProblem (GpuView-backed) instead of FlowProblemBlackoil. +// ----------------------------------------------------------------------- +template +struct Problem +{ +private: + using ScalarT = GetPropType; + using FluidSystem = GetPropType; + using CpuMaterialLawManager = + typename Opm::GetProp::EclMaterialLawManager; + using GpuViewMaterialLawManager = + ::Opm::EclMaterialLaw::GpuManager; + using GpuViewThermalLawManager = ::Opm::EclThermalLaw::GpuManager< + ScalarT, FluidSystem, ::Opm::gpuistl::GpuView, ::Opm::gpuistl::GpuView>; + +public: + using type = ::Opm::GpuFlowProblem; +}; + +// ----------------------------------------------------------------------- +// ElementContext: trivially copyable GPU element context. +// ----------------------------------------------------------------------- +template +struct ElementContext +{ + using type = ::Opm::FvBaseElementContextGpu; +}; + +} // namespace Opm::Properties + +#endif // HAVE_CUDA + +#endif // OPM_GPU_FLOW_GASWATER_ENERGY_TYPETAGS_HPP