diff --git a/CMakeLists.txt b/CMakeLists.txt index a240cd865c2..5e9c7e3978d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -353,8 +353,10 @@ macro(opm-simulators_sources_hook) # cuda warns when constxpr functions are used in kernels. # since the entire stl is constexpr ..., we enable relaxed flag set_source_files_properties( + tests/gpuistl/test_blackoilintensivequantities_gpu.cu tests/gpuistl/test_blackoilfluidstategpu.cu tests/gpuistl/test_gpu_ad.cu + tests/gpuistl/test_gpu_ecl_thermal_law_manager.cu tests/gpuistl/test_gpu_linear_two_phase_material.cu tests/gpuistl/test_gpuPvt.cu tests/gpuistl/test_gpuBlackOilFluidSystem.cu @@ -422,6 +424,18 @@ macro(opm-simulators_config_hook) target_compile_definitions(opmsimulators PUBLIC HAVE_CUDA=1) endif() + # The BlackOil intensive-quantities GPU dispatcher requires either HIP or + # CUDA >= 13.1; the corresponding .cu source is only added to the build + # under the same condition (see CMakeLists_files.cmake). Expose a public + # macro so that callers (e.g. FIBlackoilModel) can guard their use of the + # dispatcher accordingly. + if(hip_FOUND OR CUDA_VERSION VERSION_GREATER_EQUAL 13.1) + target_compile_definitions(opmsimulators + PUBLIC + OPM_HAVE_GPU_BLACKOIL_INTENSIVE_QUANTITIES_DISPATCHER=1 + ) + endif() + if(USE_GPU_BRIDGE) target_compile_definitions(opmsimulators PUBLIC COMPILE_GPU_BRIDGE=1) endif() @@ -479,6 +493,7 @@ macro(opm-simulators_tests_hook) cuVector_operations deviceBlockOperations gpu_ad + gpu_ecl_thermal_law_manager gpu_linear_two_phase_material gpu_resources gpu_safe_call @@ -504,6 +519,7 @@ macro(opm-simulators_tests_hook) primary_variables_gpu solver_adapter throw_macros_on_gpu + blackoilintensivequantities_gpu ) if(TEST ${test}) set_tests_properties(${test} @@ -670,6 +686,9 @@ macro(opm-simulators_targets_hook) target_sources(test_RestartSerialization PRIVATE $) target_sources(test_glift1 PRIVATE $) target_sources(test_tpsa_localresidual PRIVATE $) + if (TARGET test_blackoilintensivequantities_gpu) + target_sources(test_blackoilintensivequantities_gpu PRIVATE $) + endif() if(MPI_FOUND) target_sources(test_chopstep PRIVATE $) endif() diff --git a/CMakeLists_files.cmake b/CMakeLists_files.cmake index e16173f05db..dcaaed7bb6b 100644 --- a/CMakeLists_files.cmake +++ b/CMakeLists_files.cmake @@ -337,9 +337,18 @@ if(CUDA_FOUND OR hip_FOUND) ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg detail/FlexibleSolverWrapper.cpp) ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg FlexibleSolver_gpu_instantiate.cpp) ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg PreconditionerFactory_gpu_instantiate.cpp) + if(hip_FOUND OR CUDA_VERSION VERSION_GREATER_EQUAL 13.1) + ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg GpuBlackoilIntensiveQuantitiesDispatcher.cu) + endif() # HEADERS + list(APPEND PUBLIC_HEADER_FILES opm/models/discretization/common/fvbaseelementcontextgpu.hh) + list(APPEND PUBLIC_HEADER_FILES opm/simulators/flow/GpuEclMaterialLawManager.hpp) + list(APPEND PUBLIC_HEADER_FILES opm/simulators/flow/GpuEclThermalLawManager.hpp) + list(APPEND PUBLIC_HEADER_FILES opm/simulators/flow/GpuFlowProblem.hpp) + ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg GpuBlackoilIntensiveQuantitiesDispatcher.hpp) + ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg GpuFlowGasWaterEnergyTypeTags.hpp) ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/autotuner.hpp) ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/coloringAndReorderingUtils.hpp) ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/gpu_safe_call.hpp) @@ -570,6 +579,9 @@ if(CUDA_FOUND OR hip_FOUND) ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_safe_conversion.cpp) ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_solver_adapter.cpp) ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_gpu_ad.cu) + if(hip_FOUND OR CUDA_VERSION VERSION_GREATER_EQUAL 13.1) + ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_gpu_ecl_thermal_law_manager.cu) + endif() ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_gpu_linear_two_phase_material.cu) ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_gpuPvt.cu) ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_gpu_smart_pointers.cu) @@ -587,6 +599,10 @@ if(CUDA_FOUND OR hip_FOUND) ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_primary_variables_gpu.cu) endif() ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_MiniMatrix.cu) + if(hip_FOUND OR CUDA_VERSION VERSION_GREATER_EQUAL 13.1) + ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_blackoilintensivequantities_gpu.cu) + endif() + # Boost < 1.75 + nvcc = trouble in this test if(Boost_VERSION VERSION_GREATER 1.74) ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_MiniVector.cu) @@ -707,6 +723,7 @@ list (APPEND TEST_DATA_FILES tests/data/test_stokes2c.dgf tests/data/test_stokes2cni.dgf tests/data/waterair.dgf + tests/very_simple_deck.DATA ) @@ -969,6 +986,7 @@ list (APPEND PUBLIC_HEADER_FILES opm/simulators/flow/FIPContainer.hpp opm/simulators/flow/FlowBaseProblemProperties.hpp opm/simulators/flow/FlowBaseVanguard.hpp + opm/simulators/flow/FlowGasWaterEnergyTypeTag.hpp opm/simulators/flow/FlowGenericProblem.hpp opm/simulators/flow/FlowGenericProblem_impl.hpp opm/simulators/flow/FlowGenericVanguard.hpp diff --git a/flowexperimental/BlackOilEnergyIntensiveQuantitiesGlobalIndex.hpp b/flowexperimental/BlackOilEnergyIntensiveQuantitiesGlobalIndex.hpp index bcb684800de..0c62cbbb26b 100644 --- a/flowexperimental/BlackOilEnergyIntensiveQuantitiesGlobalIndex.hpp +++ b/flowexperimental/BlackOilEnergyIntensiveQuantitiesGlobalIndex.hpp @@ -59,20 +59,20 @@ class BlackOilEnergyIntensiveQuantitiesGlobalIndex; public: - void updateTemperature_([[maybe_unused]] const Problem& problem, - [[maybe_unused]] const PrimaryVariables& priVars, - [[maybe_unused]] unsigned globalSpaceIdx, - [[maybe_unused]] unsigned timeIdx) + OPM_HOST_DEVICE void updateTemperature_([[maybe_unused]] const Problem& problem, + [[maybe_unused]] const PrimaryVariables& priVars, + [[maybe_unused]] unsigned globalSpaceIdx, + [[maybe_unused]] unsigned timeIdx) { auto& fs = this->asImp_().fluidState_; Scalar T = problem.temperature(globalSpaceIdx, timeIdx); fs.setTemperature(T); } - void updateEnergyQuantities_([[maybe_unused]] const Problem& problem, - [[maybe_unused]] const PrimaryVariables& priVars, - [[maybe_unused]] unsigned globalSpaceIdx, - [[maybe_unused]] unsigned timeIdx, - const typename FluidSystem::template ParameterCache&) + OPM_HOST_DEVICE void updateEnergyQuantities_([[maybe_unused]] const Problem& problem, + [[maybe_unused]] const PrimaryVariables& priVars, + [[maybe_unused]] unsigned globalSpaceIdx, + [[maybe_unused]] unsigned timeIdx, + const typename FluidSystem::template ParameterCache&) { } }; @@ -185,17 +187,17 @@ class BlackOilEnergyIntensiveQuantitiesGlobalIndex; public: - void updateTemperature_([[maybe_unused]] const Problem& problem, - [[maybe_unused]] const PrimaryVariables& priVars, - [[maybe_unused]] unsigned globalSpaceIdx, - [[maybe_unused]] unsigned timeIdx) + OPM_HOST_DEVICE void updateTemperature_([[maybe_unused]] const Problem& problem, + [[maybe_unused]] const PrimaryVariables& priVars, + [[maybe_unused]] unsigned globalSpaceIdx, + [[maybe_unused]] unsigned timeIdx) { } - void updateEnergyQuantities_([[maybe_unused]] const Problem& problem, - [[maybe_unused]] const PrimaryVariables& priVars, - [[maybe_unused]] unsigned globalSpaceIdx, - [[maybe_unused]] unsigned timeIdx, - const typename FluidSystem::template ParameterCache&) + OPM_HOST_DEVICE void updateEnergyQuantities_([[maybe_unused]] const Problem& problem, + [[maybe_unused]] const PrimaryVariables& priVars, + [[maybe_unused]] unsigned globalSpaceIdx, + [[maybe_unused]] unsigned timeIdx, + const typename FluidSystem::template ParameterCache&) { } }; diff --git a/opm/models/blackoil/blackoilbrinemodules.hh b/opm/models/blackoil/blackoilbrinemodules.hh index caa5daf74c2..6947429d8a4 100644 --- a/opm/models/blackoil/blackoilbrinemodules.hh +++ b/opm/models/blackoil/blackoilbrinemodules.hh @@ -98,7 +98,7 @@ public: static void registerParameters() {} - static bool primaryVarApplies(unsigned pvIdx) + static OPM_HOST_DEVICE bool primaryVarApplies(unsigned pvIdx) { if constexpr (enableBrine) { return pvIdx == saltConcentrationIdx; @@ -135,7 +135,7 @@ public: return static_cast(1.0); } - static bool eqApplies(unsigned eqIdx) + static OPM_HOST_DEVICE bool eqApplies(unsigned eqIdx) { if constexpr (enableBrine) { return eqIdx == contiBrineEqIdx; @@ -405,18 +405,18 @@ public: * primary variables * */ - void updateSaltConcentration_(const ElementContext& elemCtx, - unsigned dofIdx, - unsigned timeIdx) + OPM_HOST_DEVICE void updateSaltConcentration_(const ElementContext& elemCtx, + unsigned dofIdx, + unsigned timeIdx) { const PrimaryVariables& priVars = elemCtx.primaryVars(dofIdx, timeIdx); const LinearizationType lintype = elemCtx.linearizationType(); updateSaltConcentration_(priVars, timeIdx, lintype); } - void updateSaltConcentration_(const PrimaryVariables& priVars, - const unsigned timeIdx, - const LinearizationType lintype) + OPM_HOST_DEVICE void updateSaltConcentration_(const PrimaryVariables& priVars, + const unsigned timeIdx, + const LinearizationType lintype) { const unsigned pvtnumRegionIdx = priVars.pvtRegionIndex(); auto& fs = asImp_().fluidState_; @@ -442,9 +442,9 @@ public: } } - void saltPropertiesUpdate_([[maybe_unused]] const ElementContext& elemCtx, - [[maybe_unused]] unsigned dofIdx, - [[maybe_unused]] unsigned timeIdx) + OPM_HOST_DEVICE void saltPropertiesUpdate_([[maybe_unused]] const ElementContext& elemCtx, + [[maybe_unused]] unsigned dofIdx, + [[maybe_unused]] unsigned timeIdx) { if constexpr (enableSaltPrecipitation) { const Evaluation porosityFactor = min(1.0 - asImp_().fluidState_.saltSaturation(), 1.0); //phi/phi_0 @@ -455,20 +455,20 @@ public: } } - const Evaluation& brineRefDensity() const + OPM_HOST_DEVICE const Evaluation& brineRefDensity() const { return refDensity_; } - Scalar saltSolubility() const + OPM_HOST_DEVICE Scalar saltSolubility() const { return saltSolubility_; } - Scalar saltDensity() const + OPM_HOST_DEVICE Scalar saltDensity() const { return saltDensity_; } - const Evaluation& permFactor() const + OPM_HOST_DEVICE const Evaluation& permFactor() const { return permFactor_; } protected: - Implementation& asImp_() + OPM_HOST_DEVICE Implementation& asImp_() { return *static_cast(this); } Evaluation saltConcentration_; @@ -487,27 +487,27 @@ class BlackOilBrineIntensiveQuantities using Scalar = GetPropType; public: - void updateSaltConcentration_(const ElementContext&, - unsigned, - unsigned) + OPM_HOST_DEVICE void updateSaltConcentration_(const ElementContext&, + unsigned, + unsigned) {} - void saltPropertiesUpdate_(const ElementContext&, - unsigned, - unsigned) + OPM_HOST_DEVICE void saltPropertiesUpdate_(const ElementContext&, + unsigned, + unsigned) {} - const Evaluation& brineRefDensity() const - { throw std::runtime_error("brineRefDensity() called but brine are disabled"); } + OPM_HOST_DEVICE const Evaluation& brineRefDensity() const + { OPM_THROW(std::runtime_error, "brineRefDensity() called but brine are disabled"); } - const Scalar saltSolubility() const - { throw std::logic_error("saltSolubility() called but salt precipitation is disabled"); } + OPM_HOST_DEVICE const Scalar saltSolubility() const + { OPM_THROW(std::logic_error, "saltSolubility() called but salt precipitation is disabled"); } - const Scalar saltDensity() const - { throw std::logic_error("saltDensity() called but salt precipitation is disabled"); } + OPM_HOST_DEVICE const Scalar saltDensity() const + { OPM_THROW(std::logic_error, "saltDensity() called but salt precipitation is disabled"); } - const Evaluation& permFactor() const - { throw std::logic_error("permFactor() called but salt precipitation is disabled"); } + OPM_HOST_DEVICE const Evaluation& permFactor() const + { OPM_THROW(std::logic_error, "permFactor() called but salt precipitation is disabled"); } }; } // namespace Opm diff --git a/opm/models/blackoil/blackoilenergymodules.hh b/opm/models/blackoil/blackoilenergymodules.hh index 5d1aebf2fa4..e2cce9cd2a5 100644 --- a/opm/models/blackoil/blackoilenergymodules.hh +++ b/opm/models/blackoil/blackoilenergymodules.hh @@ -30,6 +30,7 @@ #include +#include #include #include @@ -104,7 +105,7 @@ public: } } - static bool primaryVarApplies(unsigned pvIdx) + static OPM_HOST_DEVICE bool primaryVarApplies(unsigned pvIdx) { if constexpr (enableFullyImplicitThermal) { return pvIdx == temperatureIdx; @@ -129,7 +130,7 @@ public: return static_cast(1.0); } - static bool eqApplies(unsigned eqIdx) + static OPM_HOST_DEVICE bool eqApplies(unsigned eqIdx) { if constexpr (enableFullyImplicitThermal) { return eqIdx == contiEnergyEqIdx; @@ -241,11 +242,11 @@ public: } template - static void addPhaseEnthalpyFlux_(RateVector& flux, - unsigned phaseIdx, - const ElementContext& elemCtx, - unsigned scvfIdx, - unsigned timeIdx) + OPM_HOST_DEVICE static void addPhaseEnthalpyFlux_(RateVector& flux, + unsigned phaseIdx, + const ElementContext& elemCtx, + unsigned scvfIdx, + unsigned timeIdx) { const auto& extQuants = elemCtx.extensiveQuantities(scvfIdx, timeIdx); const unsigned upIdx = extQuants.upstreamIndex(phaseIdx); @@ -258,8 +259,8 @@ public: fs); } - static void addToEnthalpyRate(RateVector& flux, - const Evaluation& hRate) + OPM_HOST_DEVICE static void addToEnthalpyRate(RateVector& flux, + const Evaluation& hRate) { if constexpr (enableFullyImplicitThermal) { flux[contiEnergyEqIdx] += hRate; @@ -270,8 +271,8 @@ public: * \brief Assign the energy specific primary variables to a PrimaryVariables object */ template - static void assignPrimaryVars(PrimaryVariables& priVars, - const FluidState& fluidState) + OPM_HOST_DEVICE static void assignPrimaryVars(PrimaryVariables& priVars, + const FluidState& fluidState) { if constexpr (enableFullyImplicitThermal) { priVars[temperatureIdx] = getValue(fluidState.temperature(/*phaseIdx=*/0)); @@ -281,9 +282,9 @@ public: /*! * \brief Do a Newton-Raphson update the primary variables of the energys. */ - static void updatePrimaryVars(PrimaryVariables& newPv, - const PrimaryVariables& oldPv, - const EqVector& delta) + OPM_HOST_DEVICE static void updatePrimaryVars(PrimaryVariables& newPv, + const PrimaryVariables& oldPv, + const EqVector& delta) { if constexpr (enableFullyImplicitThermal) { // do a plain unchopped Newton update @@ -294,8 +295,8 @@ public: /*! * \brief Return how much a Newton-Raphson update is considered an error */ - static Scalar computeUpdateError(const PrimaryVariables&, - const EqVector&) + OPM_HOST_DEVICE static Scalar computeUpdateError(const PrimaryVariables&, + const EqVector&) { // do not consider consider the cange of energy primary variables for // convergence @@ -306,7 +307,7 @@ public: /*! * \brief Return how much a residual is considered an error */ - static Scalar computeResidualError(const EqVector& resid) + OPM_HOST_DEVICE static Scalar computeResidualError(const EqVector& resid) { // do not weight the residual of energy when it comes to convergence return std::abs(scalarValue(resid[contiEnergyEqIdx])); @@ -385,9 +386,9 @@ public: * \brief Update the temperature of the intensive quantity's fluid state * */ - void updateTemperature_(const ElementContext& elemCtx, - unsigned dofIdx, - unsigned timeIdx) + OPM_HOST_DEVICE void updateTemperature_(const ElementContext& elemCtx, + unsigned dofIdx, + unsigned timeIdx) { auto& fs = asImp_().fluidState_; const auto& priVars = elemCtx.primaryVars(dofIdx, timeIdx); @@ -400,11 +401,11 @@ public: * \brief Update the temperature of the intensive quantity's fluid state * */ - void updateTemperature_([[maybe_unused]] const Problem& problem, - const PrimaryVariables& priVars, - [[maybe_unused]] unsigned globalDofIdx, - const unsigned timeIdx, - const LinearizationType& lintype) + OPM_HOST_DEVICE void updateTemperature_([[maybe_unused]] const Problem& problem, + const PrimaryVariables& priVars, + [[maybe_unused]] unsigned globalDofIdx, + const unsigned timeIdx, + const LinearizationType& lintype) { auto& fs = asImp_().fluidState_; fs.setTemperature(priVars.makeEvaluation(temperatureIdx, timeIdx, lintype)); @@ -414,27 +415,27 @@ public: * \brief Compute the intensive quantities needed to handle energy conservation * */ - void updateEnergyQuantities_(const ElementContext& elemCtx, - unsigned dofIdx, - unsigned timeIdx) + OPM_HOST_DEVICE void updateEnergyQuantities_(const ElementContext& elemCtx, + unsigned dofIdx, + unsigned timeIdx) { updateEnergyQuantities_(elemCtx.problem(), elemCtx.globalSpaceIndex(dofIdx, timeIdx), timeIdx); } - void updateEnergyQuantities_(const Problem& problem, - const unsigned globalSpaceIdx, - const unsigned timeIdx) + OPM_HOST_DEVICE void updateEnergyQuantities_(const Problem& problem, + const unsigned globalSpaceIdx, + const unsigned timeIdx) { auto& fs = asImp_().fluidState_; // compute the specific enthalpy of the fluids, the specific enthalpy of the rock // and the thermal conductivity coefficients for (int phaseIdx = 0; phaseIdx < numPhases; ++ phaseIdx) { - if (!FluidSystem::phaseIsActive(phaseIdx)) { + if (!asImp_().getFluidSystem().phaseIsActive(phaseIdx)) { continue; } - const auto& h = FluidSystem::enthalpy(fs, phaseIdx, problem.pvtRegionIndex(globalSpaceIdx)); + const auto& h = asImp_().getFluidSystem().enthalpy(fs, phaseIdx, problem.pvtRegionIndex(globalSpaceIdx)); fs.setEnthalpy(phaseIdx, h); } @@ -462,7 +463,7 @@ public: { return rockFraction_; } protected: - Implementation& asImp_() + OPM_HOST_DEVICE Implementation& asImp_() { return *static_cast(this); } Evaluation rockInternalEnergy_; @@ -484,51 +485,54 @@ class BlackOilEnergyIntensiveQuantities - void updateTemperature_(const Problem& problem, - [[maybe_unused]] const PrimaryVariables& priVars, - unsigned globalDofIdx, - unsigned timeIdx, - [[maybe_unused]] const LinearizationType& lintype - ) + OPM_HOST_DEVICE void updateTemperature_(const Problem& problem, + [[maybe_unused]] const PrimaryVariables& priVars, + unsigned globalDofIdx, + unsigned timeIdx, + [[maybe_unused]] const LinearizationType& lintype) { updateTemperature_(problem, globalDofIdx, timeIdx); } - void updateTemperature_(const Problem& problem, unsigned globalDofIdx, unsigned timeIdx) + OPM_HOST_DEVICE void updateTemperature_(const Problem& problem, + unsigned globalDofIdx, + unsigned timeIdx) { auto& fs = asImp_().fluidState_; const Scalar T = problem.temperature(globalDofIdx, timeIdx); fs.setTemperature(T); } - void updateEnergyQuantities_(const ElementContext&, - unsigned, - unsigned, - const typename FluidSystem::template ParameterCache&) + OPM_HOST_DEVICE void updateEnergyQuantities_(const ElementContext&, + unsigned, + unsigned, + const typename FluidSystem::template ParameterCache&) {} - const Evaluation& rockInternalEnergy() const + OPM_HOST_DEVICE const Evaluation& rockInternalEnergy() const { - throw std::logic_error("Requested the rock internal energy, which is " - "unavailable because energy is not conserved"); + OPM_THROW(std::logic_error, + "Requested the rock internal energy, which is " + "unavailable because energy is not conserved"); } - const Evaluation& totalThermalConductivity() const + OPM_HOST_DEVICE const Evaluation& totalThermalConductivity() const { - throw std::logic_error("Requested the total thermal conductivity, which is " - "unavailable because energy is not conserved"); + OPM_THROW(std::logic_error, + "Requested the total thermal conductivity, which is " + "unavailable because energy is not conserved"); } protected: - Implementation& asImp_() + OPM_HOST_DEVICE Implementation& asImp_() { return *static_cast(this); } }; @@ -548,26 +552,28 @@ class BlackOilEnergyIntensiveQuantities - void updateTemperature_(const Problem& problem, - [[maybe_unused]] const PrimaryVariables& priVars, - unsigned globalDofIdx, - unsigned timeIdx, - [[maybe_unused]] const LinearizationType& lintype) + OPM_HOST_DEVICE void updateTemperature_(const Problem& problem, + [[maybe_unused]] const PrimaryVariables& priVars, + unsigned globalDofIdx, + unsigned timeIdx, + [[maybe_unused]] const LinearizationType& lintype) { updateTemperature_(problem, globalDofIdx, timeIdx); } @@ -576,32 +582,34 @@ public: * \brief Compute the intensive quantities needed to handle energy conservation * */ - void updateEnergyQuantities_([[maybe_unused]] const ElementContext& elemCtx, - [[maybe_unused]] unsigned dofIdx, - [[maybe_unused]] unsigned timeIdx) + OPM_HOST_DEVICE void updateEnergyQuantities_([[maybe_unused]] const ElementContext& elemCtx, + [[maybe_unused]] unsigned dofIdx, + [[maybe_unused]] unsigned timeIdx) { } - void updateEnergyQuantities_([[maybe_unused]] const Problem& problem, - [[maybe_unused]] const unsigned globalSpaceIdx, - [[maybe_unused]] const unsigned timeIdx) + OPM_HOST_DEVICE void updateEnergyQuantities_([[maybe_unused]] const Problem& problem, + [[maybe_unused]] const unsigned globalSpaceIdx, + [[maybe_unused]] const unsigned timeIdx) { } - const Evaluation& rockInternalEnergy() const + OPM_HOST_DEVICE const Evaluation& rockInternalEnergy() const { - throw std::logic_error("Requested the rock internal energy, which is " - "unavailable because energy is not conserved"); + OPM_THROW(std::logic_error, + "Requested the rock internal energy, which is " + "unavailable because energy is not conserved"); } - const Evaluation& totalThermalConductivity() const + OPM_HOST_DEVICE const Evaluation& totalThermalConductivity() const { - throw std::logic_error("Requested the total thermal conductivity, which is " - "unavailable because energy is not conserved"); + OPM_THROW(std::logic_error, + "Requested the total thermal conductivity, which is " + "unavailable because energy is not conserved"); } protected: - Implementation& asImp_() + OPM_HOST_DEVICE Implementation& asImp_() { return *static_cast(this); } }; @@ -617,41 +625,43 @@ class BlackOilEnergyIntensiveQuantities using Problem = GetPropType; public: - void updateTemperature_([[maybe_unused]] const ElementContext& elemCtx, - [[maybe_unused]] unsigned dofIdx, - [[maybe_unused]] unsigned timeIdx) + OPM_HOST_DEVICE void updateTemperature_([[maybe_unused]] const ElementContext& elemCtx, + [[maybe_unused]] unsigned dofIdx, + [[maybe_unused]] unsigned timeIdx) { } template - void updateTemperature_([[maybe_unused]] const Problem& problem, - [[maybe_unused]] const PrimaryVariables& priVars, - [[maybe_unused]] unsigned globalDofIdx, - [[maybe_unused]] unsigned timeIdx, - [[maybe_unused]] const LinearizationType& lintype) + OPM_HOST_DEVICE void updateTemperature_([[maybe_unused]] const Problem& problem, + [[maybe_unused]] const PrimaryVariables& priVars, + [[maybe_unused]] unsigned globalDofIdx, + [[maybe_unused]] unsigned timeIdx, + [[maybe_unused]] const LinearizationType& lintype) { } - void updateEnergyQuantities_(const ElementContext&, - unsigned, - unsigned, - const typename FluidSystem::template ParameterCache&) + OPM_HOST_DEVICE void updateEnergyQuantities_(const ElementContext&, + unsigned, + unsigned, + const typename FluidSystem::template ParameterCache&) {} - const Evaluation& rockInternalEnergy() const + OPM_HOST_DEVICE const Evaluation& rockInternalEnergy() const { - throw std::logic_error("Requested the rock internal energy, which is " - "unavailable because energy is not conserved"); + OPM_THROW(std::logic_error, + "Requested the rock internal energy, which is " + "unavailable because energy is not conserved"); } - const Evaluation& totalThermalConductivity() const + OPM_HOST_DEVICE const Evaluation& totalThermalConductivity() const { - throw std::logic_error("Requested the total thermal conductivity, which is " - "unavailable because energy is not conserved"); + OPM_THROW(std::logic_error, + "Requested the total thermal conductivity, which is " + "unavailable because energy is not conserved"); } protected: - Implementation& asImp_() + OPM_HOST_DEVICE Implementation& asImp_() { return *static_cast(this); } }; @@ -873,8 +883,8 @@ public: const BoundaryFluidState& /*boundaryFs*/) {} - const Evaluation& energyFlux() const - { throw std::logic_error("Requested the energy flux, but energy is not conserved"); } + OPM_HOST_DEVICE const Evaluation& energyFlux() const + { OPM_THROW(std::logic_error, "Requested the energy flux, but energy is not conserved"); } }; template @@ -934,8 +944,8 @@ public: const BoundaryFluidState& /*boundaryFs*/) { } - const Evaluation& energyFlux() const - { throw std::logic_error("Requested the energy flux, but energy is not conserved"); } + OPM_HOST_DEVICE const Evaluation& energyFlux() const + { OPM_THROW(std::logic_error, "Requested the energy flux, but energy is not conserved"); } }; template @@ -982,8 +992,8 @@ public: const BoundaryFluidState& /*boundaryFs*/) {} - const Evaluation& energyFlux() const - { throw std::logic_error("Requested the energy flux, but energy is not conserved"); } + OPM_HOST_DEVICE const Evaluation& energyFlux() const + { OPM_THROW(std::logic_error, "Requested the energy flux, but energy is not conserved"); } }; } // namespace Opm diff --git a/opm/models/blackoil/blackoilfoammodules.hh b/opm/models/blackoil/blackoilfoammodules.hh index d90f453aff0..7fb156b091c 100644 --- a/opm/models/blackoil/blackoilfoammodules.hh +++ b/opm/models/blackoil/blackoilfoammodules.hh @@ -30,6 +30,7 @@ #include +#include #include #include @@ -184,7 +185,7 @@ public: Toolbox::template decay(intQuants.solventInverseFormationVolumeFactor()); } } else { - throw std::runtime_error("Transport phase is GAS/WATER/SOLVENT"); + OPM_THROW(std::runtime_error, "Transport phase is GAS/WATER/SOLVENT"); } // Avoid singular matrix if no gas is present. diff --git a/opm/models/blackoil/blackoilintensivequantities.hh b/opm/models/blackoil/blackoilintensivequantities.hh index c249bdd3b9f..b862cce00e3 100644 --- a/opm/models/blackoil/blackoilintensivequantities.hh +++ b/opm/models/blackoil/blackoilintensivequantities.hh @@ -175,7 +175,6 @@ public: fluidState_.setRsw(0.0); } } - BlackOilIntensiveQuantities(const BlackOilIntensiveQuantities& other) = default; BlackOilIntensiveQuantities& operator=(const BlackOilIntensiveQuantities& other) = default; @@ -184,6 +183,7 @@ public: // This ctor is used when switching to the GPU typetag which currently supports thermal effects and diffusion. template + requires (energyModuleType == EnergyModules::FullyImplicitThermal && (enableDiffusion != 0)) explicit BlackOilIntensiveQuantities( const BlackOilIntensiveQuantities& other, const FluidSystem& fsystem) : fluidState_(other.fluidState_.withOtherFluidSystem(fsystem)) @@ -206,6 +206,72 @@ public: static_assert(!enableDispersion); } + template + requires (energyModuleType == EnergyModules::FullyImplicitThermal && (enableDiffusion == 0)) + explicit BlackOilIntensiveQuantities( + const BlackOilIntensiveQuantities& other, const FluidSystem& fsystem) + : fluidState_(other.fluidState_.withOtherFluidSystem(fsystem)) + , BlackOilEnergyIntensiveQuantities( + other.rockInternalEnergy_, other.totalThermalConductivity_, other.rockFraction_) + , referencePorosity_(other.referencePorosity_) + , porosity_(other.porosity_) + , rockCompTransMultiplier_(other.rockCompTransMultiplier_) + , mobility_(other.mobility_) + , dirMob_(/*NOT YET SUPPORTED ON GPU*/) + { + static_assert(!enableSolvent); + static_assert(!enableExtbo); + static_assert(!enablePolymer); + static_assert(!enableFoam); + static_assert(!enableMICP); + static_assert(!enableBrine); + static_assert(!enableDispersion); + } + + template + requires (energyModuleType != EnergyModules::FullyImplicitThermal && (enableDiffusion != 0)) + explicit BlackOilIntensiveQuantities( + const BlackOilIntensiveQuantities& other, const FluidSystem& fsystem) + : fluidState_(other.fluidState_.withOtherFluidSystem(fsystem)) + , BlackOilDiffusionIntensiveQuantities( + other.tortuosities(), other.diffusionCoefficients()) + , referencePorosity_(other.referencePorosity_) + , porosity_(other.porosity_) + , rockCompTransMultiplier_(other.rockCompTransMultiplier_) + , mobility_(other.mobility_) + , dirMob_(/*NOT YET SUPPORTED ON GPU*/) + { + static_assert(!enableSolvent); + static_assert(!enableExtbo); + static_assert(!enablePolymer); + static_assert(!enableFoam); + static_assert(!enableMICP); + static_assert(!enableBrine); + static_assert(!enableDispersion); + } + + template + requires (energyModuleType != EnergyModules::FullyImplicitThermal && (enableDiffusion == 0)) + explicit BlackOilIntensiveQuantities( + const BlackOilIntensiveQuantities& other, const FluidSystem& fsystem) + : fluidState_(other.fluidState_.withOtherFluidSystem(fsystem)) + , referencePorosity_(other.referencePorosity_) + , porosity_(other.porosity_) + , rockCompTransMultiplier_(other.rockCompTransMultiplier_) + , mobility_(other.mobility_) + , dirMob_(/*NOT YET SUPPORTED ON GPU*/) + { + static_assert(!enableSolvent); + static_assert(!enableExtbo); + static_assert(!enablePolymer); + static_assert(!enableFoam); + static_assert(!enableMICP); + static_assert(!enableBrine); + static_assert(!enableDispersion); + } + + BlackOilIntensiveQuantities(const BlackOilIntensiveQuantities& other) = default; + /** * \brief Create a copy of this intensive quantities object * @@ -221,6 +287,45 @@ public: return newIntQuants; } + /*! + * \brief Field-by-field overlay of the BlackOil intensive-quantity values from + * another \c BlackOilIntensiveQuantities instantiation onto this one. + * + * Used by the experimental GPU intensive-quantities dispatcher to write the + * GPU-computed result onto a CPU-side \c IntensiveQuantities even when the + * two TypeTags are not value-compatible (e.g. when the CPU TypeTag enables + * dispersion or other modules that the GPU TypeTag does not). The fields + * actually computed by the dispatcher are copied here, including the + * \c mobility_ array which the GPU relperm path now populates correctly + * via \c iq.update() (see \c GpuBlackoilIntensiveQuantitiesDispatcher). + */ + template + OPM_HOST_DEVICE void overlayBlackOilFieldsFrom( + const BlackOilIntensiveQuantities& other) + { + // Full fluid-state copy (handles every stored field, including + // Rs/Rv/Rsw/Rvw, salt concentration, solvent fields, ...). This + // is intentionally chosen over a hand-picked field list so that + // the GPU dispatcher can be used as the *only* fill of the IQ + // cache (no CPU pre-pass required). + fluidState_.assign(other.fluidState_); + + for (unsigned phaseIdx = 0; phaseIdx < numPhases; ++phaseIdx) { + if (!FluidSystem::phaseIsActive(phaseIdx)) { + continue; + } + mobility_[phaseIdx] = other.mobility_[phaseIdx]; + } + if constexpr (energyModuleType == EnergyModules::FullyImplicitThermal) { + this->rockInternalEnergy_ = other.rockInternalEnergy_; + this->totalThermalConductivity_ = other.totalThermalConductivity_; + this->rockFraction_ = other.rockFraction_; + } + porosity_ = other.porosity_; + referencePorosity_ = other.referencePorosity_; + rockCompTransMultiplier_ = other.rockCompTransMultiplier_; + } + OPM_HOST_DEVICE void updateTempSalt(const Problem& problem, const PrimaryVariables& priVars, const unsigned globalSpaceIdx, @@ -430,11 +535,15 @@ public: } else { if (getFluidSystem().enableDissolvedGas()) { // Add So > 0? i.e. if only water set rs = 0) - const Evaluation& RsSat = enableExtbo ? asImp_().rs() : - getFluidSystem().saturatedDissolutionFactor(fluidState_, + Evaluation RsSat; + if constexpr (enableExtbo) { + RsSat = asImp_().rs(); + } else { + RsSat = getFluidSystem().saturatedDissolutionFactor(fluidState_, oilPhaseIdx, pvtRegionIdx, SoMax); + } fluidState_.setRs(min(RsMax, RsSat)); } else { @@ -448,11 +557,15 @@ public: } else { if (getFluidSystem().enableVaporizedOil() ) { // Add Sg > 0? i.e. if only water set rv = 0) - const Evaluation& RvSat = enableExtbo ? asImp_().rv() : - getFluidSystem().saturatedDissolutionFactor(fluidState_, + Evaluation RvSat; + if constexpr (enableExtbo) { + RvSat = asImp_().rv(); + } else { + RvSat = getFluidSystem().saturatedDissolutionFactor(fluidState_, gasPhaseIdx, pvtRegionIdx, SoMax); + } fluidState_.setRv(min(RvMax, RvSat)); } else { @@ -514,11 +627,16 @@ public: const auto [b, mu] = getFluidSystem().inverseFormationVolumeFactorAndViscosity(fluidState_, phaseIdx, pvtRegionIdx); fluidState_.setInvB(phaseIdx, b); for (int i = 0; i < nmobilities; ++i) { - if (enableExtbo && phaseIdx == oilPhaseIdx) { - (*mobilities[i])[phaseIdx] /= asImp_().oilViscosity(); - } - else if (enableExtbo && phaseIdx == gasPhaseIdx) { - (*mobilities[i])[phaseIdx] /= asImp_().gasViscosity(); + if constexpr (enableExtbo) { + if (phaseIdx == oilPhaseIdx) { + (*mobilities[i])[phaseIdx] /= asImp_().oilViscosity(); + } + else if (phaseIdx == gasPhaseIdx) { + (*mobilities[i])[phaseIdx] /= asImp_().gasViscosity(); + } + else { + (*mobilities[i])[phaseIdx] /= mu; + } } else { (*mobilities[i])[phaseIdx] /= mu; @@ -845,7 +963,7 @@ public: case Dir::ZPlus: return dirMob_->getArray(2)[phaseIdx]; default: - throw std::runtime_error("Unexpected face direction"); + OPM_THROW(std::runtime_error, "Unexpected face direction"); } } else{ @@ -906,7 +1024,7 @@ public: return BrineIntQua::permFactor(); } else { - throw std::logic_error("permFactor() called but salt precipitation or bioeffects are disabled"); + OPM_THROW(std::logic_error, "permFactor() called but salt precipitation or bioeffects are disabled"); } } diff --git a/opm/models/blackoil/blackoilprimaryvariables.hh b/opm/models/blackoil/blackoilprimaryvariables.hh index 1ed590b8c34..8eea0396474 100644 --- a/opm/models/blackoil/blackoilprimaryvariables.hh +++ b/opm/models/blackoil/blackoilprimaryvariables.hh @@ -1024,10 +1024,10 @@ private: template OPM_HOST_DEVICE void computeCapillaryPressures_(Container& result, - Scalar so, - Scalar sg, - Scalar sw, - const MaterialLawParams& matParams) const + Scalar so, + Scalar sg, + Scalar sw, + const MaterialLawParams& matParams) const { using SatOnlyFluidState = SimpleModularFluidStategetPressureScale()); } // NOTE: When adding new member variables, be sure to update the template copy constructor, diff --git a/opm/models/common/directionalmobility.hh b/opm/models/common/directionalmobility.hh index bcf2056cbe3..35f6a81865a 100644 --- a/opm/models/common/directionalmobility.hh +++ b/opm/models/common/directionalmobility.hh @@ -30,9 +30,11 @@ #include #include +#include +#include + #include #include -#include #include namespace Opm { @@ -54,16 +56,15 @@ public: : mobility_{mX, mY, mZ} {} - const array_type& getArray(unsigned index) const + OPM_HOST_DEVICE const array_type& getArray(unsigned index) const { if (index > 2) { - throw std::runtime_error("Unexpected mobility array index " + std::to_string(index)); + OPM_THROW(std::runtime_error, "Unexpected mobility array index"); } - return mobility_[index]; } - array_type& getArray(unsigned index) + OPM_HOST_DEVICE array_type& getArray(unsigned index) { return const_cast(std::as_const(*this).getArray(index)); } diff --git a/opm/models/discretization/common/fvbaseelementcontextgpu.hh b/opm/models/discretization/common/fvbaseelementcontextgpu.hh new file mode 100644 index 00000000000..f7237ea9eb0 --- /dev/null +++ b/opm/models/discretization/common/fvbaseelementcontextgpu.hh @@ -0,0 +1,309 @@ +// -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- +// vi: set et ts=4 sw=4 sts=4: +/* + This file is part of the Open Porous Media project (OPM). + + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with OPM. If not, see . + + Consult the COPYING file in the top-level source directory of this + module for the precise wording of the license and the list of + copyright holders. +*/ +/*! + * \file + * + * \brief GPU-compatible stub of FvBaseElementContext. + * + * All methods are annotated with OPM_HOST_DEVICE and return dummy/default + * values. This class is intended as a placeholder so that GPU kernels that + * receive an element context can compile; it does not implement any real + * behaviour. + * + * The heavy Dune/grid infrastructure present in FvBaseElementContext is + * intentionally absent here because those types are not usable inside a + * CUDA/HIP kernel. + */ +#ifndef OPM_FV_BASE_ELEMENT_CONTEXT_GPU_HH +#define OPM_FV_BASE_ELEMENT_CONTEXT_GPU_HH + +#include +#include +#include + +#include + +namespace Opm +{ + +/*! + * \ingroup FiniteVolumeDiscretizations + * + * \brief GPU-compatible stub mirroring the public interface of FvBaseElementContext. + * + * Uses the same TypeTag-based template parameter as FvBaseElementContext; all + * concrete types are extracted via GetPropType at compile time. + * + * \tparam TypeTag The type tag from which Scalar, PrimaryVariables, etc. are derived. + */ +template +class FvBaseElementContextGpu +{ +public: + using Scalar = GetPropType; + using PrimaryVariables = GetPropType; + using IntensiveQuantities = GetPropType; + using ExtensiveQuantities = GetPropType; + using Problem = GetPropType; + using Model = GetPropType; + using GradientCalculator = GetPropType; + + // ----------------------------------------------------------------------- + // Construction / assignment + // ----------------------------------------------------------------------- + + OPM_HOST_DEVICE FvBaseElementContextGpu() = default; + OPM_HOST_DEVICE ~FvBaseElementContextGpu() = default; + OPM_HOST_DEVICE FvBaseElementContextGpu(const FvBaseElementContextGpu&) = default; + OPM_HOST_DEVICE FvBaseElementContextGpu(FvBaseElementContextGpu&&) noexcept = default; + OPM_HOST_DEVICE FvBaseElementContextGpu& operator=(const FvBaseElementContextGpu&) = default; + OPM_HOST_DEVICE FvBaseElementContextGpu& operator=(FvBaseElementContextGpu&&) noexcept + = default; + + // ----------------------------------------------------------------------- + // Stencil / element update stubs + // ----------------------------------------------------------------------- + + OPM_HOST_DEVICE void updateAll() + { + } + OPM_HOST_DEVICE void updateStencil() + { + } + OPM_HOST_DEVICE void updatePrimaryStencil() + { + } + OPM_HOST_DEVICE void updateStencilTopology() + { + } + OPM_HOST_DEVICE void updateAllIntensiveQuantities() + { + } + OPM_HOST_DEVICE void updateIntensiveQuantities(unsigned /*timeIdx*/) + { + } + OPM_HOST_DEVICE void updatePrimaryIntensiveQuantities(unsigned /*timeIdx*/) + { + } + OPM_HOST_DEVICE void updateIntensiveQuantities(const PrimaryVariables& /*priVars*/, + unsigned /*dofIdx*/, + unsigned /*timeIdx*/) + { + } + OPM_HOST_DEVICE void updateAllExtensiveQuantities() + { + } + OPM_HOST_DEVICE void updateExtensiveQuantities(unsigned /*timeIdx*/) + { + } + + // ----------------------------------------------------------------------- + // Focus DOF + // ----------------------------------------------------------------------- + + OPM_HOST_DEVICE void setFocusDofIndex(unsigned dofIdx) + { + focusDofIdx_ = static_cast(dofIdx); + } + OPM_HOST_DEVICE unsigned focusDofIndex() const + { + return focusDofIdx_; + } + + // ----------------------------------------------------------------------- + // Linearization type + // ----------------------------------------------------------------------- + + OPM_HOST_DEVICE LinearizationType linearizationType() const + { + return LinearizationType {}; + } + + // ----------------------------------------------------------------------- + // Problem / model accessors — return references to dummy stored objects + // ----------------------------------------------------------------------- + + OPM_HOST_DEVICE const Problem& problem() const + { + return problem_; + } + OPM_HOST_DEVICE const Model& model() const + { + return model_; + } + + // ----------------------------------------------------------------------- + // DOF / face counts + // ----------------------------------------------------------------------- + + OPM_HOST_DEVICE std::size_t numDof(unsigned /*timeIdx*/) const + { + return 0; + } + OPM_HOST_DEVICE std::size_t numPrimaryDof(unsigned /*timeIdx*/) const + { + return 0; + } + OPM_HOST_DEVICE std::size_t numInteriorFaces(unsigned /*timeIdx*/) const + { + return 0; + } + OPM_HOST_DEVICE std::size_t numBoundaryFaces(unsigned /*timeIdx*/) const + { + return 0; + } + + // ----------------------------------------------------------------------- + // Global space index / volume + // ----------------------------------------------------------------------- + + OPM_HOST_DEVICE unsigned globalSpaceIndex(unsigned /*dofIdx*/, unsigned /*timeIdx*/) const + { + return 0; + } + + OPM_HOST_DEVICE Scalar dofVolume(unsigned /*dofIdx*/, unsigned /*timeIdx*/) const + { + return Scalar {0}; + } + + OPM_HOST_DEVICE Scalar dofTotalVolume(unsigned /*dofIdx*/, unsigned /*timeIdx*/) const + { + return Scalar {0}; + } + + // ----------------------------------------------------------------------- + // Boundary + // ----------------------------------------------------------------------- + + OPM_HOST_DEVICE bool onBoundary() const + { + return false; + } + + // ----------------------------------------------------------------------- + // Intensive quantities + // ----------------------------------------------------------------------- + + OPM_HOST_DEVICE const IntensiveQuantities& intensiveQuantities(unsigned /*dofIdx*/, + unsigned /*timeIdx*/) const + { + return intensiveQuantitiesStashed_; + } + + OPM_HOST_DEVICE IntensiveQuantities& intensiveQuantities(unsigned /*dofIdx*/, + unsigned /*timeIdx*/) + { + return intensiveQuantitiesStashed_; + } + + OPM_HOST_DEVICE const IntensiveQuantities* thermodynamicHint(unsigned /*dofIdx*/, + unsigned /*timeIdx*/) const + { + return nullptr; + } + + // ----------------------------------------------------------------------- + // Primary variables + // ----------------------------------------------------------------------- + + OPM_HOST_DEVICE const PrimaryVariables& primaryVars(unsigned /*dofIdx*/, + unsigned /*timeIdx*/) const + { + return priVarsStashed_; + } + + // ----------------------------------------------------------------------- + // Stash / restore + // ----------------------------------------------------------------------- + + OPM_HOST_DEVICE bool haveStashedIntensiveQuantities() const + { + return stashedDofIdx_ != -1; + } + + OPM_HOST_DEVICE int stashedDofIdx() const + { + return stashedDofIdx_; + } + + OPM_HOST_DEVICE void stashIntensiveQuantities(unsigned dofIdx) + { + stashedDofIdx_ = static_cast(dofIdx); + } + + OPM_HOST_DEVICE void restoreIntensiveQuantities(unsigned /*dofIdx*/) + { + stashedDofIdx_ = -1; + } + + // ----------------------------------------------------------------------- + // Gradient calculator + // ----------------------------------------------------------------------- + + OPM_HOST_DEVICE const GradientCalculator& gradientCalculator() const + { + return gradientCalculator_; + } + + // ----------------------------------------------------------------------- + // Extensive quantities + // ----------------------------------------------------------------------- + + OPM_HOST_DEVICE const ExtensiveQuantities& extensiveQuantities(unsigned /*fluxIdx*/, + unsigned /*timeIdx*/) const + { + return extensiveQuantitiesStashed_; + } + + // ----------------------------------------------------------------------- + // Storage cache flag + // ----------------------------------------------------------------------- + + OPM_HOST_DEVICE bool enableStorageCache() const + { + return enableStorageCache_; + } + OPM_HOST_DEVICE void setEnableStorageCache(bool yesno) + { + enableStorageCache_ = yesno; + } + +private: + // Dummy stored objects returned by reference accessors. + // On device these are default-constructed and carry no meaningful data. + Problem problem_ {}; + Model model_ {}; + GradientCalculator gradientCalculator_ {}; + IntensiveQuantities intensiveQuantitiesStashed_ {}; + ExtensiveQuantities extensiveQuantitiesStashed_ {}; + PrimaryVariables priVarsStashed_ {}; + + int stashedDofIdx_ {-1}; + int focusDofIdx_ {-1}; + bool enableStorageCache_ {false}; +}; + +} // namespace Opm + +#endif // OPM_FV_BASE_ELEMENT_CONTEXT_GPU_HH diff --git a/opm/models/discretization/common/tpfalinearizer.hh b/opm/models/discretization/common/tpfalinearizer.hh index 94f6267663c..333657d7f07 100644 --- a/opm/models/discretization/common/tpfalinearizer.hh +++ b/opm/models/discretization/common/tpfalinearizer.hh @@ -98,7 +98,7 @@ struct FullDomain }; #if HAVE_CUDA && OPM_IS_COMPILING_WITH_GPU_COMPILER - FullDomain> copy_to_gpu(FullDomain<> CPUDomain) + inline FullDomain> copy_to_gpu(FullDomain<> CPUDomain) { if (CPUDomain.cells.size() == 0) { OPM_THROW(std::runtime_error, "Cannot copy empty full domain to GPU."); @@ -108,7 +108,7 @@ struct FullDomain }; }; - FullDomain> make_view(FullDomain>& buffer) + inline FullDomain> make_view(FullDomain>& buffer) { if (buffer.cells.size() == 0) { OPM_THROW(std::runtime_error, "Cannot make view of empty full domain buffer."); diff --git a/opm/simulators/flow/FIBlackoilModel.hpp b/opm/simulators/flow/FIBlackoilModel.hpp index 5a56ed016f8..3ec0c49b528 100644 --- a/opm/simulators/flow/FIBlackoilModel.hpp +++ b/opm/simulators/flow/FIBlackoilModel.hpp @@ -42,7 +42,20 @@ #include +#include + +#include + +#if HAVE_CUDA +#include +#include +#include +#include +#endif + +#include #include +#include #include #include @@ -79,14 +92,30 @@ class FIBlackOilModel : public BlackOilModel Dune::Partitions::all, ThreadManager::maxThreads()) { + initGpuIntensiveQuantitiesDispatcher_(); } void invalidateAndUpdateIntensiveQuantities(unsigned timeIdx) const { this->invalidateIntensiveQuantitiesCache(timeIdx); + + // When the experimental GPU dispatcher is active and the CPU has + // already populated the cache once (including the fields the GPU + // dispatcher does not overlay, e.g. mobility / energy), skip the + // expensive CPU loop and rely solely on the GPU dispatcher to + // refresh the BlackOil intensive-quantities fields. The remaining + // cached fields keep their previously computed values, which is + // the intended behaviour for this experimental GPU-only path. + if (gpuDispatcherActiveAndInitialized_()) { + markIntensiveQuantitiesCacheValid_(timeIdx); + maybeRunGpuIntensiveQuantitiesDispatcher_(timeIdx); + return; + } + if constexpr (gridIsUnchanging) { if constexpr (avoidElementContext) { updateCachedIntQuants(timeIdx); + cpuIntensiveQuantitiesInitialized_ = true; return; } OPM_BEGIN_PARALLEL_TRY_CATCH(); @@ -102,6 +131,11 @@ class FIBlackOilModel : public BlackOilModel } OPM_END_PARALLEL_TRY_CATCH("invalidateAndUpdateIntensiveQuantities: state error", this->simulator_.vanguard().grid().comm()); + // After all cells are CPU-updated and written into the cache, + // overlay the GPU-computed BlackOil fields in one batched call + // (no-op when the GPU dispatcher is unavailable or disabled). + maybeRunGpuIntensiveQuantitiesDispatcher_(timeIdx); + cpuIntensiveQuantitiesInitialized_ = true; } else { // Grid is possibly refined or otherwise changed between calls. ElementContext elemCtx(this->simulator_); @@ -262,6 +296,16 @@ class FIBlackOilModel : public BlackOilModel this->template updateSingleCachedIntQuantUnchecked(elementMapper.index(elem), timeIdx); } } + + // After the CPU per-cell update has populated all fields, optionally + // overlay the BlackOil intensive-quantities fields with their GPU + // counterparts via the experimental dispatcher. The dispatcher + // overwrites the subset of fields covered by + // BlackOilIntensiveQuantities::overlayBlackOilFieldsFrom; any field + // not handled there keeps the CPU-computed value. + // The call is a no-op when the GPU dispatcher is unavailable or the + // user has not enabled it. + maybeRunGpuIntensiveQuantitiesDispatcher_(timeIdx); } template @@ -276,6 +320,158 @@ class FIBlackOilModel : public BlackOilModel } ElementChunks element_chunks_; + + // ---------------------------------------------------------------------- + // GPU intensive-quantities dispatcher integration. + // + // All knowledge about whether the experimental GPU dispatcher is + // available, whether it is supported for the current TypeTag, and how + // to invoke it, is intentionally isolated to the few private members + // and helper methods below. The rest of the model only sees a single + // entry point: maybeRunGpuIntensiveQuantitiesDispatcher_(timeIdx). + // ---------------------------------------------------------------------- + + // Compile-time predicates. We use them to avoid sprinkling preprocessor + // conditionals throughout the rest of the class. + static constexpr bool gpuDispatcherCompiledIn_ = +#if HAVE_CUDA && OPM_HAVE_GPU_BLACKOIL_INTENSIVE_QUANTITIES_DISPATCHER + true; +#else + false; +#endif + + static constexpr bool gpuDispatcherSupportsTypeTag_ = +#if HAVE_CUDA && OPM_HAVE_GPU_BLACKOIL_INTENSIVE_QUANTITIES_DISPATCHER + Opm::gpuistl::GpuBlackoilIntensiveQuantitiesDispatcherSupport::value; +#else + false; +#endif + + bool useGpuIntensiveQuantitiesDispatcher_{false}; + + // Tracks whether the CPU intensive-quantities update has been run at + // least once. Used to ensure non-BlackOil cached fields (mobility, + // energy, ...) are populated before we switch to the GPU-only path + // when the experimental dispatcher is enabled. + mutable bool cpuIntensiveQuantitiesInitialized_{false}; + + // True iff the GPU dispatcher is compiled in, supported for the + // current TypeTag, enabled at runtime, and the CPU has already been + // run once to populate the cached non-BlackOil fields. + bool gpuDispatcherActiveAndInitialized_() const noexcept + { + if constexpr (gpuDispatcherCompiledIn_ && gpuDispatcherSupportsTypeTag_) { + return useGpuIntensiveQuantitiesDispatcher_ + && cpuIntensiveQuantitiesInitialized_; + } else { + return false; + } + } + + // Mark every entry of the intensive-quantity cache for the given time + // index as valid. Used on the GPU-only path where we skip the CPU + // update loop (which would normally mark each entry valid as a + // side-effect) and rely on the GPU dispatcher to refresh the + // BlackOil fields in-place. + void markIntensiveQuantitiesCacheValid_(const unsigned timeIdx) const + { + const std::size_t numCells = this->intensiveQuantityCache_[timeIdx].size(); + for (std::size_t i = 0; i < numCells; ++i) { + this->setIntensiveQuantitiesCacheEntryValidity(i, timeIdx, true); + } + } + +#if HAVE_CUDA && OPM_HAVE_GPU_BLACKOIL_INTENSIVE_QUANTITIES_DISPATCHER + using GpuDispatcherStorage = std::conditional_t< + Opm::gpuistl::GpuBlackoilIntensiveQuantitiesDispatcherSupport::value, + std::unique_ptr>, + std::monostate>; + mutable GpuDispatcherStorage gpuIntensiveQuantitiesDispatcher_{}; +#endif + + // Read the user-facing parameter and validate that the requested + // configuration is actually supported by this build / TypeTag. + // Throws (with a self-contained reason) when the user requests the GPU + // dispatcher but it is not available. + void initGpuIntensiveQuantitiesDispatcher_() + { + const bool requested = + Parameters::Get(); + if (!requested) { + useGpuIntensiveQuantitiesDispatcher_ = false; + return; + } + + if constexpr (!gpuDispatcherCompiledIn_) { + OPM_THROW(std::runtime_error, + "--experimental-compute-properties-on-gpu=true was " + "specified, but this binary was built without a " + "compatible GPU back-end. The GPU intensive-quantities " + "dispatcher requires either HIP or CUDA >= 13.1; " + "rebuild with HIP or with a sufficiently new CUDA " + "toolkit (see CMake option " + "OPM_HAVE_GPU_BLACKOIL_INTENSIVE_QUANTITIES_DISPATCHER)."); + } else if constexpr (!gpuDispatcherSupportsTypeTag_) { + OPM_THROW(std::runtime_error, + "--experimental-compute-properties-on-gpu=true was " + "specified, but the active TypeTag is not supported " + "by the GPU BlackOil intensive-quantities dispatcher. " + "Only the CO2STORE-compatible TypeTag " + "FlowGasWaterEnergyProblem (gas+water+energy, no oil) " + "is currently supported by the experimental GPU " + "dispatcher."); + } else { + useGpuIntensiveQuantitiesDispatcher_ = true; + } + } + + // Single entry point that hides all dispatcher-related logic from the + // rest of the model. No-op when the dispatcher is either not compiled + // in, not supported for the current TypeTag, or not enabled at runtime. + void maybeRunGpuIntensiveQuantitiesDispatcher_(const unsigned timeIdx) const + { + if constexpr (gpuDispatcherCompiledIn_ && gpuDispatcherSupportsTypeTag_) { + if (useGpuIntensiveQuantitiesDispatcher_) { + const auto gpuStartTime = std::chrono::steady_clock::now(); + this->runGpuIntensiveQuantitiesDispatcher_(timeIdx); + const auto gpuDuration = + std::chrono::duration_cast( + std::chrono::steady_clock::now() - gpuStartTime); + Opm::OpmLog::info(std::format( + "GPU intensive-quantities dispatch took {} ms", + gpuDuration.count())); + } + } else { + (void)timeIdx; + } + } + +#if HAVE_CUDA && OPM_HAVE_GPU_BLACKOIL_INTENSIVE_QUANTITIES_DISPATCHER + void runGpuIntensiveQuantitiesDispatcher_(const unsigned timeIdx) const + { + if constexpr (Opm::gpuistl::GpuBlackoilIntensiveQuantitiesDispatcherSupport::value) { + if (!gpuIntensiveQuantitiesDispatcher_) { + gpuIntensiveQuantitiesDispatcher_ = + std::make_unique< + Opm::gpuistl::GpuBlackoilIntensiveQuantitiesDispatcher>(); + } + using PV = GetPropType; + const auto& sol = this->solution(timeIdx); + const std::size_t numCells = this->intensiveQuantityCache_[timeIdx].size(); + std::vector priVarsPtrs(numCells); + std::vector outIqPtrs(numCells); + for (std::size_t i = 0; i < numCells; ++i) { + priVarsPtrs[i] = &sol[i]; + outIqPtrs[i] = &this->intensiveQuantityCache_[timeIdx][i]; + } + gpuIntensiveQuantitiesDispatcher_->update( + this->simulator_.problem(), + priVarsPtrs.data(), + outIqPtrs.data(), + numCells); + } + } +#endif }; } // namespace Opm diff --git a/opm/simulators/flow/FlowGasWaterEnergyTypeTag.hpp b/opm/simulators/flow/FlowGasWaterEnergyTypeTag.hpp new file mode 100644 index 00000000000..1c91e08b05f --- /dev/null +++ b/opm/simulators/flow/FlowGasWaterEnergyTypeTag.hpp @@ -0,0 +1,99 @@ +// -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- +// vi: set et ts=4 sw=4 sts=4: +/* + Copyright 2026 Equinor ASA + + This file is part of the Open Porous Media project (OPM). + + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ +/*! + * \file + * + * Shared TypeTag declaration for the gas+water+energy (CO2STORE-style) + * blackoil flow problem. Originally inlined into + * \c flow/flow_gaswater_energy.cpp; extracted into a header so that other + * translation units (e.g. the GPU intensive-quantities dispatcher) can + * obtain matching property bindings via \c GetPropType. + */ +#ifndef OPM_FLOW_GASWATER_ENERGY_TYPETAG_HPP +#define OPM_FLOW_GASWATER_ENERGY_TYPETAG_HPP + +#include + +#include +#include +#include + +#include + +namespace Opm::Properties { +namespace TTag { +struct FlowGasWaterEnergyProblem { + using InheritsFrom = std::tuple; +}; +} // namespace TTag + +template +struct Linearizer { using type = TpfaLinearizer; }; + +template +struct LocalResidual { using type = BlackOilLocalResidualTPFA; }; + +template +struct EnableDiffusion { static constexpr bool value = true; }; + +template +struct EnableDispersion { static constexpr bool value = true; }; + +template +struct EnergyModuleType +{ static constexpr EnergyModules value = EnergyModules::FullyImplicitThermal; }; + +template +struct EnableDisgasInWater { + static constexpr bool value = true; +}; + +template +struct EnableVapwat { + static constexpr bool value = true; +}; + +//! The indices required by the model +template +struct Indices +{ +private: + // it is unfortunately not possible to simply use 'TypeTag' here because + // this leads to cyclic definitions of some properties. + using BaseTypeTag = TTag::FlowProblem; + using FluidSystem = GetPropType; + static constexpr EnergyModules energyModuleType = getPropValue(); + static constexpr int numEnergyVars = (energyModuleType == EnergyModules::FullyImplicitThermal) ? 1 : 0; +public: + using type = BlackOilTwoPhaseIndices(), + getPropValue(), + getPropValue(), + numEnergyVars, + getPropValue(), + getPropValue(), + /*PVOffset=*/0, + /*disabledCompIdx=*/FluidSystem::oilCompIdx, + getPropValue()>; +}; + +} // namespace Opm::Properties + +#endif // OPM_FLOW_GASWATER_ENERGY_TYPETAG_HPP diff --git a/opm/simulators/flow/FlowProblemParameters.cpp b/opm/simulators/flow/FlowProblemParameters.cpp index 2f5f29668da..1daa0a09b2a 100644 --- a/opm/simulators/flow/FlowProblemParameters.cpp +++ b/opm/simulators/flow/FlowProblemParameters.cpp @@ -81,6 +81,24 @@ void registerFlowProblemParameters() ("Conserve inner energy and not enthalpy " "even if THERMAL is used."); +#if HAVE_CUDA && OPM_HAVE_GPU_BLACKOIL_INTENSIVE_QUANTITIES_DISPATCHER + Parameters::Register + ("Experimental: compute BlackOilIntensiveQuantities on the GPU " + "via the GpuBlackoilIntensiveQuantitiesDispatcher. Only takes " + "effect for CO2STORE-compatible TypeTags; ignored otherwise."); +#else + // Always register so that the run fails with an explanatory error + // message (rather than an "unknown parameter" message) when this binary + // has been built without the GPU intensive-quantities dispatcher (i.e. + // without CUDA/HIP support, or with a CUDA toolkit older than 13.1) + // but the user still requests the GPU dispatcher. + Parameters::Register + ("Experimental: compute BlackOilIntensiveQuantities on the GPU. " + "This binary was built without the GPU intensive-quantities " + "dispatcher (no CUDA/HIP support, or CUDA<13.1), so enabling " + "this option will cause the run to fail at start-up."); +#endif + // By default, stop it after the universe will probably have stopped // to exist. (the ECL problem will finish the simulation explicitly // after it simulated the last episode specified in the deck.) diff --git a/opm/simulators/flow/FlowProblemParameters.hpp b/opm/simulators/flow/FlowProblemParameters.hpp index 9908026b63e..0c2c307723d 100644 --- a/opm/simulators/flow/FlowProblemParameters.hpp +++ b/opm/simulators/flow/FlowProblemParameters.hpp @@ -69,6 +69,15 @@ struct UseHybridNewton { static constexpr bool value = false; }; // Conserve inner energy instead of enthalpy even if THERMAL is used struct ConserveInnerEnergyThermal { static constexpr bool value = false; }; +// Experimental: route the per-element BlackOilIntensiveQuantities update +// through the GPU dispatcher instead of computing it on the CPU. Only takes +// effect for CO2STORE-compatible TypeTags (gas+water, no oil). The parameter +// is always declared so that the run can fail with an explanatory error +// message when the option is requested in a binary that was built without +// CUDA/HIP support (or without a sufficiently new toolkit), or when the +// active TypeTag is incompatible with the GPU dispatcher. +struct ExperimentalComputePropertiesOnGpu { static constexpr bool value = false; }; + } // namespace Opm::Parameters namespace Opm { diff --git a/opm/simulators/flow/GpuEclMaterialLawManager.hpp b/opm/simulators/flow/GpuEclMaterialLawManager.hpp new file mode 100644 index 00000000000..46611f4c688 --- /dev/null +++ b/opm/simulators/flow/GpuEclMaterialLawManager.hpp @@ -0,0 +1,461 @@ +// -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- +// vi: set et ts=4 sw=4 sts=4: +/* + Copyright 2026 Equinor ASA + + This file is part of the Open Porous Media project (OPM). + + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ +/*! + * \file + * \brief GPU-compatible, simplified version of Opm::EclMaterialLaw::Manager. + * + * This class only supports the actions needed by FlowProblem, + * FlowProblemBlackoil and the BlackOilIntensiveQuantities. It re-uses the + * generic two-phase material types from opm-common + * (\c Opm::EclTwoPhaseMaterial and \c Opm::EclTwoPhaseMaterialParams) with + * a value-storage policy (\c Opm::gpuistl::ValueAsPointer) so the per-cell + * material law parameters are trivially copyable to the device. + * + * The chain of multiplexers used by the CPU manager + * (\c EclMultiplexerMaterial \f$\to\f$ \c EclEpsTwoPhaseLaw \f$\to\f$ + * \c EclHysteresisTwoPhaseLaw \f$\to\f$ \c SatCurveMultiplexer) is bypassed: + * the GPU manager unwraps the CPU multiplexer parameters down to the + * underlying \c PiecewiseLinearTwoPhaseMaterialParams and uploads only the + * piecewise-linear sample tables. + */ +#ifndef OPM_GPU_ECL_MATERIAL_LAW_MANAGER_HPP +#define OPM_GPU_ECL_MATERIAL_LAW_MANAGER_HPP + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +namespace Opm::EclMaterialLaw +{ + +namespace detail +{ + + /*! + * \brief Walk down a CPU material-law parameter object until the enclosed + * \c PiecewiseLinearTwoPhaseMaterialParams is reached. + * + * Supports the standard layering used by the CPU \c EclMaterialLaw::Manager: + * \c EclHysteresisTwoPhaseLawParams \f$\to\f$ \c EclEpsTwoPhaseLawParams + * \f$\to\f$ \c SatCurveMultiplexerParams + * \f$\to\f$ \c PiecewiseLinearTwoPhaseMaterialParams. + */ + template + const auto& extractCpuPlParams(const CpuParams& params) + { + if constexpr (requires { params.drainageParams(); }) { + return extractCpuPlParams(params.drainageParams()); + } else if constexpr (requires { params.effectiveLawParams(); }) { + return extractCpuPlParams(params.effectiveLawParams()); + } else if constexpr (requires { + params.template getRealParams< + ::Opm::SatCurveMultiplexerApproach::PiecewiseLinear>(); + }) { + return params + .template getRealParams<::Opm::SatCurveMultiplexerApproach::PiecewiseLinear>(); + } else { + return params; + } + } + + /*! + * \brief Optional host-side base used by \c GpuManager when its storage is + * owning device memory: keeps the per-cell sample buffers alive so + * that the \c GpuView pointers stored inside the cells' + * \c PiecewiseLinearTwoPhaseMaterialParams remain valid for as long + * as the manager exists. Empty for any non-owning storage so that + * \c GpuView based managers stay device-trivially-copyable. + */ + template + struct GpuPiecewiseLinearSampleHolder { + }; + + template + struct GpuPiecewiseLinearSampleHolder { + std::vector<::Opm::gpuistl::GpuBuffer> sampleBuffers {}; + }; + +} // namespace detail + +/*! + * \brief A minimal, GPU-compatible material-law manager. + * + * Only the EclDefaultMaterial three-phase law and the EclTwoPhaseMaterial + * two-phase law are supported. The two-phase gas/oil and oil/water sub-law + * types are template parameters so the caller can choose any GPU compatible + * implementation (e.g. PiecewiseLinear). + * + * \tparam TraitsT Three-phase material traits. + * \tparam GasOilLawT Two-phase gas/oil law type (must implement the + * saturation-only API). + * \tparam OilWaterLawT Two-phase oil/water law type (must implement the + * saturation-only API). + * \tparam Storage Storage container template; defaults to a CPU vector. + * Use \c Opm::gpuistl::GpuBuffer for owning GPU storage + * and \c Opm::gpuistl::GpuView for non-owning GPU + * storage. + * \tparam MaterialLawT The three-phase material-law type to use; defaults to + * \c Opm::EclDefaultMaterial. + */ +template class Storage = ::Opm::VectorWithDefaultAllocator, + class MaterialLawT = ::Opm::EclDefaultMaterial> +class GpuManager : private detail::GpuPiecewiseLinearSampleHolder< + typename TraitsT::Scalar, + std::is_same_v, ::Opm::gpuistl::GpuBuffer>> +{ +public: + using Traits = TraitsT; + using Scalar = typename Traits::Scalar; + using GasOilLaw = GasOilLawT; + using OilWaterLaw = OilWaterLawT; + using GasOilParams = typename GasOilLaw::Params; + using OilWaterParams = typename OilWaterLaw::Params; + + /*! + * \brief The actual material-law type used by the IQ. + * + * Defaults to the three-phase \c EclDefaultMaterial. Pass an + * \c Opm::EclTwoPhaseMaterial<...> instantiation (with a value-storage + * \c EclTwoPhaseMaterialParams, e.g. one using + * \c Opm::gpuistl::ValueAsPointer) to instead use the two-phase law. + * Only the GasWater sub-approach is currently supported by the + * from-CPU constructors below; this matches the CO2STORE setup. + */ + using MaterialLaw = MaterialLawT; + using MaterialLawParams = typename MaterialLaw::Params; + + static constexpr int waterPhaseIdx = Traits::wettingPhaseIdx; + static constexpr int oilPhaseIdx = Traits::nonWettingPhaseIdx; + static constexpr int gasPhaseIdx = Traits::gasPhaseIdx; + static constexpr int numPhases = Traits::numPhases; + +private: + /*! \brief Trait: true if the material-law parameters are an + * \c EclTwoPhaseMaterialParams (i.e. expose a nested + * \c GasWaterParams type). */ + template + struct IsTwoPhaseMaterialLawParams : std::false_type { + }; + + template + struct IsTwoPhaseMaterialLawParams> + : std::true_type { + }; + + static constexpr bool isTwoPhase = IsTwoPhaseMaterialLawParams::value; + +public: + OPM_HOST_DEVICE GpuManager() = default; + + OPM_HOST_DEVICE GpuManager(Storage materialLawParams, + Storage satnumRegionArray) + : materialLawParams_(std::move(materialLawParams)) + , satnumRegionArray_(std::move(satnumRegionArray)) + { + } + + /*! + * \brief Construct from a CPU \c Opm::EclMaterialLaw::Manager directly + * into device-resident \c GpuBuffer storage. + * + * For each cell, the per-cell piecewise-linear sample arrays are + * uploaded to the GPU as individual \c GpuBuffer instances kept + * alive by this manager (via its private + * \c detail::GpuPiecewiseLinearSampleHolder base). The cell's + * \c MaterialLawParams is populated with \c GpuView views + * referencing those device buffers and is then bulk-copied to the + * device alongside the satnum array. + * + * Only enabled when this manager itself uses \c GpuBuffer storage and + * the two-phase laws use \c GpuView sample storage. + */ + template , + class GasOilParamsArg = GasOilParams, + class OilWaterParamsArg = OilWaterParams, + class IntegerStorage = Storage, + class MaterialLawParamsStorageT = Storage, + std::enable_if_t> + && std::is_same_v> + && std::is_same_v> + && std::is_same_v>, + int> + = 0> + explicit GpuManager(const CpuManager& cpu, std::size_t numElements) + : detail::GpuPiecewiseLinearSampleHolder {} + , materialLawParams_( + buildHostMaterialLawParamsForGpu(cpu, numElements, this->sampleBuffers)) + , satnumRegionArray_(buildHostSatnumRegionArray(cpu, numElements)) + { + } + + /*! \brief Material-law parameters of an active cell. */ + OPM_HOST_DEVICE MaterialLawParams materialLawParams(unsigned elemIdx) const + { + // Return by value: GpuView::operator[] const already returns by + // value, so binding a reference to materialLawParams_[idx] inside + // this function would dangle. Returning by value lets the caller + // (which typically does `const auto& mp = ...`) safely extend the + // temporary's lifetime. + return materialLawParams_[elemIdx]; + } + + OPM_HOST_DEVICE MaterialLawParams& materialLawParams(unsigned elemIdx) + { + return materialLawParams_[elemIdx]; + } + + /*! \brief Saturation-region index of an active cell. */ + OPM_HOST_DEVICE int satnumRegionIdx(unsigned elemIdx) const + { + return satnumRegionArray_[elemIdx]; + } + + /*! \brief Direct access to the underlying storages + * (used by copy_to_gpu / make_view). */ + const Storage& materialLawParamsStorage() const + { + return materialLawParams_; + } + Storage& materialLawParamsStorage() + { + return materialLawParams_; + } + + const Storage& satnumRegionArrayStorage() const + { + return satnumRegionArray_; + } + Storage& satnumRegionArrayStorage() + { + return satnumRegionArray_; + } + +private: + /*! + * \brief Build the per-cell host-side \c MaterialLawParams vector for + * the GpuBuffer-storage from-CPU constructor. Allocates one + * \c GpuBuffer per per-cell sample array and wraps each + * in a \c GpuView stored inside the cell's MLP. + * The owning device buffers are pushed into \p sampleBuffers, + * which is the manager's holder member. + */ + template + static std::vector + buildHostMaterialLawParamsForGpu(const CpuManager& cpu, + std::size_t numElements, + std::vector<::Opm::gpuistl::GpuBuffer>& sampleBuffers) + { + // The CPU manager produces one piecewise-linear sample table per + // sub-law per cell. For a three-phase EclDefaultMaterial that is up + // to twelve buffers per cell (six per gas/oil and oil/water sub-law); + // for the two-phase EclTwoPhaseMaterial it is six (a single + // gas/water sub-law). Reserving the upper bound avoids reallocation. + sampleBuffers.reserve(numElements * 12); + std::vector materialLawParams(numElements); + + auto pushSampleBuffer = [&](const auto& sampleVector) { + std::vector hostCopy(sampleVector.begin(), sampleVector.end()); + sampleBuffers.emplace_back(hostCopy); + const auto& deviceBuffer = sampleBuffers.back(); + return ::Opm::gpuistl::GpuView(deviceBuffer.data(), deviceBuffer.size()); + }; + + for (std::size_t i = 0; i < numElements; ++i) { + const auto& cpuMaterialParams = cpu.materialLawParams(static_cast(i)); + + if constexpr (isTwoPhase) { + buildTwoPhaseCellParams(cpuMaterialParams, materialLawParams[i], pushSampleBuffer); + } else { + buildThreePhaseCellParams( + cpuMaterialParams, materialLawParams[i], pushSampleBuffer); + } + } + return materialLawParams; + } + + /*! + * \brief Build a single cell's two-phase \c EclTwoPhaseMaterialParams + * instance with GPU-resident piecewise-linear sample tables. + * + * Only the \c GasWater sub-approach is currently supported, since this + * is the only configuration produced by CO2STORE-style decks. + */ + template + static void buildTwoPhaseCellParams(const CpuMaterialParams& cpuMaterialParams, + MaterialLawParams& cellParams, + PushBuffer& pushSampleBuffer) + { + if (cpuMaterialParams.approach() != ::Opm::EclMultiplexerApproach::TwoPhase) { + throw std::logic_error("Opm::EclMaterialLaw::GpuManager configured for the two-phase " + "material law requires a CPU material-law parameter set that " + "uses EclMultiplexerApproach::TwoPhase."); + } + const auto& cpuTwoPhaseParams + = cpuMaterialParams.template getRealParams<::Opm::EclMultiplexerApproach::TwoPhase>(); + if (cpuTwoPhaseParams.approach() != ::Opm::EclTwoPhaseApproach::GasWater) { + throw std::logic_error("Opm::EclMaterialLaw::GpuManager only supports the GasWater " + "two-phase sub-approach."); + } + + using GasWaterParams = typename MaterialLawParams::GasWaterParams; + const auto& cpuGasWaterPiecewiseLinear + = detail::extractCpuPlParams(cpuTwoPhaseParams.gasWaterParams()); + GasWaterParams gasWaterParams(pushSampleBuffer(cpuGasWaterPiecewiseLinear.SwPcwnSamples()), + pushSampleBuffer(cpuGasWaterPiecewiseLinear.pcwnSamples()), + pushSampleBuffer(cpuGasWaterPiecewiseLinear.SwKrwSamples()), + pushSampleBuffer(cpuGasWaterPiecewiseLinear.krwSamples()), + pushSampleBuffer(cpuGasWaterPiecewiseLinear.SwKrnSamples()), + pushSampleBuffer(cpuGasWaterPiecewiseLinear.krnSamples())); + + cellParams.setGasWaterParams( + typename MaterialLawParams::GasWaterParamsStorage(std::move(gasWaterParams))); + cellParams.setApproach(::Opm::EclTwoPhaseApproach::GasWater); + cellParams.finalize(); + } + + /*! + * \brief Build a single cell's three-phase \c EclDefaultMaterialParams + * instance with GPU-resident piecewise-linear sample tables. + */ + template + static void buildThreePhaseCellParams(const CpuMaterialParams& cpuMaterialParams, + MaterialLawParams& cellParams, + PushBuffer& pushSampleBuffer) + { + if (cpuMaterialParams.approach() != ::Opm::EclMultiplexerApproach::Default) { + throw std::logic_error("Opm::EclMaterialLaw::GpuManager only supports the Default " + "three-phase material law approach."); + } + const auto& cpuDefaultParams + = cpuMaterialParams.template getRealParams<::Opm::EclMultiplexerApproach::Default>(); + const auto& cpuGasOilPiecewiseLinear + = detail::extractCpuPlParams(cpuDefaultParams.gasOilParams()); + const auto& cpuOilWaterPiecewiseLinear + = detail::extractCpuPlParams(cpuDefaultParams.oilWaterParams()); + + GasOilParams gasOilParams(pushSampleBuffer(cpuGasOilPiecewiseLinear.SwPcwnSamples()), + pushSampleBuffer(cpuGasOilPiecewiseLinear.pcwnSamples()), + pushSampleBuffer(cpuGasOilPiecewiseLinear.SwKrwSamples()), + pushSampleBuffer(cpuGasOilPiecewiseLinear.krwSamples()), + pushSampleBuffer(cpuGasOilPiecewiseLinear.SwKrnSamples()), + pushSampleBuffer(cpuGasOilPiecewiseLinear.krnSamples())); + OilWaterParams oilWaterParams(pushSampleBuffer(cpuOilWaterPiecewiseLinear.SwPcwnSamples()), + pushSampleBuffer(cpuOilWaterPiecewiseLinear.pcwnSamples()), + pushSampleBuffer(cpuOilWaterPiecewiseLinear.SwKrwSamples()), + pushSampleBuffer(cpuOilWaterPiecewiseLinear.krwSamples()), + pushSampleBuffer(cpuOilWaterPiecewiseLinear.SwKrnSamples()), + pushSampleBuffer(cpuOilWaterPiecewiseLinear.krnSamples())); + + cellParams.setGasOilParams(std::make_shared(std::move(gasOilParams))); + cellParams.setOilWaterParams(std::make_shared(std::move(oilWaterParams))); + cellParams.setSwl(cpuDefaultParams.Swl()); + cellParams.finalize(); + } + + template + static std::vector buildHostSatnumRegionArray(const CpuManager& cpu, + std::size_t numElements) + { + std::vector satnumRegionArray(numElements); + for (std::size_t i = 0; i < numElements; ++i) { + satnumRegionArray[i] = static_cast(cpu.satnumRegionIdx(static_cast(i))); + } + return satnumRegionArray; + } + + Storage materialLawParams_ {}; + Storage satnumRegionArray_ {}; +}; + +} // namespace Opm::EclMaterialLaw + +namespace Opm::gpuistl +{ + +/*! + * \brief Copy a CPU GpuManager to GPU-resident GpuBuffer storage. + * + * The MaterialLawParams element type is assumed to be the same on the CPU + * and the GPU, i.e. the caller is responsible for making the GasOilLaw and + * OilWaterLaw GPU-compatible (typically by templating their parameter type + * on a GPU storage). + */ +template +::Opm::EclMaterialLaw::GpuManager +copy_to_gpu(const ::Opm::EclMaterialLaw::GpuManager& cpu) +{ + using GpuManagerBuffer = ::Opm::EclMaterialLaw:: + GpuManager; + using MaterialLawParams = typename GpuManagerBuffer::MaterialLawParams; + return GpuManagerBuffer(GpuBuffer(cpu.materialLawParamsStorage()), + GpuBuffer(cpu.satnumRegionArrayStorage())); +} + +/*! + * \brief Make a non-owning GpuView based GpuManager from an owning GpuBuffer + * based GpuManager. + */ +template +::Opm::EclMaterialLaw::GpuManager +make_view( + ::Opm::EclMaterialLaw::GpuManager& + buf) +{ + using GpuManagerView = ::Opm::EclMaterialLaw:: + GpuManager; + using MaterialLawParams = typename GpuManagerView::MaterialLawParams; + return GpuManagerView( + GpuView(buf.materialLawParamsStorage().data(), + buf.materialLawParamsStorage().size()), + GpuView(buf.satnumRegionArrayStorage().data(), buf.satnumRegionArrayStorage().size())); +} + +} // namespace Opm::gpuistl + +#endif // OPM_GPU_ECL_MATERIAL_LAW_MANAGER_HPP diff --git a/opm/simulators/flow/GpuEclThermalLawManager.hpp b/opm/simulators/flow/GpuEclThermalLawManager.hpp new file mode 100644 index 00000000000..bc87a3c9884 --- /dev/null +++ b/opm/simulators/flow/GpuEclThermalLawManager.hpp @@ -0,0 +1,367 @@ +// -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- +// vi: set et ts=4 sw=4 sts=4: +/* + Copyright 2026 Equinor ASA + + This file is part of the Open Porous Media project (OPM). + + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ +/*! + * \file + * + * GPU-portable, simplified version of \c Opm::EclThermalLawManager. + * + * Mirrors the design of \c Opm::EclMaterialLaw::GpuManager: + * + * - Templated on the outer storage container (per-cell / per-region + * bulk arrays of params): \c VectorWithDefaultAllocator on the CPU, + * \c GpuBuffer for owning device memory, \c GpuView for non-owning + * device storage usable from a kernel. + * - Templated on the inner storage used by the per-region SPECROCK + * sample tables (one tiny array per region). + * - The CPU \c Opm::EclThermalLawManager is treated as a *builder*: a + * dedicated constructor extracts the per-region SPECROCK sample + * tables and the per-cell THCONR coefficients and uploads them to + * device memory. + * + * Currently only the SPECROCK solid-energy approach and the THCONR + * thermal-conduction approach are supported by the builder, since these + * are the only ones used by the CO2STORE+THERMAL setup that the GPU + * dispatcher targets. The builder throws via \c OPM_THROW for any other + * approach. + */ +#ifndef OPM_GPU_ECL_THERMAL_LAW_MANAGER_HPP +#define OPM_GPU_ECL_THERMAL_LAW_MANAGER_HPP + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include + +namespace Opm::EclThermalLaw { + +namespace detail { + + /*! + * \brief Holder for the per-region SPECROCK sample buffers when the + * manager owns its device memory (i.e. the outer storage is + * \c GpuBuffer). Each per-region + * \c EclSpecrockLawParams stored in the + * bulk array references one of the buffers in this holder, so + * the holder must outlive every kernel invocation. + * + * For non-owning outer storage (host or GpuView), the holder + * is empty. + */ + template + struct SpecrockSampleHolder { + }; + + template + struct SpecrockSampleHolder { + std::vector<::Opm::gpuistl::GpuBuffer> sampleBuffers {}; + }; + +} // namespace detail + +/*! + * \brief Minimal, GPU-portable thermal-law manager. + * + * \tparam ScalarT Floating point type used for the per-cell and + * per-region sample tables. + * \tparam FluidSystemT Fluid system associated with this manager. + * \tparam OuterStorage Container used for the per-cell / per-region + * bulk arrays of params and the SATNUM index map. + * \tparam SampleStorage Container used for the per-region SPECROCK + * internal-energy / temperature sample tables. + * + * Typical instantiations: + * - CPU-only: \c GpuManager + * - GPU owning (built from CPU): \c GpuManager + * - GPU non-owning (kernel arg): \c GpuManager + */ +template class OuterStorage = ::Opm::VectorWithDefaultAllocator, + template class SampleStorage = ::Opm::VectorWithDefaultAllocator> +class GpuManager + : private detail::SpecrockSampleHolder< + ScalarT, + std::is_same_v, ::Opm::gpuistl::GpuBuffer>> +{ +public: + using Scalar = ScalarT; + using FluidSystem = FluidSystemT; + using SolidEnergyLawParams = ::Opm::EclSpecrockLawParams; + using ThermalConductionLawParams = ::Opm::EclThconrLawParams; + + static constexpr bool isOwningGpu + = std::is_same_v, ::Opm::gpuistl::GpuBuffer>; + + OPM_HOST_DEVICE GpuManager() = default; + + OPM_HOST_DEVICE + GpuManager(OuterStorage solidEnergyParams, + OuterStorage elementToSolidRegionIdx, + OuterStorage thermalConductionParams) + : solidEnergyParams_(std::move(solidEnergyParams)) + , elementToSolidRegionIdx_(std::move(elementToSolidRegionIdx)) + , thermalConductionParams_(std::move(thermalConductionParams)) + { + } + + /*! + * \brief Owning constructor that also takes ownership of the + * per-region SPECROCK sample buffers (only available for the + * owning GpuBuffer outer-storage instantiation). + */ + template = 0> + GpuManager(std::vector<::Opm::gpuistl::GpuBuffer> sampleBuffers, + OuterStorage solidEnergyParams, + OuterStorage elementToSolidRegionIdx, + OuterStorage thermalConductionParams) + : detail::SpecrockSampleHolder{std::move(sampleBuffers)} + , solidEnergyParams_(std::move(solidEnergyParams)) + , elementToSolidRegionIdx_(std::move(elementToSolidRegionIdx)) + , thermalConductionParams_(std::move(thermalConductionParams)) + { + } + + /*! \brief Solid-energy law parameters for an active cell. */ + OPM_HOST_DEVICE SolidEnergyLawParams solidEnergyLawParams(unsigned elemIdx) const + { + const int regionIdx = elementToSolidRegionIdx_[elemIdx]; + return solidEnergyParams_[regionIdx]; + } + + /*! \brief Thermal-conduction law parameters for an active cell. */ + OPM_HOST_DEVICE ThermalConductionLawParams + thermalConductionLawParams(unsigned elemIdx) const + { + return thermalConductionParams_[elemIdx]; + } + + /*! \brief Direct storage accessors used by copy_to_gpu / make_view. */ + const OuterStorage& solidEnergyParamsStorage() const + { + return solidEnergyParams_; + } + OuterStorage& solidEnergyParamsStorage() + { + return solidEnergyParams_; + } + + const OuterStorage& elementToSolidRegionIdxStorage() const + { + return elementToSolidRegionIdx_; + } + OuterStorage& elementToSolidRegionIdxStorage() + { + return elementToSolidRegionIdx_; + } + + const OuterStorage& thermalConductionParamsStorage() const + { + return thermalConductionParams_; + } + OuterStorage& thermalConductionParamsStorage() + { + return thermalConductionParams_; + } + + /*! \brief Mutable accessor for the per-region SPECROCK sample + * buffer holder; used by \c copy_to_gpu to populate the + * owning device buffers. Only available when this manager + * owns its device memory. + */ + template = 0> + std::vector<::Opm::gpuistl::GpuBuffer>& specrockSampleBuffers() + { + return this->sampleBuffers; + } + +private: + OuterStorage solidEnergyParams_ {}; + OuterStorage elementToSolidRegionIdx_ {}; + OuterStorage thermalConductionParams_ {}; +}; + +/*! + * \brief Build a CPU \c GpuManager from a CPU \c Opm::FlowProblem. + * + * Iterates over the active cells, querying the CPU problem for each + * cell's \c SolidEnergyLawParams (multiplexer) and + * \c ThermalConductionLawParams (multiplexer). The per-cell SPECROCK + * multiplexer references are deduplicated by address to recover the + * underlying region index (so the resulting GPU manager stores one + * \c SolidEnergyLawParams per region rather than one per cell). + * + * Throws via \c OPM_THROW for any solid-energy approach other than + * SPECROCK or any thermal-conduction approach other than THCONR. + */ +template +GpuManager +buildCpuManagerFromFlowProblem(const CpuFlowProblemT& cpu, std::size_t numElements) +{ + using ManagerCpu = GpuManager; + using SolidEnergyLawParams = typename ManagerCpu::SolidEnergyLawParams; + using ThermalConductionLawParams = typename ManagerCpu::ThermalConductionLawParams; + + std::vector regionMultiplexerAddress; + std::vector hostElementToSolidRegionIdx(numElements); + std::vector hostSolidEnergyParams; + std::vector hostThermalConductionParams(numElements); + + for (std::size_t i = 0; i < numElements; ++i) { + const auto& cpuSolidMultiplexer + = cpu.solidEnergyLawParams(static_cast(i), 0u); + if (cpuSolidMultiplexer.solidEnergyApproach() + != ::Opm::EclSolidEnergyApproach::Specrock) { + OPM_THROW(std::logic_error, + "Opm::EclThermalLaw::GpuManager only supports the SPECROCK " + "solid-energy approach."); + } + const void* address = static_cast(&cpuSolidMultiplexer); + int regionIdx = -1; + for (std::size_t r = 0; r < regionMultiplexerAddress.size(); ++r) { + if (regionMultiplexerAddress[r] == address) { + regionIdx = static_cast(r); + break; + } + } + if (regionIdx < 0) { + const auto& cpuSpecrock = cpuSolidMultiplexer.template getRealParams< + ::Opm::EclSolidEnergyApproach::Specrock>(); + const auto& xValues = cpuSpecrock.temperatureSamples(); + const auto& yValues = cpuSpecrock.internalEnergySamples(); + std::vector temperatureSamples(xValues.begin(), xValues.end()); + std::vector internalEnergySamples(yValues.begin(), yValues.end()); + SolidEnergyLawParams params; + params.setSamples(temperatureSamples, internalEnergySamples); + regionIdx = static_cast(hostSolidEnergyParams.size()); + hostSolidEnergyParams.emplace_back(std::move(params)); + regionMultiplexerAddress.push_back(address); + } + hostElementToSolidRegionIdx[i] = regionIdx; + + const auto& cpuThermalMultiplexer + = cpu.thermalConductionLawParams(static_cast(i), 0u); + if (cpuThermalMultiplexer.thermalConductionApproach() + != ::Opm::EclThermalConductionApproach::Thconr) { + OPM_THROW(std::logic_error, + "Opm::EclThermalLaw::GpuManager only supports the THCONR " + "thermal-conduction approach."); + } + const auto& cpuThconr = cpuThermalMultiplexer.template getRealParams< + ::Opm::EclThermalConductionApproach::Thconr>(); + hostThermalConductionParams[i].setReferenceTotalThermalConductivity( + cpuThconr.referenceTotalThermalConductivity()); + hostThermalConductionParams[i].setDTotalThermalConductivity_dSg( + cpuThconr.dTotalThermalConductivity_dSg()); + hostThermalConductionParams[i].finalize(); + } + + return ManagerCpu(std::move(hostSolidEnergyParams), + std::move(hostElementToSolidRegionIdx), + std::move(hostThermalConductionParams)); +} + +} // namespace Opm::EclThermalLaw + +namespace Opm::gpuistl { + +/*! + * \brief Copy a CPU \c GpuManager (plain host storage) to GPU-resident + * \c GpuBuffer storage. + * + * Each per-region SPECROCK sample table is uploaded to its own + * \c GpuBuffer; those buffers are owned by the returned + * manager's \c SpecrockSampleHolder base. The corresponding per-region + * \c EclSpecrockLawParams objects are then assembled + * on the host and uploaded as a single bulk + * \c GpuBuffer. + */ +template +::Opm::EclThermalLaw::GpuManager +copy_to_gpu(const ::Opm::EclThermalLaw::GpuManager& cpu) +{ + using ManagerBuf + = ::Opm::EclThermalLaw::GpuManager; + using SolidEnergyLawParamsView = typename ManagerBuf::SolidEnergyLawParams; + using ThermalConductionLawParams = typename ManagerBuf::ThermalConductionLawParams; + + std::vector> sampleBuffers; + sampleBuffers.reserve(2u * cpu.solidEnergyParamsStorage().size()); + + std::vector hostStaging; + hostStaging.reserve(cpu.solidEnergyParamsStorage().size()); + + for (const auto& cpuRegion : cpu.solidEnergyParamsStorage()) { + sampleBuffers.emplace_back(GpuBuffer(cpuRegion.temperatureSamples())); + auto& tBuf = sampleBuffers.back(); + sampleBuffers.emplace_back(GpuBuffer(cpuRegion.internalEnergySamples())); + auto& eBuf = sampleBuffers.back(); + SolidEnergyLawParamsView params(GpuView(tBuf.data(), tBuf.size()), + GpuView(eBuf.data(), eBuf.size())); + hostStaging.emplace_back(std::move(params)); + } + + return ManagerBuf(std::move(sampleBuffers), + GpuBuffer(hostStaging), + GpuBuffer(cpu.elementToSolidRegionIdxStorage()), + GpuBuffer(cpu.thermalConductionParamsStorage())); +} + +/*! + * \brief Make a non-owning \c GpuView based \c GpuManager from an owning + * \c GpuBuffer based \c GpuManager. The per-region solid-energy + * params element type is unchanged (\c GpuView sample storage in + * both cases). + */ +template +::Opm::EclThermalLaw::GpuManager +make_view(::Opm::EclThermalLaw::GpuManager& buf) +{ + using ManagerView + = ::Opm::EclThermalLaw::GpuManager; + using SolidEnergyLawParamsView = typename ManagerView::SolidEnergyLawParams; + using ThermalConductionLawParams = typename ManagerView::ThermalConductionLawParams; + return ManagerView(GpuView(buf.solidEnergyParamsStorage().data(), + buf.solidEnergyParamsStorage().size()), + GpuView(buf.elementToSolidRegionIdxStorage().data(), + buf.elementToSolidRegionIdxStorage().size()), + GpuView( + buf.thermalConductionParamsStorage().data(), + buf.thermalConductionParamsStorage().size())); +} + +} // namespace Opm::gpuistl + +#endif // OPM_GPU_ECL_THERMAL_LAW_MANAGER_HPP diff --git a/opm/simulators/flow/GpuFlowProblem.hpp b/opm/simulators/flow/GpuFlowProblem.hpp new file mode 100644 index 00000000000..73d88ee2f3a --- /dev/null +++ b/opm/simulators/flow/GpuFlowProblem.hpp @@ -0,0 +1,587 @@ +// -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- +// vi: set et ts=4 sw=4 sts=4: +/* + Copyright 2026 Equinor ASA + + This file is part of the Open Porous Media project (OPM). + + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ +/*! + * \file + * \brief GPU-compatible, simplified version of Opm::FlowProblem. + * + * Only exposes the subset of the FlowProblem / FlowProblemBlackoil interface + * that BlackOilIntensiveQuantities needs in order to call ::update(). Anything + * that is not used by the intensive quantities (well model, aquifers, source + * terms, boundary conditions, ...) is intentionally absent. + */ +#ifndef OPM_GPU_FLOW_PROBLEM_HPP +#define OPM_GPU_FLOW_PROBLEM_HPP + +#include +#include + +#include + +#include +#include + +#include +#include +#include +#include + +namespace Opm::gpuistl +{ +template +class GpuBuffer; +template +class GpuView; +} // namespace Opm::gpuistl + +namespace Opm { + +/*! + * \brief No-op thermal-law manager used as the default 4th template arg of + * \c GpuFlowProblem. + * + * Carries no data and exposes the minimum surface needed by the + * \c GpuFlowProblem builder/copy/view machinery: a \c FluidSystem alias + * fixed to \c void (so the \c hasThermal compile-time switch evaluates + * to false), trivial empty params types, and trivial accessors that + * return default-constructed values. + * + * Intentionally not callable on the device when actually queried: it is + * only used by non-thermal \c GpuFlowProblem instantiations whose dispatch + * paths never call \c solidEnergyLawParams or \c thermalConductionLawParams. + */ +struct NoThermalLawManager +{ + using FluidSystem = void; + struct SolidEnergyLawParams {}; + struct ThermalConductionLawParams {}; + + OPM_HOST_DEVICE SolidEnergyLawParams solidEnergyLawParams(unsigned /*elemIdx*/) const + { + return SolidEnergyLawParams{}; + } + OPM_HOST_DEVICE ThermalConductionLawParams + thermalConductionLawParams(unsigned /*elemIdx*/) const + { + return ThermalConductionLawParams{}; + } +}; + +} // namespace Opm + +namespace Opm::gpuistl { + +/*! \brief copy_to_gpu overload for the no-op thermal manager. */ +inline ::Opm::NoThermalLawManager +copy_to_gpu(const ::Opm::NoThermalLawManager& /*cpu*/) +{ + return ::Opm::NoThermalLawManager{}; +} + +/*! \brief make_view overload for the no-op thermal manager. */ +inline ::Opm::NoThermalLawManager +make_view(::Opm::NoThermalLawManager& /*buf*/) +{ + return ::Opm::NoThermalLawManager{}; +} + +} // namespace Opm::gpuistl + +namespace Opm { + +/*! + * \brief Minimal, GPU-compatible problem class. + * + * Provides exactly the interface required by BlackOilIntensiveQuantities to + * compute a primary-variable update on the device. All cell-wise rock / + * fluid bookkeeping data is kept in templated Storage so the same class can + * be used as a CPU object (default \c std::vector) or as a GPU object + * (\c GpuBuffer / \c GpuView). + * + * The material law manager is held by value (so it is trivially copyable + * into a CUDA kernel together with the rest of the problem state). + * + * \tparam ScalarT Floating point type used for rock data. + * \tparam MaterialLawManagerT GPU material law manager type (e.g. + * \c Opm::EclMaterialLaw::GpuManager). + * \tparam Storage Container template used for per-cell data. + * \tparam ThermalLawManagerT GPU thermal-law manager type (e.g. + * \c Opm::EclThermalLaw::GpuManager). Defaults + * to \c NoThermalLawManager, a no-op stub used + * by non-thermal callers. + */ +template class Storage = ::Opm::VectorWithDefaultAllocator, + class ThermalLawManagerT = ::Opm::NoThermalLawManager> +class GpuFlowProblem +{ +public: + using Scalar = ScalarT; + using EclMaterialLawManager = MaterialLawManagerT; + using MaterialLawParams = typename EclMaterialLawManager::MaterialLawParams; + using EclThermalLawManager = ThermalLawManagerT; + using SolidEnergyLawParams = typename EclThermalLawManager::SolidEnergyLawParams; + using ThermalConductionLawParams = typename EclThermalLawManager::ThermalConductionLawParams; + + /*! \brief True iff the thermal-law manager carries a real fluid-system + * tag (anything other than \c void) and therefore the + * thermal-extraction code paths should be enabled. */ + static constexpr bool hasThermal + = !std::is_same_v; + + /*! \brief Trivial nested model() helper that satisfies the + * \c problem.model().linearizer().getLinearizationType() chain + * that BlackOilIntensiveQuantities walks. */ + struct ModelView + { + struct LinearizerView + { + OPM_HOST_DEVICE LinearizationType getLinearizationType() const + { + return LinearizationType{}; + } + }; + + OPM_HOST_DEVICE LinearizerView linearizer() const + { + return LinearizerView{}; + } + }; + + OPM_HOST_DEVICE GpuFlowProblem() = default; + + OPM_HOST_DEVICE GpuFlowProblem(EclMaterialLawManager materialLawManager, + Storage porosity, + Storage rockCompressibility, + Storage rockReferencePressure, + Storage maxOilSaturation, + Storage maxOilVaporizationFactor, + Storage maxGasDissolutionFactor, + EclThermalLawManager thermalLawManager, + Storage rockFraction, + Storage pvtRegionIndex) + : materialLawManager_(std::move(materialLawManager)) + , porosity_(std::move(porosity)) + , rockCompressibility_(std::move(rockCompressibility)) + , rockReferencePressure_(std::move(rockReferencePressure)) + , maxOilSaturation_(std::move(maxOilSaturation)) + , maxOilVaporizationFactor_(std::move(maxOilVaporizationFactor)) + , maxGasDissolutionFactor_(std::move(maxGasDissolutionFactor)) + , thermalLawManager_(std::move(thermalLawManager)) + , rockFraction_(std::move(rockFraction)) + , pvtRegionIndex_(std::move(pvtRegionIndex)) + { + } + + /*! + * \brief Construct from any CPU \c FlowProblem (or \c FlowProblemBlackoil). + * + * Iterates over all grid degrees of freedom and copies the rock / + * mixing-control fields used by \c BlackOilIntensiveQuantities into local + * vectors. The inner material-law manager is constructed from + * \c *cpu.materialLawManager(), so the GPU manager type held by this + * problem must in turn be constructible from + * \c Opm::EclMaterialLaw::Manager (i.e. use CPU vector storage and + * piecewise-linear vector-storage two-phase laws). + * + * Only enabled when this problem itself uses CPU vector storage. + */ + template , + std::enable_if_t< + std::is_same_v>, + int> = 0> + explicit GpuFlowProblem(const CpuProblem& cpu) + : materialLawManager_(*cpu.materialLawManager(), cpu.model().numGridDof()) + { + const std::size_t n = cpu.model().numGridDof(); + porosity_.resize(n); + rockCompressibility_.resize(n); + rockReferencePressure_.resize(n); + maxOilSaturation_.resize(n); + maxOilVaporizationFactor_.resize(n); + maxGasDissolutionFactor_.resize(n); + for (std::size_t i = 0; i < n; ++i) { + const unsigned u = static_cast(i); + porosity_[i] = cpu.porosity(u, 0u); + rockCompressibility_[i] = cpu.rockCompressibility(u); + rockReferencePressure_[i] = cpu.rockReferencePressure(u); + maxOilSaturation_[i] = cpu.maxOilSaturation(u); + maxOilVaporizationFactor_[i] = cpu.maxOilVaporizationFactor(0u, u); + maxGasDissolutionFactor_[i] = cpu.maxGasDissolutionFactor(0u, u); + } + if constexpr (hasThermal) { + using FluidSystemTag = typename EclThermalLawManager::FluidSystem; + thermalLawManager_ + = ::Opm::EclThermalLaw::buildCpuManagerFromFlowProblem( + cpu, n); + rockFraction_.resize(n); + pvtRegionIndex_.resize(n); + for (std::size_t i = 0; i < n; ++i) { + const unsigned u = static_cast(i); + rockFraction_[i] = cpu.rockFraction(u, 0u); + pvtRegionIndex_[i] = static_cast(cpu.pvtRegionIndex(u)); + } + } + } + + /*! + * \brief Construct directly into device-resident \c GpuBuffer storage + * from any CPU \c FlowProblem (or \c FlowProblemBlackoil). + * + * Internally builds the per-cell rock/mixing data on the host as + * \c std::vector, then uploads each as a single + * \c GpuBuffer. The inner GPU material law manager is constructed + * from \c *cpu.materialLawManager() via its own GpuBuffer-storage + * constructor, which uploads the per-cell sample arrays. + * + * Only enabled when this problem itself uses \c GpuBuffer storage. + */ + template , + std::enable_if_t< + std::is_same_v>, + int> = 0> + explicit GpuFlowProblem(const CpuProblem& cpu) + : materialLawManager_(*cpu.materialLawManager(), cpu.model().numGridDof()) + , porosity_(extractRockField(cpu, [](const CpuProblem& p, unsigned u) { + return Scalar(p.porosity(u, 0u)); + })) + , rockCompressibility_(extractRockField(cpu, [](const CpuProblem& p, unsigned u) { + return Scalar(p.rockCompressibility(u)); + })) + , rockReferencePressure_(extractRockField(cpu, [](const CpuProblem& p, unsigned u) { + return Scalar(p.rockReferencePressure(u)); + })) + , maxOilSaturation_(extractRockField(cpu, [](const CpuProblem& p, unsigned u) { + return Scalar(p.maxOilSaturation(u)); + })) + , maxOilVaporizationFactor_(extractRockField(cpu, [](const CpuProblem& p, unsigned u) { + return Scalar(p.maxOilVaporizationFactor(0u, u)); + })) + , maxGasDissolutionFactor_(extractRockField(cpu, [](const CpuProblem& p, unsigned u) { + return Scalar(p.maxGasDissolutionFactor(0u, u)); + })) + , thermalLawManager_(buildGpuThermalManager(cpu)) + , rockFraction_(extractThermalScalarField(cpu, [](const CpuProblem& p, unsigned u) { + return Scalar(p.rockFraction(u, 0u)); + })) + , pvtRegionIndex_(extractThermalIntField(cpu, [](const CpuProblem& p, unsigned u) { + return static_cast(p.pvtRegionIndex(u)); + })) + { + } + + OPM_HOST_DEVICE ModelView model() const + { + return ModelView{}; + } + + OPM_HOST_DEVICE int satnumRegionIndex(std::size_t elemIdx) const + { + return materialLawManager_.satnumRegionIdx(static_cast(elemIdx)); + } + + OPM_HOST_DEVICE MaterialLawParams materialLawParams(std::size_t elemIdx) const + { + return materialLawManager_.materialLawParams(static_cast(elemIdx)); + } + + OPM_HOST_DEVICE Scalar rockCompressibility(std::size_t elemIdx) const + { + return rockCompressibility_.size() == 0 ? Scalar(0) : rockCompressibility_[elemIdx]; + } + + OPM_HOST_DEVICE Scalar rockReferencePressure(std::size_t elemIdx) const + { + return rockReferencePressure_.size() == 0 ? Scalar(0) : rockReferencePressure_[elemIdx]; + } + + OPM_HOST_DEVICE Scalar porosity(std::size_t elemIdx, unsigned /*timeIdx*/) const + { + return porosity_.size() == 0 ? Scalar(0) : porosity_[elemIdx]; + } + + OPM_HOST_DEVICE Scalar maxOilVaporizationFactor(unsigned /*timeIdx*/, std::size_t elemIdx) const + { + return maxOilVaporizationFactor_.size() == 0 ? Scalar(0) : maxOilVaporizationFactor_[elemIdx]; + } + + OPM_HOST_DEVICE Scalar maxGasDissolutionFactor(unsigned /*timeIdx*/, std::size_t elemIdx) const + { + return maxGasDissolutionFactor_.size() == 0 ? Scalar(0) : maxGasDissolutionFactor_[elemIdx]; + } + + OPM_HOST_DEVICE Scalar maxOilSaturation(std::size_t elemIdx) const + { + return maxOilSaturation_.size() == 0 ? Scalar(0) : maxOilSaturation_[elemIdx]; + } + + /*! \brief Per-cell PVT region index. Returns 0 when the GpuFlowProblem + * instantiation has no thermal support. */ + OPM_HOST_DEVICE unsigned pvtRegionIndex(std::size_t elemIdx) const + { + return pvtRegionIndex_.size() == 0 + ? 0u + : static_cast(pvtRegionIndex_[elemIdx]); + } + + /*! \brief Per-cell rock fraction (1 - effective porosity). Returns 0 + * when the GpuFlowProblem instantiation has no thermal + * support. */ + OPM_HOST_DEVICE Scalar rockFraction(std::size_t elemIdx, unsigned /*timeIdx*/) const + { + return rockFraction_.size() == 0 ? Scalar(0) : rockFraction_[elemIdx]; + } + + /*! \brief Solid-energy law parameters for a single cell. Forwards to + * the embedded thermal-law manager. */ + OPM_HOST_DEVICE SolidEnergyLawParams + solidEnergyLawParams(std::size_t elemIdx, unsigned /*timeIdx*/) const + { + return thermalLawManager_.solidEnergyLawParams(static_cast(elemIdx)); + } + + /*! \brief Thermal-conduction law parameters for a single cell. + * Forwards to the embedded thermal-law manager. */ + OPM_HOST_DEVICE ThermalConductionLawParams + thermalConductionLawParams(std::size_t elemIdx, unsigned /*timeIdx*/) const + { + return thermalLawManager_.thermalConductionLawParams(static_cast(elemIdx)); + } + + /*! \brief Default rock-pore-volume multiplier (1 if no compressibility). */ + template + OPM_HOST_DEVICE Evaluation rockCompPoroMultiplier(const auto& /*intQuants*/, std::size_t /*elemIdx*/) const + { + return Evaluation(1.0); + } + + /*! \brief Default rock-trans multiplier (1 if no compressibility). */ + template + OPM_HOST_DEVICE Evaluation rockCompTransMultiplier(const auto& /*intQuants*/, std::size_t /*elemIdx*/) const + { + return Evaluation(1.0); + } + + /*! + * \brief Update the relative permeabilities of all phases for a single + * cell, in the same way as the CPU \c FlowProblem does. + * + * Calls \c MaterialLaw::relativePermeabilities on the cell's + * \c MaterialLawParams. The \c BlackOilIntensiveQuantities later divides + * the result by the phase viscosity to obtain the mobility. + * + * Directional relative permeabilities are not supported by the GPU + * problem, so the \p dirMob output is left untouched. + */ + template + OPM_HOST_DEVICE void updateRelperms(auto& mobility, + auto& /*dirMob*/, + FluidState& fluidState, + unsigned globalSpaceIdx) const + { + using ContainerT = std::decay_t; + const auto materialParams = materialLawParams(globalSpaceIdx); + EclMaterialLawManager::MaterialLaw::template relativePermeabilities< + ContainerT, FluidState, Args...>(mobility, materialParams, fluidState); + } + + /*! \name Direct accessors for serialization to the GPU. */ + //!\{ + const EclMaterialLawManager& materialLawManager() const { return materialLawManager_; } + EclMaterialLawManager& materialLawManager() { return materialLawManager_; } + const Storage& porosityStorage() const { return porosity_; } + Storage& porosityStorage() { return porosity_; } + const Storage& rockCompressibilityStorage() const { return rockCompressibility_; } + Storage& rockCompressibilityStorage() { return rockCompressibility_; } + const Storage& rockReferencePressureStorage() const { return rockReferencePressure_; } + Storage& rockReferencePressureStorage() { return rockReferencePressure_; } + const Storage& maxOilSaturationStorage() const { return maxOilSaturation_; } + Storage& maxOilSaturationStorage() { return maxOilSaturation_; } + const Storage& maxOilVaporizationFactorStorage() const { return maxOilVaporizationFactor_; } + Storage& maxOilVaporizationFactorStorage() { return maxOilVaporizationFactor_; } + const Storage& maxGasDissolutionFactorStorage() const { return maxGasDissolutionFactor_; } + Storage& maxGasDissolutionFactorStorage() { return maxGasDissolutionFactor_; } + const EclThermalLawManager& thermalLawManager() const { return thermalLawManager_; } + EclThermalLawManager& thermalLawManager() { return thermalLawManager_; } + const Storage& rockFractionStorage() const { return rockFraction_; } + Storage& rockFractionStorage() { return rockFraction_; } + const Storage& pvtRegionIndexStorage() const { return pvtRegionIndex_; } + Storage& pvtRegionIndexStorage() { return pvtRegionIndex_; } + //!\} + +private: + template + static Storage extractRockField(const CpuProblem& cpu, F f) + { + const std::size_t n = cpu.model().numGridDof(); + std::vector v(n); + for (std::size_t i = 0; i < n; ++i) { + v[i] = f(cpu, static_cast(i)); + } + if constexpr (std::is_same_v, + ::Opm::VectorWithDefaultAllocator>) { + return Storage(v.begin(), v.end()); + } else { + return Storage(v); + } + } + + /*! \brief Build the per-cell rockFraction storage; empty when the + * GpuFlowProblem instantiation has no thermal support. */ + template + static Storage extractThermalScalarField(const CpuProblem& cpu, F f) + { + if constexpr (!hasThermal) { + return Storage{}; + } else { + return extractRockField(cpu, f); + } + } + + /*! \brief Build the per-cell pvtRegionIndex storage; empty when the + * GpuFlowProblem instantiation has no thermal support. */ + template + static Storage extractThermalIntField(const CpuProblem& cpu, F f) + { + if constexpr (!hasThermal) { + return Storage{}; + } else { + const std::size_t n = cpu.model().numGridDof(); + std::vector v(n); + for (std::size_t i = 0; i < n; ++i) { + v[i] = f(cpu, static_cast(i)); + } + if constexpr (std::is_same_v, + ::Opm::VectorWithDefaultAllocator>) { + return Storage(v.begin(), v.end()); + } else { + return Storage(v); + } + } + } + + /*! \brief Build the GPU thermal-law manager from a CPU FlowProblem. + * Returns a default-constructed (empty) manager when this + * GpuFlowProblem instantiation has no thermal support. */ + template + static EclThermalLawManager buildGpuThermalManager(const CpuProblem& cpu) + { + if constexpr (!hasThermal) { + return EclThermalLawManager{}; + } else { + using FluidSystemTag = typename EclThermalLawManager::FluidSystem; + auto cpuMgr + = ::Opm::EclThermalLaw::buildCpuManagerFromFlowProblem( + cpu, cpu.model().numGridDof()); + return ::Opm::gpuistl::copy_to_gpu(cpuMgr); + } + } + + EclMaterialLawManager materialLawManager_{}; + Storage porosity_{}; + Storage rockCompressibility_{}; + Storage rockReferencePressure_{}; + Storage maxOilSaturation_{}; + Storage maxOilVaporizationFactor_{}; + Storage maxGasDissolutionFactor_{}; + EclThermalLawManager thermalLawManager_{}; + Storage rockFraction_{}; + Storage pvtRegionIndex_{}; +}; + +} // namespace Opm + +namespace Opm::gpuistl { + +/*! + * \brief Copy a CPU GpuFlowProblem to GPU-resident GpuBuffer storage. + * + * The material law manager is also moved to the GPU (via copy_to_gpu). + */ +template +auto copy_to_gpu( + const ::Opm::GpuFlowProblem& cpu) +{ + using GpuMaterialLawManagerBuffer + = decltype(::Opm::gpuistl::copy_to_gpu(cpu.materialLawManager())); + using GpuThermalLawManagerBuffer + = decltype(::Opm::gpuistl::copy_to_gpu(cpu.thermalLawManager())); + using GpuProblemType = ::Opm::GpuFlowProblem; + + return GpuProblemType(::Opm::gpuistl::copy_to_gpu(cpu.materialLawManager()), + GpuBuffer(cpu.porosityStorage()), + GpuBuffer(cpu.rockCompressibilityStorage()), + GpuBuffer(cpu.rockReferencePressureStorage()), + GpuBuffer(cpu.maxOilSaturationStorage()), + GpuBuffer(cpu.maxOilVaporizationFactorStorage()), + GpuBuffer(cpu.maxGasDissolutionFactorStorage()), + ::Opm::gpuistl::copy_to_gpu(cpu.thermalLawManager()), + GpuBuffer(cpu.rockFractionStorage()), + GpuBuffer(cpu.pvtRegionIndexStorage())); +} + +/*! + * \brief Make a non-owning GpuView based GpuFlowProblem from an owning + * GpuBuffer based GpuFlowProblem. + */ +template +auto make_view(::Opm::GpuFlowProblem& buf) +{ + using GpuMaterialLawManagerView + = decltype(::Opm::gpuistl::make_view(buf.materialLawManager())); + using GpuThermalLawManagerView + = decltype(::Opm::gpuistl::make_view(buf.thermalLawManager())); + using GpuProblemView = ::Opm::GpuFlowProblem; + + auto toView = [](auto& storage) { + using T = typename std::decay_t::value_type; + return GpuView(storage.data(), storage.size()); + }; + + return GpuProblemView(::Opm::gpuistl::make_view(buf.materialLawManager()), + toView(buf.porosityStorage()), + toView(buf.rockCompressibilityStorage()), + toView(buf.rockReferencePressureStorage()), + toView(buf.maxOilSaturationStorage()), + toView(buf.maxOilVaporizationFactorStorage()), + toView(buf.maxGasDissolutionFactorStorage()), + ::Opm::gpuistl::make_view(buf.thermalLawManager()), + toView(buf.rockFractionStorage()), + toView(buf.pvtRegionIndexStorage())); +} + +} // namespace Opm::gpuistl + +#endif // OPM_GPU_FLOW_PROBLEM_HPP diff --git a/opm/simulators/flow/NewTranFluxModule.hpp b/opm/simulators/flow/NewTranFluxModule.hpp index ecfcfa05d7b..83811e38cf9 100644 --- a/opm/simulators/flow/NewTranFluxModule.hpp +++ b/opm/simulators/flow/NewTranFluxModule.hpp @@ -34,6 +34,7 @@ #include #include +#include #include #include @@ -142,7 +143,7 @@ class NewTranExtensiveQuantities */ OPM_HOST_DEVICE const DimMatrix& intrinsicPermeability() const { - throw std::invalid_argument("The ECL transmissibility module does not provide an explicit intrinsic permeability"); + OPM_THROW(std::invalid_argument, "The ECL transmissibility module does not provide an explicit intrinsic permeability"); } /*! @@ -153,7 +154,7 @@ class NewTranExtensiveQuantities */ OPM_HOST_DEVICE const EvalDimVector& potentialGrad(unsigned) const { - throw std::invalid_argument("The ECL transmissibility module does not provide explicit potential gradients"); + OPM_THROW(std::invalid_argument, "The ECL transmissibility module does not provide explicit potential gradients"); } /*! @@ -173,7 +174,7 @@ class NewTranExtensiveQuantities */ OPM_HOST_DEVICE const EvalDimVector& filterVelocity(unsigned) const { - throw std::invalid_argument("The ECL transmissibility module does not provide explicit filter velocities"); + OPM_THROW(std::invalid_argument, "The ECL transmissibility module does not provide explicit filter velocities"); } /*! diff --git a/opm/simulators/flow/TemperatureModel.hpp b/opm/simulators/flow/TemperatureModel.hpp index f387ecc3d4c..704a7cd5c0b 100644 --- a/opm/simulators/flow/TemperatureModel.hpp +++ b/opm/simulators/flow/TemperatureModel.hpp @@ -29,6 +29,7 @@ #define OPM_TEMPERATURE_MODEL_HPP #include +#include #include #include @@ -97,15 +98,17 @@ class BlackOilEnergyIntensiveQuantitiesTemp - void updateTemperature_(const Problem& problem, unsigned globalDofIdx, unsigned timeIdx) + OPM_HOST_DEVICE void updateTemperature_(const Problem& problem, + unsigned globalDofIdx, + unsigned timeIdx) { const EvaluationTemp T = EvaluationTemp::createVariable(problem.temperature(globalDofIdx, timeIdx), 0); fluidState_.setTemperature(T); } - void updateEnergyQuantities_(const Problem& problem, - const unsigned globalSpaceIdx, - const unsigned timeIdx) + OPM_HOST_DEVICE void updateEnergyQuantities_(const Problem& problem, + const unsigned globalSpaceIdx, + const unsigned timeIdx) { // compute the specific enthalpy of the fluids, the specific enthalpy of the rock // and the thermal conductivity coefficients @@ -132,20 +135,20 @@ class BlackOilEnergyIntensiveQuantitiesTemp rockFraction_ = problem.rockFraction(globalSpaceIdx, timeIdx); } - const EvaluationTemp& rockInternalEnergy() const + OPM_HOST_DEVICE const EvaluationTemp& rockInternalEnergy() const { return rockInternalEnergy_; } - const EvaluationTemp& totalThermalConductivity() const + OPM_HOST_DEVICE const EvaluationTemp& totalThermalConductivity() const { return totalThermalConductivity_; } - const Scalar& rockFraction() const + OPM_HOST_DEVICE const Scalar& rockFraction() const { return rockFraction_; } - const FluidStateTemp& fluidStateTemp() const + OPM_HOST_DEVICE const FluidStateTemp& fluidStateTemp() const { return fluidState_; } template - void setFluidState(const FluidState& fs) + OPM_HOST_DEVICE void setFluidState(const FluidState& fs) { // copy the needed part of the fluid state fluidState_.setPvtRegionIndex(fs.pvtRegionIndex()); diff --git a/opm/simulators/linalg/gpuistl/GpuBlackoilIntensiveQuantitiesDispatcher.cu b/opm/simulators/linalg/gpuistl/GpuBlackoilIntensiveQuantitiesDispatcher.cu new file mode 100644 index 00000000000..ae9e738612c --- /dev/null +++ b/opm/simulators/linalg/gpuistl/GpuBlackoilIntensiveQuantitiesDispatcher.cu @@ -0,0 +1,377 @@ +/* + Copyright 2026 Equinor ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ +#include + +// Tricks for older versions of ROCm that do not support alugrid or dune-fem +// (mirrors test_blackoilintensivequantities_gpu.cu). +#ifdef HAVE_DUNE_ALUGRID +#undef HAVE_DUNE_ALUGRID +#endif +#define HAVE_DUNE_ALUGRID 0 +#ifdef HAVE_DUNE_FEM +#undef HAVE_DUNE_FEM +#endif +#define HAVE_DUNE_FEM 0 + +#include + +#include +#include +#include + +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include + +#include + +#include +#include +#include +#include + +namespace { + +// Small helper to take a std::chrono::steady_clock timestamp. +inline auto dispatcherNow() +{ + return std::chrono::steady_clock::now(); +} + +inline double dispatcherElapsedMs(const std::chrono::steady_clock::time_point& start, + const std::chrono::steady_clock::time_point& end) +{ + return std::chrono::duration(end - start).count(); +} + +} // namespace + +namespace Opm::gpuistl { + +namespace { + +using DispatcherCpuTag = Opm::Properties::TTag::FlowGasWaterEnergyProblem; +using DispatcherGpuTag = Opm::Properties::TTag::FlowGasWaterEnergyDummyProblemGPU; + +using DispatcherScalar = Opm::GetPropType; + +using DispatcherCpuFluidSystem = Opm::BlackOilFluidSystem; +using DispatcherFluidSystemView + = Opm::BlackOilFluidSystemNonStatic; + +using DispatcherGpuPrimaryVariables + = Opm::BlackOilPrimaryVariables; +using DispatcherGpuIntensiveQuantities = Opm::BlackOilIntensiveQuantities; + +using DispatcherCpuMaterialLawManager = + typename Opm::GetProp::EclMaterialLawManager; +using DispatcherTraits = typename DispatcherCpuMaterialLawManager::MaterialLaw::Traits; +using DispatcherTwoPhaseTraits = + Opm::TwoPhaseMaterialTraits; +using DispatcherGpuPiecewiseLinearParamsBuf = + Opm::PiecewiseLinearTwoPhaseMaterialParams>; +using DispatcherGpuPiecewiseLinearLawBuf = + Opm::PiecewiseLinearTwoPhaseMaterial; +using DispatcherGpuMaterialLawParamsBuf = + Opm::EclTwoPhaseMaterialParams; +using DispatcherGpuMaterialLawBuf = + Opm::EclTwoPhaseMaterial; +using DispatcherGpuManagerBuf = + Opm::EclMaterialLaw::GpuManager; +using DispatcherGpuThermalManagerBuf = + Opm::EclThermalLaw::GpuManager; +using DispatcherGpuFlowProblemBuf = + Opm::GpuFlowProblem; +using DispatcherGpuFlowProblemView = + decltype(Opm::gpuistl::make_view(std::declval())); + +template +__global__ void +dispatcherUpdateAllCellsKernel(GpuProblem problem, + Opm::gpuistl::GpuView primaryVariables, + Opm::gpuistl::GpuView outIntensiveQuantities, + std::size_t numCells) +{ + const std::size_t i = static_cast(blockIdx.x) * blockDim.x + threadIdx.x; + if (i >= numCells) { + return; + } + IntensiveQuantitiesT iq = outIntensiveQuantities[i]; + iq.updateSaturations(primaryVariables[i], 0, Opm::LinearizationType{}); + iq.update(problem, primaryVariables[i], static_cast(i), 0); + iq.updateEnergyQuantities_(problem, static_cast(i), 0u); + outIntensiveQuantities[i] = iq; +} + +} // namespace + +// ============================================================================= +// Impl +// ============================================================================= +template +struct GpuBlackoilIntensiveQuantitiesDispatcher::Impl { + using ManagedFluidSystemViewPtr = + std::unique_ptr>; + + bool initialized = false; + std::unique_ptr problemBuf; + DispatcherGpuFlowProblemView problemView{}; + ManagedFluidSystemViewPtr managedFluidSystemView{}; + std::optional prototype; + std::size_t callCount = 0; + + // Cumulative per-stage timings (milliseconds, host-side). + double totalConvertMs = 0.0; + double totalH2DMs = 0.0; + double totalKernelMs = 0.0; + double totalD2HMs = 0.0; + double totalOverlayMs = 0.0; + double totalDofsProcessed = 0.0; + + ~Impl() + { + if (callCount > 0u) { + const double totalMs = + totalConvertMs + totalH2DMs + totalKernelMs + totalD2HMs + totalOverlayMs; + const double avgConvertMs = totalConvertMs / static_cast(callCount); + const double avgH2DMs = totalH2DMs / static_cast(callCount); + const double avgKernelMs = totalKernelMs / static_cast(callCount); + const double avgD2HMs = totalD2HMs / static_cast(callCount); + const double avgOverlayMs = totalOverlayMs / static_cast(callCount); + const double avgTotalMs = totalMs / static_cast(callCount); + const double avgDofs = totalDofsProcessed / static_cast(callCount); + Opm::OpmLog::info(std::format( + "[GpuBlackoilIntensiveQuantitiesDispatcher] aggregate over {} call(s)," + " {} dof-updates:\n" + " totals : convert={:.3f}ms h2d={:.3f}ms kernel={:.3f}ms" + " d2h={:.3f}ms overlay={:.3f}ms total={:.3f}ms\n" + " per-call: convert={:.3f}ms h2d={:.3f}ms kernel={:.3f}ms" + " d2h={:.3f}ms overlay={:.3f}ms total={:.3f}ms avg_dofs={:.1f}", + callCount, + static_cast(totalDofsProcessed), + totalConvertMs, totalH2DMs, totalKernelMs, totalD2HMs, totalOverlayMs, totalMs, + avgConvertMs, avgH2DMs, avgKernelMs, avgD2HMs, avgOverlayMs, avgTotalMs, + avgDofs)); + } + } + + Impl() = default; + Impl(const Impl&) = delete; + Impl& operator=(const Impl&) = delete; + Impl(Impl&&) = delete; + Impl& operator=(Impl&&) = delete; +}; + +template +GpuBlackoilIntensiveQuantitiesDispatcher::GpuBlackoilIntensiveQuantitiesDispatcher() + : impl_(std::make_unique()) +{ +} + +template +GpuBlackoilIntensiveQuantitiesDispatcher::~GpuBlackoilIntensiveQuantitiesDispatcher() + = default; + +template +void GpuBlackoilIntensiveQuantitiesDispatcher::update( + const Problem& cpuProblem, + const PrimaryVariables* const* cpuPriVars, + IntensiveQuantities* const* outIQ, + std::size_t numDof) +{ + if (numDof == 0u) { + return; + } + + if (!impl_->initialized) { + // Build the GPU FlowProblem from the CPU FlowProblem (one-time setup). + impl_->problemBuf = std::make_unique(cpuProblem); + impl_->problemView = Opm::gpuistl::make_view(*impl_->problemBuf); + + // Place the FluidSystemView in unified memory so its device pointer + // dereferences are valid both on host and device (mirrors the test). + auto& dynamicCpuFluidSystem = DispatcherCpuFluidSystem::getNonStaticInstance(); + // The fluid-system buffer is owning device memory; leak it intentionally + // for the lifetime of the process via a function-local static. + using FsBufferType = decltype(Opm::gpuistl::copy_to_gpu(dynamicCpuFluidSystem)); + static FsBufferType s_fsBuffer = Opm::gpuistl::copy_to_gpu(dynamicCpuFluidSystem); + auto fsView = Opm::gpuistl::make_view(s_fsBuffer); + + DispatcherFluidSystemView* managed = nullptr; + impl_->managedFluidSystemView = + Opm::gpuistl::make_gpu_managed_unique_ptr(fsView); + managed = impl_->managedFluidSystemView.get(); + + // Build a default IntensiveQuantities prototype for the GPU side. + Opm::BlackOilIntensiveQuantities cpuPrototype; + impl_->prototype = cpuPrototype.template withOtherFluidSystem( + *managed); + + impl_->initialized = true; + Opm::OpmLog::info(std::format( + "[GpuBlackoilIntensiveQuantitiesDispatcher] initialized for {} cells", + cpuProblem.model().numGridDof())); + } + + // ------------------------------------------------------------------- + // 1. Convert CPU primary variables to the dispatcher's GPU primary + // variables (host-side). + // ------------------------------------------------------------------- + const auto t0 = dispatcherNow(); + std::vector hostPriVars; + hostPriVars.reserve(numDof); + for (std::size_t i = 0; i < numDof; ++i) { + // The BlackOilPrimaryVariables converting copy ctor handles the + // TypeTag/storage difference. + hostPriVars.emplace_back(*cpuPriVars[i]); + } + std::vector hostIQ(numDof, *impl_->prototype); + const auto t1 = dispatcherNow(); + + // ------------------------------------------------------------------- + // 2. Upload host buffers to the device (host-to-device copies are + // embedded in the GpuBuffer ctors). + // ------------------------------------------------------------------- + Opm::gpuistl::GpuBuffer primaryVariablesBuffer(hostPriVars); + Opm::gpuistl::GpuBuffer intensiveQuantitiesBuffer(hostIQ); + OPM_GPU_SAFE_CALL(cudaDeviceSynchronize()); + const auto t2 = dispatcherNow(); + + // ------------------------------------------------------------------- + // 3. Launch the per-cell update kernel. + // ------------------------------------------------------------------- + const unsigned blockSize = 64u; + const unsigned gridSize = static_cast((numDof + blockSize - 1u) / blockSize); + + dispatcherUpdateAllCellsKernel<<>>( + impl_->problemView, + Opm::gpuistl::GpuView( + primaryVariablesBuffer.data(), primaryVariablesBuffer.size()), + Opm::gpuistl::GpuView( + intensiveQuantitiesBuffer.data(), intensiveQuantitiesBuffer.size()), + numDof); + OPM_GPU_SAFE_CALL(cudaDeviceSynchronize()); + OPM_GPU_SAFE_CALL(cudaGetLastError()); + const auto t3 = dispatcherNow(); + + // ------------------------------------------------------------------- + // 4. Read the GPU IntensiveQuantities back to host memory. + // ------------------------------------------------------------------- + OPM_GPU_SAFE_CALL(cudaMemcpy(hostIQ.data(), + intensiveQuantitiesBuffer.data(), + numDof * sizeof(DispatcherGpuIntensiveQuantities), + cudaMemcpyDeviceToHost)); + const auto t4 = dispatcherNow(); + + // ------------------------------------------------------------------- + // 5. Field-by-field overlay onto the caller's CPU IntensiveQuantities. + // The caller is expected to have run the CPU update first to + // populate any fields the dispatcher does not overwrite (e.g. + // \c mobility_, which the GPU relperm path currently leaves at 0). + // ------------------------------------------------------------------- + for (std::size_t i = 0; i < numDof; ++i) { + outIQ[i]->overlayBlackOilFieldsFrom(hostIQ[i]); + } + const auto t5 = dispatcherNow(); + + // ------------------------------------------------------------------- + // Per-call timings. + // ------------------------------------------------------------------- + const double convertMs = dispatcherElapsedMs(t0, t1); + const double h2dMs = dispatcherElapsedMs(t1, t2); + const double kernelMs = dispatcherElapsedMs(t2, t3); + const double d2hMs = dispatcherElapsedMs(t3, t4); + const double overlayMs = dispatcherElapsedMs(t4, t5); + + impl_->totalConvertMs += convertMs; + impl_->totalH2DMs += h2dMs; + impl_->totalKernelMs += kernelMs; + impl_->totalD2HMs += d2hMs; + impl_->totalOverlayMs += overlayMs; + impl_->totalDofsProcessed += static_cast(numDof); + + Opm::OpmLog::info(std::format( + "[GpuBlackoilIntensiveQuantitiesDispatcher] call={} dofs={}" + " convert={:.3f}ms h2d={:.3f}ms kernel={:.3f}ms d2h={:.3f}ms overlay={:.3f}ms total={:.3f}ms", + impl_->callCount, numDof, + convertMs, h2dMs, kernelMs, d2hMs, overlayMs, + convertMs + h2dMs + kernelMs + d2hMs + overlayMs)); + + ++impl_->callCount; +} + +// ============================================================================= +// Explicit instantiation for the user-facing CO2STORE simulation TypeTag, +// namely \c FlowGasWaterEnergyProblem (used by the Flow simulator binary +// when a CO2STORE deck with WATER+GAS+THERMAL is loaded). The GPU-specific +// property bindings live in \c GpuFlowGasWaterEnergyTypeTags.hpp. +// ============================================================================= +template class GpuBlackoilIntensiveQuantitiesDispatcher< + Opm::Properties::TTag::FlowGasWaterEnergyProblem>; + +} // namespace Opm::gpuistl diff --git a/opm/simulators/linalg/gpuistl/GpuBlackoilIntensiveQuantitiesDispatcher.hpp b/opm/simulators/linalg/gpuistl/GpuBlackoilIntensiveQuantitiesDispatcher.hpp new file mode 100644 index 00000000000..572bc80d4a9 --- /dev/null +++ b/opm/simulators/linalg/gpuistl/GpuBlackoilIntensiveQuantitiesDispatcher.hpp @@ -0,0 +1,123 @@ +// -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- +// vi: set et ts=4 sw=4 sts=4: +/* + Copyright 2026 Equinor ASA + + This file is part of the Open Porous Media project (OPM). + + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ +#ifndef OPM_GPU_BLACKOIL_INTENSIVE_QUANTITIES_DISPATCHER_HPP +#define OPM_GPU_BLACKOIL_INTENSIVE_QUANTITIES_DISPATCHER_HPP + +// The dispatcher (and its underlying CUDA/HIP translation unit) requires +// either HIP (any version) or CUDA >= 13.1; older CUDA toolkits cannot +// compile the kernel translation unit. The build system gates inclusion of +// the .cu file (and the explicit instantiations therein) on the same +// condition, and exports the +// \c OPM_HAVE_GPU_BLACKOIL_INTENSIVE_QUANTITIES_DISPATCHER compile +// definition iff the dispatcher is available. Callers should gate uses of +// the dispatcher on that macro (or, equivalently, on +// \c GpuBlackoilIntensiveQuantitiesDispatcherSupport, which is only +// specialized to \c true when the dispatcher is available). +#ifndef OPM_HAVE_GPU_BLACKOIL_INTENSIVE_QUANTITIES_DISPATCHER +#define OPM_HAVE_GPU_BLACKOIL_INTENSIVE_QUANTITIES_DISPATCHER 0 +#endif + +#if OPM_HAVE_GPU_BLACKOIL_INTENSIVE_QUANTITIES_DISPATCHER + +#include + +#include +#include + +namespace Opm::Properties::TTag { + struct FlowGasWaterEnergyProblem; +} + +namespace Opm::gpuistl { + +/// Compile-time predicate: does this CPU \c TypeTag describe the specific +/// CO2STORE configuration (FlowGasWaterEnergyProblem) that the GPU +/// intensive-quantities dispatcher currently supports? Restricted to a +/// single TypeTag so that explicit instantiations of the dispatcher +/// remain manageable for the experimental prototype. +template +struct GpuBlackoilIntensiveQuantitiesDispatcherSupport { + static constexpr bool value = false; +}; + +template <> +struct GpuBlackoilIntensiveQuantitiesDispatcherSupport< + Opm::Properties::TTag::FlowGasWaterEnergyProblem> { + static constexpr bool value = true; +}; + +/// Runs the BlackOil intensive-quantities update on the GPU for a given set +/// of degrees of freedom. The dispatcher is process-wide (one singleton per +/// CPU \c TypeTag). It lazily constructs the GPU-side \c GpuFlowProblem +/// from the supplied CPU problem on the first call. +/// +/// On every call, primary variables for the requested DoFs are uploaded to +/// the device, the per-cell update kernel from +/// \c test_blackoilintensivequantities_gpu.cu is launched (one thread per +/// DoF), and the resulting intensive quantities are read back to host +/// memory, as the design plan in \c PLAN.md requires. +/// +/// The class is a template on the CPU \c TypeTag and is explicitly +/// instantiated in the \c .cu translation unit. +template +class GpuBlackoilIntensiveQuantitiesDispatcher +{ +public: + using Problem = Opm::GetPropType; + using PrimaryVariables = Opm::GetPropType; + using IntensiveQuantities = Opm::GetPropType; + + GpuBlackoilIntensiveQuantitiesDispatcher(); + ~GpuBlackoilIntensiveQuantitiesDispatcher(); + + GpuBlackoilIntensiveQuantitiesDispatcher(const GpuBlackoilIntensiveQuantitiesDispatcher&) = delete; + GpuBlackoilIntensiveQuantitiesDispatcher& + operator=(const GpuBlackoilIntensiveQuantitiesDispatcher&) = delete; + GpuBlackoilIntensiveQuantitiesDispatcher(GpuBlackoilIntensiveQuantitiesDispatcher&&) = delete; + GpuBlackoilIntensiveQuantitiesDispatcher& + operator=(GpuBlackoilIntensiveQuantitiesDispatcher&&) = delete; + + /// Run the per-cell intensive-quantities update kernel on \p numDof + /// DoFs. \p cpuPriVars[i] points at the CPU primary variables for DoF + /// \c i. The GPU-computed BlackOil intensive quantities are written + /// onto \p outIQ[i] field-by-field via + /// \c BlackOilIntensiveQuantities::overlayBlackOilFieldsFrom, which + /// includes \c mobility_ (the GPU relperm path now populates it via + /// \c iq.update()). + /// + /// Per-call host/device timings are accumulated and printed every call + /// via \c Opm::OpmLog::info, prefixed with + /// \c "[GpuBlackoilIntensiveQuantitiesDispatcher]". + void update(const Problem& cpuProblem, + const PrimaryVariables* const* cpuPriVars, + IntensiveQuantities* const* outIQ, + std::size_t numDof); + +private: + struct Impl; + std::unique_ptr impl_; +}; + +} // namespace Opm::gpuistl + +#endif // OPM_HAVE_GPU_BLACKOIL_INTENSIVE_QUANTITIES_DISPATCHER + +#endif // OPM_GPU_BLACKOIL_INTENSIVE_QUANTITIES_DISPATCHER_HPP diff --git a/opm/simulators/linalg/gpuistl/GpuFlowGasWaterEnergyTypeTags.hpp b/opm/simulators/linalg/gpuistl/GpuFlowGasWaterEnergyTypeTags.hpp new file mode 100644 index 00000000000..ad8cdf16594 --- /dev/null +++ b/opm/simulators/linalg/gpuistl/GpuFlowGasWaterEnergyTypeTags.hpp @@ -0,0 +1,243 @@ +// -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- +// vi: set et ts=4 sw=4 sts=4: +/* + Copyright 2026 Equinor ASA + + This file is part of the Open Porous Media project (OPM). + + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ +/*! + * \file + * + * Shared GPU TypeTag derivations on top of the simulation-side + * \c FlowGasWaterEnergyProblem TypeTag (CO2STORE configuration). + * + * The intent is that GPU code (the dispatcher and the + * \c test_blackoilintensivequantities_gpu test) reuses the *exact* same + * TypeTag as the simulator and only overrides the small set of properties + * that have to change for the GPU build (storage container of the primary + * variables, the material law manager, the fluid system, the problem and + * the element context). + */ +#ifndef OPM_GPU_FLOW_GASWATER_ENERGY_TYPETAGS_HPP +#define OPM_GPU_FLOW_GASWATER_ENERGY_TYPETAGS_HPP + +#if HAVE_CUDA + +#include +#include +#include +#include + +#include + +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace Opm::Properties { + +namespace TTag { + +/// The GPU sibling of \c FlowGasWaterEnergyProblem. Inherits all property +/// bindings from the simulation TypeTag and only overrides the storage of +/// the primary variables, the material law and the fluid system to their +/// GPU equivalents. +struct FlowGasWaterEnergyProblemGPU { + using InheritsFrom = std::tuple; +}; + +/// A second derived TypeTag used for the per-cell intensive-quantities +/// kernel: it additionally swaps in \c GpuFlowProblem and +/// \c FvBaseElementContextGpu so that the GPU device side has trivially +/// copyable \c Problem / \c ElementContext types. +struct FlowGasWaterEnergyDummyProblemGPU { + using InheritsFrom = std::tuple; +}; + +} // namespace TTag + +// ----------------------------------------------------------------------- +// Diffusion / dispersion: not yet implemented in the GPU IntensiveQuantities +// update path; force them off on the GPU TypeTag. +// ----------------------------------------------------------------------- +template +struct EnableDiffusion +{ + static constexpr bool value = false; +}; + +template +struct EnableDispersion +{ + static constexpr bool value = false; +}; + +// ----------------------------------------------------------------------- +// MaterialLaw: GPU-friendly EclTwoPhaseMaterial backed by GpuView storage. +// ----------------------------------------------------------------------- +template +struct MaterialLaw +{ +private: + using Scalar = GetPropType; + using FluidSystem = GetPropType; + using Traits = ThreePhaseMaterialTraits(), + getPropValue()>; + using TwoPhaseTraits = TwoPhaseMaterialTraits; + using TwoPhaseParams = ::Opm::PiecewiseLinearTwoPhaseMaterialParams< + TwoPhaseTraits, ::Opm::gpuistl::GpuView>; + using TwoPhaseLaw = ::Opm::PiecewiseLinearTwoPhaseMaterial; + using GpuMaterialLawParams = ::Opm::EclTwoPhaseMaterialParams< + Traits, TwoPhaseParams, TwoPhaseParams, TwoPhaseParams, ::Opm::gpuistl::ValueAsPointer>; + using GpuMaterialLaw = ::Opm::EclTwoPhaseMaterial< + Traits, TwoPhaseLaw, TwoPhaseLaw, TwoPhaseLaw, GpuMaterialLawParams>; + +public: + using EclMaterialLawManager = ::Opm::EclMaterialLaw::GpuManager< + Traits, TwoPhaseLaw, TwoPhaseLaw, ::Opm::VectorWithDefaultAllocator, GpuMaterialLaw>; + using type = typename EclMaterialLawManager::MaterialLaw; +}; + +// ----------------------------------------------------------------------- +// PrimaryVariables: GPU build uses MiniVector storage so the type is +// trivially copyable to the device. +// ----------------------------------------------------------------------- +template +struct PrimaryVariables +{ + using type = ::Opm::BlackOilPrimaryVariables; +}; + +// ----------------------------------------------------------------------- +// FluidSystem: non-static, GpuView-backed instantiation. +// ----------------------------------------------------------------------- +template +struct FluidSystem +{ + using type = ::Opm::BlackOilFluidSystemNonStatic< + GetPropType, + ::Opm::BlackOilDefaultFluidSystemIndices, + ::Opm::gpuistl::GpuView>; +}; + +template +struct FluidSystem +{ + using type = ::Opm::BlackOilFluidSystemNonStatic< + GetPropType, + ::Opm::BlackOilDefaultFluidSystemIndices, + ::Opm::gpuistl::GpuView>; +}; + +// ----------------------------------------------------------------------- +// SolidEnergyLaw / ThermalConductionLaw: bind to the GPU-portable laws +// (EclSpecrockLaw / EclThconrLaw). Both expose an +// \c EclThermalLawManager nested typedef matching the convention used by +// FlowBaseProblemProperties so existing GetProp<...>::EclThermalLawManager +// chains keep working. +// ----------------------------------------------------------------------- +template +struct SolidEnergyLaw +{ +private: + using Scalar = GetPropType; + using FluidSystem = GetPropType; + +public: + using EclThermalLawManager = ::Opm::EclThermalLaw::GpuManager< + Scalar, FluidSystem, ::Opm::gpuistl::GpuView, ::Opm::gpuistl::GpuView>; + + using type = ::Opm::EclSpecrockLaw< + Scalar, + ::Opm::EclSpecrockLawParams>; +}; + +template +struct ThermalConductionLaw +{ +private: + using Scalar = GetPropType; + using FluidSystem = GetPropType; + +public: + using EclThermalLawManager = ::Opm::EclThermalLaw::GpuManager< + Scalar, FluidSystem, ::Opm::gpuistl::GpuView, ::Opm::gpuistl::GpuView>; + + using type = ::Opm::EclThconrLaw; +}; + +// ----------------------------------------------------------------------- +// Problem: GpuFlowProblem (GpuView-backed) instead of FlowProblemBlackoil. +// ----------------------------------------------------------------------- +template +struct Problem +{ +private: + using ScalarT = GetPropType; + using FluidSystem = GetPropType; + using CpuMaterialLawManager = + typename Opm::GetProp::EclMaterialLawManager; + using GpuViewMaterialLawManager = + ::Opm::EclMaterialLaw::GpuManager; + using GpuViewThermalLawManager = ::Opm::EclThermalLaw::GpuManager< + ScalarT, FluidSystem, ::Opm::gpuistl::GpuView, ::Opm::gpuistl::GpuView>; + +public: + using type = ::Opm::GpuFlowProblem; +}; + +// ----------------------------------------------------------------------- +// ElementContext: trivially copyable GPU element context. +// ----------------------------------------------------------------------- +template +struct ElementContext +{ + using type = ::Opm::FvBaseElementContextGpu; +}; + +} // namespace Opm::Properties + +#endif // HAVE_CUDA + +#endif // OPM_GPU_FLOW_GASWATER_ENERGY_TYPETAGS_HPP diff --git a/opm/simulators/linalg/gpuistl/MiniVector.hpp b/opm/simulators/linalg/gpuistl/MiniVector.hpp index 921f3a9271a..7c2af6e23ea 100644 --- a/opm/simulators/linalg/gpuistl/MiniVector.hpp +++ b/opm/simulators/linalg/gpuistl/MiniVector.hpp @@ -78,6 +78,7 @@ class MiniVector fill(value); } + #if HAVE_DUNE_COMMON /** * @brief Conversion constructor from Dune::FieldVector. * @param fv Source FieldVector (must have dimension `Dimension`). @@ -88,6 +89,7 @@ class MiniVector data_[i] = fv[i]; } } + #endif /** * @brief Initializer‑list constructor. diff --git a/opm/simulators/linalg/gpuistl/gpu_smart_pointer.hpp b/opm/simulators/linalg/gpuistl/gpu_smart_pointer.hpp index ee7cb4950de..9fe166ace44 100644 --- a/opm/simulators/linalg/gpuistl/gpu_smart_pointer.hpp +++ b/opm/simulators/linalg/gpuistl/gpu_smart_pointer.hpp @@ -115,6 +115,96 @@ make_gpu_unique_ptr(const T& value) return ptr; } +/** + * @brief Deleter that releases a GPU array allocation made with \c cudaMalloc. + * + * Used by \c make_gpu_unique_ptr_array. Having a named deleter type (rather than a lambda) + * makes the resulting \c std::unique_ptr type stable across translation units, which is + * important when storing it as a class member. + */ +template +struct GpuArrayDeleter { + void operator()(T* ptr) const noexcept + { + OPM_GPU_WARN_IF_ERROR(cudaFree(ptr)); + } +}; + +/** + * @brief Deleter for objects living in unified (managed) GPU memory. + * + * Calls the destructor of the contained object before releasing the allocation with + * \c cudaFree. Used by \c make_gpu_managed_unique_ptr. + */ +template +struct GpuManagedDeleter { + void operator()(T* ptr) const noexcept + { + if (ptr != nullptr) { + ptr->~T(); + OPM_GPU_WARN_IF_ERROR(cudaFree(ptr)); + } + } +}; + +/** + * @brief Creates a unique pointer managing a GPU-allocated array of \p numElements elements. + * + * This function allocates memory on the GPU for \p numElements elements of type \c T using + * \c cudaMalloc. It returns a \c std::unique_ptr that automatically releases the GPU memory + * with \c cudaFree when destroyed. + * + * The returned smart pointer uses the \c T[] specialization, so element access via + * \c operator[] is well defined; however, dereferencing the returned pointer on the host is + * not safe because the underlying memory lives on the device. + * + * @tparam T The element type to allocate on the GPU. + * @param numElements The number of elements of type \c T to allocate. + * @return A \c std::unique_ptr to the GPU-allocated array. + */ +template +std::unique_ptr> +make_gpu_unique_ptr_array(std::size_t numElements) +{ + T* ptr = nullptr; + OPM_GPU_SAFE_CALL(cudaMalloc(&ptr, numElements * sizeof(T))); + return std::unique_ptr>(ptr); +} + +/** + * @brief Creates a unique pointer managing GPU unified (managed) memory for a single object. + * + * Allocates one \c T worth of unified memory with \c cudaMallocManaged, then constructs a + * \c T in-place using the supplied arguments via placement-new. The returned + * \c std::unique_ptr destroys the contained object and releases the unified memory with + * \c cudaFree when it goes out of scope. + * + * Unified memory is accessible from both host and device, so the returned pointer can be + * dereferenced directly on either side. This is useful when an object must be visible to + * GPU kernels but also needs to be touched from host code (e.g. when a device-side member + * holds a pointer into the same unified-memory region). + * + * @tparam T The type of the object to construct in unified memory. + * @tparam Args Argument types forwarded to \c T's constructor. + * @param args Arguments forwarded to \c T's constructor. + * @return A \c std::unique_ptr owning the unified-memory allocation. + */ +template +std::unique_ptr> +make_gpu_managed_unique_ptr(Args&&... args) +{ + void* raw = nullptr; + OPM_GPU_SAFE_CALL(cudaMallocManaged(&raw, sizeof(T))); + T* ptr = nullptr; + try { + ptr = new (raw) T(std::forward(args)...); + } catch (...) { + OPM_GPU_WARN_IF_ERROR(cudaFree(raw)); + throw; + } + return std::unique_ptr>(ptr); +} + /** * @brief Copies a value from GPU-allocated memory to the host. * @@ -336,7 +426,9 @@ class ValueAsPointer { public: using element_type = T; - ValueAsPointer(const T& t) : value(t) {} + OPM_HOST_DEVICE ValueAsPointer() = default; + + OPM_HOST_DEVICE explicit ValueAsPointer(const T& t) : value(t) {} OPM_HOST_DEVICE T* operator->() { return &value; diff --git a/tests/gpuistl/test_blackoilintensivequantities_gpu.cu b/tests/gpuistl/test_blackoilintensivequantities_gpu.cu new file mode 100644 index 00000000000..71ef4b067a7 --- /dev/null +++ b/tests/gpuistl/test_blackoilintensivequantities_gpu.cu @@ -0,0 +1,1007 @@ +/* + Copyright 2025 Equinor ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ +#include + +// Tricks for older versions of ROCm that do not support alugrid +// or dune fem. Note that these changes are only valid for this test file. +#ifdef HAVE_DUNE_ALUGRID +#undef HAVE_DUNE_ALUGRID +#endif +#define HAVE_DUNE_ALUGRID 0 +#ifdef HAVE_DUNE_FEM +#undef HAVE_DUNE_FEM +#endif +#define HAVE_DUNE_FEM 0 + +#include +#include +#include +#define HAVE_ECL_INPUT 1 + + +#define BOOST_TEST_MODULE TestBlackOilIntensiveQuantitiesGPU + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include + + +/// Build a CO2STORE WATER+GAS deck with the requested DIMENS. The grid is +/// a uniform unit-cube grid; PERMX/PORO are constant. EQUIL gives the +/// pressure profile so different cells (different depths) get different +/// fluid states, exercising the per-cell update on both CPU and GPU. +static std::string makeDeckString(int nx, int ny, int nz) +{ + const long total = static_cast(nx) * ny * nz; + const long areal = static_cast(nx) * ny; + std::ostringstream o; + o << "RUNSPEC\n" + << "DIMENS\n" << nx << ' ' << ny << ' ' << nz << " /\n" + << "EQLDIMS\n/\n" + << "TABDIMS\n/\n" + << "WATER\nGAS\nCO2STORE\nTHERMAL\nMETRIC\n" + << "GRID\n" + << "GRIDFILE\n0 0 /\n" + << "DX\n" << total << "*1 /\n" + << "DY\n" << total << "*1 /\n" + << "DZ\n" << total << "*1 /\n" + << "TOPS\n" << areal << "*0 /\n" + << "PERMX\n" << total << "*1013.25 /\n" + << "PORO\n" << total << "*0.25 /\n" + << "COPY\nPERMX PERMY /\nPERMX PERMZ /\n/\n" + << "THCONR\n" << total << "*1.5 /\n" + << "PROPS\n" + << "SGWFN\n" + << "0.000000E+00 0.000000E+00 1.000000E+00 3.060000E-02\n" + << "1.000000E+00 1.000000E+00 0.000000E+00 3.060000E-01 /\n" + << "SPECROCK\n20 2125.0 100 2125.0 /\n" + << "SOLUTION\n" + << "RPTRST\n'BASIC=0' /\n" + << "EQUIL\n0 300 100 0 0 0 1 1 0 /\n" + << "RTEMPVD\n0 36.12\n1000 70.0 /\n" + << "SCHEDULE\n" + << "RPTRST\n'BASIC=0' /\n" + << "TSTEP\n1 /"; + return o.str(); +} + + +static constexpr const char* deckString1 = "-- =============== RUNSPEC\n" + "RUNSPEC\n" + "DIMENS\n" + "3 3 3 /\n" + "EQLDIMS\n" + "/\n" + "TABDIMS\n" + "/\n" + "WATER\n" + "GAS\n" + "CO2STORE\n" + "THERMAL\n" + "METRIC\n" + "-- =============== GRID\n" + "GRID\n" + "GRIDFILE\n" + "0 0 /\n" + "DX\n" + "27*1 /\n" + "DY\n" + "27*1 /\n" + "DZ\n" + "27*1 /\n" + "TOPS\n" + "9*0 /\n" + "PERMX\n" + "27*1013.25 /\n" + "PORO\n" + "27*0.25 /\n" + "COPY\n" + "PERMX PERMY /\n" + "PERMX PERMZ /\n" + "/\n" + "THCONR\n" + "27*1.5 /\n" + "-- =============== PROPS\n" + "PROPS\n" + "SGWFN\n" + "0.000000E+00 0.000000E+00 1.000000E+00 3.060000E-02\n" + "1.000000E+00 1.000000E+00 0.000000E+00 3.060000E-01 /\n" + "SPECROCK\n" + "20 2125.0 100 2125.0 /\n" + "-- =============== SOLUTION\n" + "SOLUTION\n" + "RPTRST\n" + "'BASIC=0' /\n" + "EQUIL\n" + "0 300 100 0 0 0 1 1 0 /\n" + "RTEMPVD\n" + "0 36.12\n" + "1000 70.0 /\n" + "-- =============== SCHEDULE\n" + "SCHEDULE\n" + "RPTRST\n" + "'BASIC=0' /\n" + "TSTEP\n" + "1 /"; + +using ScalarToUse = Opm::GetPropType; + +using GpuB = Opm::gpuistl::GpuBuffer; +using GpuV = Opm::gpuistl::GpuView; +using FluidSystem = Opm::BlackOilFluidSystem; +using Evaluation = Opm::DenseAd::Evaluation; +using BlackOilFluidSystemView + = Opm::BlackOilFluidSystemNonStatic; + +template +OPM_HOST_DEVICE double asDouble(const Value& value) +{ + if constexpr (requires { value.value(); }) { + return static_cast(value.value()); + } + else { + return static_cast(value); + } +} + +/// Compare two scalar-or-Evaluation quantities. Always compares the value +/// part. When \p checkDerivatives is true and both operands are +/// `DenseAd::Evaluation`s, also checks every partial derivative using +/// \c BOOST_CHECK_CLOSE with the supplied tolerance and increments +/// \p derivativeComparisons by the number of partial derivatives compared. +/// If \p checkDerivatives is true but the operands are not Evaluations, +/// the test fails with \c BOOST_FAIL so a misconfigured derivative test +/// cannot silently pass on plain scalars. +template +void checkValueAndDerivatives(const CpuValue& cpuValue, + const GpuValue& gpuValue, + double tol, + bool checkDerivatives, + const char* label, + std::size_t& derivativeComparisons) +{ + BOOST_CHECK_CLOSE(asDouble(cpuValue), asDouble(gpuValue), tol); + + if (!checkDerivatives) { + return; + } + + if constexpr (requires { + CpuValue::numVars; + cpuValue.derivative(0); + GpuValue::numVars; + gpuValue.derivative(0); + }) + { + static_assert(static_cast(CpuValue::numVars) + == static_cast(GpuValue::numVars), + "CPU and GPU Evaluation types must have the same numVars"); + static_assert(static_cast(CpuValue::numVars) > 0, + "Derivative comparison requires at least one partial derivative"); + for (int derivIdx = 0; derivIdx < CpuValue::numVars; ++derivIdx) { + const double cpuDerivative = static_cast(cpuValue.derivative(derivIdx)); + const double gpuDerivative = static_cast(gpuValue.derivative(derivIdx)); + BOOST_CHECK_MESSAGE(std::abs(cpuDerivative - gpuDerivative) + <= tol * (1.0 + std::abs(cpuDerivative)), + label << " derivative[" << derivIdx + << "] cpu=" << cpuDerivative + << " gpu=" << gpuDerivative); + ++derivativeComparisons; + } + } + else { + BOOST_FAIL(std::string("checkDerivatives=true but ") + label + + " is not a DenseAd::Evaluation"); + } +} + +namespace Opm +{ +namespace Properties +{ +namespace TTag +{ + +/// CPU side mirror of the simulation TypeTag, with diffusion/dispersion +/// disabled because the simpler \c BlackOilIntensiveQuantities::update() +/// overload used in this test does not support them. All other property +/// bindings (Indices, MaterialLaw, FluidSystem, etc.) are inherited as-is +/// from \c FlowGasWaterEnergyProblem. +struct FlowGasWaterEnergyProblemTest { + using InheritsFrom = std::tuple; +}; + +/// CPU TypeTag used by the real-deck test: identical to the production +/// \c FlowGasWaterEnergyProblem (does not force-disable diffusion or +/// dispersion) so that decks enabling those keywords can construct the +/// simulator successfully. +struct FlowGasWaterEnergyProblemTestRealDeck { + using InheritsFrom = std::tuple; +}; + +} // namespace TTag + +template +struct EnableDiffusion +{ static constexpr bool value = false; }; + +template +struct EnableDispersion +{ static constexpr bool value = false; }; + +} // namespace Properties + +} // namespace Opm + +using TypeTag = Opm::Properties::TTag::FlowGasWaterEnergyProblemTest; +using TypeNacht = Opm::Properties::TTag::FlowGasWaterEnergyDummyProblemGPU; +using TypeTagGPU = Opm::Properties::TTag::FlowGasWaterEnergyProblemGPU; + +template +__global__ void +updateAllCellsKernel(GpuProblem problem, + Opm::gpuistl::GpuView primaryVariables, + Opm::gpuistl::GpuView outIntensiveQuantities, + std::size_t numCells) +{ + const std::size_t i = static_cast(blockIdx.x) * blockDim.x + threadIdx.x; + if (i >= numCells) { + return; + } + IntensiveQuantities intensiveQuantities = outIntensiveQuantities[i]; + intensiveQuantities.updateSaturations(primaryVariables[i], 0, Opm::LinearizationType{}); + intensiveQuantities.update(problem, primaryVariables[i], static_cast(i), 0); + intensiveQuantities.updateEnergyQuantities_(problem, static_cast(i), 0u); + outIntensiveQuantities[i] = intensiveQuantities; +} + +/// One-shot global initialization of MPI, communicator, and parameter +/// registration. Safe to call from multiple BOOST test cases; only the +/// first call performs the work. +static void initSimulatorOnce() +{ + static bool done = false; + if (done) { + return; + } + using namespace Opm; + int argc1 = boost::unit_test::framework::master_test_suite().argc; + char** argv1 = boost::unit_test::framework::master_test_suite().argv; +#if HAVE_DUNE_FEM + Dune::Fem::MPIManager::initialize(argc1, argv1); +#else + Dune::MPIHelper::instance(argc1, argv1); +#endif + FlowGenericVanguard::setCommunication(std::make_unique()); + Opm::ThreadManager::registerParameters(); + Opm::NewtonMethodParams::registerParameters(); + BlackoilModelParameters::registerParameters(); + AdaptiveTimeStepping::registerParameters(); + Parameters::Register( + "Dummy added for the well model to compile."); + registerAllParameters_(/*finalizeRegistration=*/false); + // Also register parameters for the RealDeck TypeTag here so that all + // tests in the same process share a single (still-open) registration + // pass. Once endRegistration() (called via setupParameters_) closes + // registration, no further parameters can be added globally. + using RealDeckTypeTag = Opm::Properties::TTag::FlowGasWaterEnergyProblemTestRealDeck; + AdaptiveTimeStepping::registerParameters(); + registerAllParameters_(/*finalizeRegistration=*/false); + Parameters::endRegistration(); + done = true; +} + +/// Run the per-cell intensive quantities update on both GPU and CPU for the +/// deck stored at \p deckPath, optionally measuring wall-clock time on both +/// sides. +/// +/// Correctness is checked by comparing CPU vs GPU intensive quantities +/// cell-by-cell at indices \p i = 0, \p sampleStride, 2*\p sampleStride, ... +/// +/// \param deckPath path to a CO2STORE WATER+GAS deck written to disk +/// \param expectedNumCells number of grid cells the deck describes +/// \param sampleStride stride for sampled correctness; 1 means full check +/// \param measureTiming if true, prints CPU and GPU wall-clock to stdout +static void runIntensiveQuantitiesTestForDeck(const std::string& deckPath, + std::size_t expectedNumCells, + std::size_t sampleStride, + bool measureTiming, + bool checkDerivatives = false) +{ + Opm::resetLocale(); + initSimulatorOnce(); + + using namespace Opm; + using Simulator = Opm::GetPropType; + + const auto filenameArg = std::string{"--ecl-deck-file-name="} + deckPath; + const char* argv2[] = { + "test_gpuflowproblem", + filenameArg.c_str(), + "--check-satfunc-consistency=false", + }; + Opm::setupParameters_(/*argc=*/sizeof(argv2) / sizeof(argv2[0]), + argv2, + /*registerParams=*/false, + /*allowUnused=*/true, + /*handleHelp=*/false, + /*myRank=*/0); + + Opm::FlowGenericVanguard::readDeck(deckPath); + auto sim = std::make_unique(); + + // The Simulator constructor only calls finishInit; the solution vector + // is not yet EQUIL-populated. Trigger initial-solution application so + // model().solution(0)[i] holds the real per-cell primary variables + // (otherwise zero-initialized PVs make the GPU update produce + // non-finite densities and trip the assertion in + // BlackOilIntensiveQuantities::assertFiniteMembers()). + sim->model().applyInitialSolution(); + + auto& cpuProblem = sim->problem(); + auto& dynamicFluidSystem = FluidSystem::getNonStaticInstance(); + + auto dynamicGpuFluidSystemBuffer = ::Opm::gpuistl::copy_to_gpu(dynamicFluidSystem); + auto dynamicGpuFluidSystemView = ::Opm::gpuistl::make_view(dynamicGpuFluidSystemBuffer); + + // Place the FluidSystemView in unified memory so that the device-side + // BlackOilFluidState pointer dereference is valid both on host and on + // device. + using FluidSystemViewType = std::decay_t; + FluidSystemViewType* managedFluidSystemView = nullptr; + auto managedFluidSystemViewOwner = + Opm::gpuistl::make_gpu_managed_unique_ptr(dynamicGpuFluidSystemView); + managedFluidSystemView = managedFluidSystemViewOwner.get(); + + using CpuMaterialLawManager = typename Opm::GetProp::EclMaterialLawManager; + using Traits = typename CpuMaterialLawManager::MaterialLaw::Traits; + using TwoPhaseTraits = Opm::TwoPhaseMaterialTraits; + using GpuPiecewiseLinearParams = Opm::PiecewiseLinearTwoPhaseMaterialParams< + TwoPhaseTraits, Opm::gpuistl::GpuView>; + using GpuPiecewiseLinearLaw = Opm::PiecewiseLinearTwoPhaseMaterial; + using GpuMaterialLawParams = Opm::EclTwoPhaseMaterialParams< + Traits, GpuPiecewiseLinearParams, GpuPiecewiseLinearParams, GpuPiecewiseLinearParams, + Opm::gpuistl::ValueAsPointer>; + using GpuMaterialLaw = Opm::EclTwoPhaseMaterial< + Traits, GpuPiecewiseLinearLaw, GpuPiecewiseLinearLaw, GpuPiecewiseLinearLaw, + GpuMaterialLawParams>; + using GpuManagerBuffer = Opm::EclMaterialLaw::GpuManager< + Traits, GpuPiecewiseLinearLaw, GpuPiecewiseLinearLaw, + Opm::gpuistl::GpuBuffer, GpuMaterialLaw>; + using GpuThermalManagerBuffer = Opm::EclThermalLaw::GpuManager< + ScalarToUse, FluidSystemViewType, + Opm::gpuistl::GpuBuffer, Opm::gpuistl::GpuView>; + using GpuProblemBuffer = Opm::GpuFlowProblem; + + GpuProblemBuffer gpuProblemBuffer(cpuProblem); + auto gpuProblemView = Opm::gpuistl::make_view(gpuProblemBuffer); + + const std::size_t numCells = cpuProblem.model().numGridDof(); + BOOST_REQUIRE_EQUAL(numCells, expectedNumCells); + BOOST_REQUIRE_GT(numCells, 0u); + + using PrimaryVariablesCpu = Opm::GetPropType; + using PrimaryVariablesGpu = Opm::BlackOilPrimaryVariables; + using IntensiveQuantitiesCpu = Opm::BlackOilIntensiveQuantities; + using IntensiveQuantitiesGpu = Opm::BlackOilIntensiveQuantities; + + // Pull the EQUIL-initialized per-cell primary variables straight from + // the simulator's solution vector. Using zero-initialized PVs makes the + // GPU update produce non-finite densities (e.g. for CO2STORE the PVT + // tables are not defined at p=0), which trips the assertion in + // BlackOilIntensiveQuantities::assertFiniteMembers(). + const auto& cpuSolution = cpuProblem.model().solution(/*timeIdx=*/0); + BOOST_REQUIRE_EQUAL(cpuSolution.size(), numCells); + std::vector cpuPrimaryVariables(numCells); + std::vector hostPrimaryVariablesGpu; + hostPrimaryVariablesGpu.reserve(numCells); + for (std::size_t i = 0; i < numCells; ++i) { + cpuPrimaryVariables[i] = cpuSolution[i]; + hostPrimaryVariablesGpu.emplace_back(cpuPrimaryVariables[i]); + } + Opm::gpuistl::GpuBuffer primaryVariablesBuffer(hostPrimaryVariablesGpu); + + IntensiveQuantitiesCpu cpuIntensiveQuantitiesPrototype; + IntensiveQuantitiesGpu gpuIntensiveQuantitiesPrototype = cpuIntensiveQuantitiesPrototype.template withOtherFluidSystem(*managedFluidSystemView); + if (measureTiming) { + std::cout << std::format("[timing] sizeof(IntensiveQuantitiesGpu)={} sizeof(IntensiveQuantitiesCpu)={} " + "sizeof(PrimaryVariablesGpu)={} cells={} " + "approx GPU mem for IQ buffer={:.2f} MiB\n", + sizeof(IntensiveQuantitiesGpu), sizeof(IntensiveQuantitiesCpu), + sizeof(PrimaryVariablesGpu), numCells, + (sizeof(IntensiveQuantitiesGpu) * numCells) / (1024.0 * 1024.0)); + } + std::vector hostIntensiveQuantities(numCells, gpuIntensiveQuantitiesPrototype); + Opm::gpuistl::GpuBuffer intensiveQuantitiesBuffer(hostIntensiveQuantities); + + const unsigned blockSize = 64u; + const unsigned gridSize = static_cast((numCells + blockSize - 1u) / blockSize); + + // Time the GPU kernel using CUDA events (excludes the host-to-device + // setup and device-to-host copy already done via GpuBuffer ctors above). + cudaEvent_t eventStart{}, eventStop{}; + OPM_GPU_SAFE_CALL(cudaEventCreate(&eventStart)); + OPM_GPU_SAFE_CALL(cudaEventCreate(&eventStop)); + OPM_GPU_SAFE_CALL(cudaEventRecord(eventStart)); + updateAllCellsKernel<<>>( + gpuProblemView, + Opm::gpuistl::GpuView(primaryVariablesBuffer.data(), primaryVariablesBuffer.size()), + Opm::gpuistl::GpuView(intensiveQuantitiesBuffer.data(), intensiveQuantitiesBuffer.size()), + numCells); + OPM_GPU_SAFE_CALL(cudaEventRecord(eventStop)); + OPM_GPU_SAFE_CALL(cudaEventSynchronize(eventStop)); + OPM_GPU_SAFE_CALL(cudaGetLastError()); + float gpuMilliseconds = 0.0f; + OPM_GPU_SAFE_CALL(cudaEventElapsedTime(&gpuMilliseconds, eventStart, eventStop)); + OPM_GPU_SAFE_CALL(cudaEventDestroy(eventStart)); + OPM_GPU_SAFE_CALL(cudaEventDestroy(eventStop)); + + // CPU reference: use the IntensiveQuantities the simulator itself + // computed during invalidateAndUpdateIntensiveQuantities(0). This keeps + // the CPU side consistent with the static FluidSystem state of the + // current simulator instance (which may otherwise diverge from the + // dynamic GPU FluidSystem when other tests have run before this one). + std::vector cpuIntensiveQuantities(numCells); + const auto cpuT0 = std::chrono::steady_clock::now(); + sim->model().invalidateAndUpdateIntensiveQuantities(/*timeIdx=*/0); + for (std::size_t i = 0; i < numCells; ++i) { + const auto* cached = sim->model().cachedIntensiveQuantities(static_cast(i), 0); + BOOST_REQUIRE(cached != nullptr); + cpuIntensiveQuantities[i] = *cached; + } + const auto cpuT1 = std::chrono::steady_clock::now(); + const double cpuMilliseconds = + std::chrono::duration(cpuT1 - cpuT0).count(); + + if (measureTiming) { + std::cout << std::format("[timing] cells={} CPU={:.3f} ms GPU(kernel)={:.3f} ms " + "speedup={:.2f}x\n", + numCells, cpuMilliseconds, gpuMilliseconds, + cpuMilliseconds / gpuMilliseconds); + } + + // Bring GPU intensive quantities back and compare cells (sampled by sampleStride). + std::vector gpuIntensiveQuantitiesHost(numCells, gpuIntensiveQuantitiesPrototype); + OPM_GPU_SAFE_CALL(cudaMemcpy(gpuIntensiveQuantitiesHost.data(), + intensiveQuantitiesBuffer.data(), + numCells * sizeof(IntensiveQuantitiesGpu), + cudaMemcpyDeviceToHost)); + BOOST_REQUIRE_EQUAL(gpuIntensiveQuantitiesHost.size(), numCells); + + constexpr double tol = 1e-6; + // Active phase indices for CO2STORE WATER+GAS (oil disabled). + constexpr unsigned waterPhaseIdx = FluidSystem::waterPhaseIdx; + constexpr unsigned gasPhaseIdx = FluidSystem::gasPhaseIdx; + const unsigned activePhases[] = {waterPhaseIdx, gasPhaseIdx}; + + std::size_t checked = 0; + std::size_t derivativeComparisons = 0; + for (std::size_t i = 0; i < numCells; i += sampleStride) { + const auto& cpuIQ = cpuIntensiveQuantities[i]; + const auto& gpuIQ = gpuIntensiveQuantitiesHost[i]; + const auto& cpuFluidState = cpuIQ.fluidState(); + const auto& gpuFluidState = gpuIQ.fluidState(); + + // Per-phase fluid state quantities for the active phases. Restricted + // to fields that are stored directly on the FluidState (no call into + // the underlying FluidSystem), since the GPU-side FluidState's + // FluidSystem holds device pointers (GpuView) that are not + // dereferenceable on the host. + for (unsigned p : activePhases) { + checkValueAndDerivatives(cpuFluidState.saturation(p), + gpuFluidState.saturation(p), + tol, checkDerivatives, "saturation", + derivativeComparisons); + checkValueAndDerivatives(cpuFluidState.pressure(p), + gpuFluidState.pressure(p), + tol, checkDerivatives, "pressure", + derivativeComparisons); + checkValueAndDerivatives(cpuFluidState.density(p), + gpuFluidState.density(p), + tol, checkDerivatives, "density", + derivativeComparisons); + checkValueAndDerivatives(cpuFluidState.invB(p), + gpuFluidState.invB(p), + tol, checkDerivatives, "invB", + derivativeComparisons); + // Mobility now goes through the opm-common EclTwoPhaseMaterial + // (relativePermeabilities) on both CPU and GPU, so the values + // must agree to the same tolerance as the rest of the IQ. + checkValueAndDerivatives(cpuIQ.mobility(p), + gpuIQ.mobility(p), + tol, checkDerivatives, "mobility", + derivativeComparisons); + } + + // Scalar / per-cell quantities. Both CPU and GPU FluidState configs + // have enableVapwat=true and enableDisgasInWater=true so Rsw/Rvw are + // stored on both sides. + checkValueAndDerivatives(cpuFluidState.Rsw(), gpuFluidState.Rsw(), + tol, checkDerivatives, "Rsw", + derivativeComparisons); + checkValueAndDerivatives(cpuFluidState.Rvw(), gpuFluidState.Rvw(), + tol, checkDerivatives, "Rvw", + derivativeComparisons); + BOOST_CHECK_EQUAL(static_cast(cpuFluidState.pvtRegionIndex()), + static_cast(gpuFluidState.pvtRegionIndex())); + + checkValueAndDerivatives(cpuIQ.porosity(), gpuIQ.porosity(), + tol, checkDerivatives, "porosity", + derivativeComparisons); + BOOST_CHECK_CLOSE(static_cast(cpuIQ.referencePorosity()), + static_cast(gpuIQ.referencePorosity()), tol); + checkValueAndDerivatives(cpuIQ.rockCompTransMultiplier(), + gpuIQ.rockCompTransMultiplier(), + tol, checkDerivatives, "rockCompTransMultiplier", + derivativeComparisons); + + ++checked; + } + BOOST_TEST_MESSAGE("Per-cell GPU vs CPU IQ comparison: checked " + << checked << " / " << numCells << " cells"); + if (checkDerivatives) { + BOOST_TEST_MESSAGE("Derivative comparisons performed: " << derivativeComparisons); + // Guard against silent regressions: if checkDerivatives is requested + // but the helper never actually compared a derivative (e.g. because + // the IQ types degraded to plain scalars), fail loudly. + BOOST_REQUIRE_GT(derivativeComparisons, 0u); + } +} + +/// RAII wrapper around a temporary file. The file is deleted when the +/// object goes out of scope. +struct TemporaryFile { + std::filesystem::path path; + explicit TemporaryFile(std::string_view filename) + : path(std::filesystem::temp_directory_path() / filename) {} + ~TemporaryFile() { std::filesystem::remove(path); } + TemporaryFile(const TemporaryFile&) = delete; + TemporaryFile& operator=(const TemporaryFile&) = delete; + TemporaryFile(TemporaryFile&&) = delete; + TemporaryFile& operator=(TemporaryFile&&) = delete; +}; + +/// Variant of \c runIntensiveQuantitiesTestForDeck that takes the per-cell +/// primary variables from the simulator's initial (EQUIL-driven) solution +/// vector instead of using a single uniform prototype. This exercises the +/// exact code path used by the dispatcher in production: +/// \c BlackOilPrimaryVariables(cpuPrimaryVariables[i]) -> GPU +/// kernel update -> compare CPU vs GPU IQ field-by-field. \p sampleStride +/// controls how often we check (1 means every cell). +static void runIntensiveQuantitiesTestFromSimulatorSolution(const std::string& deckPath, + std::size_t sampleStride) +{ + Opm::resetLocale(); + + using namespace Opm; + using LocalTypeTag = Opm::Properties::TTag::FlowGasWaterEnergyProblemTestRealDeck; + using LocalGpuTypeTag = Opm::Properties::TTag::FlowGasWaterEnergyDummyProblemGPU; + using Simulator = Opm::GetPropType; + + // Reuse the global init: it registers parameters for both TypeTags so + // this test works whether it runs first or after another test in the + // same process (parameter registration is closed after the first + // setupParameters_ call). + initSimulatorOnce(); + + const auto filenameArg = std::string{"--ecl-deck-file-name="} + deckPath; + const char* argv2[] = { + "test_gpuflowproblem", + filenameArg.c_str(), + "--check-satfunc-consistency=false", + "--enable-tuning=false", + }; + Opm::setupParameters_(/*argc=*/sizeof(argv2) / sizeof(argv2[0]), + argv2, + /*registerParams=*/false, + /*allowUnused=*/true, + /*handleHelp=*/false, + /*myRank=*/0); + + Opm::FlowGenericVanguard::readDeck(deckPath); + auto sim = std::make_unique(); + + // The Simulator constructor only calls finishInit; the solution vector + // is not yet EQUIL-populated. Trigger initial-solution application so + // model().solution(0)[i] holds the real per-cell primary variables. + sim->model().applyInitialSolution(); + + auto& cpuProblem = sim->problem(); + auto& dynamicFluidSystem = FluidSystem::getNonStaticInstance(); + + BOOST_TEST_MESSAGE(std::format( + "FluidSystem phase activation: static[O={}, W={}, G={}] dynamic[O={}, W={}, G={}]", + FluidSystem::phaseIsActive(FluidSystem::oilPhaseIdx), + FluidSystem::phaseIsActive(FluidSystem::waterPhaseIdx), + FluidSystem::phaseIsActive(FluidSystem::gasPhaseIdx), + dynamicFluidSystem.phaseIsActive(FluidSystem::oilPhaseIdx), + dynamicFluidSystem.phaseIsActive(FluidSystem::waterPhaseIdx), + dynamicFluidSystem.phaseIsActive(FluidSystem::gasPhaseIdx))); + + auto dynamicGpuFluidSystemBuffer = ::Opm::gpuistl::copy_to_gpu(dynamicFluidSystem); + auto dynamicGpuFluidSystemView = ::Opm::gpuistl::make_view(dynamicGpuFluidSystemBuffer); + + using FluidSystemViewType = std::decay_t; + FluidSystemViewType* managedFluidSystemView = nullptr; + auto managedFluidSystemViewOwner = + Opm::gpuistl::make_gpu_managed_unique_ptr(dynamicGpuFluidSystemView); + managedFluidSystemView = managedFluidSystemViewOwner.get(); + + BOOST_TEST_MESSAGE(std::format( + "GPU fluid-system view phase activation: O={} W={} G={}", + managedFluidSystemView->phaseIsActive(FluidSystem::oilPhaseIdx), + managedFluidSystemView->phaseIsActive(FluidSystem::waterPhaseIdx), + managedFluidSystemView->phaseIsActive(FluidSystem::gasPhaseIdx))); + + using CpuMaterialLawManager = typename Opm::GetProp::EclMaterialLawManager; + using Traits = typename CpuMaterialLawManager::MaterialLaw::Traits; + using TwoPhaseTraits = Opm::TwoPhaseMaterialTraits; + using GpuPiecewiseLinearParams = Opm::PiecewiseLinearTwoPhaseMaterialParams< + TwoPhaseTraits, Opm::gpuistl::GpuView>; + using GpuPiecewiseLinearLaw = Opm::PiecewiseLinearTwoPhaseMaterial; + using GpuMaterialLawParams = Opm::EclTwoPhaseMaterialParams< + Traits, GpuPiecewiseLinearParams, GpuPiecewiseLinearParams, GpuPiecewiseLinearParams, + Opm::gpuistl::ValueAsPointer>; + using GpuMaterialLaw = Opm::EclTwoPhaseMaterial< + Traits, GpuPiecewiseLinearLaw, GpuPiecewiseLinearLaw, GpuPiecewiseLinearLaw, + GpuMaterialLawParams>; + using GpuManagerBuffer = Opm::EclMaterialLaw::GpuManager< + Traits, GpuPiecewiseLinearLaw, GpuPiecewiseLinearLaw, + Opm::gpuistl::GpuBuffer, GpuMaterialLaw>; + using GpuThermalManagerBuffer = Opm::EclThermalLaw::GpuManager< + ScalarToUse, FluidSystemViewType, + Opm::gpuistl::GpuBuffer, Opm::gpuistl::GpuView>; + using GpuProblemBuffer = Opm::GpuFlowProblem; + + GpuProblemBuffer gpuProblemBuffer(cpuProblem); + auto gpuProblemView = Opm::gpuistl::make_view(gpuProblemBuffer); + + const std::size_t numCells = cpuProblem.model().numGridDof(); + BOOST_REQUIRE_GT(numCells, 0u); + BOOST_TEST_MESSAGE("Real-deck test: numCells=" << numCells); + + using PrimaryVariablesCpu = Opm::GetPropType; + using PrimaryVariablesGpu = Opm::BlackOilPrimaryVariables; + using IntensiveQuantitiesCpu = Opm::BlackOilIntensiveQuantities; + using IntensiveQuantitiesGpu = Opm::BlackOilIntensiveQuantities; + + // Pull the EQUIL-initialized per-cell primary variables straight from + // the simulator's solution vector and convert them to the GPU + // PrimaryVariables type cell-by-cell. + const auto& cpuSolution = cpuProblem.model().solution(/*timeIdx=*/0); + BOOST_REQUIRE_EQUAL(cpuSolution.size(), numCells); + std::vector cpuPrimaryVariables(numCells); + std::vector hostPrimaryVariablesGpu; + hostPrimaryVariablesGpu.reserve(numCells); + for (std::size_t i = 0; i < numCells; ++i) { + cpuPrimaryVariables[i] = cpuSolution[i]; + hostPrimaryVariablesGpu.emplace_back(cpuPrimaryVariables[i]); + } + Opm::gpuistl::GpuBuffer primaryVariablesBuffer(hostPrimaryVariablesGpu); + + // Diagnostic: distribution of pressure-meaning values across cells. + { + std::array meaningCounts{0, 0, 0}; + for (std::size_t i = 0; i < numCells; ++i) { + const auto m = cpuPrimaryVariables[i].primaryVarsMeaningPressure(); + switch (m) { + case Opm::BlackOil::PressureMeaning::Pg: ++meaningCounts[0]; break; + case Opm::BlackOil::PressureMeaning::Po: ++meaningCounts[1]; break; + case Opm::BlackOil::PressureMeaning::Pw: ++meaningCounts[2]; break; + default: break; + } + } + BOOST_TEST_MESSAGE(std::format( + "PressureMeaning distribution (cells={}): Pg={} Po={} Pw={}", + numCells, meaningCounts[0], meaningCounts[1], meaningCounts[2])); + } + if (numCells > 0) { + const auto& pv0 = cpuPrimaryVariables[0]; + BOOST_TEST_MESSAGE(std::format( + "Cell 0 PV: meaning(P={},W={},G={}) pvtRegion={}", + static_cast(pv0.primaryVarsMeaningPressure()), + static_cast(pv0.primaryVarsMeaningWater()), + static_cast(pv0.primaryVarsMeaningGas()), + pv0.pvtRegionIndex())); + for (unsigned k = 0; k < pv0.size(); ++k) { + BOOST_TEST_MESSAGE(std::format(" pv[{}] = {}", k, double(pv0[k]))); + } + } + + + IntensiveQuantitiesCpu cpuIntensiveQuantitiesPrototype; + IntensiveQuantitiesGpu gpuIntensiveQuantitiesPrototype = + cpuIntensiveQuantitiesPrototype.template withOtherFluidSystem(*managedFluidSystemView); + std::vector hostIntensiveQuantities(numCells, gpuIntensiveQuantitiesPrototype); + Opm::gpuistl::GpuBuffer intensiveQuantitiesBuffer(hostIntensiveQuantities); + + const unsigned blockSize = 64u; + const unsigned gridSize = static_cast((numCells + blockSize - 1u) / blockSize); + + updateAllCellsKernel<<>>( + gpuProblemView, + Opm::gpuistl::GpuView( + primaryVariablesBuffer.data(), primaryVariablesBuffer.size()), + Opm::gpuistl::GpuView( + intensiveQuantitiesBuffer.data(), intensiveQuantitiesBuffer.size()), + numCells); + OPM_GPU_SAFE_CALL(cudaDeviceSynchronize()); + OPM_GPU_SAFE_CALL(cudaGetLastError()); + + // CPU reference: use the IntensiveQuantities the simulator itself + // computed during invalidateAndUpdateIntensiveQuantities(0). This is the + // exact CPU value that the production dispatcher path would overlay + // GPU-computed BlackOil fields onto, so it is a meaningful reference for + // the GPU comparison without requiring the (diffusion-incompatible) + // simpler IQ::update overload to compile here. + sim->model().invalidateAndUpdateIntensiveQuantities(/*timeIdx=*/0); + std::vector cpuIntensiveQuantities(numCells); + for (std::size_t i = 0; i < numCells; ++i) { + const auto* cached = sim->model().cachedIntensiveQuantities(static_cast(i), 0); + BOOST_REQUIRE(cached != nullptr); + cpuIntensiveQuantities[i] = *cached; + } + + std::vector gpuIntensiveQuantitiesHost(numCells, gpuIntensiveQuantitiesPrototype); + OPM_GPU_SAFE_CALL(cudaMemcpy(gpuIntensiveQuantitiesHost.data(), + intensiveQuantitiesBuffer.data(), + numCells * sizeof(IntensiveQuantitiesGpu), + cudaMemcpyDeviceToHost)); + + constexpr double tol = 1e-6; + constexpr unsigned waterPhaseIdx = FluidSystem::waterPhaseIdx; + constexpr unsigned gasPhaseIdx = FluidSystem::gasPhaseIdx; + const unsigned activePhases[] = {waterPhaseIdx, gasPhaseIdx}; + + std::size_t checked = 0; + std::size_t derivativeComparisons = 0; + std::size_t firstBadCell = static_cast(-1); + for (std::size_t i = 0; i < numCells; i += sampleStride) { + const auto& cpuIQ = cpuIntensiveQuantities[i]; + const auto& gpuIQ = gpuIntensiveQuantitiesHost[i]; + const auto& cpuFs = cpuIQ.fluidState(); + const auto& gpuFs = gpuIQ.fluidState(); + + // Probe non-finite values explicitly; report the first offender so + // we can debug the production simulator failure mode. + for (unsigned p : activePhases) { + const double cpuRho = asDouble(cpuFs.density(p)); + const double gpuRho = asDouble(gpuFs.density(p)); + const double cpuP = asDouble(cpuFs.pressure(p)); + const double gpuP = asDouble(gpuFs.pressure(p)); + const double cpuS = asDouble(cpuFs.saturation(p)); + const double gpuS = asDouble(gpuFs.saturation(p)); + if (!std::isfinite(gpuRho) || !std::isfinite(gpuP) || !std::isfinite(gpuS)) { + if (firstBadCell == static_cast(-1)) { + firstBadCell = i; + BOOST_TEST_MESSAGE(std::format( + "First non-finite GPU IQ at cell {} (phase {}): " + "S(cpu={}, gpu={}) P(cpu={}, gpu={}) rho(cpu={}, gpu={})", + i, p, cpuS, gpuS, cpuP, gpuP, cpuRho, gpuRho)); + } + } + } + + for (unsigned p : activePhases) { + checkValueAndDerivatives(cpuFs.saturation(p), gpuFs.saturation(p), + tol, /*checkDerivatives=*/false, "saturation", + derivativeComparisons); + checkValueAndDerivatives(cpuFs.pressure(p), gpuFs.pressure(p), + tol, /*checkDerivatives=*/false, "pressure", + derivativeComparisons); + checkValueAndDerivatives(cpuFs.density(p), gpuFs.density(p), + tol, /*checkDerivatives=*/false, "density", + derivativeComparisons); + checkValueAndDerivatives(cpuFs.invB(p), gpuFs.invB(p), + tol, /*checkDerivatives=*/false, "invB", + derivativeComparisons); + // mobility is intentionally NOT compared: the GPU dispatcher + // does not overlay it (left to the CPU-computed value in + // production). + } + checkValueAndDerivatives(cpuFs.Rsw(), gpuFs.Rsw(), + tol, /*checkDerivatives=*/false, "Rsw", + derivativeComparisons); + checkValueAndDerivatives(cpuFs.Rvw(), gpuFs.Rvw(), + tol, /*checkDerivatives=*/false, "Rvw", + derivativeComparisons); + BOOST_CHECK_EQUAL(static_cast(cpuFs.pvtRegionIndex()), + static_cast(gpuFs.pvtRegionIndex())); + checkValueAndDerivatives(cpuIQ.porosity(), gpuIQ.porosity(), + tol, /*checkDerivatives=*/false, "porosity", + derivativeComparisons); + BOOST_CHECK_CLOSE(static_cast(cpuIQ.referencePorosity()), + static_cast(gpuIQ.referencePorosity()), tol); + + // Thermal IQ fields: temperature in the fluid state, and the three + // BlackOilEnergyIntensiveQuantities scalars (rockInternalEnergy, + // totalThermalConductivity, rockFraction). For FullyImplicitThermal + // the GPU dispatcher writes all of these via overlayBlackOilFieldsFrom, + // so they should match the CPU value field-by-field. + checkValueAndDerivatives(cpuFs.temperature(/*phaseIdx=*/0), + gpuFs.temperature(/*phaseIdx=*/0), + tol, /*checkDerivatives=*/false, "temperature", + derivativeComparisons); + checkValueAndDerivatives(cpuIQ.rockInternalEnergy(), + gpuIQ.rockInternalEnergy(), + tol, /*checkDerivatives=*/false, "rockInternalEnergy", + derivativeComparisons); + checkValueAndDerivatives(cpuIQ.totalThermalConductivity(), + gpuIQ.totalThermalConductivity(), + tol, /*checkDerivatives=*/false, "totalThermalConductivity", + derivativeComparisons); + BOOST_CHECK_CLOSE(static_cast(cpuIQ.rockFraction()), + static_cast(gpuIQ.rockFraction()), tol); + for (unsigned p : activePhases) { + checkValueAndDerivatives(cpuFs.enthalpy(p), gpuFs.enthalpy(p), + tol, /*checkDerivatives=*/false, "enthalpy", + derivativeComparisons); + } + ++checked; + } + BOOST_TEST_MESSAGE("Real-deck per-cell GPU vs CPU IQ comparison: checked " + << checked << " / " << numCells << " cells"); +} + +BOOST_AUTO_TEST_CASE(TestRealDeckGpuVsCpuFromSimulatorSolution) +{ + // Drive the per-cell IQ update with the EQUIL-initialized primary + // variables of the production CO2STORE deck the user reports diverges + // when the dispatcher is enabled. This reproduces the exact code path + // taken by the dispatcher in production (per-cell PrimaryVariables + // upload + GPU update + readback) and compares every IQ field against + // a CPU reference, so we can identify which field becomes non-finite. + // + // NOTE: this test must run BEFORE the synthetic-deck tests because the + // BlackOilFluidSystem singleton is global state that gets re-set by + // every readDeck call; running this after the synthetic tests in the + // same process leaves the static FluidSystem in a state that is + // inconsistent with the per-cell GPU FluidSystem copy taken here. + const std::string deckPath = "/workspaces/opm/thecaseiwant/deck/THECASEIWANT.DATA"; + if (!std::filesystem::exists(deckPath)) { + BOOST_TEST_MESSAGE("Skipping: deck not found at " << deckPath); + return; + } + runIntensiveQuantitiesTestFromSimulatorSolution(deckPath, /*sampleStride=*/1u); +} + +BOOST_AUTO_TEST_CASE(TestInstantiateGpuFlowProblem) +{ + // Persist the in-source two-phase WATER+GAS+CO2STORE deck to a file so + // the standard Vanguard read path can ingest it. + const TemporaryFile tempFile("test_blackoilintensivequantities_gpu.DATA"); + { + std::ofstream f(tempFile.path); + f << deckString1; + } + runIntensiveQuantitiesTestForDeck(tempFile.path.string(), + /*expectedNumCells=*/27u, + /*sampleStride=*/1u, + /*measureTiming=*/false); +} + +BOOST_AUTO_TEST_CASE(TestPerformanceGpuVsCpuLargeGrid) +{ + // 100^3 = 1'000'000 cells. Build a CO2STORE WATER+GAS deck with a uniform + // unit-cube grid and EQUIL initialization, then time the per-cell IQ + // update on both GPU and CPU. Correctness is checked at every 1000th + // cell (1000 sample points) so the test stays reasonably fast. + // 64^3 = 262'144 cells. Build a CO2STORE WATER+GAS deck with a uniform + // unit-cube grid and EQUIL initialization, then time the per-cell IQ + // update on both GPU and CPU. Correctness is checked at every 256th cell + // (~1024 sample points) so the test stays reasonably fast. + // + // NOTE: the upper grid size here is bounded by the per-cell device + // allocation pattern in GpuEclMaterialLawManager (one cudaMalloc per + // piecewise-linear sample array per cell, i.e. ~12 cudaMalloc per cell). + // With ROCm's typical 4 KiB allocation granularity, ~16 GiB of VRAM + // limits us to a few hundred thousand cells; a 1M-cell deck would + // require consolidating per-cell sample buffers (left as a future + // optimisation, since for a homogeneous deck all cells share the same + // SGWFN table). + constexpr int dim = 64; + constexpr std::size_t expected = + static_cast(dim) * dim * dim; + const TemporaryFile tempFile("test_blackoilintensivequantities_gpu_1M.DATA"); + { + std::ofstream f(tempFile.path); + f << makeDeckString(dim, dim, dim); + } + runIntensiveQuantitiesTestForDeck(tempFile.path.string(), + /*expectedNumCells=*/expected, + /*sampleStride=*/256u, + /*measureTiming=*/true); +} + +BOOST_AUTO_TEST_CASE(TestGpuVsCpuIntensiveQuantitiesEvaluationDerivatives) +{ + // Re-runs the small (3x3x3) WATER+GAS+CO2STORE deck used by + // TestInstantiateGpuFlowProblem, but in addition to the value of every + // intensive-quantity field this test also compares every partial + // derivative of the underlying DenseAd::Evaluation between CPU and GPU. + // This guarantees that the GPU material-law / fluid-system evaluation + // chain is differentiated identically to the CPU one. + const TemporaryFile tempFile("test_blackoilintensivequantities_gpu_deriv.DATA"); + { + std::ofstream f(tempFile.path); + f << deckString1; + } + runIntensiveQuantitiesTestForDeck(tempFile.path.string(), + /*expectedNumCells=*/27u, + /*sampleStride=*/1u, + /*measureTiming=*/false, + /*checkDerivatives=*/true); +} + diff --git a/tests/gpuistl/test_gpu_ecl_thermal_law_manager.cu b/tests/gpuistl/test_gpu_ecl_thermal_law_manager.cu new file mode 100644 index 00000000000..534e9d1abc2 --- /dev/null +++ b/tests/gpuistl/test_gpu_ecl_thermal_law_manager.cu @@ -0,0 +1,210 @@ +/* + Copyright 2026 Equinor ASA + + This file is part of the Open Porous Media project (OPM). + + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ +#include + +#define BOOST_TEST_MODULE TestGpuEclThermalLawManager + +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include + +namespace { + +using Scalar = double; + +// A minimal stand-in fluid system. The thermal-conduction law only ever +// asks the fluid system whether the gas phase is active and what its +// phase index is. Both are compile-time constants. +struct DummyFluidSystem { + static constexpr int numPhases = 3; + static constexpr int gasPhaseIdx = 2; + OPM_HOST_DEVICE static constexpr bool phaseIsActive(int phaseIdx) + { + return phaseIdx == gasPhaseIdx; + } +}; + +// A minimal stand-in fluid state with just enough surface area to feed +// EclSpecrockLaw / EclThconrLaw / their GPU counterparts. +template +struct DummyFluidState { + using ValueType = ScalarT; + ScalarT temperature_ = 0; + ScalarT gasSaturation_ = 0; + + OPM_HOST_DEVICE ScalarT temperature(int /*phaseIdx*/) const { return temperature_; } + OPM_HOST_DEVICE ScalarT saturation(int /*phaseIdx*/) const { return gasSaturation_; } + OPM_HOST_DEVICE DummyFluidSystem fluidSystem() const { return DummyFluidSystem{}; } +}; + +using GpuSpecrockParamsBuf = Opm::EclSpecrockLawParams; +using GpuSpecrockParamsView = Opm::EclSpecrockLawParams; +using GpuSpecrockLawView = Opm::EclSpecrockLaw; +using GpuThconrLaw = Opm::EclThconrLaw; + +using ManagerCpu = Opm::EclThermalLaw::GpuManager; +using ManagerBuf + = Opm::EclThermalLaw::GpuManager; +using ManagerView + = Opm::EclThermalLaw::GpuManager; + +// Kernel: per-cell, evaluate solid internal energy from the law manager +// at the supplied temperature, and total thermal conductivity at the +// supplied gas saturation. +__global__ void evaluateThermalKernel(ManagerView view, + const Scalar* temperatures, + const Scalar* gasSaturations, + Scalar* outSolidEnergy, + Scalar* outConductivity, + std::size_t numCells) +{ + const std::size_t i = + static_cast(blockIdx.x) * blockDim.x + threadIdx.x; + if (i >= numCells) { + return; + } + DummyFluidState fs; + fs.temperature_ = temperatures[i]; + fs.gasSaturation_ = gasSaturations[i]; + + const auto solidEnergyParams = view.solidEnergyLawParams(static_cast(i)); + outSolidEnergy[i] = + GpuSpecrockLawView::solidInternalEnergy(solidEnergyParams, fs); + + const auto thermalConductionParams = + view.thermalConductionLawParams(static_cast(i)); + outConductivity[i] = + GpuThconrLaw::thermalConductivity(thermalConductionParams, fs); +} + +} // namespace + +BOOST_AUTO_TEST_CASE(SpecrockAndThconrEvaluateMatchesCpu) +{ + // -- Build a synthetic two-region SPECROCK plus per-cell THCONR setup + // on the host. Cells 0..2 belong to region 0, cells 3..5 to region 1. + constexpr std::size_t numCells = 6; + + const std::vector regionTemperatures0 = { 280.0, 320.0, 360.0, 400.0 }; + const std::vector regionEnergies0 = { 0.0, 4.0e7, 9.0e7, 1.5e8 }; + const std::vector regionTemperatures1 = { 280.0, 350.0, 420.0 }; + const std::vector regionEnergies1 = { 0.0, 3.5e7, 8.4e7 }; + + // Build a host-side CPU manager directly from the basic constructor. + Opm::EclSpecrockLawParams region0Params; + region0Params.setSamples(regionTemperatures0, regionEnergies0); + Opm::EclSpecrockLawParams region1Params; + region1Params.setSamples(regionTemperatures1, regionEnergies1); + + std::vector> cpuSolidEnergyParams; + cpuSolidEnergyParams.push_back(region0Params); + cpuSolidEnergyParams.push_back(region1Params); + + std::vector cpuElementToRegion(numCells); + for (std::size_t i = 0; i < numCells; ++i) { + cpuElementToRegion[i] = (i < 3) ? 0 : 1; + } + + std::vector> cpuThermalConductionParams(numCells); + for (std::size_t i = 0; i < numCells; ++i) { + cpuThermalConductionParams[i].setReferenceTotalThermalConductivity( + 100.0 + 5.0 * static_cast(i)); + cpuThermalConductionParams[i].setDTotalThermalConductivity_dSg( + 0.20 + 0.01 * static_cast(i)); + cpuThermalConductionParams[i].finalize(); + } + + ManagerCpu cpuManager(std::move(cpuSolidEnergyParams), + std::move(cpuElementToRegion), + std::move(cpuThermalConductionParams)); + + // -- Upload to the GPU and create a non-owning view. + auto bufManager = Opm::gpuistl::copy_to_gpu(cpuManager); + auto viewManager = Opm::gpuistl::make_view(bufManager); + + // -- Pick a per-cell test temperature and gas saturation each. + std::array hostTemperatures = { + 290.0, 310.0, 355.0, 285.0, 360.0, 410.0 + }; + std::array hostGasSaturations = { + 0.0, 0.1, 0.25, 0.5, 0.75, 0.99 + }; + + // -- Run the kernel. + auto dTOwner = Opm::gpuistl::make_gpu_unique_ptr_array(numCells); + auto dSgOwner = Opm::gpuistl::make_gpu_unique_ptr_array(numCells); + auto dEnergyOwner = Opm::gpuistl::make_gpu_unique_ptr_array(numCells); + auto dConductivityOwner = Opm::gpuistl::make_gpu_unique_ptr_array(numCells); + Scalar* dT = dTOwner.get(); + Scalar* dSg = dSgOwner.get(); + Scalar* dEnergy = dEnergyOwner.get(); + Scalar* dConductivity = dConductivityOwner.get(); + OPM_GPU_SAFE_CALL(cudaMemcpy(dT, hostTemperatures.data(), + numCells * sizeof(Scalar), cudaMemcpyHostToDevice)); + OPM_GPU_SAFE_CALL(cudaMemcpy(dSg, hostGasSaturations.data(), + numCells * sizeof(Scalar), cudaMemcpyHostToDevice)); + + evaluateThermalKernel<<<1, 32>>>(viewManager, dT, dSg, dEnergy, dConductivity, numCells); + OPM_GPU_SAFE_CALL(cudaDeviceSynchronize()); + OPM_GPU_SAFE_CALL(cudaGetLastError()); + + std::array hostEnergyResult {}; + std::array hostConductivityResult {}; + OPM_GPU_SAFE_CALL(cudaMemcpy(hostEnergyResult.data(), dEnergy, + numCells * sizeof(Scalar), cudaMemcpyDeviceToHost)); + OPM_GPU_SAFE_CALL(cudaMemcpy(hostConductivityResult.data(), dConductivity, + numCells * sizeof(Scalar), cudaMemcpyDeviceToHost)); + + // -- Compare against the CPU manager. + for (std::size_t i = 0; i < numCells; ++i) { + DummyFluidState fs; + fs.temperature_ = hostTemperatures[i]; + fs.gasSaturation_ = hostGasSaturations[i]; + + const auto& cpuSolidParams = + cpuManager.solidEnergyLawParams(static_cast(i)); + const Scalar cpuEnergy = + Opm::EclSpecrockLaw::solidInternalEnergy(cpuSolidParams, fs); + BOOST_CHECK_CLOSE(cpuEnergy, hostEnergyResult[i], 1e-9); + + const auto& cpuConductionParams = + cpuManager.thermalConductionLawParams(static_cast(i)); + const Scalar cpuConductivity = + GpuThconrLaw::thermalConductivity(cpuConductionParams, fs); + BOOST_CHECK_CLOSE(cpuConductivity, hostConductivityResult[i], 1e-9); + } +} diff --git a/tests/gpuistl/test_gpu_linear_two_phase_material.cu b/tests/gpuistl/test_gpu_linear_two_phase_material.cu index 68512d9ab8a..604de549a2c 100644 --- a/tests/gpuistl/test_gpu_linear_two_phase_material.cu +++ b/tests/gpuistl/test_gpu_linear_two_phase_material.cu @@ -33,6 +33,7 @@ #include #include #include +#include #include #include @@ -80,8 +81,10 @@ BOOST_AUTO_TEST_CASE(TestSimpleInterpolation) NorneEvaluation* gpuAdInput; NorneEvaluation* gpuAdResOnGPU; - OPM_GPU_SAFE_CALL(cudaMalloc(&gpuAdInput, sizeof(NorneEvaluation))); - OPM_GPU_SAFE_CALL(cudaMalloc(&gpuAdResOnGPU, sizeof(NorneEvaluation))); + auto gpuAdInputOwner = Opm::gpuistl::make_gpu_unique_ptr(); + auto gpuAdResOnGPUOwner = Opm::gpuistl::make_gpu_unique_ptr(); + gpuAdInput = gpuAdInputOwner.get(); + gpuAdResOnGPU = gpuAdResOnGPUOwner.get(); for (Scalar x_i : testXs){ auto cpuMadeAd = NorneEvaluation(x_i, 0); @@ -96,7 +99,4 @@ BOOST_AUTO_TEST_CASE(TestSimpleInterpolation) BOOST_CHECK(gpuAdResOnCPU == cpuInterpolatedEval); } - - OPM_GPU_SAFE_CALL(cudaFree(gpuAdInput)); - OPM_GPU_SAFE_CALL(cudaFree(gpuAdResOnGPU)); } diff --git a/tests/gpuistl/test_gpu_smart_pointers.cu b/tests/gpuistl/test_gpu_smart_pointers.cu index 5ba3b7a93bb..881e19a349b 100644 --- a/tests/gpuistl/test_gpu_smart_pointers.cu +++ b/tests/gpuistl/test_gpu_smart_pointers.cu @@ -145,3 +145,44 @@ BOOST_AUTO_TEST_CASE(TestCopyToGPU) { BOOST_CHECK_EQUAL(valueFromDeviceArray[2], 3.0); BOOST_CHECK_EQUAL(valueFromDeviceArray[3], 4.0); } + +BOOST_AUTO_TEST_CASE(TestUniquePointerArray) +{ + constexpr std::size_t numElements = 5; + auto arrayPtr = Opm::gpuistl::make_gpu_unique_ptr_array(numElements); + + const std::array hostInput {1.5, 2.5, 3.5, 4.5, 5.5}; + OPM_GPU_SAFE_CALL(cudaMemcpy(arrayPtr.get(), + hostInput.data(), + numElements * sizeof(double), + cudaMemcpyHostToDevice)); + + std::array hostOutput {}; + OPM_GPU_SAFE_CALL(cudaMemcpy(hostOutput.data(), + arrayPtr.get(), + numElements * sizeof(double), + cudaMemcpyDeviceToHost)); + + for (std::size_t i = 0; i < numElements; ++i) { + BOOST_CHECK_EQUAL(hostOutput[i], hostInput[i]); + } +} + +BOOST_AUTO_TEST_CASE(TestManagedUniquePointer) +{ + // Default-constructed payload. + auto managedPtr = Opm::gpuistl::make_gpu_managed_unique_ptr(); + BOOST_CHECK_EQUAL(managedPtr->isCalled, false); + + // Mutate from the device through a regular kernel; the unified-memory + // pointer is dereferenceable on the device, just like cudaMallocManaged. + callFunction<<<1, 1>>>(Opm::gpuistl::make_view(managedPtr)); + OPM_GPU_SAFE_CALL(cudaDeviceSynchronize()); + + // Read back directly from host (the whole point of unified memory). + BOOST_CHECK_EQUAL(managedPtr->isCalled, true); + + // Forwarded constructor arguments. + auto managedDouble = Opm::gpuistl::make_gpu_managed_unique_ptr(7.25); + BOOST_CHECK_EQUAL(*managedDouble, 7.25); +} diff --git a/tests/very_simple_deck.DATA b/tests/very_simple_deck.DATA new file mode 100644 index 00000000000..8c0623914ab --- /dev/null +++ b/tests/very_simple_deck.DATA @@ -0,0 +1,53 @@ +-- =============== RUNSPEC +RUNSPEC +DIMENS +3 3 3 / +EQLDIMS +/ +TABDIMS +/ +WATER +GAS +CO2STORE +METRIC + +-- =============== GRID +GRID +GRIDFILE +0 0 / +DX +27*1 / +DY +27*1 / +DZ +27*1 / +TOPS +9*0 / +PERMX +27*1013.25 / +PORO +27*0.25 / +COPY +PERMX PERMY / +PERMX PERMZ / +/ + +-- =============== PROPS +PROPS +SGWFN +0.000000E+00 0.000000E+00 1.000000E+00 3.060000E-02 +1.000000E+00 1.000000E+00 0.000000E+00 3.060000E-01 / + +-- =============== SOLUTION +SOLUTION +RPTRST +'BASIC=0' / +EQUIL +0 300 100 0 0 0 1 1 0 / + +-- =============== SCHEDULE +SCHEDULE +RPTRST +'BASIC=0' / +TSTEP +1 /