Skip to content
Merged

Gpu #244

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions applications/solvers/dfLowMachFoam/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
cmake_minimum_required(VERSION 3.5)
project(dfLowMachFoam LANGUAGES CXX)
FIND_PACKAGE(MPI REQUIRED)
FIND_PACKAGE(OpenMP REQUIRED)
FIND_PACKAGE(CUDA REQUIRED)

# Check valid thirdParty
if(DEFINED ENV{WM_PROJECT_DIR})
Expand All @@ -26,6 +28,8 @@ SET(SRC_ORIG $ENV{SRC_ORIG})

# set compilation options
SET(CMAKE_EXE_LINKER_FLAGS "-fuse-ld=bfd -Xlinker --add-needed -Xlinker --no-as-needed")
SET (CMAKE_C_FLAGS ${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS})
SET (CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS})

SET(CMAKE_C_COMPILER g++)
SET(PATH_LIB_OPENMPI "openmpi-system") # Foundation version
Expand Down Expand Up @@ -83,6 +87,9 @@ include_directories(
${CANTERA_ROOT}/include
${MPI_INCLUDE_PATH}
${PROJECT_SOURCE_DIR}
${CUDA_INCLUDE_DIRS}
/home/runze/AmgX/AMGX/include
/home/runze/deepflame-dev/src_gpu
)

# add execution
Expand All @@ -98,6 +105,9 @@ target_link_libraries(${PROJECT_NAME}
${DF_ROOT}/lib/libdfCombustionModels.so
$ENV{FOAM_LIBBIN}/openmpi-system/libPstream.so
${MPI_LIBRARIES}
${CUDA_LIBRARIES}
/home/runze/AmgX/AMGX/build/libamgxsh.so
/home/runze/deepflame-dev/src_gpu/build/libdfMatrix.so
)

if(DEFINED ENV{PYTHON_INC_DIR})
Expand Down
7 changes: 5 additions & 2 deletions applications/solvers/dfLowMachFoam/Make/options
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
-include $(GENERAL_RULES)/mplibType

EXE_INC = -std=c++14 \
-fopenmp \
-Wno-unused-variable \
-Wno-unused-but-set-variable \
-Wno-old-style-cast \
Expand All @@ -25,7 +26,8 @@ EXE_INC = -std=c++14 \
$(if $(LIBTORCH_ROOT),-I$(LIBTORCH_ROOT)/include/torch/csrc/api/include,) \
$(PYTHON_INC_DIR) \
-I/home/runze/deepflame-dev/src_gpu \
-I/usr/local/cuda-11.6/include
-I/usr/local/cuda-11.6/include \
-I/home/runze/AmgX/AMGX/include

EXE_LIBS = \
-lcompressibleTransportModels \
Expand All @@ -48,5 +50,6 @@ EXE_LIBS = \
$(if $(PYTHON_LIB_DIR),-L$(PYTHON_LIB_DIR),) \
$(if $(PYTHON_LIB_DIR),-lpython3.8,) \
/home/runze/deepflame-dev/src_gpu/build/libdfMatrix.so \
/usr/local/cuda-11.6/lib64/libcudart.so
/usr/local/cuda-11.6/lib64/libcudart.so \
/home/runze/AmgX/AMGX/build/libamgxsh.so

75 changes: 61 additions & 14 deletions applications/solvers/dfLowMachFoam/UEqn.H
Original file line number Diff line number Diff line change
@@ -1,22 +1,45 @@
// Solve the Momentum equation

// start1 = std::clock();
// Info << "U_of = " << U << endl;
tmp<fvVectorMatrix> tUEqn
(
fvm::ddt(rho, U) + fvm::div(phi, U)
+ turbulence->divDevRhoReff(U)
+ turbulence->divDevRhoReff(U) == -fvc::grad(p)
);
fvVectorMatrix& UEqn = tUEqn.ref();
// end1 = std::clock();
// time_monitor_UEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
// time_monitor_UEqn_mtxAssembly += double(end1 - start1) / double(CLOCKS_PER_SEC);

UEqn.relax();
// start1 = std::clock();
// fvVectorMatrix turb_source
// (
// turbulence->divDevRhoReff(U)
// );
// end1 = std::clock();
// time_monitor_UEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
// time_monitor_UEqn_mtxAssembly += double(end1 - start1) / double(CLOCKS_PER_SEC);

if (pimple.momentumPredictor())
{
solve(UEqn == -fvc::grad(p));

K = 0.5*magSqr(U);
}
// UEqn.relax();
// start1 = std::clock();
// if (pimple.momentumPredictor())
// {
// solve(UEqn);

// K = 0.5*magSqr(U);
// }
// Info << "U_of = " << U << endl;
// end1 = std::clock();
// time_monitor_UEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
// time_monitor_UEqn_Solve += double(end1 - start1) / double(CLOCKS_PER_SEC);


start1 = std::clock();

UEqn_GPU.fvm_ddt(&rho.oldTime()[0], &rho[0], &U.oldTime()[0][0]);

start2 = std::clock();
int offset = 0;
forAll(U.boundaryField(), patchi)
{
Expand All @@ -39,18 +62,42 @@ forAll(U.boundaryField(), patchi)

// boundary pressure
std::copy(&patchP[0], &patchP[0]+patchSize, boundary_pressure_init.begin()+offset);
// boundary face vector
std::copy(&pSf[0][0], &pSf[0][0]+3*patchSize, boundary_face_vector_init.begin()+3*offset);

offset += patchSize;
}
end2 = std::clock();
time_monitor_CPU += double(end2 - start2) / double(CLOCKS_PER_SEC);

UEqn_GPU.fvm_div(&phi[0], ueqn_internalCoeffs_init, ueqn_boundaryCoeffs_init, boundary_pressure_init);
UEqn_GPU.fvc_grad(&p[0]);

start2 = std::clock();
fvVectorMatrix turb_source
(
turbulence->divDevRhoReff(U)
);
end2 = std::clock();
time_monitor_CPU += double(end2 - start2) / double(CLOCKS_PER_SEC);

UEqn_GPU.fvm_ddt(&rho.oldTime()[0], &rho[0], &mesh.V()[0], &U.oldTime()[0][0]);
UEqn_GPU.fvm_div(&mesh.surfaceInterpolation::weights()[0], &phi[0], ueqn_internalCoeffs_init, ueqn_boundaryCoeffs_init);
UEqn_GPU.fvc_grad(&mesh.Sf()[0][0], &p[0], boundary_face_vector_init, boundary_pressure_init);
UEqn_GPU.add_fvMatrix(&turb_source.lower()[0], &turb_source.diag()[0], &turb_source.upper()[0], &turb_source.source()[0][0]);
UEqn_GPU.print();
end1 = std::clock();
time_monitor_UEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_UEqn_mtxAssembly += double(end1 - start1) / double(CLOCKS_PER_SEC);

// UEqn_GPU.checkValue(false);

start1 = std::clock();
UEqn_GPU.solve();
end1 = std::clock();
time_monitor_UEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_UEqn_Solve += double(end1 - start1) / double(CLOCKS_PER_SEC);

start1 = std::clock();
UEqn_GPU.updatePsi(&U[0][0]);
K = 0.5*magSqr(U);
end1 = std::clock();
time_monitor_UEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_UEqn_mtxAssembly += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_CPU += double(end1 - start1) / double(CLOCKS_PER_SEC);
// Info << "U_amgx = " << U << endl;

11 changes: 9 additions & 2 deletions applications/solvers/dfLowMachFoam/createdfSolver.H
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,25 @@ int num_cells = mesh.nCells();
int num_surfaces = neighbour.size();

std::vector<int> boundaryCellIndex;
std::vector<double> boundary_face_vector_init;
int num_boundary_faces = 0;
int patchSize;
int offset = 0;
forAll(mesh.boundary(), patchi)
{
labelUList sub_boundary = mesh.boundary()[patchi].faceCells();
patchSize = sub_boundary.size();
const vectorField& pSf = mesh.Sf().boundaryField()[patchi];

boundaryCellIndex.insert(boundaryCellIndex.end(), &sub_boundary[0], &sub_boundary[0]+patchSize);
boundary_face_vector_init.insert(boundary_face_vector_init.end()+offset, &pSf[0][0], &pSf[0][0]+3*patchSize);
num_boundary_faces += patchSize;
}
int num_boundary_cells;

dfMatrix UEqn_GPU(num_surfaces, num_cells, num_boundary_faces, num_boundary_cells, &neighbour[0], &owner[0], boundaryCellIndex);
dfMatrix UEqn_GPU(num_surfaces, num_cells, num_boundary_faces, num_boundary_cells, &neighbour[0], &owner[0], &mesh.V()[0], &mesh.surfaceInterpolation::weights()[0],
&mesh.Sf()[0][0], boundary_face_vector_init, boundaryCellIndex, "dDDI", "/home/runze/deepflame-dev/examples/dfLowMachFoam/threeD_reactingTGV/H2/cvodeIntegrator/system/amgxUOptions");

std::vector<double> ueqn_internalCoeffs_init(3*num_boundary_faces), ueqn_boundaryCoeffs_init(3*num_boundary_faces);
std::vector<double> boundary_pressure_init(num_boundary_faces), boundary_face_vector_init(3*num_boundary_faces);
std::vector<double> boundary_pressure_init(num_boundary_faces);
// std::vector<double> boundary_face_vector_init(3*num_boundary_faces);
32 changes: 30 additions & 2 deletions applications/solvers/dfLowMachFoam/dfLowMachFoam.C
Original file line number Diff line number Diff line change
Expand Up @@ -83,13 +83,18 @@ int main(int argc, char *argv[])
#include "createRhoUfIfPresent.H"

double time_monitor_flow=0;
double time_monitor_UEqn=0;
double time_monitor_UEqn_mtxAssembly=0;
double time_monitor_UEqn_Solve=0;
double time_monitor_UEqn_sum=0;
double time_monitor_chem=0;
double time_monitor_Y=0;
double time_monitor_E=0;
double time_monitor_corrThermo=0;
double time_monitor_corrDiff=0;
double time_monitor_CPU=0;
label timeIndex = 0;
clock_t start, end;
clock_t start, end, start1, end1, start2, end2;

turbulence->validate();

Expand All @@ -99,7 +104,12 @@ int main(int argc, char *argv[])
#include "setInitialDeltaT.H"
}

start1 = std::clock();
#include "createdfSolver.H"
end1 = std::clock();
// time_monitor_UEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
// time_monitor_UEqn_mtxAssembly += double(end1 - start1) / double(CLOCKS_PER_SEC);
// double time_UEqn_initial = time_monitor_UEqn;

// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //

Expand Down Expand Up @@ -146,7 +156,7 @@ int main(int argc, char *argv[])
{
#include "rhoEqn.H"
}

start = std::clock();
#include "UEqn.H"
end = std::clock();
Expand Down Expand Up @@ -199,6 +209,7 @@ int main(int argc, char *argv[])
rho = thermo.rho();

runTime.write();
time_monitor_UEqn_sum += time_monitor_UEqn;

Info << "output time index " << runTime.timeIndex() << endl;

Expand All @@ -209,11 +220,28 @@ int main(int argc, char *argv[])
Info<< "Energy Equations = " << time_monitor_E << " s" << endl;
Info<< "thermo & Trans Properties = " << time_monitor_corrThermo << " s" << endl;
Info<< "Diffusion Correction Time = " << time_monitor_corrDiff << " s" << endl;
Info<< "UEqn Time = " << time_monitor_UEqn << " s" << endl;
Info<< "UEqn Time assamble Mtx = " << time_monitor_UEqn_mtxAssembly << " s" << endl;
Info<< "UEqn Time solve = " << time_monitor_UEqn_Solve << " s" << endl;
// Info<< "UEqn sum Time = " << time_monitor_UEqn_sum << " s" << endl;
// Info<< "UEqn sum Time - overhead = " << time_monitor_UEqn_sum - time_UEqn_initial << " s" << endl;
Info<< "sum Time = " << (time_monitor_chem + time_monitor_Y + time_monitor_flow + time_monitor_E + time_monitor_corrThermo + time_monitor_corrDiff) << " s" << endl;
Info<< "CPU Time (get turb souce) = " << time_monitor_CPU << " s" << endl;
Info<< "============================================"<<nl<< endl;

Info<< "ExecutionTime = " << runTime.elapsedCpuTime() << " s"
<< " ClockTime = " << runTime.elapsedClockTime() << " s" << endl;
time_monitor_UEqn = 0;
time_monitor_UEqn_mtxAssembly = 0;
time_monitor_UEqn_Solve = 0;
time_monitor_chem = 0;
time_monitor_Y = 0;
time_monitor_E = 0;
time_monitor_flow = 0;
time_monitor_corrThermo = 0;
time_monitor_corrDiff = 0;
time_monitor_CPU = 0;

#ifdef USE_PYTORCH
if (log_ && torch_)
{
Expand Down
4 changes: 4 additions & 0 deletions bashrc.in
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,7 @@ export DF_APPBIN=pwd/bin
export DF_LIBBIN=pwd/lib
export PATH=$DF_APPBIN:$PATH
export LD_LIBRARY_PATH=$DF_LIBBIN:$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=$DF_ROOT/src_gpu/build:$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=/home/runze/AmgX/AMGX/build:$LD_LIBRARY_PATH
export AMGX_DIR=/home/runze/AmgX/AMGX

Loading