forked from Exawind/nalu-wind
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathAssembleFaceElemSolverAlgorithm.h
More file actions
174 lines (146 loc) · 6.51 KB
/
AssembleFaceElemSolverAlgorithm.h
File metadata and controls
174 lines (146 loc) · 6.51 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
// Copyright 2017 National Technology & Engineering Solutions of Sandia, LLC
// (NTESS), National Renewable Energy Laboratory, University of Texas Austin,
// Northwest Research Associates. Under the terms of Contract DE-NA0003525
// with NTESS, the U.S. Government retains certain rights in this software.
//
// This software is released under the BSD 3-clause license. See LICENSE file
// for more details.
//
#ifndef AssembleFaceElemSolverAlgorithm_h
#define AssembleFaceElemSolverAlgorithm_h
#include <SolverAlgorithm.h>
#include <ElemDataRequests.h>
#include <Realm.h>
#include <ScratchViews.h>
#include <SimdInterface.h>
#include <SharedMemData.h>
#include <CopyAndInterleave.h>
#include <stk_mesh/base/NgpMesh.hpp>
#include <ngp_utils/NgpFieldManager.h>
#include <ngp_utils/NgpMEUtils.h>
namespace stk {
namespace mesh {
class Part;
}
}
namespace sierra{
namespace nalu{
class AssembleFaceElemSolverAlgorithm : public SolverAlgorithm
{
public:
AssembleFaceElemSolverAlgorithm(
Realm &realm,
stk::mesh::Part *part,
EquationSystem *eqSystem,
unsigned nodesPerFace,
unsigned nodesPerElem);
virtual ~AssembleFaceElemSolverAlgorithm() {}
virtual void initialize_connectivity();
virtual void execute();
template<typename LambdaFunction>
void run_face_elem_algorithm(stk::mesh::BulkData& bulk, LambdaFunction lamdbaFunc)
{
int nDim = bulk.mesh_meta_data().spatial_dimension();
int totalNumFields = bulk.mesh_meta_data().get_fields().size();
// Register face ME instance in elemdata also to obtain face integration points
if (elemDataNeeded_.get_cvfem_face_me() == nullptr)
elemDataNeeded_.add_cvfem_face_me(faceDataNeeded_.get_cvfem_face_me());
int rhsSize = nodesPerElem_ * numDof_, lhsSize = rhsSize * rhsSize,
scratchIdsSize = rhsSize;
const stk::mesh::NgpMesh& ngpMesh = realm_.ngp_mesh();
const nalu_ngp::FieldManager& fieldMgr = realm_.ngp_field_manager();
ElemDataRequestsGPU faceDataNGP(fieldMgr, faceDataNeeded_, totalNumFields);
ElemDataRequestsGPU elemDataNGP(fieldMgr, elemDataNeeded_, totalNumFields);
const int bytes_per_team = 0;
const int bytes_per_thread = calculate_shared_mem_bytes_per_thread(
lhsSize, rhsSize, scratchIdsSize, nDim, faceDataNGP, elemDataNGP);
const auto nodesPerFace = nodesPerFace_;
const auto nodesPerElem = nodesPerElem_;
stk::mesh::Selector s_locally_owned_union =
bulk.mesh_meta_data().locally_owned_part() &
stk::mesh::selectUnion(partVec_);
stk::mesh::EntityRank sideRank = bulk.mesh_meta_data().side_rank();
const auto& buckets = stk::mesh::get_bucket_ids(bulk, sideRank, s_locally_owned_union);
auto team_exec = sierra::nalu::get_device_team_policy(
buckets.size(), bytes_per_team, bytes_per_thread);
Kokkos::parallel_for(
team_exec, KOKKOS_LAMBDA(const sierra::nalu::DeviceTeamHandleType& team) {
auto bktId = buckets.device_get(team.league_rank());
auto& b = ngpMesh.get_bucket(sideRank, bktId);
#ifndef KOKKOS_ENABLE_CUDA
ThrowAssertMsg(
b.topology().num_nodes() == (unsigned)nodesPerFace_,
"AssembleFaceElemSolverAlgorithm expected nodesPerEntity_ = "
<< nodesPerFace_ << ", but b.topology().num_nodes() = "
<< b.topology().num_nodes());
#endif
SharedMemData_FaceElem<DeviceTeamHandleType, DeviceShmem> smdata(
team, nDim, faceDataNGP, elemDataNGP, nodesPerFace, nodesPerElem,
rhsSize);
const size_t bucketLen = b.size();
const size_t simdBucketLen =
sierra::nalu::get_num_simd_groups(bucketLen);
Kokkos::parallel_for(
Kokkos::TeamThreadRange(team, simdBucketLen),
[&](const size_t& bktIndex) {
size_t simdGroupLen = sierra::nalu::get_length_of_next_simd_group(
bktIndex, bucketLen);
size_t numFacesProcessed = 0;
do {
int elemFaceOrdinal = -1;
int simdFaceIndex = 0;
while ((numFacesProcessed + simdFaceIndex) < simdGroupLen) {
stk::mesh::Entity face =
b[bktIndex * simdLen + numFacesProcessed + simdFaceIndex];
const auto ngpFaceIndex = ngpMesh.fast_mesh_index(face);
// ThrowAssertMsg(
// bulk.num_elements(face) == 1,
// "Expecting just 1 element attached to face!");
int thisElemFaceOrdinal =
ngpMesh.get_element_ordinals(sideRank, ngpFaceIndex)[0];
if (elemFaceOrdinal >= 0 && thisElemFaceOrdinal != elemFaceOrdinal) {
break;
}
const auto& elems = ngpMesh.get_elements(sideRank, ngpFaceIndex);
const auto elemIndex = ngpMesh.fast_mesh_index(elems[0]);
smdata.ngpConnectedNodes[simdFaceIndex] =
ngpMesh.get_nodes(stk::topology::ELEMENT_RANK, elemIndex);
smdata.elemFaceOrdinal = thisElemFaceOrdinal;
elemFaceOrdinal = thisElemFaceOrdinal;
sierra::nalu::fill_pre_req_data(
faceDataNGP, ngpMesh, sideRank, face,
*smdata.faceViews[simdFaceIndex]);
sierra::nalu::fill_pre_req_data(
elemDataNGP, ngpMesh, stk::topology::ELEMENT_RANK, elems[0],
*smdata.elemViews[simdFaceIndex]);
++simdFaceIndex;
}
smdata.numSimdFaces = simdFaceIndex;
numFacesProcessed += simdFaceIndex;
#ifndef KOKKOS_ENABLE_CUDA
// No need to interleave on GPUs
copy_and_interleave(
smdata.faceViews, smdata.numSimdFaces, smdata.simdFaceViews);
copy_and_interleave(
smdata.elemViews, smdata.numSimdFaces, smdata.simdElemViews);
#endif
fill_master_element_views(
faceDataNGP, smdata.simdFaceViews, smdata.elemFaceOrdinal);
fill_master_element_views(
elemDataNGP, smdata.simdElemViews, smdata.elemFaceOrdinal);
lamdbaFunc(smdata);
} while (numFacesProcessed < simdGroupLen);
});
});
}
ElemDataRequests faceDataNeeded_;
ElemDataRequests elemDataNeeded_;
double diagRelaxFactor_{1.0};
unsigned numDof_;
unsigned nodesPerFace_;
unsigned nodesPerElem_;
int rhsSize_;
};
} // namespace nalu
} // namespace Sierra
#endif