Skip to content

Commit f081805

Browse files
m-brettellMarkstephenswat
authored
GBTS hit bidding and dropout (#1296)
* seed hit dropout and bidding * seed ambiwq * move configs to struct * formating * adding seed sorting * better dropout logic * better naming * update names * add dropout for low-eta if all seeds are incompatable * formatting * removing seed sorting test adding use_dropout flag * formatting * reverting testing change in seed fitting * fixing fp63->fp32 warnings --------- Co-authored-by: Mark <markbrettel@cern.ch> Co-authored-by: Stephen Nicholas Swatman <stephen.nicholas.swatman@cern.ch>
1 parent 26f8099 commit f081805

3 files changed

Lines changed: 199 additions & 15 deletions

File tree

core/include/traccc/gbts_seeding/gbts_seeding_config.hpp

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,16 @@ struct gbts_seed_extraction_params {
125125
float max_z0 = 160.0f;
126126
};
127127

128+
struct gbts_seed_ambi_params {
129+
// sample multiple triplets when forming seeds to hedge against outliers
130+
bool use_dropout = true;
131+
// these curvatures are in 1/m
132+
float dropout_dcurv_m = 0.007f;
133+
float force_dropout_max_curv_m = 0.03f;
134+
float best_hit_frac = 0.49f;
135+
float tight_bid_cot_threshold = 1.0f;
136+
};
137+
128138
struct gbts_seedfinder_config {
129139
bool setLinkingScheme(
130140
const std::vector<std::pair<int, std::vector<int>>>& binTables,
@@ -142,8 +152,8 @@ struct gbts_seedfinder_config {
142152

143153
// tuned for 900 MeV pT cut and scaled by input minPt
144154
gbts_graph_building_params graph_building_params{};
145-
146155
gbts_seed_extraction_params seed_extraction_params{};
156+
gbts_seed_ambi_params seed_ambi_params{};
147157

148158
// node making bin counts
149159
unsigned int n_eta_bins = 0; // calculated from input layerInfo

device/cuda/src/gbts_seeding/gbts_seeding_algorithm.cu

Lines changed: 35 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@ struct gbts_ctx {
105105
int2* d_seed_proposals{}; // int quality and final mini_state_idx
106106
// first 32 bits are seed quality second 32 bits are seed_proposals index
107107
unsigned long long int* d_edge_bids{};
108+
unsigned long long int* d_hit_bids{};
108109
// 0 as default/is real seed, 1 as maybe seed,
109110
//-1 as maybe fake seed, -2 as fake
110111
char* d_seed_ambiguity{};
@@ -823,9 +824,15 @@ gbts_seeding_algorithm::output_type gbts_seeding_algorithm::operator()(
823824

824825
cudaStreamSynchronize(stream);
825826

827+
error = cudaGetLastError();
828+
if (error != cudaSuccess) {
829+
TRACCC_ERROR(
830+
"seed extraction: CUDA error: " << cudaGetErrorString(error));
831+
return {0, m_mr.main};
832+
}
833+
826834
cudaFree(ctx.d_levels);
827835
cudaFree(ctx.d_outgoing_paths);
828-
cudaFree(ctx.d_reducedSP);
829836

830837
if (nProps == 0) {
831838
return {0, m_mr.main};
@@ -865,26 +872,47 @@ gbts_seeding_algorithm::output_type gbts_seeding_algorithm::operator()(
865872
cudaFree(ctx.d_graph_building_params);
866873

867874
// 8. convert to 3sp seeds and make output buffer
868-
875+
// allocate extra seed space for hit permutation
869876
edm::seed_collection::buffer output_seeds(
870-
ctx.nSeeds, m_mr.main, vecmem::data::buffer_type::resizable);
871-
m_copy.get().setup(output_seeds)->ignore();
877+
2 * ctx.nSeeds, m_mr.main, vecmem::data::buffer_type::resizable);
878+
m_copy.get().setup(output_seeds)->wait();
879+
880+
cudaMalloc(&ctx.d_hit_bids, ctx.nSp * sizeof(unsigned long long int));
881+
cudaMemsetAsync(ctx.d_hit_bids, 0, ctx.nSp * sizeof(unsigned long long int),
882+
stream);
883+
884+
nThreads = 128;
885+
nBlocks = 1 + (ctx.nSeeds - 1) / nThreads;
886+
887+
kernels::seeds_bid_for_hits<<<nBlocks, nThreads, 0, stream>>>(
888+
ctx.d_output_graph, ctx.d_seed_proposals, ctx.d_path_store,
889+
ctx.d_seed_ambiguity, ctx.d_hit_bids, nProps,
890+
1 + 2 + m_config.max_num_neighbours);
872891

873892
kernels::gbts_seed_conversion_kernel<<<nBlocks, nThreads, 0, stream>>>(
874893
ctx.d_seed_proposals, ctx.d_seed_ambiguity, ctx.d_path_store,
875-
ctx.d_output_graph, output_seeds, nProps, m_config.max_num_neighbours);
894+
ctx.d_output_graph, ctx.d_reducedSP, output_seeds, ctx.d_hit_bids,
895+
nProps, m_config.max_num_neighbours,
896+
m_config.seed_ambi_params.dropout_dcurv_m,
897+
m_config.seed_ambi_params.force_dropout_max_curv_m,
898+
m_config.seed_ambi_params.best_hit_frac,
899+
m_config.seed_ambi_params.tight_bid_cot_threshold,
900+
m_config.seed_ambi_params.use_dropout);
876901

877902
cudaStreamSynchronize(stream);
878903

904+
ctx.nSeeds = m_copy.get().get_size(output_seeds);
905+
906+
cudaFree(ctx.d_reducedSP);
879907
cudaFree(ctx.d_output_graph);
880908
cudaFree(ctx.d_path_store);
881909
cudaFree(ctx.d_seed_proposals);
882910
cudaFree(ctx.d_seed_ambiguity);
911+
cudaFree(ctx.d_hit_bids);
883912

884913
error = cudaGetLastError();
885-
886914
if (error != cudaSuccess) {
887-
TRACCC_ERROR("seed-extracting kalman filter: CUDA error: "
915+
TRACCC_ERROR("Seed ambiguity solving: CUDA error: "
888916
<< cudaGetErrorString(error));
889917
return {0, m_mr.main};
890918
}

device/cuda/src/gbts_seeding/kernels/GbtsGraphProcessingKernels.cuh

Lines changed: 153 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -693,10 +693,91 @@ void __global__ seeds_rebid_for_edges(int2* d_path_store,
693693
}
694694
}
695695

696+
void __global__ seeds_bid_for_hits(int* d_output_graph, int2* d_seed_proposals,
697+
int2* d_path_store, char* d_seed_ambiguity,
698+
unsigned long long int* d_hit_bids,
699+
const unsigned int nProps, int edge_size) {
700+
701+
for (unsigned int prop_idx = threadIdx.x + blockDim.x * blockIdx.x;
702+
prop_idx < nProps; prop_idx += gridDim.x * blockDim.x) {
703+
if (d_seed_ambiguity[prop_idx] == -2) {
704+
continue;
705+
}
706+
int2 prop = d_seed_proposals[prop_idx];
707+
unsigned long long int seed_bid =
708+
(static_cast<unsigned long long int>(prop.x) << 32) |
709+
(static_cast<unsigned long long int>(prop_idx));
710+
711+
int2 path = make_int2(0, prop.y);
712+
while (path.y >= 0) {
713+
path = d_path_store[path.y];
714+
int sp_idx = d_output_graph[traccc::device::gbts_consts::node1 +
715+
edge_size * path.x];
716+
atomicMax(&d_hit_bids[sp_idx], seed_bid);
717+
}
718+
int sp_idx = d_output_graph[traccc::device::gbts_consts::node2 +
719+
edge_size * path.x];
720+
atomicMax(&d_hit_bids[sp_idx], seed_bid);
721+
}
722+
}
723+
724+
inline __device__ float2 estimate_params(float4 sps[3]) {
725+
726+
// conformal mapping with the center at the middle spacepoint
727+
728+
float u[2], v[2];
729+
730+
const float x0 = sps[1].x;
731+
const float y0 = sps[1].y;
732+
733+
const float r0 = sqrtf(x0 * x0 + y0 * y0);
734+
735+
const float cosA = x0 / r0;
736+
737+
const float sinA = y0 / r0;
738+
739+
for (unsigned int k = 0; k < 2; k++) {
740+
741+
int sp_idx = (k == 1) ? 2 : k;
742+
743+
const float dx = sps[sp_idx].x - x0;
744+
745+
const float dy = sps[sp_idx].y - y0;
746+
747+
const float r2_inv = 1.0f / (dx * dx + dy * dy);
748+
749+
const float xn = dx * cosA + dy * sinA;
750+
751+
const float yn = -dx * sinA + dy * cosA;
752+
753+
u[k] = xn * r2_inv;
754+
v[k] = yn * r2_inv;
755+
}
756+
757+
const float du = u[0] - u[1];
758+
if (du == 0.0f) {
759+
return make_float2(0.0f, 0.0f);
760+
}
761+
762+
const float A = (v[0] - v[1]) / du;
763+
764+
const float B = v[1] - A * u[1];
765+
766+
// signed curvature in 1/m
767+
const float curv = 1000.0f * B / sqrtf(1 + A * A);
768+
const float cot_t = (sps[2].z - sps[1].z) /
769+
(sqrtf(sps[2].x * sps[2].x + sps[2].y * sps[2].y) - r0);
770+
return make_float2(curv, cot_t);
771+
}
772+
696773
void __global__ gbts_seed_conversion_kernel(
697774
int2* d_seed_proposals, char* d_seed_ambiguity, int2* d_path_store,
698-
int* d_output_graph, edm::seed_collection::view output_seeds,
699-
const unsigned int nProps, const unsigned int max_num_neighbours) {
775+
int* d_output_graph, float4* d_sp_params,
776+
edm::seed_collection::view output_seeds, unsigned long long int* d_hit_bids,
777+
const unsigned int nProps, const unsigned int max_num_neighbours,
778+
const float dcurv_cut_m, const float force_dropout_max_curv_m,
779+
const float best_hit_frac, const float tight_bid_cot_threshold,
780+
const bool use_dropout) {
700781

701782
int edge_size = 2 + 1 + max_num_neighbours;
702783
edm::seed_collection::device seeds_device(output_seeds);
@@ -707,23 +788,88 @@ void __global__ gbts_seed_conversion_kernel(
707788
// drop seeds that lost the bidding
708789
continue;
709790
}
791+
// collect seed hits and reject those that lost the hit bidding
792+
char best_for_hit = 0;
710793
Tracklet seed;
711794
seed.size = 0;
712795
// dummy path to start the loop
713-
int2 path = make_int2(0, d_seed_proposals[prop_idx].y);
796+
int2 prop = d_seed_proposals[prop_idx];
797+
int2 path = make_int2(0, prop.y);
714798
while (path.y >= 0) {
715799
path = d_path_store[path.y];
716800
seed.nodes[seed.size++] =
717801
d_output_graph[traccc::device::gbts_consts::node1 +
718802
edge_size * path.x];
803+
best_for_hit +=
804+
(prop_idx ==
805+
(d_hit_bids[seed.nodes[seed.size - 1]] & 0xFFFFFFFFLL));
719806
}
720807
seed.nodes[seed.size++] =
721808
d_output_graph[traccc::device::gbts_consts::node2 +
722809
edge_size * path.x];
723-
// sample begining, middle, end sp from tracklet for now
724-
seeds_device.push_back({seed.nodes[seed.size - 1],
725-
seed.nodes[(1 + seed.size) / 2 - 1],
726-
seed.nodes[0]});
810+
best_for_hit += (prop_idx == (d_hit_bids[seed.nodes[seed.size - 1]] &
811+
0xFFFFFFFFLL));
812+
813+
if ((best_for_hit < best_hit_frac * seed.size)) {
814+
continue;
815+
}
816+
char diff_code = 0;
817+
bool force_dropout = false;
818+
if (use_dropout) {
819+
float4 sps[3];
820+
// seed 1
821+
sps[0] = d_sp_params[seed.nodes[seed.size - 1]];
822+
sps[1] = d_sp_params[seed.nodes[(seed.size - 1) / 2 + 1]];
823+
sps[2] = d_sp_params[seed.nodes[0]];
824+
float2 curv_cot_1 = estimate_params(sps);
825+
// seed 2
826+
sps[1] = d_sp_params[seed.nodes[(seed.size - 1) / 2]];
827+
float2 curv_cot_2 = estimate_params(sps);
828+
sps[0] = d_sp_params[seed.nodes[seed.size - 2]];
829+
// seed 3
830+
float2 curv_cot_3 = estimate_params(sps);
831+
// for low eta (higher fake rate) seeds perform a stronger cut
832+
if ((best_for_hit < seed.size - 1) &
833+
(abs(curv_cot_1.y + curv_cot_2.y + curv_cot_3.y) <
834+
3.0f * tight_bid_cot_threshold) &
835+
(seed.size < 5)) {
836+
continue;
837+
}
838+
float diff[3] = {abs(curv_cot_1.x - curv_cot_2.x),
839+
abs(curv_cot_2.x - curv_cot_3.x),
840+
abs(curv_cot_1.x - curv_cot_3.x)};
841+
diff_code = 4 * (diff[0] < dcurv_cut_m) +
842+
2 * (diff[1] < dcurv_cut_m) + (diff[2] < dcurv_cut_m);
843+
// for high pt the diff may pass dispite bad estimates
844+
force_dropout = abs(curv_cot_1.x + curv_cot_2.x + curv_cot_3.x) <
845+
3.0f * force_dropout_max_curv_m;
846+
force_dropout |= (abs(curv_cot_1.y + curv_cot_2.y + curv_cot_3.y) <
847+
3.0f * tight_bid_cot_threshold) &
848+
diff_code == 0;
849+
}
850+
// use one seed from a consistant pair/set + the inconsistant one
851+
// sample spacepoints from tracklet to create seeds
852+
// include 1st order unless either 2 or 3 are consitant with the other
853+
// and 1
854+
if (diff_code != 3 & diff_code != 6 | force_dropout) {
855+
seeds_device.push_back({seed.nodes[seed.size - 1],
856+
seed.nodes[(seed.size - 1) / 2 + 1],
857+
seed.nodes[0]});
858+
}
859+
// include 2nd order if it consistant with 1 and 3 or only 1 and 3 are
860+
// consistant
861+
if (diff_code == 1 | diff_code == 6) {
862+
seeds_device.push_back({seed.nodes[seed.size - 1],
863+
seed.nodes[(seed.size - 1) / 2],
864+
seed.nodes[0]});
865+
}
866+
// include 3rd order if it is consistant with 1 and 2 or only 1 and 2
867+
// are consistant or if only 2 and 3 are consistant
868+
if (diff_code == 2 | diff_code == 3 | diff_code == 4 | force_dropout) {
869+
seeds_device.push_back({seed.nodes[seed.size - 2],
870+
seed.nodes[(seed.size - 1) / 2],
871+
seed.nodes[0]});
872+
}
727873
}
728874
}
729875

0 commit comments

Comments
 (0)