-
Notifications
You must be signed in to change notification settings - Fork 10
Expand file tree
/
Copy pathgpuFacade.cpp
More file actions
129 lines (113 loc) · 4.53 KB
/
gpuFacade.cpp
File metadata and controls
129 lines (113 loc) · 4.53 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
#include <vector>
#include <iostream>
#include <stdio.h>
#include <time.h>
#include "cuda.h"
#include "cuda_runtime.h"
#include "opencv2/opencv.hpp"
using namespace std;
using namespace cv;
#include "latch.h"
#include "bitMatcher.h"
#include "gpuFacade.hpp"
// images
// keypoints
// descriptors
// matches
using namespace std;
#define cudaCalloc(A, B) \
do { \
cudaError_t __cudaCalloc_err = cudaMalloc(A, B); \
if (__cudaCalloc_err == cudaSuccess) cudaMemset(*A, 0, B); \
} while (0)
#define checkError(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true) {
if (code != cudaSuccess) {
fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
if (abort) exit(code);
}
}
#define checkLaunchError() \
do { \
/* Check synchronous errors, i.e. pre-launch */ \
cudaError_t err = cudaGetLastError(); \
if (cudaSuccess != err) { \
fprintf (stderr, "Cuda error in file '%s' in line %i : %s.\n",\
__FILE__, __LINE__, cudaGetErrorString(err) ); \
exit(EXIT_FAILURE); \
} \
/* Check asynchronous errors, i.e. kernel failed (ULF) */ \
err = cudaThreadSynchronize(); \
if (cudaSuccess != err) { \
fprintf (stderr, "Cuda error in file '%s' in line %i : %s.\n",\
__FILE__, __LINE__, cudaGetErrorString( err) ); \
exit(EXIT_FAILURE); \
} \
} while (0)
gpuFacade::~gpuFacade() {
// cudaFreeArray(patchTriplets); // This crashes..?
cudaFree(d_K);
cudaFree(d_D1);
cudaFree(d_D2);
cudaFree(d_M1);
cudaFree(d_M2);
cudaFreeHost(h_K1);
cudaFreeHost(h_K2);
cudaDeviceReset();
}
gpuFacade::gpuFacade(int maxKeypoints, int input_WIDTH, int input_HEIGHT, int imageSlots) {
maxKP = maxKeypoints;
WIDTH = input_WIDTH;
HEIGHT = input_HEIGHT;
cudaEventCreate(&start);
cudaEventCreate(&stop);
// Sizes for device and host pointers
sizeK = maxKP * sizeof(float) * 4; // K for keypoints
sizeI = WIDTH * HEIGHT * sizeof(unsigned char); // I for Image
sizeD = maxKP * (2048 / 32) * sizeof(unsigned int); // D for Descriptor
sizeM = maxKP * sizeof(int); // M for Matches
sizeMask = 64 * sizeof(float);
// Host pointers
cudaMallocHost((void **) &h_K1, sizeK);
cudaMallocHost((void **) &h_K2, sizeK);
h_M1 = (int*) malloc(sizeM);
h_M2 = (int*) malloc(sizeM);
for (int i=0; i<64; i++) { h_mask[i] = 1.0f; }
// Device pointers
cudaCalloc((void **) &d_K, sizeK);
cudaCalloc((void **) &d_D1, sizeD);
cudaCalloc((void **) &d_D2, sizeD);
cudaCalloc((void **) &d_M1, sizeM);
cudaCalloc((void **) &d_M2, sizeM);
cudaCalloc((void **) &d_mask, sizeM);
// The patch triplet locations for LATCH fits in texture memory cache.
initPatchTriplets(patchTriplets);
initImage(&d_I, WIDTH, HEIGHT, &pitch);
initMask(&d_mask, h_mask);
// Events allow asynchronous, nonblocking launch of subsequent kernels after a given event has happened,
// such as completion of a different kernel on a different stream.
cudaEventCreate(&latchFinished);
// You should create a new stream for each bitMatcher kernel you want to launch at once.
cudaStreamCreate(&streamKP1);
cudaStreamCreate(&streamKP2);
}
void gpuFacade::LATCH(
Mat img,
unsigned int* d_descriptor,
int* keypoints,
vector<KeyPoint>* vectorKP) {
latch( img, d_I, pitch, h_K1, d_descriptor, keypoints, maxKP, d_K, vectorKP, d_mask, latchFinished );
}
void gpuFacade::match(
unsigned int* d_descriptorQ,
unsigned int* d_descriptorT,
int numKP_Q,
int numKP_T,
int* d_matches,
int threshold,
cudaStream_t stream) {
bitMatcher( d_descriptorQ, d_descriptorT, numKP_Q, numKP_T, maxKP, d_matches, threshold, stream, latchFinished );
}
void gpuFacade::getResults(int* h_matches, int* d_matches) {
getMatches(maxKP, h_matches, d_matches);
}