Skip to content
This repository was archived by the owner on May 3, 2024. It is now read-only.

Commit a5cc379

Browse files
committed
Merge remote-tracking branch 'origin/hiprng-mlopen' into hip
Conflicts: include/caffe/util/device_alternate.hpp
2 parents 6747f2f + 0f35727 commit a5cc379

File tree

11 files changed

+103
-128
lines changed

11 files changed

+103
-128
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,7 @@ HIP_LIB_DIR += /usr/local/cuda/lib64
176176

177177
ifneq (, $(findstring hcc, $(HIP_PLATFORM)))
178178
#HIP_LIBS := hip_hcc hcblas
179-
HIP_LIBS := hcblas
179+
HIP_LIBS := hcblas hcrng
180180
else ifneq (, $(findstring nvcc, $(HIP_PLATFORM)))
181181
HIP_LIBS := cudart cublas curand
182182
endif

Makefile.config

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -104,8 +104,8 @@ PYTHON_LIB := /usr/lib
104104
# WITH_PYTHON_LAYER := 1
105105

106106
# Whatever else you find you need goes here.
107-
INCLUDE_DIRS := $(PYTHON_INCLUDE) /usr/local/include /opt/rocm/hip/include /opt/rocm/hcblas/include
108-
LIBRARY_DIRS := $(PYTHON_LIB) /usr/local/lib /usr/lib /opt/rocm/hcblas/lib
107+
INCLUDE_DIRS := $(PYTHON_INCLUDE) /usr/local/include /opt/rocm/hip/include /opt/rocm/profiler/CXLActivityLogger/include /opt/rocm/hcblas/include /opt/rocm/hcrng/include
108+
LIBRARY_DIRS := $(PYTHON_LIB) /usr/local/lib /usr/lib /opt/rocm/hcblas/lib /opt/rocm/hcrng/lib
109109

110110
# If Homebrew is installed at a non standard location (for example your home directory) and you use it for general dependencies
111111
# INCLUDE_DIRS += $(shell brew --prefix)/include

README.amd-tips

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
# Build caffe:
2+
$ source setup.sh
3+
$ make DEBUG=1 -j20
4+
$ build/tools/caffe train --solver=examples/cifar10/cifar10_quick_solver.prototxt --gpu 0,1
5+
16

27
### CAFFE directed tests:
38
# Run all tests:

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ D. Unit Testing
115115

116116
a. Support libraries
117117

118-
sudo apt-get install libprotobuf-dev libleveldb-dev libsnappy-dev libopencv-dev libhdf5-serial-dev protobuf-compiler libatlas-base-dev libblas-dev libgflags-dev libgoogle-glog-dev liblmdb-dev libboost-all-dev
118+
sudo apt-get install libprotobuf-dev libleveldb-dev libsnappy-dev libopencv-dev libhdf5-serial-dev protobuf-compiler libatlas-base-dev libblas-dev libgflags-dev libgoogle-glog-dev liblmdb-dev libboost-all-dev python-h5py
119119

120120
b. HcBLAS library
121121

@@ -138,4 +138,4 @@ To improve build time, one could as well invoke make -j <number of threads>
138138

139139
After done with A, B and C, Now its time to test. Run the following commands to perform unit testing of different components of Caffe.
140140

141-
./build/test/test_all.testbin
141+
./build/test/test_all.testbin

include/caffe/common.hpp

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ using std::vector;
9898
void GlobalInit(int* pargc, char*** pargv);
9999

100100
// A singleton class to hold common caffe stuff, such as the handler that
101-
// caffe is going to use for hipblas, curand, etc.
101+
// caffe is going to use for hipblas, hiprng, etc.
102102
class Caffe {
103103
public:
104104
~Caffe();
@@ -124,7 +124,7 @@ class Caffe {
124124
shared_ptr<Generator> generator_;
125125
};
126126

127-
// Getters for boost rng, curand, and hipblas handles
127+
// Getters for boost rng, hiprng, and hipblas handles
128128
inline static RNG& rng_stream() {
129129
if (!Get().random_generator_) {
130130
Get().random_generator_.reset(new RNG());
@@ -133,10 +133,9 @@ class Caffe {
133133
}
134134
#ifndef CPU_ONLY
135135
inline static hipblasHandle_t hipblas_handle() { return Get().hipblas_handle_; }
136-
// TODO HIP Equivalent
137-
/*inline static curandGenerator_t curand_generator() {
138-
return Get().curand_generator_;
139-
}*/
136+
inline static hiprngGenerator_t hiprng_generator() {
137+
return Get().hiprng_generator_;
138+
}
140139
#endif
141140

142141
// Returns the mode: running on CPU or GPU.
@@ -147,9 +146,9 @@ class Caffe {
147146
// freed in a non-pinned way, which may cause problems - I haven't verified
148147
// it personally but better to note it here in the header file.
149148
inline static void set_mode(Brew mode) { Get().mode_ = mode; }
150-
// Sets the random seed of both boost and curand
149+
// Sets the random seed of both boost and hiprng
151150
static void set_random_seed(const unsigned int seed);
152-
// Sets the device. Since we have hipblas and curand stuff, set device also
151+
// Sets the device. Since we have hipblas and hiprng stuff, set device also
153152
// requires us to reset those values.
154153
static void SetDevice(const int device_id);
155154
// Prints the current GPU status.
@@ -168,8 +167,7 @@ class Caffe {
168167
protected:
169168
#ifndef CPU_ONLY
170169
hipblasHandle_t hipblas_handle_;
171-
//TODO: HIP Equivalent
172-
//curandGenerator_t curand_generator_;
170+
hiprngGenerator_t hiprng_generator_;
173171
#endif
174172
shared_ptr<RNG> random_generator_;
175173

include/caffe/util/device_alternate.hpp

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ void classname<Dtype>::funcname##_##gpu(const vector<Blob<Dtype>*>& top, \
4343
#endif
4444

4545
#include <hipblas.h>
46+
#include <hiprng.h>
4647
#ifdef USE_ACCMI
4748
#include "caffe/util/cudnn.hpp"
4849
#endif
@@ -69,14 +70,12 @@ void classname<Dtype>::funcname##_##gpu(const vector<Blob<Dtype>*>& top, \
6970
<< caffe::hipblasGetErrorString(status); \
7071
} while (0)
7172

72-
// TODO: Get HIP equivalent
73-
/*#define CURAND_CHECK(condition) \
73+
#define HIPRNG_CHECK(condition) \
7474
do { \
75-
curandStatus_t status = condition; \
76-
CHECK_EQ(status, CURAND_STATUS_SUCCESS) << " " \
77-
<< caffe::curandGetErrorString(status); \
75+
hiprngStatus_t status = condition; \
76+
CHECK_EQ(status,HIPRNG_STATUS_SUCCESS) << " " \
77+
<< caffe::hiprngGetErrorString(status); \
7878
} while (0)
79-
*/
8079

8180
// HIP: grid stride looping
8281
#define HIP_KERNEL_LOOP(i, n) \
@@ -85,13 +84,13 @@ void classname<Dtype>::funcname##_##gpu(const vector<Blob<Dtype>*>& top, \
8584
i += hipBlockDim_x * hipGridDim_x)
8685

8786
// HIP: check for error after kernel execution and exit loudly if there is one.
88-
//TODO: Get HIP equivalent
89-
//#define HIP_POST_KERNEL_CHECK HIP_CHECK(cudaPeekAtLastError())
87+
#define HIP_POST_KERNEL_CHECK HIP_CHECK(hipPeekAtLastError())
9088

9189
namespace caffe {
9290

9391
// HIP: library error reporting.
9492
const char* hipblasGetErrorString(hipblasStatus_t error);
93+
const char* hiprngGetErrorString(hiprngStatus_t error);
9594
// HIP: use 512 threads per block
9695

9796
#ifdef __HIP_PLATFORM_NVCC__

include/caffe/util/math_functions.hpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -217,9 +217,9 @@ void caffe_gpu_rng_uniform(const int n, unsigned int* r);
217217

218218
// caffe_gpu_rng_uniform with four arguments generates floats in the range
219219
// (a, b] (strictly greater than a, less than or equal to b) due to the
220-
// specification of curandGenerateUniform. With a = 0, b = 1, just calls
221-
// curandGenerateUniform; with other limits will shift and scale the outputs
222-
// appropriately after calling curandGenerateUniform.
220+
// specification of hiprngGenerateUniform. With a = 0, b = 1, just calls
221+
// hiprngGenerateUniform; with other limits will shift and scale the outputs
222+
// appropriately after calling hiprngGenerateUniform.
223223
template <typename Dtype>
224224
void caffe_gpu_rng_uniform(const int n, const Dtype a, const Dtype b, Dtype* r);
225225

src/caffe/common.cpp

Lines changed: 62 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -106,47 +106,46 @@ void* Caffe::RNG::generator() {
106106

107107
Caffe::Caffe()
108108
// TODO: HIP Equivalent
109-
: hipblas_handle_(NULL), random_generator_(),
109+
: hipblas_handle_(NULL), hiprng_generator_(NULL), random_generator_(),
110110
mode_(Caffe::CPU), solver_count_(1), root_solver_(true) {
111111
// Try to create a hipblas handler, and report an error if failed (but we will
112112
// keep the program running as one might just want to run CPU code).
113113
if (hipblasCreate(&hipblas_handle_) != HIPBLAS_STATUS_SUCCESS) {
114114
LOG(ERROR) << "Cannot create Cublas handle. Cublas won't be available.";
115115
}
116-
// Try to create a curand handler.
117-
/*if (curandCreateGenerator(&curand_generator_, CURAND_RNG_PSEUDO_DEFAULT)
118-
!= CURAND_STATUS_SUCCESS ||
119-
curandSetPseudoRandomGeneratorSeed(curand_generator_, cluster_seedgen())
120-
!= CURAND_STATUS_SUCCESS) {
116+
// Try to create a hiprng handler.
117+
if (hiprngCreateGenerator(&hiprng_generator_, HIPRNG_RNG_PSEUDO_MRG32K3A)
118+
!= HIPRNG_STATUS_SUCCESS ||
119+
hiprngSetPseudoRandomGeneratorSeed(hiprng_generator_, cluster_seedgen())
120+
!= HIPRNG_STATUS_SUCCESS) {
121121
LOG(ERROR) << "Cannot create Curand generator. Curand won't be available.";
122-
}*/
122+
}
123123
}
124124

125125
Caffe::~Caffe() {
126-
// TODO: HIP Equivalent
127126
if (hipblas_handle_) HIPBLAS_CHECK(hipblasDestroy(hipblas_handle_));
128-
/*if (curand_generator_) {
129-
CURAND_CHECK(curandDestroyGenerator(curand_generator_));
130-
}*/
127+
if (hiprng_generator_) {
128+
HIPRNG_CHECK(hiprngDestroyGenerator(hiprng_generator_));
129+
}
131130
}
132131

133132
void Caffe::set_random_seed(const unsigned int seed) {
134133
// Curand seed
135-
// TODO HIP Equivalent
136-
/*static bool g_curand_availability_logged = false;
137-
if (Get().curand_generator_) {
138-
CURAND_CHECK(curandSetPseudoRandomGeneratorSeed(curand_generator(),
139-
seed));
140-
CURAND_CHECK(curandSetGeneratorOffset(curand_generator(), 0));
134+
static bool g_hiprng_availability_logged = false;
135+
if (Get().hiprng_generator_) {
136+
HIPRNG_CHECK(hiprngSetPseudoRandomGeneratorSeed(hiprng_generator(),
137+
seed));
138+
// TODO: support in HIP equivalent
139+
//HIPRNG_CHECK(hiprngSetGeneratorOffset(hiprng_generator(), 0));
141140
} else {
142-
if (!g_curand_availability_logged) {
141+
if (!g_hiprng_availability_logged) {
143142
LOG(ERROR) <<
144-
"Curand not available. Skipping setting the curand seed.";
145-
g_curand_availability_logged = true;
143+
"Curand not available. Skipping setting the hiprng seed.";
144+
g_hiprng_availability_logged = true;
146145
}
147146
}
148147
// RNG seed
149-
Get().random_generator_.reset(new RNG(seed));*/
148+
Get().random_generator_.reset(new RNG(seed));
150149
}
151150

152151
void Caffe::SetDevice(const int device_id) {
@@ -158,16 +157,15 @@ void Caffe::SetDevice(const int device_id) {
158157
// The call to hipSetDevice must come before any calls to Get, which
159158
// may perform initialization using the GPU.
160159
HIP_CHECK(hipSetDevice(device_id));
161-
//TODO HIP equivalent
162160
if (Get().hipblas_handle_) HIPBLAS_CHECK(hipblasDestroy(Get().hipblas_handle_));
163-
/*if (Get().curand_generator_) {
164-
CURAND_CHECK(curandDestroyGenerator(Get().curand_generator_));
165-
}*/
161+
if (Get().hiprng_generator_) {
162+
HIPRNG_CHECK(hiprngDestroyGenerator(Get().hiprng_generator_));
163+
}
166164
HIPBLAS_CHECK(hipblasCreate(&Get().hipblas_handle_));
167-
/*CURAND_CHECK(curandCreateGenerator(&Get().curand_generator_,
168-
CURAND_RNG_PSEUDO_DEFAULT));
169-
CURAND_CHECK(curandSetPseudoRandomGeneratorSeed(Get().curand_generator_,
170-
cluster_seedgen()));*/
165+
HIPRNG_CHECK(hiprngCreateGenerator(&Get().hiprng_generator_,
166+
HIPRNG_RNG_PSEUDO_MRG32K3A));
167+
HIPRNG_CHECK(hiprngSetPseudoRandomGeneratorSeed(Get().hiprng_generator_,
168+
cluster_seedgen()));
171169
}
172170

173171
void Caffe::DeviceQuery() {
@@ -263,7 +261,7 @@ void* Caffe::RNG::generator() {
263261

264262

265263
const char* hipblasGetErrorString(hipblasStatus_t error) {
266-
/*switch (error) {
264+
switch (error) {
267265
case HIPBLAS_STATUS_SUCCESS:
268266
return "HIPBLAS_STATUS_SUCCESS";
269267
case HIPBLAS_STATUS_NOT_INITIALIZED:
@@ -278,46 +276,49 @@ const char* hipblasGetErrorString(hipblasStatus_t error) {
278276
return "HIPBLAS_STATUS_EXECUTION_FAILED";
279277
case HIPBLAS_STATUS_INTERNAL_ERROR:
280278
return "HIPBLAS_STATUS_INTERNAL_ERROR";
279+
case HIPBLAS_STATUS_NOT_SUPPORTED:
280+
return "HIPBLAS_STATUS_NOT_SUPPORTED";
281281
#if HIP_VERSION >= 6000
282282
case HIPBLAS_STATUS_INTERNAL_ERROR:
283283
return "HIPBLAS_STATUS_INTERNAL_ERROR";
284284
#endif
285-
}*/
285+
}
286286
return "Unknown hipblas status";
287287
}
288288

289-
// TODO HIP Equivalent
290-
/*const char* curandGetErrorString(curandStatus_t error) {
289+
const char* hiprngGetErrorString(hiprngStatus_t error) {
291290
switch (error) {
292-
case CURAND_STATUS_SUCCESS:
293-
return "CURAND_STATUS_SUCCESS";
294-
case CURAND_STATUS_VERSION_MISMATCH:
295-
return "CURAND_STATUS_VERSION_MISMATCH";
296-
case CURAND_STATUS_NOT_INITIALIZED:
297-
return "CURAND_STATUS_NOT_INITIALIZED";
298-
case CURAND_STATUS_ALLOCATION_FAILED:
299-
return "CURAND_STATUS_ALLOCATION_FAILED";
300-
case CURAND_STATUS_TYPE_ERROR:
301-
return "CURAND_STATUS_TYPE_ERROR";
302-
case CURAND_STATUS_OUT_OF_RANGE:
303-
return "CURAND_STATUS_OUT_OF_RANGE";
304-
case CURAND_STATUS_LENGTH_NOT_MULTIPLE:
305-
return "CURAND_STATUS_LENGTH_NOT_MULTIPLE";
306-
case CURAND_STATUS_DOUBLE_PRECISION_REQUIRED:
307-
return "CURAND_STATUS_DOUBLE_PRECISION_REQUIRED";
308-
case CURAND_STATUS_LAUNCH_FAILURE:
309-
return "CURAND_STATUS_LAUNCH_FAILURE";
310-
case CURAND_STATUS_PREEXISTING_FAILURE:
311-
return "CURAND_STATUS_PREEXISTING_FAILURE";
312-
case CURAND_STATUS_INITIALIZATION_FAILED:
313-
return "CURAND_STATUS_INITIALIZATION_FAILED";
314-
case CURAND_STATUS_ARCH_MISMATCH:
315-
return "CURAND_STATUS_ARCH_MISMATCH";
316-
case CURAND_STATUS_INTERNAL_ERROR:
317-
return "CURAND_STATUS_INTERNAL_ERROR";
291+
case HIPRNG_STATUS_INVALID_STREAM_CREATOR:
292+
return "HIPRNG_STATUS_INVALID_STREAM_CREATOR";
293+
case HIPRNG_STATUS_SUCCESS:
294+
return "HIPRNG_STATUS_SUCCESS";
295+
case HIPRNG_STATUS_VERSION_MISMATCH:
296+
return "HIPRNG_STATUS_VERSION_MISMATCH";
297+
//case HIPRNG_STATUS_NOT_INITIALIZED:
298+
//return "HIPRNG_STATUS_NOT_INITIALIZED";
299+
case HIPRNG_STATUS_ALLOCATION_FAILED:
300+
return "HIPRNG_STATUS_ALLOCATION_FAILED";
301+
case HIPRNG_STATUS_TYPE_ERROR:
302+
return "HIPRNG_STATUS_TYPE_ERROR";
303+
//case HIPRNG_STATUS_OUT_OF_RANGE:
304+
//return "HIPRNG_STATUS_OUT_OF_RANGE";
305+
//case HIPRNG_STATUS_LENGTH_NOT_MULTIPLE:
306+
//return "HIPRNG_STATUS_LENGTH_NOT_MULTIPLE";
307+
//case HIPRNG_STATUS_DOUBLE_PRECISION_REQUIRED:
308+
//return "HIPRNG_STATUS_DOUBLE_PRECISION_REQUIRED";
309+
//case HIPRNG_STATUS_LAUNCH_FAILURE:
310+
//return "HIPRNG_STATUS_LAUNCH_FAILURE";
311+
//case HIPRNG_STATUS_PREEXISTING_FAILURE:
312+
//return "HIPRNG_STATUS_PREEXISTING_FAILURE";
313+
case HIPRNG_STATUS_INITIALIZATION_FAILED:
314+
return "HIPRNG_STATUS_INITIALIZATION_FAILED";
315+
//case HIPRNG_STATUS_ARCH_MISMATCH:
316+
//return "HIPRNG_STATUS_ARCH_MISMATCH";
317+
//case HIPRNG_STATUS_INTERNAL_ERROR:
318+
//return "HIPRNG_STATUS_INTERNAL_ERROR";
318319
}
319-
return "Unknown curand status";
320-
}*/
320+
return "Unknown hiprng status";
321+
}
321322

322323
#endif // CPU_ONLY
323324

src/caffe/test/CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,10 @@ caffe_leave_only_selected_tests(test_cuda ${BUILD_only_tests})
1010
# That's why the lines below are commented. TODO: remove them
1111

1212
# definition needed to include CMake generated files
13-
#add_definitions(-DCMAKE_BUILD)
13+
add_definitions(-DCMAKE_BUILD)
1414

1515
# generates test_data/sample_data_list.txt.gen.cmake
16-
#caffe_configure_testdatafile(test_data/sample_data_list.txt)
16+
caffe_configure_testdatafile(test_data/sample_data_list.txt)
1717

1818
set(the_target test.testbin)
1919
set(test_args --gtest_shuffle)

src/caffe/test/test_common.cpp

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,7 @@ class CommonTest : public ::testing::Test {};
1515
TEST_F(CommonTest, TestCublasHandlerGPU) {
1616
int hip_device_id;
1717
HIP_CHECK(hipGetDevice(&hip_device_id));
18-
//TODO: HIP equivalent
19-
//EXPECT_TRUE(Caffe::cublas_handle());
18+
EXPECT_TRUE(Caffe::hipblas_handle());
2019
}
2120

2221
#endif
@@ -49,16 +48,15 @@ TEST_F(CommonTest, TestRandSeedGPU) {
4948
SyncedMemory data_a(10 * sizeof(unsigned int));
5049
SyncedMemory data_b(10 * sizeof(unsigned int));
5150
Caffe::set_random_seed(1701);
52-
// TODO: HIP Equivalent
53-
/*CURAND_CHECK(curandGenerate(Caffe::curand_generator(),
51+
HIPRNG_CHECK(hiprngGenerate(Caffe::hiprng_generator(),
5452
static_cast<unsigned int*>(data_a.mutable_gpu_data()), 10));
5553
Caffe::set_random_seed(1701);
56-
CURAND_CHECK(curandGenerate(Caffe::curand_generator(),
54+
HIPRNG_CHECK(hiprngGenerate(Caffe::hiprng_generator(),
5755
static_cast<unsigned int*>(data_b.mutable_gpu_data()), 10));
5856
for (int i = 0; i < 10; ++i) {
5957
EXPECT_EQ(((const unsigned int*)(data_a.cpu_data()))[i],
6058
((const unsigned int*)(data_b.cpu_data()))[i]);
61-
}*/
59+
}
6260
}
6361

6462
#endif

0 commit comments

Comments
 (0)