Skip to content

Commit 16bd49b

Browse files
author
alielabridi
committed
last commit before moving to msgs
1 parent ec930b7 commit 16bd49b

File tree

5 files changed

+369
-11
lines changed

5 files changed

+369
-11
lines changed

OpenMP-Strassen.cpp

Lines changed: 309 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,309 @@
1+
/*
2+
Compile : g++ -o OpenMP OpenMP-Strassen.cpp -fopenmp
3+
Run : ./OpenMP SIZE THRESHOLD #PROC
4+
*/
5+
6+
#include <omp.h>
7+
#include <stdio.h>
8+
#include <stdlib.h>
9+
#include <time.h>
10+
#include <fstream>
11+
#include <iostream>
12+
#include <ctime>
13+
template < typename T >
14+
T **Allocate2DArray( int nRows, int nCols)
15+
{
16+
//(step 1) allocate memory for array of elements of column
17+
T **ppi = new T*[nRows];
18+
19+
//(step 2) allocate memory for array of elements of each row
20+
T *curPtr = new T [nRows * nCols];
21+
22+
// Now point the pointers in the right place
23+
for( int i = 0; i < nRows; ++i)
24+
{
25+
*(ppi + i) = curPtr;
26+
curPtr += nCols;
27+
}
28+
return ppi;
29+
}
30+
31+
template < typename T >
32+
void Free2DArray(T** Array)
33+
{
34+
delete [] *Array;
35+
delete [] Array;
36+
}
37+
38+
/*#define THRESHOLD 32768*/ /* product size below which matmultleaf is used */
39+
int THRESHOLD;
40+
41+
void seqMatMult(int m, int n, int p, int** A, int** B, int** C)
42+
{
43+
for (int i = 0; i < m; i++)
44+
for (int j = 0; j < n; j++)
45+
{
46+
C[i][j] = 0.0;
47+
for (int k = 0; k < p; k++)
48+
C[i][j] += A[i][k]*B[k][j];
49+
}
50+
}
51+
52+
void matmultleaf(int mf, int ml, int nf, int nl, int pf, int pl, int **A, int **B, int **C)
53+
/*
54+
subroutine that uses the simple triple loop to multiply
55+
a submatrix from A with a submatrix from B and store the
56+
result in a submatrix of C.
57+
*/
58+
// mf, ml; /* first and last+1 i index */
59+
// nf, nl; /* first and last+1 j index */
60+
// pf, pl; /* first and last+1 k index */
61+
{
62+
for (int i = mf; i < ml; i++)
63+
for (int j = nf; j < nl; j++) {
64+
C[i][j] = 0.0;
65+
for (int k = pf; k < pl; k++)
66+
C[i][j] += A[i][k]*B[k][j];
67+
}
68+
}
69+
70+
71+
void copyQtrMatrix(int **X, int m, int **Y, int mf, int nf)
72+
{
73+
for (int i = 0; i < m; i++)
74+
X[i] = &Y[mf+i][nf];
75+
}
76+
77+
void AddMatBlocks(int **T, int m, int n, int **X, int **Y)
78+
{
79+
for (int i = 0; i < m; i++)
80+
for (int j = 0; j < n; j++)
81+
T[i][j] = X[i][j] + Y[i][j];
82+
}
83+
84+
void SubMatBlocks(int **T, int m, int n, int **X, int **Y)
85+
{
86+
for (int i = 0; i < m; i++)
87+
for (int j = 0; j < n; j++)
88+
T[i][j] = X[i][j] - Y[i][j];
89+
}
90+
91+
92+
void strassenMMult(int ml, int nl, int pl, int **A, int **B, int **C)
93+
{
94+
if (((float)ml)*((float)nl)*((float)pl) <= THRESHOLD)
95+
matmultleaf(0, ml, 0, nl, 0, pl, A, B, C);
96+
97+
else {
98+
int m2 = ml/2;
99+
int n2 = nl/2;
100+
int p2 = pl/2;
101+
102+
int **M1 = Allocate2DArray< int >(m2, n2);
103+
int **M2 = Allocate2DArray< int >(m2, n2);
104+
int **M3 = Allocate2DArray< int >(m2, n2);
105+
int **M4 = Allocate2DArray< int >(m2, n2);
106+
int **M5 = Allocate2DArray< int >(m2, n2);
107+
int **M6 = Allocate2DArray< int >(m2, n2);
108+
int **M7 = Allocate2DArray< int >(m2, n2);
109+
110+
int **wAM1 = Allocate2DArray< int >(m2, p2);
111+
int **wBM1 = Allocate2DArray< int >(p2, n2);
112+
int **wAM2 = Allocate2DArray< int >(m2, p2);
113+
int **wBM3 = Allocate2DArray< int >(p2, n2);
114+
int **wBM4 = Allocate2DArray< int >(p2, n2);
115+
int **wAM5 = Allocate2DArray< int >(m2, p2);
116+
int **wAM6 = Allocate2DArray< int >(m2, p2);
117+
int **wBM6 = Allocate2DArray< int >(p2, n2);
118+
int **wAM7 = Allocate2DArray< int >(m2, p2);
119+
int **wBM7 = Allocate2DArray< int >(p2, n2);
120+
121+
int **A11 = new int*[m2];
122+
int **A12 = new int*[m2];
123+
int **A21 = new int*[m2];
124+
int **A22 = new int*[m2];
125+
126+
int **B11 = new int*[p2];
127+
int **B12 = new int*[p2];
128+
int **B21 = new int*[p2];
129+
int **B22 = new int*[p2];
130+
131+
int **C11 = new int*[m2];
132+
int **C12 = new int*[m2];
133+
int **C21 = new int*[m2];
134+
int **C22 = new int*[m2];
135+
136+
copyQtrMatrix(A11, m2, A, 0, 0);
137+
copyQtrMatrix(A12, m2, A, 0, p2);
138+
copyQtrMatrix(A21, m2, A, m2, 0);
139+
copyQtrMatrix(A22, m2, A, m2, p2);
140+
141+
copyQtrMatrix(B11, p2, B, 0, 0);
142+
copyQtrMatrix(B12, p2, B, 0, n2);
143+
copyQtrMatrix(B21, p2, B, p2, 0);
144+
copyQtrMatrix(B22, p2, B, p2, n2);
145+
146+
copyQtrMatrix(C11, m2, C, 0, 0);
147+
copyQtrMatrix(C12, m2, C, 0, n2);
148+
copyQtrMatrix(C21, m2, C, m2, 0);
149+
copyQtrMatrix(C22, m2, C, m2, n2);
150+
151+
#pragma omp task
152+
{
153+
// M1 = (A11 + A22)*(B11 + B22)
154+
AddMatBlocks(wAM1, m2, p2, A11, A22);
155+
AddMatBlocks(wBM1, p2, n2, B11, B22);
156+
strassenMMult(m2, n2, p2, wAM1, wBM1, M1);
157+
}
158+
159+
#pragma omp task
160+
{
161+
//M2 = (A21 + A22)*B11
162+
AddMatBlocks(wAM2, m2, p2, A21, A22);
163+
strassenMMult(m2, n2, p2, wAM2, B11, M2);
164+
}
165+
166+
#pragma omp task
167+
{
168+
//M3 = A11*(B12 - B22)
169+
SubMatBlocks(wBM3, p2, n2, B12, B22);
170+
strassenMMult(m2, n2, p2, A11, wBM3, M3);
171+
}
172+
173+
#pragma omp task
174+
{
175+
//M4 = A22*(B21 - B11)
176+
SubMatBlocks(wBM4, p2, n2, B21, B11);
177+
strassenMMult(m2, n2, p2, A22, wBM4, M4);
178+
}
179+
180+
#pragma omp task
181+
{
182+
//M5 = (A11 + A12)*B22
183+
AddMatBlocks(wAM5, m2, p2, A11, A12);
184+
strassenMMult(m2, n2, p2, wAM5, B22, M5);
185+
}
186+
187+
#pragma omp task
188+
{
189+
//M6 = (A21 - A11)*(B11 + B12)
190+
SubMatBlocks(wAM6, m2, p2, A21, A11);
191+
AddMatBlocks(wBM6, p2, n2, B11, B12);
192+
strassenMMult(m2, n2, p2, wAM6, wBM6, M6);
193+
}
194+
195+
#pragma omp task
196+
{
197+
//M7 = (A12 - A22)*(B21 + B22)
198+
SubMatBlocks(wAM7, m2, p2, A12, A22);
199+
AddMatBlocks(wBM7, p2, n2, B21, B22);
200+
strassenMMult(m2, n2, p2, wAM7, wBM7, M7);
201+
}
202+
#pragma omp taskwait
203+
204+
for (int i = 0; i < m2; i++)
205+
for (int j = 0; j < n2; j++) {
206+
C11[i][j] = M1[i][j] + M4[i][j] - M5[i][j] + M7[i][j];
207+
C12[i][j] = M3[i][j] + M5[i][j];
208+
C21[i][j] = M2[i][j] + M4[i][j];
209+
C22[i][j] = M1[i][j] - M2[i][j] + M3[i][j] + M6[i][j];
210+
}
211+
212+
Free2DArray< int >(M1);
213+
Free2DArray< int >(M2);
214+
Free2DArray< int >(M3);
215+
Free2DArray< int >(M4);
216+
Free2DArray< int >(M5);
217+
Free2DArray< int >(M6);
218+
Free2DArray< int >(M7);
219+
220+
Free2DArray< int >(wAM1);
221+
Free2DArray< int >(wBM1);
222+
Free2DArray< int >(wAM2);
223+
Free2DArray< int >(wBM3);
224+
Free2DArray< int >(wBM4);
225+
Free2DArray< int >(wAM5);
226+
Free2DArray< int >(wAM6);
227+
Free2DArray< int >(wBM6);
228+
Free2DArray< int >(wAM7);
229+
Free2DArray< int >(wBM7);
230+
231+
delete[] A11; delete[] A12; delete[] A21; delete[] A22;
232+
delete[] B11; delete[] B12; delete[] B21; delete[] B22;
233+
delete[] C11; delete[] C12; delete[] C21; delete[] C22;
234+
}
235+
}
236+
237+
void matmultS(int m, int n, int p, int **A, int **B, int **C)
238+
{
239+
#pragma omp parallel
240+
{
241+
#pragma omp single
242+
{
243+
strassenMMult(m, n, p, A, B, C);
244+
}
245+
}
246+
}
247+
248+
249+
int main(int argc, char* argv[])
250+
{
251+
252+
int size = atoi(argv[1]);
253+
THRESHOLD = atoi(argv[2]);
254+
int proc = atoi(argv[3]);
255+
double start, end;
256+
257+
int **A = Allocate2DArray< int >(size, size);
258+
int **B = Allocate2DArray< int >(size, size);
259+
int **C = Allocate2DArray< int >(size, size);
260+
261+
int i, j;
262+
263+
264+
for (int i = 0; i < size; ++i)
265+
for (int j = 0; j < size; ++j){
266+
B[i][j] = 0;
267+
A[i][j] = 1;
268+
269+
}
270+
271+
for (int i = 0; i < size; ++i)
272+
B[i][i] = 1;
273+
274+
omp_set_dynamic(0); // Explicitly disable dynamic teams
275+
omp_set_num_threads(proc);
276+
277+
start = omp_get_wtime();
278+
matmultS(size, size, size, A, B, C);
279+
end = omp_get_wtime();
280+
281+
double elapsed_secs = end - start;
282+
283+
/*checking result if correct*/
284+
285+
std::ofstream myfile;
286+
myfile.open ("results.txt",std::ofstream::app);
287+
288+
bool correctness = true;
289+
for (int i = 0; i < size; ++i)
290+
for (int j = 0; j < size; ++j)
291+
if(C[i][j] != A[i][j]) correctness = false;
292+
if(correctness){
293+
printf("OpenMP - Correct: matrix size = %d, Threshold = %d,# of proc = %d, Exec time = %lf sec \n", size, THRESHOLD,omp_get_max_threads(),elapsed_secs );
294+
myfile<<"OpenMP - Correct: matrix size = " <<size<<", Threshold = "<< THRESHOLD<<",# of proc = "<<omp_get_max_threads()<<" , Exec time = " << elapsed_secs << "\n";
295+
}
296+
else{
297+
printf("OpenMP - Incorrect: matrix size = %d, Threshold = %d,# of proc = %d, Exec time = %lf sec \n", size, THRESHOLD,omp_get_max_threads(),elapsed_secs );
298+
myfile<<"OpenMP - Incorrect: matrix size = " <<size<<", Threshold = "<< THRESHOLD<<",# of proc = "<<omp_get_max_threads()<<" , Exec time = " << elapsed_secs << "\n";
299+
}
300+
myfile.close();
301+
302+
303+
304+
Free2DArray< int >(A);
305+
Free2DArray< int >(B);
306+
Free2DArray< int >(C);
307+
308+
return 0;
309+
}

sftp-config.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
"password": "otmane",
2020
"port": "22",
2121

22-
"remote_path": "/home/aelabridi/charm2/charm/examples/charm++/Strassen-parallelization-charm--",
22+
"remote_path": "/home/aelabridi/charm/examples/charm++/strassen",
2323
"ignore_regexes": [
2424
"\\.sublime-(project|workspace)", "sftp-config(-alt\\d?)?\\.json",
2525
"sftp-settings\\.json", "/venv/", "\\.svn/", "\\.hg/", "\\.git/",

strassen.C

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -67,16 +67,26 @@ class Main : public CBase_Main {
6767
for (int i = 0; i < size; ++i)
6868
for (int j = 0; j < size; ++j)
6969
if(m->v[i][j] != A[i][j]) correctness = false;
70-
if(correctness){
71-
CkPrintf("Correct: matrix size = %d, Threshold = %d,# of proc = %d, Exec time = %lf sec \n", size, THRESHOLD,CkNumPes(),endtimer-starttimer);
72-
myfile<<"Correct: matrix size = " <<size<<", Threshold = "<< THRESHOLD<<",# of proc = "<<CkNumPes()<<" , Exec time = " << endtimer-starttimer << "\n";
70+
if(correctness && THRESHOLD == size){
71+
CkPrintf("Naive Serial - Correct: matrix size = %d, Exec time = %lf sec \n", size,endtimer-starttimer);
72+
myfile<<"Naive Serial - Correct: matrix size = " <<size<<" , Exec time = " << endtimer-starttimer << "\n";
73+
74+
}
75+
else if(!correctness && THRESHOLD == size){
76+
CkPrintf("Naive Serial - Incorrect: matrix size = %d, Exec time = %lf sec \n", size,endtimer-starttimer);
77+
myfile<<"Naive Serial - Incorrect: matrix size = " <<size<<" , Exec time = " << endtimer-starttimer << "\n";
78+
79+
}
80+
else if(correctness){
81+
CkPrintf("Strassen - Correct: matrix size = %d, Threshold = %d,# of proc = %d, Exec time = %lf sec \n", size, THRESHOLD,CkNumPes(),endtimer-starttimer);
82+
myfile<<"Strassen - Correct: matrix size = " <<size<<", Threshold = "<< THRESHOLD<<",# of proc = "<<CkNumPes()<<" , Exec time = " << endtimer-starttimer << "\n";
7383
}
7484
else{
75-
CkPrintf("Incorrect: matrix size = %d, Threshold = %d,# of proc = %d, Exec time = %lf sec \n", size, THRESHOLD,CkNumPes(),endtimer-starttimer);
76-
myfile<<"Incorrect: matrix size = " <<size<<", Threshold = "<< THRESHOLD<<",# of proc = "<<CkNumPes()<<" , Exec time = " << endtimer-starttimer << "\n";
85+
CkPrintf("Strassen - Incorrect: matrix size = %d, Threshold = %d,# of proc = %d, Exec time = %lf sec \n", size, THRESHOLD,CkNumPes(),endtimer-starttimer);
86+
myfile<<"Strassen - Incorrect: matrix size = " <<size<<", Threshold = "<< THRESHOLD<<",# of proc = "<<CkNumPes()<<" , Exec time = " << endtimer-starttimer << "\n";
7787
}
78-
delete m;
7988
myfile.close();
89+
delete m;
8090

8191
CkExit();
8292
}

test.txt

Whitespace-only changes.

0 commit comments

Comments
 (0)