/
mm.c
450 lines (382 loc) · 16.5 KB
/
mm.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
/*!
*
* \file mm.c
* \brief Implementation of a parallel matrix multiplication algorithm
*
* \author BJ Peter DeLaCruz
*
* \date June 7, 2010
*
* \version 1.0
*
* \details \par How this program works:
* Given an M x N matrix and an N x P matrix, the program will perform matrix
* multiplication. Each process will be given M / Q rows from the first matrix and a copy
* of the entire N x P matrix. Each process will then perform matrix multiplication
* for each element in its rows. Finally, the results will be sent to the master, which
* will combine all of them together.
*
* \par Example:
* 2 processes and two 2x2 matrices.\n
* Process 0's rowA is first row in matrixA.\n
* Process 1's rowA is second row in matrixA.
* \arg Process 0: results[0] = (rowA[0] * matrixB[0][0]) + (rowA[1] * matrixB[1][0])
* \arg Process 0: results[1] = (rowA[0] * matrixB[0][1]) + (rowA[1] * matrixB[1][1])
* \arg Process 1: results[0] = (rowA[0] * matrixB[0][0]) + (rowA[1] * matrixB[1][0])
* \arg Process 1: results[1] = (rowA[0] * matrixB[0][1]) + (rowA[1] * matrixB[1][1])
*
* \note
* \arg M mod Q must equal 0, where Q is the number of processes.
* \arg This version of matrix multiplication does not use a ring topology.
*
*/
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <mpi.h>
/*! Master process. Usually process 0. */
#define MASTER 0
/*!
*
* \par Description:
* Assigns random values to elements in matrix.
*
* \param matrix Empty 2-dimensional array
* \param width Number of columns in matrix
* \param height Number of rows in matrix
*
*/
void initialize(double* matrix, int width, int height);
/*!
*
* \par Description:
* Prints matrix to the screen.
*
* \param matrix 2-dimensional array
* \param width Number of columns in \b matrix
* \param height Number of rows in \b matrix
*
*/
void print_matrix(double* matrix, int width, int height);
/*!
*
* \par Description:
* Allocates storage space for a matrix on the heap.
*
* \note
* Not called in program as matrices are currently being allocated on the stack.
*
* \param height Number of rows in matrix
* \param width Number of columns in matrix
*
*/
double** create_matrix(int height, int width);
/*!
*
* \par Description:
* Frees up storage space occupied by a matrix on the heap.
*
* \note
* Not called in program as matrices are currently being allocated on the stack.
*
* \param matrix 2-dimensional array
* \param height Number of rows in matrix
*
*/
void destroy_matrix(double** matrix, int height);
/*!
* \param argv[1] Number of rows in matrix A
* \param argv[2] Number of columns in matrix A
* \param argv[3] Number of rows in matrix B
* \param argv[4] Number of columns in matrix B
*/
int main(int argc, char** argv) {
/* Holds a row that will be used in matrix C */
double* results = NULL;
/* Holds a row from matrix A */
double* rowA = NULL;
/* Number of rows in matrix A */
int A_HEIGHT;
/* Number of columns in matrix A */
int A_WIDTH;
/* Number of rows in matrix B */
int B_HEIGHT;
/* Number of columns in matrix B */
int B_WIDTH;
/* Used for error handling */
int error_code;
/* Total number of processes used in this program */
int NUMBER_OF_PROCESSES;
/* Current process */
int PROCESS_ID;
#ifndef SERIAL
/* Message identifier for sending/receiving columns to/from processes */
int COLUMN_TAG = 0;
/* Main loop counter. Counts from 0 to N, where N = SIZE. */
int program_counter;
/* Message identifier for sending/receiving rows to/from processes */
int ROW_TAG = 1;
/* Number of rows per process */
int SIZE;
#endif
/* Used to start timing matrix multiplication algorithm */
time_t start;
/* Used to end timing matrix multiplication algorithm */
time_t end;
/* Derived datatype for sending a column in a matrix to a process */
MPI_Datatype column_type;
/* Used in MPI_Recv */
MPI_Status status;
/***************************************************************************************************/
if (argc != 5) {
printf("Usage: ./mm ");
printf("[number of rows in matrix A] [number of columns in matrix A] ");
printf("[number of rows in matrix B] [number of columns in matrix B]\nPlease try again.\n");
exit(1);
}
if ((A_HEIGHT = atoi(argv[1])) <= 0) {
printf("Error: Invalid argument for number of rows in matrix A. Please try again.\n");
exit(1);
}
if ((A_WIDTH = atoi(argv[2])) <= 0) {
printf("Error: Invalid argument for number of columns in matrix A. Please try again.\n");
exit(1);
}
if ((B_HEIGHT = atoi(argv[3])) <= 0) {
printf("Error: Invalid argument for number of rows in matrix B. Please try again.\n");
exit(1);
}
if ((B_WIDTH = atoi(argv[4])) <= 0) {
printf("Error: Invalid argument for number of columns in matrix B. Please try again.\n");
exit(1);
}
if (A_WIDTH != B_HEIGHT) {
printf("Error: Column length of Matrix A does not equal row length of Matrix B.\n");
exit(1);
}
/***************************************************************************************************/
double matrixA[A_HEIGHT][A_WIDTH];
double matrixB[B_HEIGHT][B_WIDTH];
double matrixC[A_HEIGHT][B_WIDTH];
error_code = MPI_Init(&argc, &argv);
error_code = MPI_Comm_size(MPI_COMM_WORLD, &NUMBER_OF_PROCESSES);
error_code = MPI_Comm_rank(MPI_COMM_WORLD, &PROCESS_ID);
if (error_code != 0) {
printf("Error encountered while initializing MPI and obtaining task information.\n");
MPI_Finalize();
exit(1);
}
if (A_HEIGHT % NUMBER_OF_PROCESSES != 0) {
printf("Number of rows in matrix A = %d\tNumber of processes = %d\n", A_HEIGHT, NUMBER_OF_PROCESSES);
printf("Number of processes does NOT divide number of rows in matrix A. Please try again.\n");
printf("[For example: Number of rows in matrix A = 24. Number of processes = 8.]\n");
MPI_Finalize();
exit(1);
}
srand(time(NULL));
/***************************************************************************************************/
SIZE = A_HEIGHT / NUMBER_OF_PROCESSES;
/****************************************************************************************************
** MASTER **
****************************************************************************************************/
if (PROCESS_ID == MASTER) {
int j, k;
#ifndef SERIAL
int current_row,
destination, /* process that receives data from master */
source, /* process that sent data to master */
previous_row;
#else
int i;
#endif
initialize(&matrixA[0][0], A_HEIGHT, A_WIDTH);
initialize(&matrixB[0][0], B_HEIGHT, B_WIDTH);
#ifdef DEBUG
/*************************************************************************************
** Note: Be sure that the heights and widths are not too large so that the matrices **
** will be small enough to be viewable. --BPD **
*************************************************************************************/
printf("\n");
printf("======================================================================\n");
printf("== Matrix A ==\n");
printf("======================================================================\n\n");
print_matrix(&matrixA[0][0], A_HEIGHT, A_WIDTH);
printf("\n");
printf("======================================================================\n");
printf("== Matrix B ==\n");
printf("======================================================================\n\n");
print_matrix(&matrixB[0][0], B_HEIGHT, B_WIDTH);
printf("\n");
#endif
start = time(NULL);
#ifndef SERIAL
/****************************************************************************************************
** Send rows in matrix A to workers and then get results from them **
****************************************************************************************************/
for (program_counter = 0, current_row = 0; program_counter < SIZE; program_counter++) {
/***** Master calculates its row *****/
for (j = 0; j < B_WIDTH; j++) {
matrixC[current_row][j] = 0.0;
for (k = 0; k < A_WIDTH; k++) {
matrixC[current_row][j] += (matrixA[current_row][k] * matrixB[k][j]);
}
}
current_row++;
previous_row = current_row;
for (destination = 1; destination < NUMBER_OF_PROCESSES; destination++) {
MPI_Send(&matrixA[current_row++][0], A_WIDTH, MPI_DOUBLE, destination, ROW_TAG, MPI_COMM_WORLD);
}
/****************************************************************************************************
** Send matrix B to workers only once **
****************************************************************************************************/
if (program_counter == 0) {
for (destination = 1; destination < NUMBER_OF_PROCESSES; destination++) {
MPI_Send(&matrixB[0][0], B_HEIGHT * B_WIDTH, MPI_DOUBLE, destination, COLUMN_TAG, MPI_COMM_WORLD);
}
}
for (source = 1; source < NUMBER_OF_PROCESSES; source++) {
MPI_Recv(&matrixC[previous_row++][0], A_WIDTH, MPI_DOUBLE, source, ROW_TAG, MPI_COMM_WORLD, &status);
}
}
#else
printf("======================================================================\n");
printf("== Serial version ==\n");
printf("======================================================================\n\n");
for (i = 0; i < A_HEIGHT; i++) {
for (j = 0; j < B_WIDTH; j++) {
matrixC[i][j] = 0.0;
for (k = 0; k < A_WIDTH; k++) {
matrixC[i][j] += (matrixA[i][k] * matrixB[k][j]);
}
}
}
#ifdef DEBUG
print_matrix(&matrixC[0][0], A_HEIGHT, B_WIDTH);
printf("\n");
#endif
#endif
end = time(NULL);
}
/****************************************************************************************************
** WORKERS **
****************************************************************************************************/
else {
#ifndef SERIAL
int i, j;
results = (double*) calloc(B_WIDTH, sizeof(double));
if (results == NULL) {
printf("Memory allocation failed for results array! ");
printf("Unable to allocate memory on process %d.\nAborting program...\n", PROCESS_ID);
MPI_Finalize();
exit(1);
}
rowA = (double*) calloc(A_WIDTH, sizeof(double));
if (rowA == NULL) {
printf("Memory allocation failed for rowA array! ");
printf("Unable to allocate memory on process %d.\nAborting program...\n", PROCESS_ID);
MPI_Finalize();
exit(1);
}
/****************************************************************************************************
** Get rows in matrix A and everything in matrix B from Master **
****************************************************************************************************/
for (program_counter = 0; program_counter < SIZE; program_counter++) {
MPI_Recv(&rowA[0], A_WIDTH, MPI_DOUBLE, MASTER, ROW_TAG, MPI_COMM_WORLD, &status);
if (program_counter == 0) {
MPI_Recv(&matrixB[0][0], B_HEIGHT * B_WIDTH, MPI_DOUBLE, MASTER, COLUMN_TAG, MPI_COMM_WORLD, &status);
}
/***** Perform matrix multiplication, store in results, and then send results to Master *****/
for (i = 0; i < B_WIDTH; i++) {
results[i] = 0.0;
for (j = 0; j < A_WIDTH; j++) {
results[i] += (rowA[j] * matrixB[j][i]);
}
}
MPI_Send(&results[0], A_WIDTH, MPI_DOUBLE, MASTER, ROW_TAG, MPI_COMM_WORLD);
}
#endif
}
MPI_Barrier(MPI_COMM_WORLD);
/****************************************************************************************************
** Print results **
****************************************************************************************************/
if (PROCESS_ID == MASTER) {
#ifdef DEBUG
printf("======================================================================\n");
printf("== Results ==\n");
printf("======================================================================\n\n");
print_matrix(&matrixC[0][0], A_HEIGHT, B_WIDTH);
printf("\n");
#endif
printf("======================================================================\n");
printf("== Summary ==\n");
printf("======================================================================\n\n");
printf("Total number of processes: %10d\n\n", NUMBER_OF_PROCESSES);
printf("Matrix A\n");
printf(" Number of rows: %10d\n", A_HEIGHT);
printf(" Number of columns: %10d\n", A_WIDTH);
printf(" Number of elements in matrix A\n");
printf(" (number of rows * number of columns): %10d\n\n", A_HEIGHT * A_WIDTH);
printf("Matrix B\n");
printf(" Number of rows: %10d\n", B_HEIGHT);
printf(" Number of columns: %10d\n", B_WIDTH);
printf(" Number of elements in matrix B: %10d\n\n", B_HEIGHT * B_WIDTH);
printf("Matrix C (results)\n");
printf(" Number of rows: %10d\n", A_HEIGHT);
printf(" Number of columns: %10d\n", B_WIDTH);
printf(" Number of elements in matrix C: %10d\n\n", A_HEIGHT * B_WIDTH);
printf("Total runtime: %13.2f seconds\n\n", difftime(end, start));
}
/***************************************************************************************************/
if (PROCESS_ID != MASTER) {
free(rowA);
free(results);
}
MPI_Finalize();
return 0;
}
void initialize(double* matrix, int height, int width) {
int i, j;
for (i = 0; i < height; i++) {
for (j = 0; j < width; j++) {
#ifdef DEBUG
*matrix++ = rand() % 10 + 1;
#else
*matrix++ = rand();
#endif
}
}
}
void print_matrix(double* matrix, int height, int width) {
int i, j;
for (i = 0; i < height; i++) {
for (j = 0; j < width; j++) {
printf("%10.0f\t", *matrix++);
}
printf("\n");
}
}
double** create_matrix(int height, int width) {
double** matrix = (double**) calloc(width, sizeof(double*));
if (matrix == NULL) {
return NULL;
}
else {
int i;
for (i = 0; i < height; i++) {
matrix[i] = (double*) calloc(width, sizeof(double));
if (matrix[i] == NULL) {
return NULL;
}
}
}
return matrix;
}
void destroy_matrix(double** matrix, int height) {
int i;
for (i = 0; i < height; i++) {
free(matrix[i]);
}
free(matrix);
}