Untitled
unknown
c_cpp
2 years ago
5.6 kB
16
Indexable
#include <mpi.h>
#include <bits/stdc++.h>
using namespace std;
using namespace chrono;
#define max_value 10
int N;
void multiply_matrix(vector<int> &A_sub, vector<int> &B_sub, vector<int> &C_sub, int block_size){
int rank;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
cout << "Process " << rank << " performing local matrix multiplication\n";
/*cout << "A: ";
for (int i = 0; i < block_size; i++){
cout << A_sub[i] << ' ';
}
cout << '\n';
cout << "B: ";
for (int i = 0; i < block_size; i++){
cout << B_sub[i] << ' ';
}
cout << '\n';*/
for (int i = 0; i < block_size; ++i){
for (int j = 0; j < block_size; ++j){
int sum = 0;
for (int k = 0; k < block_size; ++k){
sum += A_sub[i * block_size + k] * B_sub[k * block_size + j];
}
C_sub[i * block_size + j] += sum;
}
}
cout << "Process " << rank << " local matrix multiplication ends\n";
return;
}
void matrix_init(vector<int> &matrix){
for (int i = 0; i < matrix.size(); i++){
matrix[i] = static_cast<int>(drand48() * max_value);
}
return;
}
bool verify(vector<int> &expected, vector<int> &result){
int n = expected.size();
for (int i = 0; i < n; i++){
if (expected[i] != result[i])
return false;
}
return true;
}
int main(int argc, char **argv){
if (argc < 2){
cerr << "matrix size should be typed\n";
return -1;
}
bool vf = false;
N = atoi(argv[1]);
if (argc > 2 && strcmp(argv[2], "-v") == 0)
vf = true;
MPI_Init(&argc, &argv);
int p, rank;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &p);
int c = 1; // parameter for memory replication
/*for (int i = 1; i <= p; ++i){
if (p % (i * i) == 0)
c = i * i; // 'c' is udimated if 'p' is divisible by 'i * i'.
}*/
int q = static_cast<int>(sqrt(p / c));
if (q * q != p / c){
cout << "Processor number not perfect square!\n";
MPI_Finalize();
return -1;
}
int dims[3] = {q, q, c};
int periods[3] = {1, 1, 1};
MPI_Comm grid_comm;
MPI_Cart_create(MPI_COMM_WORLD, 3, dims, periods, 1, &grid_comm);
int block_size = N / q;
vector<int> A(N * N), B(N * N);
vector<int> A_sub(block_size * block_size), B_sub(block_size * block_size);
vector<int> C_serial(N * N, 0), C(N * N, 0), C_sub(block_size * block_size, 0);
if (rank == 0){
matrix_init(A);
matrix_init(B);
if(vf)
multiply_matrix(A, B, C_serial, block_size);
}
MPI_Barrier(MPI_COMM_WORLD);
MPI_Datatype block_type, block_type_resized;
MPI_Type_vector(block_size, block_size, N, MPI_INT, &block_type);
MPI_Type_create_resized(block_type, 0, block_size * sizeof(int), &block_type_resized);
MPI_Type_commit(&block_type_resized);
cout << rank << " Type commit\n";
MPI_Scatter(A.data(), 1, block_type_resized, A_sub.data(), block_size * block_size, MPI_INT, 0, MPI_COMM_WORLD);//need to !!
MPI_Scatter(B.data(), 1, block_type_resized, B_sub.data(), block_size * block_size, MPI_INT, 0, MPI_COMM_WORLD); // need to !!
cout << rank << " Scatter\n";
multiply_matrix(A_sub, B_sub, C_sub, block_size);
int right_src, right_dst, down_src, down_dst;
MPI_Cart_shift(grid_comm, 1, 1, &right_src, &right_dst);
MPI_Cart_shift(grid_comm, 0, 1, &down_src, &down_dst);
cout << rank << " Initial Shift\n";
MPI_Status status;
for (int i = 0; i < c; i++){
MPI_Sendrecv_replace(A_sub.data(), block_size * block_size, MPI_INT, right_dst, 0, right_src, 0, grid_comm, &status);
MPI_Sendrecv_replace(B_sub.data(), block_size * block_size, MPI_INT, down_dst, 1, down_src, 1, grid_comm, &status);
multiply_matrix(A_sub, B_sub, C_sub, block_size);
}
cout << rank << " Algorithm\n";
MPI_Barrier(MPI_COMM_WORLD);
vector<int> recvcounts(p, block_size * block_size), displs(p);
if (rank == 0){
for (int i = 0; i < displs.size(); ++i){
int row_block_index = i / q; // Row index in the block grid
int col_block_index = i % q; // Column index in the block grid
displs[i] = (row_block_index * block_size * N) + (col_block_index * block_size);
}
}
MPI_Barrier(MPI_COMM_WORLD);
MPI_Gatherv(
C_sub.data(), // Send buffer: the subblock from this process
block_size * block_size, // Number of elements in send buffer
MPI_INT, // Type of send buffer elements
C.data(), // Receive buffer (only on root)
recvcounts.data(), // Amount of data to receive from each process (only on root)
displs.data(), // Displacements where to place incoming data (only on root)
MPI_INT, // Type of receive buffer elements
0, // Root process
MPI_COMM_WORLD // Communicator
);
cout << rank << " Gather\n";
MPI_Barrier(MPI_COMM_WORLD);
if (rank == 0 && vf){
bool result = verify(C_serial, C);
if (result == false)
cout << "Incorrect!\n";
else
cout << "Correct!\n";
}
MPI_Type_free(&block_type_resized);
MPI_Type_free(&block_type);
MPI_Comm_free(&grid_comm);
MPI_Finalize();
return 0;
}
Editor is loading...
Leave a Comment