Untitled
unknown
plain_text
3 years ago
4.4 kB
6
Indexable
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <stdio.h>
#include <math.h>
#include <ctime>
#define MAX_CUDA_BLOCKS 1024
cudaError_t addWithCuda(int *c, const int *a, const int *b, size_t size);
__global__ void addKernel(int *c, const int *a, const int *b) { // Get current thread "position" x and y
int innerX = threadIdx.x;
int innerY = threadIdx.y;
int blockYOffset = blockIdx.x;
// Get block size (to simulate matrix behaviour)
int blockWidth = blockDim.x * blockDim.y;
// Calculate final element index (for A, B and output C matrices)
int elementIndex = blockYOffset * blockWidth + innerX * blockDim.x + innerY;
// Return calculated result to the output pointer
c[elementIndex] = a[elementIndex] + b[elementIndex];
}
int main() {
// Initialize randomness
srand((unsigned int) time (NULL));
// Define Matrix/Vector dimensions (X x Y) (X x 1 for vectors)
const int SIZE_X = 17;
const int SIZE_Y = 1;
// Cache size for later calculations
const int VEC_LEN = SIZE_X * SIZE_Y;
// Define input and output vectors
int a[VEC_LEN];
int b[VEC_LEN];
// Initialize empty output vector
int c[VEC_LEN] = { 0 };
// Fill in input vectors with some initial data
for (int i = 0; i < VEC_LEN; i++) {
}
} }
a[i] = rand() % 100;
b[i] = rand() % 100;
// Add matrices (or vectors) using cuda
cudaError_t cudaStatus = addWithCuda(c, a, b, VEC_LEN);
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "addWithCuda failed!");
return 1; }
// Print output result
for (int i = 0; i < VEC_LEN; i++) {
if (i && i % SIZE_X == 0) {
printf("\n");
printf("%5d ", c[i]);
// Close CUDA interface
cudaStatus = cudaThreadExit();
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaThreadExit failed!");
return 1; }
// Pause system on finish
system("pause");
return 0;
}
// Helper function for using CUDA to add vectors in parallel.
cudaError_t addWithCuda(int *c, const int *a, const int *b, size_t size) {
int *dev_a = 0;
int *dev_b = 0;
int *dev_c = 0;
cudaError_t cudaStatus;
// Choose which GPU to run on,
change this on a multi-GPU system.
cudaStatus = cudaSetDevice(0);
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaSetDevice failed! Do you have a CUDA-capable GPU
installed?");
goto Error; }
// Allocate GPU buffers for three vectors (two input, one output) .
cudaStatus = cudaMalloc((void**)&dev_c, size * sizeof(int));
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaMalloc failed!");
goto Error; }
cudaStatus = cudaMalloc((void**)&dev_a, size * sizeof(int));
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaMalloc failed!");
goto Error; }
cudaStatus = cudaMalloc((void**)&dev_b, size * sizeof(int));
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaMalloc failed!");
goto Error; }
// Copy input vectors from host memory to GPU buffers.
cudaStatus = cudaMemcpy(dev_a, a, size * sizeof(int), cudaMemcpyHostToDevice); if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaMemcpy failed!");
goto Error; }
cudaStatus = cudaMemcpy(dev_b, b, size * sizeof(int), cudaMemcpyHostToDevice); if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaMemcpy failed!");
goto Error; }
// Split work into NxN block sector (max CUDA threads count per block shouldn't
exceed 1024)
const int N = sqrt(1024.0f);
dim3 blockCount = size / MAX_CUDA_BLOCKS + (size % MAX_CUDA_BLOCKS == 0 ? 0 :
1);
dim3 threadsPerBlock(N, N);
// Initialize calculations
addKernel<<<blockCount, threadsPerBlock>>>(dev_c, dev_a, dev_b);
// cudaThreadSynchronize waits for the kernel to finish, and returns
// any errors encountered during the launch.
cudaStatus = cudaThreadSynchronize();
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaThreadSynchronize returned error code %d after launching
addKernel!\n", cudaStatus);
goto Error; }
// Copy output vector from GPU buffer to host memory.
cudaStatus = cudaMemcpy(c, dev_c, size * sizeof(int), cudaMemcpyDeviceToHost); if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaMemcpy failed!");
goto Error; }
Error:
cudaFree(dev_c);
cudaFree(dev_a);
cudaFree(dev_b);
return cudaStatus;
}Editor is loading...