Untitled
unknown
c_cpp
2 years ago
9.5 kB
5
Indexable
#include <cuda_runtime.h> #include <helper_cuda.h> #include <math.h> #include <float.h> #include <stdio.h> #include <omp.h> #include <stdarg.h> #include "myProto.h" #define NUM_THREADS 256 /* kernel function to calculate coordinates according to given formula */ __global__ void calculateCoordinates(Coordinate *coordinates, Point *points, float t, int numPoints) { Point point; /* calculate index */ int i = threadIdx.x + blockIdx.x * blockDim.x; /* calculate only for valid indices */ if (i < numPoints) { point = points[i]; coordinates[i].x = ((point.x2 - point.x1) / 2 ) * sin (t * M_PI / 2) + (point.x2 + point.x1) / 2; coordinates[i].y = point.a * coordinates[i].x + point.b; //printf("Point ID = %d, Point X1 = %f, Point X2 = %f, Point a = %f, Point b = %f, Coordinate: X = %f, Y = %f\n", point.id, point.x1, point.x2, point.a, point.b, coordinates[i].x, coordinates[i].y); } } /* kernel function to calculate distance between every coordinate */ __global__ void calculateDistances(Coordinate *coordinates, float *distances, int numPoints) { int index = blockIdx.x * blockDim.x + threadIdx.x; int stride = blockDim.x * gridDim.x; for (int i = index; i < numPoints * numPoints; i += stride) { int coord1 = i / numPoints; int coord2 = i % numPoints; /* avoid calculating distance with the point itself */ if (coord1 != coord2) { float dx = coordinates[coord2].x - coordinates[coord1].x; float dy = coordinates[coord2].y - coordinates[coord1].y; distances[i] = sqrt(dx * dx + dy * dy); } else { distances[i] = FLT_MAX; } //printf("Coord1 = (%f, %f), Coord2 = (%f, %f), distance = %f\n", coordinates[coord1].x, coordinates[coord1].y, coordinates[coord2].x, coordinates[coord2].y, distances[i]); } } /* kernel function to check proximity criteria for each point */ __global__ void checkProximityCriteria(Point *points, float *distances, int *results, int *globalCounter, int numPoints, int K, float D) { int index = blockIdx.x * blockDim.x + threadIdx.x; int stride = blockDim.x * gridDim.x; for (int i = index; i < numPoints && *globalCounter < NUM_SATISFY; i += stride) { int count = 0; for (int j = 0; j < numPoints && *globalCounter < NUM_SATISFY; j++) { if (i != j && distances[i * numPoints + j] < D) { count++; } //printf("pointID = %d, local counter = %d\n", points[i].id, count); } if (count >= K) { results[i] = 1; atomicAdd(globalCounter, 1); ////printf("GPU: PointID%d satisfy Proximity Criteria \n", points[i].id); } else { results[i] = 0; } //printf("globalCounter = %d\n", *globalCounter); } } /* find 3 points that satisfy the criteria and insert them to local indices array */ void fillIndicesArray(int *results, int *indices_array, int numPoints) { int i, count = 0; //tid; // TODO - try to parallelize //#pragma omp parallel shared(results, indices_array) private(tid) //{ //tid = omp_get_thread_num(); //#pragma omp for for (i = 0; i < numPoints; i++) { if (results[i] == 1) { indices_array[count] = i; count++; if (count >= NUM_SATISFY) break; } } //} } /* function to free all GPU memory */ void cudaFreeAll(int num, ...) { va_list valist; va_start(valist, num); for (int i = 0; i < num; i++) { void* d_ptr = va_arg(valist, void*); if(d_ptr != NULL) { cudaError_t err = cudaFree(d_ptr); if(err != cudaSuccess){ printf("Error freeing CUDA memory: %s\n", cudaGetErrorString(err)); } } else { printf("Warning: Attempt to free a NULL pointer.\n"); } } va_end(valist); } /* GPU "controller" function to evaluate Proximity Criteria for each point */ int computeOnGPU(int *local_indices, Point *h_points, int N, int K, float D, float t) { Point *d_points; Coordinate *d_coordinates; float *d_distances; int *d_results; int *d_global_counter; int *h_results; /* Allocate memory for the global counter on the device */ cudaError_t err = cudaMalloc((void**)&d_global_counter, sizeof(int)); if (err != cudaSuccess) { printf("GPU - Failed to allocate device memory for counter: %s\n", cudaGetErrorString(err)); return -1; } int h_counter = 0; /* Set the counter in the device memory to 0 */ err = cudaMemcpy(d_global_counter, &h_counter, sizeof(int), cudaMemcpyHostToDevice); if (err != cudaSuccess) { printf("GPU - Failed to set device memory for counter: %s\n", cudaGetErrorString(err)); cudaFreeAll(1, d_global_counter); return -1; } /* Allocate memory for points on the device */ err = cudaMalloc((void**)&d_points, N * sizeof(Point)); if (err != cudaSuccess) { printf("GPU - Failed to allocate device memory for points: %s\n", cudaGetErrorString(err)); cudaFreeAll(1, d_global_counter); return -1; } //printf("GPU - Allocate memory for coordinates on the device\n"); /* Allocate memory for coordinates on the device */ err = cudaMalloc((void**)&d_coordinates, N * sizeof(Coordinate)); if (err != cudaSuccess) { printf("GPU - Failed to allocate device memory for coordinates: %s\n", cudaGetErrorString(err)); cudaFreeAll(2, d_global_counter, d_points); return -1; } /* Allocate memory for distances on the device */ err = cudaMalloc((void**)&d_distances, N * N * sizeof(float)); if (err != cudaSuccess) { printf("GPU - Failed to allocate device memory for distances: %s\n", cudaGetErrorString(err)); cudaFreeAll(3, d_global_counter, d_points, d_coordinates); return -1; } /* Allocate memory for results on the device */ err = cudaMalloc((void**)&d_results, N * sizeof(int)); if (err != cudaSuccess) { printf("GPU - Failed to allocate device memory for results: %s\n", cudaGetErrorString(err)); cudaFreeAll(4, d_global_counter, d_points, d_coordinates, d_distances); return -1; } /* Copy points from host to device */ err = cudaMemcpy(d_points, h_points, N * sizeof(Point), cudaMemcpyHostToDevice); if (err != cudaSuccess) { printf("GPU - Failed to copy points from host to device: %s\n", cudaGetErrorString(err)); cudaFreeAll(5, d_global_counter, d_points, d_coordinates, d_distances, d_results); return -1; } /* Calculate required blocks */ int numBlocks = (N + NUM_THREADS - 1) / NUM_THREADS; /* Run the calculateCoordinates kernel, to calculate coordinates */ calculateCoordinates<<<numBlocks, NUM_THREADS>>>(d_coordinates, d_points, t, N); cudaDeviceSynchronize(); err = cudaGetLastError(); if (err != cudaSuccess) { printf("Error running calculateDistances: %s\n", cudaGetErrorString(err)); cudaFreeAll(5, d_global_counter, d_points, d_coordinates, d_distances, d_results); return -1; } /* Run the calculateDistances kernel, to calculate distanceses */ calculateDistances<<<numBlocks, NUM_THREADS>>>(d_coordinates, d_distances, N); cudaDeviceSynchronize(); err = cudaGetLastError(); if (err != cudaSuccess) { cudaFreeAll(5, d_global_counter, d_points, d_coordinates, d_distances, d_results); return -1; } /* Run the checkProximityCriteria kernel */ checkProximityCriteria<<<numBlocks, NUM_THREADS>>>(d_points, d_distances, d_results, d_global_counter, N, K, D); cudaDeviceSynchronize(); err = cudaGetLastError(); if (err != cudaSuccess) { printf("Error running checkProximityCriteria: %s\n", cudaGetErrorString(err)); cudaFreeAll(5, d_global_counter, d_points, d_coordinates, d_distances, d_results); return -1; } int counter; /* Copy counter from device to host */ err = cudaMemcpy(&counter, d_global_counter, sizeof(int), cudaMemcpyDeviceToHost); if (err != cudaSuccess) { printf("GPU - Failed to copy results from device to host: %s\n", cudaGetErrorString(err)); //cudaFreeAll(5, d_global_counter, d_points, d_coordinates, d_distances, d_results); return -1; } if (counter < NUM_SATISFY) { cudaFreeAll(5, d_global_counter, d_points, d_coordinates, d_distances, d_results); return 1; } /* Allocate memory for results on host */ h_results = (int *)malloc(N * sizeof(int)); if (h_results == NULL) { printf("GPU - Failed to copy results from device to host: %s\n", cudaGetErrorString(err)); cudaFreeAll(5, d_global_counter, d_points, d_coordinates, d_distances, d_results); return -1; } /* Copy results from device to host */ err = cudaMemcpy(h_results, d_results, N * sizeof(int), cudaMemcpyDeviceToHost); if (err != cudaSuccess) { printf("GPU - Failed to copy results from device to host: %s\n", cudaGetErrorString(err)); free(h_results); cudaFreeAll(5, d_global_counter, d_points, d_coordinates, d_distances, d_results); return -1; } /* find NUM_SATISFY points that satisfy the criteria and insert them to local indices array */ fillIndicesArray(h_results, local_indices, N); /* Free device memory */ free(h_results); cudaFreeAll(5, d_global_counter, d_points, d_coordinates, d_distances, d_results); return 0; }
Editor is loading...