Untitled
unknown
c_cpp
2 years ago
9.5 kB
10
Indexable
#include <cuda_runtime.h>
#include <helper_cuda.h>
#include <math.h>
#include <float.h>
#include <stdio.h>
#include <omp.h>
#include <stdarg.h>
#include "myProto.h"
#define NUM_THREADS 256
/* kernel function to calculate coordinates according to given formula */
__global__ void calculateCoordinates(Coordinate *coordinates, Point *points, float t, int numPoints) {
Point point;
/* calculate index */
int i = threadIdx.x + blockIdx.x * blockDim.x;
/* calculate only for valid indices */
if (i < numPoints) {
point = points[i];
coordinates[i].x = ((point.x2 - point.x1) / 2 ) * sin (t * M_PI / 2) + (point.x2 + point.x1) / 2;
coordinates[i].y = point.a * coordinates[i].x + point.b;
//printf("Point ID = %d, Point X1 = %f, Point X2 = %f, Point a = %f, Point b = %f, Coordinate: X = %f, Y = %f\n", point.id, point.x1, point.x2, point.a, point.b, coordinates[i].x, coordinates[i].y);
}
}
/* kernel function to calculate distance between every coordinate */
__global__ void calculateDistances(Coordinate *coordinates, float *distances, int numPoints) {
int index = blockIdx.x * blockDim.x + threadIdx.x;
int stride = blockDim.x * gridDim.x;
for (int i = index; i < numPoints * numPoints; i += stride) {
int coord1 = i / numPoints;
int coord2 = i % numPoints;
/* avoid calculating distance with the point itself */
if (coord1 != coord2) {
float dx = coordinates[coord2].x - coordinates[coord1].x;
float dy = coordinates[coord2].y - coordinates[coord1].y;
distances[i] = sqrt(dx * dx + dy * dy);
} else {
distances[i] = FLT_MAX;
}
//printf("Coord1 = (%f, %f), Coord2 = (%f, %f), distance = %f\n", coordinates[coord1].x, coordinates[coord1].y, coordinates[coord2].x, coordinates[coord2].y, distances[i]);
}
}
/* kernel function to check proximity criteria for each point */
__global__ void checkProximityCriteria(Point *points, float *distances, int *results, int *globalCounter, int numPoints, int K, float D) {
int index = blockIdx.x * blockDim.x + threadIdx.x;
int stride = blockDim.x * gridDim.x;
for (int i = index; i < numPoints && *globalCounter < NUM_SATISFY; i += stride) {
int count = 0;
for (int j = 0; j < numPoints && *globalCounter < NUM_SATISFY; j++) {
if (i != j && distances[i * numPoints + j] < D) {
count++;
}
//printf("pointID = %d, local counter = %d\n", points[i].id, count);
}
if (count >= K) {
results[i] = 1;
atomicAdd(globalCounter, 1);
////printf("GPU: PointID%d satisfy Proximity Criteria \n", points[i].id);
} else {
results[i] = 0;
}
//printf("globalCounter = %d\n", *globalCounter);
}
}
/* find 3 points that satisfy the criteria and insert them to local indices array */
void fillIndicesArray(int *results, int *indices_array, int numPoints)
{
int i, count = 0; //tid;
// TODO - try to parallelize
//#pragma omp parallel shared(results, indices_array) private(tid)
//{
//tid = omp_get_thread_num();
//#pragma omp for
for (i = 0; i < numPoints; i++) {
if (results[i] == 1) {
indices_array[count] = i;
count++;
if (count >= NUM_SATISFY)
break;
}
}
//}
}
/* function to free all GPU memory */
void cudaFreeAll(int num, ...) {
va_list valist;
va_start(valist, num);
for (int i = 0; i < num; i++) {
void* d_ptr = va_arg(valist, void*);
if(d_ptr != NULL) {
cudaError_t err = cudaFree(d_ptr);
if(err != cudaSuccess){
printf("Error freeing CUDA memory: %s\n", cudaGetErrorString(err));
}
} else {
printf("Warning: Attempt to free a NULL pointer.\n");
}
}
va_end(valist);
}
/* GPU "controller" function to evaluate Proximity Criteria for each point */
int computeOnGPU(int *local_indices, Point *h_points, int N, int K, float D, float t) {
Point *d_points;
Coordinate *d_coordinates;
float *d_distances;
int *d_results;
int *d_global_counter;
int *h_results;
/* Allocate memory for the global counter on the device */
cudaError_t err = cudaMalloc((void**)&d_global_counter, sizeof(int));
if (err != cudaSuccess) {
printf("GPU - Failed to allocate device memory for counter: %s\n", cudaGetErrorString(err));
return -1;
}
int h_counter = 0;
/* Set the counter in the device memory to 0 */
err = cudaMemcpy(d_global_counter, &h_counter, sizeof(int), cudaMemcpyHostToDevice);
if (err != cudaSuccess) {
printf("GPU - Failed to set device memory for counter: %s\n", cudaGetErrorString(err));
cudaFreeAll(1, d_global_counter);
return -1;
}
/* Allocate memory for points on the device */
err = cudaMalloc((void**)&d_points, N * sizeof(Point));
if (err != cudaSuccess) {
printf("GPU - Failed to allocate device memory for points: %s\n", cudaGetErrorString(err));
cudaFreeAll(1, d_global_counter);
return -1;
}
//printf("GPU - Allocate memory for coordinates on the device\n");
/* Allocate memory for coordinates on the device */
err = cudaMalloc((void**)&d_coordinates, N * sizeof(Coordinate));
if (err != cudaSuccess) {
printf("GPU - Failed to allocate device memory for coordinates: %s\n", cudaGetErrorString(err));
cudaFreeAll(2, d_global_counter, d_points);
return -1;
}
/* Allocate memory for distances on the device */
err = cudaMalloc((void**)&d_distances, N * N * sizeof(float));
if (err != cudaSuccess) {
printf("GPU - Failed to allocate device memory for distances: %s\n", cudaGetErrorString(err));
cudaFreeAll(3, d_global_counter, d_points, d_coordinates);
return -1;
}
/* Allocate memory for results on the device */
err = cudaMalloc((void**)&d_results, N * sizeof(int));
if (err != cudaSuccess) {
printf("GPU - Failed to allocate device memory for results: %s\n", cudaGetErrorString(err));
cudaFreeAll(4, d_global_counter, d_points, d_coordinates, d_distances);
return -1;
}
/* Copy points from host to device */
err = cudaMemcpy(d_points, h_points, N * sizeof(Point), cudaMemcpyHostToDevice);
if (err != cudaSuccess) {
printf("GPU - Failed to copy points from host to device: %s\n", cudaGetErrorString(err));
cudaFreeAll(5, d_global_counter, d_points, d_coordinates, d_distances, d_results);
return -1;
}
/* Calculate required blocks */
int numBlocks = (N + NUM_THREADS - 1) / NUM_THREADS;
/* Run the calculateCoordinates kernel, to calculate coordinates */
calculateCoordinates<<<numBlocks, NUM_THREADS>>>(d_coordinates, d_points, t, N);
cudaDeviceSynchronize();
err = cudaGetLastError();
if (err != cudaSuccess) {
printf("Error running calculateDistances: %s\n", cudaGetErrorString(err));
cudaFreeAll(5, d_global_counter, d_points, d_coordinates, d_distances, d_results);
return -1;
}
/* Run the calculateDistances kernel, to calculate distanceses */
calculateDistances<<<numBlocks, NUM_THREADS>>>(d_coordinates, d_distances, N);
cudaDeviceSynchronize();
err = cudaGetLastError();
if (err != cudaSuccess) {
cudaFreeAll(5, d_global_counter, d_points, d_coordinates, d_distances, d_results);
return -1;
}
/* Run the checkProximityCriteria kernel */
checkProximityCriteria<<<numBlocks, NUM_THREADS>>>(d_points, d_distances, d_results, d_global_counter, N, K, D);
cudaDeviceSynchronize();
err = cudaGetLastError();
if (err != cudaSuccess) {
printf("Error running checkProximityCriteria: %s\n", cudaGetErrorString(err));
cudaFreeAll(5, d_global_counter, d_points, d_coordinates, d_distances, d_results);
return -1;
}
int counter;
/* Copy counter from device to host */
err = cudaMemcpy(&counter, d_global_counter, sizeof(int), cudaMemcpyDeviceToHost);
if (err != cudaSuccess) {
printf("GPU - Failed to copy results from device to host: %s\n", cudaGetErrorString(err));
//cudaFreeAll(5, d_global_counter, d_points, d_coordinates, d_distances, d_results);
return -1;
}
if (counter < NUM_SATISFY) {
cudaFreeAll(5, d_global_counter, d_points, d_coordinates, d_distances, d_results);
return 1;
}
/* Allocate memory for results on host */
h_results = (int *)malloc(N * sizeof(int));
if (h_results == NULL) {
printf("GPU - Failed to copy results from device to host: %s\n", cudaGetErrorString(err));
cudaFreeAll(5, d_global_counter, d_points, d_coordinates, d_distances, d_results);
return -1;
}
/* Copy results from device to host */
err = cudaMemcpy(h_results, d_results, N * sizeof(int), cudaMemcpyDeviceToHost);
if (err != cudaSuccess) {
printf("GPU - Failed to copy results from device to host: %s\n", cudaGetErrorString(err));
free(h_results);
cudaFreeAll(5, d_global_counter, d_points, d_coordinates, d_distances, d_results);
return -1;
}
/* find NUM_SATISFY points that satisfy the criteria and insert them to local indices array */
fillIndicesArray(h_results, local_indices, N);
/* Free device memory */
free(h_results);
cudaFreeAll(5, d_global_counter, d_points, d_coordinates, d_distances, d_results);
return 0;
}
Editor is loading...