Untitled
unknown
plain_text
a year ago
1.8 kB
8
Indexable
#include <cuda.h>
#include <stdio.h>
#include <stdlib.h>
__global__ void mandelKernel(int* output, float lowerX, float lowerY, float stepX, float stepY, int resX, int resY, int count) {
// To avoid error caused by the floating number, use the following pseudo code
//
int tx = blockIdx.x * blockDim.x + threadIdx.x;
int ty = blockIdx.y * blockDim.y + threadIdx.y;
float x = lowerX + tx * stepX;
float y = lowerY + ty * stepY;
float z_re = x, z_im = y;
int i;
int ans = 0;
int flag = 0;
for (i = 0; i < count; ++i)
{
if (z_re * z_re + z_im * z_im > 4.f && flag == 0)
{
flag = 1;
ans = i;
}
float new_re = z_re * z_re - z_im * z_im;
float new_im = 2.f * z_re * z_im;
z_re = x + new_re;
z_im = y + new_im;
}
if (tx < resX && ty <resY)
output[ty * resX + tx] = (flag == 1)? ans : count;
}
// Host front-end function that allocates the memory and launches the GPU kernel
void hostFE (float upperX, float upperY, float lowerX, float lowerY, int* img, int resX, int resY, int maxIterations)
{
float stepX = (upperX - lowerX) / resX;
float stepY = (upperY - lowerY) / resY;
int* host_mem = (int*) malloc(resX * resY * sizeof(int));
int* gpu_mem;
cudaMalloc(&gpu_mem,resX * resY * sizeof(int));
dim3 dimBlock(8, 8);
dim3 dimGrid((resX + 7) / 8, (resY + 7) / 8);
mandelKernel<<<dimGrid, dimBlock>>>(gpu_mem, lowerX, lowerY, stepX, stepY, resX, resY, maxIterations);
cudaDeviceSynchronize();
cudaMemcpy(host_mem, gpu_mem, resX * resY * sizeof(int), cudaMemcpyDeviceToHost);
cudaFree(gpu_mem);
memcpy(img, host_mem, resX * resY * sizeof(int));
free(host_mem);
}Editor is loading...
Leave a Comment