Untitled

#include <cuda.h>
#include <stdio.h>
#include <stdlib.h>

__global__ void mandelKernel(int* output, float lowerX, float lowerY, float stepX, float stepY, int resX, int resY, int count) {
    // To avoid error caused by the floating number, use the following pseudo code
    //
    int tx = blockIdx.x * blockDim.x + threadIdx.x;
    int ty = blockIdx.y * blockDim.y + threadIdx.y;
   
    float x = lowerX + tx * stepX;
    float y = lowerY + ty * stepY;

    float z_re = x, z_im = y;
    int i;
    int ans = 0;
    int flag = 0;
    for (i = 0; i < count; ++i)
    {

        if (z_re * z_re + z_im * z_im > 4.f && flag == 0)
        {
            flag = 1;
            ans = i;

        }

        float new_re = z_re * z_re - z_im * z_im;
        float new_im = 2.f * z_re * z_im;
        z_re = x + new_re;
        z_im = y + new_im;
    }
    
    if (tx < resX && ty <resY)
        output[ty * resX + tx] = (flag == 1)? ans : count;

}

// Host front-end function that allocates the memory and launches the GPU kernel
void hostFE (float upperX, float upperY, float lowerX, float lowerY, int* img, int resX, int resY, int maxIterations)
{
    float stepX = (upperX - lowerX) / resX;
    float stepY = (upperY - lowerY) / resY;
    int* host_mem = (int*) malloc(resX * resY * sizeof(int));
    int* gpu_mem; 
    cudaMalloc(&gpu_mem,resX * resY * sizeof(int));
    dim3 dimBlock(8, 8);
    dim3 dimGrid((resX + 7) / 8, (resY + 7) / 8);
    mandelKernel<<<dimGrid, dimBlock>>>(gpu_mem, lowerX, lowerY, stepX, stepY, resX, resY, maxIterations);
    cudaDeviceSynchronize();
    cudaMemcpy(host_mem, gpu_mem, resX * resY * sizeof(int), cudaMemcpyDeviceToHost);
    cudaFree(gpu_mem);
    memcpy(img, host_mem, resX * resY * sizeof(int));
    free(host_mem);

}
Editor is loading...