Untitled
unknown
plain_text
5 months ago
1.8 kB
7
Indexable
#include <cuda.h> #include <stdio.h> #include <stdlib.h> __global__ void mandelKernel(int* output, float lowerX, float lowerY, float stepX, float stepY, int resX, int resY, int count) { // To avoid error caused by the floating number, use the following pseudo code // int tx = blockIdx.x * blockDim.x + threadIdx.x; int ty = blockIdx.y * blockDim.y + threadIdx.y; float x = lowerX + tx * stepX; float y = lowerY + ty * stepY; float z_re = x, z_im = y; int i; int ans = 0; int flag = 0; for (i = 0; i < count; ++i) { if (z_re * z_re + z_im * z_im > 4.f && flag == 0) { flag = 1; ans = i; } float new_re = z_re * z_re - z_im * z_im; float new_im = 2.f * z_re * z_im; z_re = x + new_re; z_im = y + new_im; } if (tx < resX && ty <resY) output[ty * resX + tx] = (flag == 1)? ans : count; } // Host front-end function that allocates the memory and launches the GPU kernel void hostFE (float upperX, float upperY, float lowerX, float lowerY, int* img, int resX, int resY, int maxIterations) { float stepX = (upperX - lowerX) / resX; float stepY = (upperY - lowerY) / resY; int* host_mem = (int*) malloc(resX * resY * sizeof(int)); int* gpu_mem; cudaMalloc(&gpu_mem,resX * resY * sizeof(int)); dim3 dimBlock(8, 8); dim3 dimGrid((resX + 7) / 8, (resY + 7) / 8); mandelKernel<<<dimGrid, dimBlock>>>(gpu_mem, lowerX, lowerY, stepX, stepY, resX, resY, maxIterations); cudaDeviceSynchronize(); cudaMemcpy(host_mem, gpu_mem, resX * resY * sizeof(int), cudaMemcpyDeviceToHost); cudaFree(gpu_mem); memcpy(img, host_mem, resX * resY * sizeof(int)); free(host_mem); }
Editor is loading...
Leave a Comment