Untitled
unknown
plain_text
25 days ago
23 kB
3
Indexable
// Histogram Equalization #include <wb.h> #define HISTOGRAM_LENGTH 256 #define BLOCK_SIZE 16 //@@ insert code here #define wbCheck(stmt) do { \ cudaError_t err = stmt; \ if (err != cudaSuccess) { \ wbLog(ERROR, "Failed to run stmt ", #stmt); \ wbLog(ERROR, "Got CUDA error ... ", cudaGetErrorString(err)); \ return -1; \ } \ } while(0) __global__ void castChar(float *input, unsigned char *output, int len){ int idx = threadIdx.x + blockIdx.x * blockDim.x; if ( idx < len){ output[idx] = (unsigned char)(255 * input[idx]); } } __global__ void colorToGray(unsigned char *input, unsigned char *output, int width, int height){ int x = blockDim.x * blockIdx.x + threadIdx.x; int y = blockDim.y * blockIdx.y + threadIdx.y; if ( x < width && y < height){ int idx = y * width + x; unsigned char r = input[3 * idx]; unsigned char g = input[3 * idx + 1]; unsigned char b = input[3 * idx + 2]; output[idx] = (unsigned char)(0.21 * r + 0.71 * g + 0.07 * b); } } __global__ void hist(unsigned char *gray, int *hist, int width, int height){ int x = blockDim.x * blockIdx.x + threadIdx.x; int y = blockDim.y * blockIdx.y + threadIdx.y; if ( x < width && y < height){ int idx = y * width + x; atomicAdd(&(hist[gray[idx]]), 1); } } __global__ void cdf(int *input, float *output, int len){ int tx = threadIdx.x; int bx = blockIdx.x; int idx = tx + bx * BLOCK_SIZE; __shared__ float T[HISTOGRAM_LENGTH]; if (idx < HISTOGRAM_LENGTH){ T[idx] = input[idx]; } __syncthreads(); for (int stride = 1; stride < HISTOGRAM_LENGTH; stride *= 2){ int index = (tx + 1) * stride * 2 - 1; if ( index < HISTOGRAM_LENGTH && index >= stride){ T[index] += T[index - stride]; } __syncthreads(); } for ( int stride = HISTOGRAM_LENGTH / 4; stride > 0; stride /= 2){ int index = (tx + 1) * stride * 2 - 1; if (( index + stride) < HISTOGRAM_LENGTH){ T[index + stride] += T[index]; } __syncthreads(); } if (idx < HISTOGRAM_LENGTH){ output[idx] = (float)T[idx] * 1 / len; } } __global__ void equal(unsigned char *output, float *cdf, int width, int height){ int x = blockDim.x * blockIdx.x + threadIdx.x; int y = blockDim.y * blockIdx.y + threadIdx.y; if ( x < width && y < height){ for ( int i = 0; i < 3; i++){ int ii = (y * width + x) * 3 + i; float tmp = 255.0 * (cdf[output[ii]] - cdf[0]) / (1 - cdf[0]); float res = min(max(tmp, 0.0f), 255.0f); output[ii] = (unsigned char)res; } } } __global__ void castFloat(const unsigned char *input, float *output, int width, int height) { int x = blockDim.x * blockIdx.x + threadIdx.x; int y = blockDim.y * blockIdx.y + threadIdx.y; if ( x < width && y < height){ for ( int i = 0; i < 3; i++){ int ii = (y * width + x) * 3 + i; output[ii] = (float) (input[ii] / 255.0); } } } int main(int argc, char **argv) { wbArg_t args; int imageWidth; int imageHeight; int imageChannels; wbImage_t inputImage; wbImage_t outputImage; float *hostInputImageData; float *hostOutputImageData; const char *inputImageFile; //@@ Insert more code here args = wbArg_read(argc, argv); /* parse the input arguments */ inputImageFile = wbArg_getInputFile(args, 0); //Import data and create memory on host inputImage = wbImport(inputImageFile); imageWidth = wbImage_getWidth(inputImage); imageHeight = wbImage_getHeight(inputImage); imageChannels = wbImage_getChannels(inputImage); outputImage = wbImage_new(imageWidth, imageHeight, imageChannels); hostInputImageData = wbImage_getData(inputImage); hostOutputImageData = wbImage_getData(outputImage); //@@ insert code here float *GPU_Float; unsigned char *GPU_Char; unsigned char *g_img; int *GPU_Hist; float *CDF; int image_size_c = imageWidth * imageHeight * imageChannels; int image_size_g = imageWidth * imageHeight; cudaMalloc((void **)&GPU_Float, image_size_c *sizeof(float)); cudaMalloc((void **)&GPU_Char, image_size_c * sizeof(unsigned char)); cudaMalloc((void **)&g_img, image_size_g * sizeof(unsigned char)); cudaMalloc((void **)&GPU_Hist, HISTOGRAM_LENGTH * sizeof(int)); cudaMalloc((void **)&GPU_Hist, HISTOGRAM_LENGTH * sizeof(int)); cudaMalloc((void **)&CDF, HISTOGRAM_LENGTH * sizeof(float)); cudaMemcpy(GPU_Float, hostInputImageData, image_size_c * sizeof(float), cudaMemcpyHostToDevice); dim3 DimBlock(BLOCK_SIZE * BLOCK_SIZE); dim3 DimGrid((image_size_c + BLOCK_SIZE * BLOCK_SIZE - 1) / (BLOCK_SIZE * BLOCK_SIZE)); dim3 DimBlock2(BLOCK_SIZE, BLOCK_SIZE); dim3 DimGrid2(ceil(imageWidth / BLOCK_SIZE + 1), ceil(imageHeight / BLOCK_SIZE + 1)); castChar<<<DimGrid, DimBlock>>>(GPU_Float, GPU_Char, image_size_c); cudaDeviceSynchronize(); colorToGray<<<DimGrid2, DimBlock2>>>(GPU_Char, g_img, imageWidth, imageHeight); cudaDeviceSynchronize(); hist<<<DimGrid2, DimBlock2>>>(g_img, GPU_Hist, imageWidth, imageHeight); cudaDeviceSynchronize(); cdf<<<1, HISTOGRAM_LENGTH>>>(GPU_Hist, CDF, imageWidth * imageHeight); cudaDeviceSynchronize(); equal<<<DimGrid2, DimBlock2>>>(GPU_Char, CDF, imageWidth, imageHeight); cudaDeviceSynchronize(); castFloat<<<DimGrid2, DimBlock2>>>(GPU_Char, GPU_Float, imageWidth, imageHeight); cudaDeviceSynchronize(); cudaMemcpy(hostOutputImageData, GPU_Float, image_size_c * sizeof(float), cudaMemcpyDeviceToHost); wbSolution(args, outputImage); //@@ insert code here cudaFree(GPU_Float); cudaFree(GPU_Char); cudaFree(g_img); cudaFree(GPU_Hist); cudaFree(CDF); return 0; } } } } } } } } } } } } } } } } } } } } }
Editor is loading...
Leave a Comment