Untitled
unknown
plain_text
9 months ago
23 kB
6
Indexable
// Histogram Equalization
#include <wb.h>
#define HISTOGRAM_LENGTH 256
#define BLOCK_SIZE 16
//@@ insert code here
#define wbCheck(stmt) do { \
cudaError_t err = stmt; \
if (err != cudaSuccess) { \
wbLog(ERROR, "Failed to run stmt ", #stmt); \
wbLog(ERROR, "Got CUDA error ... ", cudaGetErrorString(err)); \
return -1; \
} \
} while(0)
__global__ void castChar(float *input, unsigned char *output, int len){
int idx = threadIdx.x + blockIdx.x * blockDim.x;
if ( idx < len){
output[idx] = (unsigned char)(255 * input[idx]);
}
}
__global__ void colorToGray(unsigned char *input, unsigned char *output, int width, int height){
int x = blockDim.x * blockIdx.x + threadIdx.x;
int y = blockDim.y * blockIdx.y + threadIdx.y;
if ( x < width && y < height){
int idx = y * width + x;
unsigned char r = input[3 * idx];
unsigned char g = input[3 * idx + 1];
unsigned char b = input[3 * idx + 2];
output[idx] = (unsigned char)(0.21 * r + 0.71 * g + 0.07 * b);
}
}
__global__ void hist(unsigned char *gray, int *hist, int width, int height){
int x = blockDim.x * blockIdx.x + threadIdx.x;
int y = blockDim.y * blockIdx.y + threadIdx.y;
if ( x < width && y < height){
int idx = y * width + x;
atomicAdd(&(hist[gray[idx]]), 1);
}
}
__global__ void cdf(int *input, float *output, int len){
int tx = threadIdx.x; int bx = blockIdx.x;
int idx = tx + bx * BLOCK_SIZE;
__shared__ float T[HISTOGRAM_LENGTH];
if (idx < HISTOGRAM_LENGTH){
T[idx] = input[idx];
}
__syncthreads();
for (int stride = 1; stride < HISTOGRAM_LENGTH; stride *= 2){
int index = (tx + 1) * stride * 2 - 1;
if ( index < HISTOGRAM_LENGTH && index >= stride){
T[index] += T[index - stride];
}
__syncthreads();
}
for ( int stride = HISTOGRAM_LENGTH / 4; stride > 0; stride /= 2){
int index = (tx + 1) * stride * 2 - 1;
if (( index + stride) < HISTOGRAM_LENGTH){
T[index + stride] += T[index];
}
__syncthreads();
}
if (idx < HISTOGRAM_LENGTH){
output[idx] = (float)T[idx] * 1 / len;
}
}
__global__ void equal(unsigned char *output, float *cdf, int width, int height){
int x = blockDim.x * blockIdx.x + threadIdx.x;
int y = blockDim.y * blockIdx.y + threadIdx.y;
if ( x < width && y < height){
for ( int i = 0; i < 3; i++){
int ii = (y * width + x) * 3 + i;
float tmp = 255.0 * (cdf[output[ii]] - cdf[0]) / (1 - cdf[0]);
float res = min(max(tmp, 0.0f), 255.0f);
output[ii] = (unsigned char)res;
}
}
}
__global__ void castFloat(const unsigned char *input, float *output, int width, int height) {
int x = blockDim.x * blockIdx.x + threadIdx.x;
int y = blockDim.y * blockIdx.y + threadIdx.y;
if ( x < width && y < height){
for ( int i = 0; i < 3; i++){
int ii = (y * width + x) * 3 + i;
output[ii] = (float) (input[ii] / 255.0);
}
}
}
int main(int argc, char **argv) {
wbArg_t args;
int imageWidth;
int imageHeight;
int imageChannels;
wbImage_t inputImage;
wbImage_t outputImage;
float *hostInputImageData;
float *hostOutputImageData;
const char *inputImageFile;
//@@ Insert more code here
args = wbArg_read(argc, argv); /* parse the input arguments */
inputImageFile = wbArg_getInputFile(args, 0);
//Import data and create memory on host
inputImage = wbImport(inputImageFile);
imageWidth = wbImage_getWidth(inputImage);
imageHeight = wbImage_getHeight(inputImage);
imageChannels = wbImage_getChannels(inputImage);
outputImage = wbImage_new(imageWidth, imageHeight, imageChannels);
hostInputImageData = wbImage_getData(inputImage);
hostOutputImageData = wbImage_getData(outputImage);
//@@ insert code here
float *GPU_Float;
unsigned char *GPU_Char;
unsigned char *g_img;
int *GPU_Hist;
float *CDF;
int image_size_c = imageWidth * imageHeight * imageChannels;
int image_size_g = imageWidth * imageHeight;
cudaMalloc((void **)&GPU_Float, image_size_c *sizeof(float));
cudaMalloc((void **)&GPU_Char, image_size_c * sizeof(unsigned char));
cudaMalloc((void **)&g_img, image_size_g * sizeof(unsigned char));
cudaMalloc((void **)&GPU_Hist, HISTOGRAM_LENGTH * sizeof(int));
cudaMalloc((void **)&GPU_Hist, HISTOGRAM_LENGTH * sizeof(int));
cudaMalloc((void **)&CDF, HISTOGRAM_LENGTH * sizeof(float));
cudaMemcpy(GPU_Float, hostInputImageData, image_size_c * sizeof(float), cudaMemcpyHostToDevice);
dim3 DimBlock(BLOCK_SIZE * BLOCK_SIZE);
dim3 DimGrid((image_size_c + BLOCK_SIZE * BLOCK_SIZE - 1) / (BLOCK_SIZE * BLOCK_SIZE));
dim3 DimBlock2(BLOCK_SIZE, BLOCK_SIZE);
dim3 DimGrid2(ceil(imageWidth / BLOCK_SIZE + 1), ceil(imageHeight / BLOCK_SIZE + 1));
castChar<<<DimGrid, DimBlock>>>(GPU_Float, GPU_Char, image_size_c);
cudaDeviceSynchronize();
colorToGray<<<DimGrid2, DimBlock2>>>(GPU_Char, g_img, imageWidth, imageHeight);
cudaDeviceSynchronize();
hist<<<DimGrid2, DimBlock2>>>(g_img, GPU_Hist, imageWidth, imageHeight);
cudaDeviceSynchronize();
cdf<<<1, HISTOGRAM_LENGTH>>>(GPU_Hist, CDF, imageWidth * imageHeight);
cudaDeviceSynchronize();
equal<<<DimGrid2, DimBlock2>>>(GPU_Char, CDF, imageWidth, imageHeight);
cudaDeviceSynchronize();
castFloat<<<DimGrid2, DimBlock2>>>(GPU_Char, GPU_Float, imageWidth, imageHeight);
cudaDeviceSynchronize();
cudaMemcpy(hostOutputImageData, GPU_Float, image_size_c * sizeof(float), cudaMemcpyDeviceToHost);
wbSolution(args, outputImage);
//@@ insert code here
cudaFree(GPU_Float);
cudaFree(GPU_Char);
cudaFree(g_img);
cudaFree(GPU_Hist);
cudaFree(CDF);
return 0;
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}Editor is loading...
Leave a Comment