Untitled

// Histogram Equalization

#include <wb.h>

#define HISTOGRAM_LENGTH 256
#define BLOCK_SIZE 16
//@@ insert code here

#define wbCheck(stmt) do {                                                    \
        cudaError_t err = stmt;                                               \
                if (err != cudaSuccess) {                                             \
                            wbLog(ERROR, "Failed to run stmt ", #stmt);                       \
                                        wbLog(ERROR, "Got CUDA error ...  ", cudaGetErrorString(err));    \
                                                    return -1;                                                        \
                                                            }                                                                     \
                                                                } while(0)
                                                                    
                                                                    __global__ void castChar(float *input, unsigned char *output, int len){
                                                                            int idx = threadIdx.x + blockIdx.x * blockDim.x;
                                                                                if ( idx < len){
                                                                                            output[idx] = (unsigned char)(255 * input[idx]);
                                                                                }
                                                                    }

                                                                    __global__ void colorToGray(unsigned char *input, unsigned char *output, int width, int height){
                                                                          int x = blockDim.x * blockIdx.x + threadIdx.x;
                                                                            int y = blockDim.y * blockIdx.y + threadIdx.y;
                                                                              if ( x < width && y < height){
                                                                                    int idx = y * width + x;
                                                                                        unsigned char r = input[3 * idx];
                                                                                            unsigned char g = input[3 * idx + 1];
                                                                                                unsigned char b = input[3 * idx + 2];
                                                                                                    output[idx] = (unsigned char)(0.21 * r + 0.71 * g + 0.07 * b);
                                                                              }
                                                                    }

                                                                    __global__ void hist(unsigned char *gray, int *hist, int width, int height){
                                                                          int x = blockDim.x * blockIdx.x + threadIdx.x;
                                                                            int y = blockDim.y * blockIdx.y + threadIdx.y;
                                                                              if ( x < width && y < height){
                                                                                    int idx = y * width + x;
                                                                                        atomicAdd(&(hist[gray[idx]]), 1);
                                                                              }
                                                                    }

                                                                    __global__ void cdf(int *input, float *output, int len){
                                                                            int tx = threadIdx.x; int bx = blockIdx.x;    
                                                                                int idx = tx + bx * BLOCK_SIZE;
                                                                                    __shared__ float T[HISTOGRAM_LENGTH];
                                                                                        if (idx < HISTOGRAM_LENGTH){
                                                                                                  T[idx] = input[idx];
                                                                                        }
                                                                                            __syncthreads();
                                                                                                for (int stride = 1; stride < HISTOGRAM_LENGTH; stride *= 2){
                                                                                                          int index = (tx + 1) * stride * 2 - 1;
                                                                                                                if ( index < HISTOGRAM_LENGTH && index >= stride){
                                                                                                                            T[index] += T[index - stride];
                                                                                                                }
                                                                                                                      __syncthreads();
                                                                                                }
                                                                                                    for ( int stride = HISTOGRAM_LENGTH / 4; stride > 0; stride /= 2){
                                                                                                              int index = (tx + 1) * stride * 2 - 1;
                                                                                                                    if (( index + stride) < HISTOGRAM_LENGTH){
                                                                                                                                T[index + stride] += T[index];
                                                                                                                    }
                                                                                                                          __syncthreads();
                                                                                                    }

                                                                                                        if (idx < HISTOGRAM_LENGTH){
                                                                                                                  output[idx] = (float)T[idx] * 1 / len;
                                                                                                        }
                                                                    }

                                                                    __global__ void equal(unsigned char *output, float *cdf, int width, int height){
                                                                          int x = blockDim.x * blockIdx.x + threadIdx.x;
                                                                            int y = blockDim.y * blockIdx.y + threadIdx.y;
                                                                              if ( x < width && y < height){
                                                                                    for ( int i = 0; i < 3; i++){
                                                                                              int ii = (y * width + x) * 3 + i;
                                                                                                    float tmp = 255.0 * (cdf[output[ii]] - cdf[0]) / (1 - cdf[0]);
                                                                                                          float res = min(max(tmp, 0.0f), 255.0f);
                                                                                                                output[ii] = (unsigned char)res;
                                                                                    }
                                                                              }
                                                                    }

                                                                    __global__ void castFloat(const unsigned char *input, float *output, int width, int height) {
                                                                          int x = blockDim.x * blockIdx.x + threadIdx.x;
                                                                            int y = blockDim.y * blockIdx.y + threadIdx.y;
                                                                              if ( x < width && y < height){
                                                                                    for ( int i = 0; i < 3; i++){
                                                                                              int ii = (y * width + x) * 3 + i;
                                                                                                    output[ii] = (float) (input[ii] / 255.0);
                                                                                    }    
                                                                              }
                                                                    }

                                                                    int main(int argc, char **argv) {
                                                                          wbArg_t args;
                                                                            int imageWidth;
                                                                              int imageHeight;
                                                                                int imageChannels;
                                                                                  wbImage_t inputImage;
                                                                                    wbImage_t outputImage;
                                                                                      float *hostInputImageData;
                                                                                        float *hostOutputImageData;
                                                                                          const char *inputImageFile;

                                                                                            //@@ Insert more code here

                                                                                              args = wbArg_read(argc, argv); /* parse the input arguments */

                                                                                                inputImageFile = wbArg_getInputFile(args, 0);

                                                                                                  //Import data and create memory on host
                                                                                                    inputImage = wbImport(inputImageFile);
                                                                                                      imageWidth = wbImage_getWidth(inputImage);
                                                                                                        imageHeight = wbImage_getHeight(inputImage);
                                                                                                          imageChannels = wbImage_getChannels(inputImage);
                                                                                                            outputImage = wbImage_new(imageWidth, imageHeight, imageChannels);
                                                                                                              hostInputImageData = wbImage_getData(inputImage);
                                                                                                                hostOutputImageData = wbImage_getData(outputImage);
                                                                                                                  
                                                                                                                    
                                                                                                                      //@@ insert code here

                                                                                                                        float *GPU_Float;
                                                                                                                          unsigned char *GPU_Char;
                                                                                                                            unsigned char *g_img;
                                                                                                                              int *GPU_Hist;
                                                                                                                                float *CDF;
                                                                                                                                  int image_size_c = imageWidth * imageHeight * imageChannels;
                                                                                                                                    int image_size_g = imageWidth * imageHeight;

                                                                                                                                      cudaMalloc((void **)&GPU_Float, image_size_c *sizeof(float));
                                                                                                                                        cudaMalloc((void **)&GPU_Char, image_size_c * sizeof(unsigned char));
                                                                                                                                          cudaMalloc((void **)&g_img, image_size_g * sizeof(unsigned char));
                                                                                                                                            cudaMalloc((void **)&GPU_Hist, HISTOGRAM_LENGTH * sizeof(int));
                                                                                                                                              cudaMalloc((void **)&GPU_Hist, HISTOGRAM_LENGTH * sizeof(int));
                                                                                                                                                cudaMalloc((void **)&CDF, HISTOGRAM_LENGTH * sizeof(float));
                                                                                                                                                  
                                                                                                                                                    cudaMemcpy(GPU_Float, hostInputImageData, image_size_c * sizeof(float), cudaMemcpyHostToDevice);

                                                                                                                                                      dim3 DimBlock(BLOCK_SIZE * BLOCK_SIZE);
                                                                                                                                                        dim3 DimGrid((image_size_c + BLOCK_SIZE * BLOCK_SIZE - 1) / (BLOCK_SIZE * BLOCK_SIZE)); 
                                                                                                                                                          dim3 DimBlock2(BLOCK_SIZE, BLOCK_SIZE);
                                                                                                                                                            dim3 DimGrid2(ceil(imageWidth / BLOCK_SIZE + 1), ceil(imageHeight / BLOCK_SIZE + 1));

                                                                                                                                                              castChar<<<DimGrid, DimBlock>>>(GPU_Float, GPU_Char, image_size_c);
                                                                                                                                                                cudaDeviceSynchronize();

                                                                                                                                                                  colorToGray<<<DimGrid2, DimBlock2>>>(GPU_Char, g_img, imageWidth, imageHeight);
                                                                                                                                                                    cudaDeviceSynchronize(); 

                                                                                                                                                                      hist<<<DimGrid2, DimBlock2>>>(g_img, GPU_Hist, imageWidth, imageHeight); 
                                                                                                                                                                        cudaDeviceSynchronize();

                                                                                                                                                                          cdf<<<1, HISTOGRAM_LENGTH>>>(GPU_Hist, CDF, imageWidth * imageHeight);
                                                                                                                                                                            cudaDeviceSynchronize();

                                                                                                                                                                              equal<<<DimGrid2, DimBlock2>>>(GPU_Char, CDF, imageWidth, imageHeight);
                                                                                                                                                                                cudaDeviceSynchronize();

                                                                                                                                                                                  castFloat<<<DimGrid2, DimBlock2>>>(GPU_Char, GPU_Float, imageWidth, imageHeight);
                                                                                                                                                                                    cudaDeviceSynchronize();

                                                                                                                                                                                      cudaMemcpy(hostOutputImageData, GPU_Float, image_size_c * sizeof(float), cudaMemcpyDeviceToHost);
                                                                                                                                                                                        wbSolution(args, outputImage);


                                                                                                                                                                                          //@@ insert code here
                                                                                                                                                                                            cudaFree(GPU_Float);
                                                                                                                                                                                              cudaFree(GPU_Char);
                                                                                                                                                                                                cudaFree(g_img);
                                                                                                                                                                                                  cudaFree(GPU_Hist);
                                                                                                                                                                                                    cudaFree(CDF);
                                                                                                                                                                                                      
                                                                                                                                                                                                        return 0;
                                                                    }
                                                                    }
                                                                                    }
                                                                              }
                                                                    }
                                                                                    }
                                                                              }
                                                                    }
                                                                                                        }
                                                                                                                    }
                                                                                                    }
                                                                                                                }
                                                                                                }
                                                                                        }
                                                                    }
                                                                              }
                                                                    }
                                                                              }
                                                                    }
                                                                                }
                                                                    }
Editor is loading...