Untitled
unknown
plain_text
4 years ago
6.9 kB
2
Indexable
#include <png.h> #include <zlib.h> #include <cassert> #include <cmath> #include <cstdlib> #include <iostream> #define MASK_N 2 #define MASK_X 5 #define MASK_Y 5 #define SCALE 8 // clang-format off __constant__ int mask[MASK_N][MASK_X][MASK_Y] = { {{ -1, -4, -6, -4, -1}, { -2, -8,-12, -8, -2}, { 0, 0, 0, 0, 0}, { 2, 8, 12, 8, 2}, { 1, 4, 6, 4, 1}}, {{ -1, -2, 0, 2, 1}, { -4, -8, 0, 8, 4}, { -6,-12, 0, 12, 6}, { -4, -8, 0, 8, 4}, { -1, -2, 0, 2, 1}} }; // clang-format on int read_png(const char* filename, unsigned char** image, unsigned* height, unsigned* width, unsigned* channels) { unsigned char sig[8]; FILE* infile; infile = fopen(filename, "rb"); fread(sig, 1, 8, infile); if (!png_check_sig(sig, 8)) return 1; /* bad signature */ png_structp png_ptr; png_infop info_ptr; png_ptr = png_create_read_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL); if (!png_ptr) return 4; /* out of memory */ info_ptr = png_create_info_struct(png_ptr); if (!info_ptr) { png_destroy_read_struct(&png_ptr, NULL, NULL); return 4; /* out of memory */ } png_init_io(png_ptr, infile); png_set_sig_bytes(png_ptr, 8); png_read_info(png_ptr, info_ptr); int bit_depth, color_type; png_get_IHDR(png_ptr, info_ptr, width, height, &bit_depth, &color_type, NULL, NULL, NULL); png_uint_32 i, rowbytes; png_bytep row_pointers[*height]; png_read_update_info(png_ptr, info_ptr); rowbytes = png_get_rowbytes(png_ptr, info_ptr); *channels = (int)png_get_channels(png_ptr, info_ptr); if ((*image = (unsigned char*)malloc(rowbytes * *height)) == NULL) { png_destroy_read_struct(&png_ptr, &info_ptr, NULL); return 3; } for (i = 0; i < *height; ++i) { row_pointers[i] = *image + i * rowbytes; } png_read_image(png_ptr, row_pointers); png_read_end(png_ptr, NULL); png_destroy_read_struct(&png_ptr, &info_ptr, NULL); return 0; } void write_png(const char* filename, png_bytep image, const unsigned height, const unsigned width, const unsigned channels) { FILE* fp = fopen(filename, "wb"); png_structp png_ptr = png_create_write_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL); png_infop info_ptr = png_create_info_struct(png_ptr); png_init_io(png_ptr, fp); png_set_IHDR(png_ptr, info_ptr, width, height, 8, PNG_COLOR_TYPE_RGB, PNG_INTERLACE_NONE, PNG_COMPRESSION_TYPE_DEFAULT, PNG_FILTER_TYPE_DEFAULT); png_set_filter(png_ptr, 0, PNG_NO_FILTERS); png_write_info(png_ptr, info_ptr); png_set_compression_level(png_ptr, 0); png_bytep row_ptr[height]; for (int i = 0; i < height; ++i) { row_ptr[i] = image + i * width * channels * sizeof(unsigned char); } png_write_image(png_ptr, row_ptr); png_write_end(png_ptr, NULL); png_destroy_write_struct(&png_ptr, &info_ptr); fclose(fp); } __global__ void sobel_GPU(unsigned char* s, unsigned char* t, unsigned height, unsigned width, unsigned channels){ int i, v, u; int R, G, B; double val[MASK_N * 3] = {0.0}; int adjustX, adjustY, xBound, yBound; int x = blockIdx.x * blockDim.x + threadIdx.x; int y = blockIdx.y * blockDim.y + threadIdx.y; adjustX = (MASK_X % 2) ? 1 : 0; adjustY = (MASK_Y % 2) ? 1 : 0; xBound = MASK_X / 2; yBound = MASK_Y / 2; if(x < width && y < height){ for (i = 0; i < MASK_N; ++i) { val[i * 3 + 2] = 0.0; val[i * 3 + 1] = 0.0; val[i * 3] = 0.0; for (v = -yBound; v < yBound + adjustY; ++v) { for (u = -xBound; u < xBound + adjustX; ++u) { if ((x + u) >= 0 && (x + u) < width && y + v >= 0 && y + v < height) { R = s[channels * (width * (y + v) + (x + u)) + 2]; G = s[channels * (width * (y + v) + (x + u)) + 1]; B = s[channels * (width * (y + v) + (x + u)) + 0]; val[i * 3 + 2] += R * mask[i][u + xBound][v + yBound]; val[i * 3 + 1] += G * mask[i][u + xBound][v + yBound]; val[i * 3 + 0] += B * mask[i][u + xBound][v + yBound]; } } } } double totalR = 0.0; double totalG = 0.0; double totalB = 0.0; for (i = 0; i < MASK_N; ++i) { totalR += val[i * 3 + 2] * val[i * 3 + 2]; totalG += val[i * 3 + 1] * val[i * 3 + 1]; totalB += val[i * 3 + 0] * val[i * 3 + 0]; } totalR = sqrt(totalR) / SCALE; totalG = sqrt(totalG) / SCALE; totalB = sqrt(totalB) / SCALE; const unsigned char cR = (totalR > 255.0) ? 255 : totalR; const unsigned char cG = (totalG > 255.0) ? 255 : totalG; const unsigned char cB = (totalB > 255.0) ? 255 : totalB; t[channels * (width * y + x) + 2] = cR; t[channels * (width * y + x) + 1] = cG; t[channels * (width * y + x) + 0] = cB; } } int main(int argc, char** argv) { assert(argc == 3); unsigned height, width, channels; unsigned char* src_img = NULL; read_png(argv[1], &src_img, &height, &width, &channels); assert(channels == 3); unsigned char* dst_img = (unsigned char*)malloc(height * width * channels * sizeof(unsigned char)); printf("width = %d, height = %d, channels = %d\n", width, height, channels); cudaDeviceProp devProp; cudaGetDeviceProperties(&devProp, 0); int num_SMs = devProp.multiProcessorCount; printf("number of SMs = %d\n", num_SMs); // Create memory space for source image on GPU unsigned char* GPU_SRC_IMG = NULL; cudaMalloc(&GPU_SRC_IMG, height * width * channels * sizeof(unsigned char)); cudaMemcpy(GPU_SRC_IMG, src_img, height * width * channels * sizeof(unsigned char), cudaMemcpyHostToDevice); cudaDeviceSynchronize(); // Create memory space for destination image on GPU unsigned char* GPU_DST_IMG = NULL; cudaMalloc(&GPU_DST_IMG, height * width * channels * sizeof(unsigned char)); cudaDeviceSynchronize(); // Set parameters of threads and blocks dim3 threadsPerBlock(32, 16, 1); dim3 numBlocks(width / threadsPerBlock.x + 1, height / threadsPerBlock.y + 1, 1); // Run the function on GPU sobel_GPU<<<numBlocks, threadsPerBlock>>>(GPU_SRC_IMG, GPU_DST_IMG, height, width, channels); cudaDeviceSynchronize(); // Copy result from Device to Host cudaMemcpy(dst_img, GPU_DST_IMG, height * width * channels * sizeof(unsigned char), cudaMemcpyDeviceToHost); cudaDeviceSynchronize(); write_png(argv[2], dst_img, height, width, channels); cudaFree(GPU_SRC_IMG); cudaFree(GPU_DST_IMG); free(src_img); free(dst_img); return 0; }
Editor is loading...