Untitled

 avatar
unknown
plain_text
4 years ago
6.9 kB
2
Indexable
#include <png.h>
#include <zlib.h>

#include <cassert>
#include <cmath>
#include <cstdlib>
#include <iostream>

#define MASK_N 2
#define MASK_X 5
#define MASK_Y 5
#define SCALE 8

// clang-format off
__constant__ int mask[MASK_N][MASK_X][MASK_Y] = {
    {{ -1, -4, -6, -4, -1},
     { -2, -8,-12, -8, -2},
     {  0,  0,  0,  0,  0},
     {  2,  8, 12,  8,  2},
     {  1,  4,  6,  4,  1}},
    {{ -1, -2,  0,  2,  1},
     { -4, -8,  0,  8,  4},
     { -6,-12,  0, 12,  6},
     { -4, -8,  0,  8,  4},
     { -1, -2,  0,  2,  1}}
};
// clang-format on

int read_png(const char* filename, unsigned char** image, unsigned* height, unsigned* width,
    unsigned* channels) {
    unsigned char sig[8];
    FILE* infile;
    infile = fopen(filename, "rb");

    fread(sig, 1, 8, infile);
    if (!png_check_sig(sig, 8)) return 1; /* bad signature */

    png_structp png_ptr;
    png_infop info_ptr;

    png_ptr = png_create_read_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL);
    if (!png_ptr) return 4; /* out of memory */

    info_ptr = png_create_info_struct(png_ptr);
    if (!info_ptr) {
        png_destroy_read_struct(&png_ptr, NULL, NULL);
        return 4; /* out of memory */
    }

    png_init_io(png_ptr, infile);
    png_set_sig_bytes(png_ptr, 8);
    png_read_info(png_ptr, info_ptr);
    int bit_depth, color_type;
    png_get_IHDR(png_ptr, info_ptr, width, height, &bit_depth, &color_type, NULL, NULL, NULL);

    png_uint_32 i, rowbytes;
    png_bytep row_pointers[*height];
    png_read_update_info(png_ptr, info_ptr);
    rowbytes = png_get_rowbytes(png_ptr, info_ptr);
    *channels = (int)png_get_channels(png_ptr, info_ptr);

    if ((*image = (unsigned char*)malloc(rowbytes * *height)) == NULL) {
        png_destroy_read_struct(&png_ptr, &info_ptr, NULL);
        return 3;
    }

    for (i = 0; i < *height; ++i) {
        row_pointers[i] = *image + i * rowbytes;
    }

    png_read_image(png_ptr, row_pointers);
    png_read_end(png_ptr, NULL);
    png_destroy_read_struct(&png_ptr, &info_ptr, NULL);
    return 0;
}

void write_png(const char* filename, png_bytep image, const unsigned height, const unsigned width,
    const unsigned channels) {
    FILE* fp = fopen(filename, "wb");
    png_structp png_ptr = png_create_write_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL);
    png_infop info_ptr = png_create_info_struct(png_ptr);
    png_init_io(png_ptr, fp);
    png_set_IHDR(png_ptr, info_ptr, width, height, 8, PNG_COLOR_TYPE_RGB, PNG_INTERLACE_NONE,
        PNG_COMPRESSION_TYPE_DEFAULT, PNG_FILTER_TYPE_DEFAULT);
    png_set_filter(png_ptr, 0, PNG_NO_FILTERS);
    png_write_info(png_ptr, info_ptr);
    png_set_compression_level(png_ptr, 0);

    png_bytep row_ptr[height];
    for (int i = 0; i < height; ++i) {
        row_ptr[i] = image + i * width * channels * sizeof(unsigned char);
    }
    png_write_image(png_ptr, row_ptr);
    png_write_end(png_ptr, NULL);
    png_destroy_write_struct(&png_ptr, &info_ptr);
    fclose(fp);
}

__global__ void sobel_GPU(unsigned char* s, unsigned char* t, unsigned height, unsigned width, unsigned channels){
    int i, v, u;
    int R, G, B;
    double val[MASK_N * 3] = {0.0};
    int adjustX, adjustY, xBound, yBound;
    int x = blockIdx.x * blockDim.x + threadIdx.x;
    int y = blockIdx.y * blockDim.y + threadIdx.y;

    adjustX = (MASK_X % 2) ? 1 : 0;
    adjustY = (MASK_Y % 2) ? 1 : 0;
    xBound = MASK_X / 2;
    yBound = MASK_Y / 2;

    if(x < width && y < height){
        for (i = 0; i < MASK_N; ++i) {

            val[i * 3 + 2] = 0.0;
            val[i * 3 + 1] = 0.0;
            val[i * 3] = 0.0;

            for (v = -yBound; v < yBound + adjustY; ++v) {
                for (u = -xBound; u < xBound + adjustX; ++u) {
                    if ((x + u) >= 0 && (x + u) < width && y + v >= 0 && y + v < height) {
                        R = s[channels * (width * (y + v) + (x + u)) + 2];
                        G = s[channels * (width * (y + v) + (x + u)) + 1];
                        B = s[channels * (width * (y + v) + (x + u)) + 0];
                        val[i * 3 + 2] += R * mask[i][u + xBound][v + yBound];
                        val[i * 3 + 1] += G * mask[i][u + xBound][v + yBound];
                        val[i * 3 + 0] += B * mask[i][u + xBound][v + yBound];
                    }
                }
            }
        }

        double totalR = 0.0;
        double totalG = 0.0;
        double totalB = 0.0;
        for (i = 0; i < MASK_N; ++i) {
            totalR += val[i * 3 + 2] * val[i * 3 + 2];
            totalG += val[i * 3 + 1] * val[i * 3 + 1];
            totalB += val[i * 3 + 0] * val[i * 3 + 0];
        }

        totalR = sqrt(totalR) / SCALE;
        totalG = sqrt(totalG) / SCALE;
        totalB = sqrt(totalB) / SCALE;
        const unsigned char cR = (totalR > 255.0) ? 255 : totalR;
        const unsigned char cG = (totalG > 255.0) ? 255 : totalG;
        const unsigned char cB = (totalB > 255.0) ? 255 : totalB;
        t[channels * (width * y + x) + 2] = cR;
        t[channels * (width * y + x) + 1] = cG;
        t[channels * (width * y + x) + 0] = cB;
    }
}

int main(int argc, char** argv) {
    assert(argc == 3);

    unsigned height, width, channels;
    unsigned char* src_img = NULL;



    read_png(argv[1], &src_img, &height, &width, &channels);
    assert(channels == 3);

    unsigned char* dst_img =
        (unsigned char*)malloc(height * width * channels * sizeof(unsigned char));

    printf("width = %d, height = %d, channels = %d\n", width, height, channels);

    cudaDeviceProp devProp;
	cudaGetDeviceProperties(&devProp, 0);
    int num_SMs = devProp.multiProcessorCount;
    printf("number of SMs = %d\n", num_SMs);

    // Create memory space for source image on GPU
    unsigned char* GPU_SRC_IMG = NULL;
    cudaMalloc(&GPU_SRC_IMG, height * width * channels * sizeof(unsigned char));
    cudaMemcpy(GPU_SRC_IMG, src_img, height * width * channels * sizeof(unsigned char), cudaMemcpyHostToDevice);
    cudaDeviceSynchronize();

    // Create memory space for destination image on GPU
    unsigned char* GPU_DST_IMG = NULL;
    cudaMalloc(&GPU_DST_IMG, height * width * channels * sizeof(unsigned char));
    cudaDeviceSynchronize();

    // Set parameters of threads and blocks
    dim3 threadsPerBlock(32, 16, 1);
    dim3 numBlocks(width / threadsPerBlock.x + 1, height / threadsPerBlock.y + 1, 1);
    
    // Run the function on GPU
    sobel_GPU<<<numBlocks, threadsPerBlock>>>(GPU_SRC_IMG, GPU_DST_IMG, height, width, channels);
    cudaDeviceSynchronize();
    
    // Copy result from Device to Host
    cudaMemcpy(dst_img, GPU_DST_IMG, height * width * channels * sizeof(unsigned char), cudaMemcpyDeviceToHost);
    cudaDeviceSynchronize();

    write_png(argv[2], dst_img, height, width, channels);

    cudaFree(GPU_SRC_IMG);
    cudaFree(GPU_DST_IMG);
    free(src_img);
    free(dst_img);

    return 0;
}
Editor is loading...