Assembler Language HW7

mail@pastecode.io avatar
unknown
plain_text
a year ago
2.8 kB
1
Indexable
Never
#include <stdio.h>
#include <stdlib.h>
#include <immintrin.h>
#include <emmintrin.h>

#define height 2    // 17280
#define width  2    // 30720

int main(void){
    void *ptr;
    int *pBitmap, row, col;
    int pixel, red, green, blue, alpha, bw;

   //pBitmap = (int*) malloc(height * width * sizeof(int));
    posix_memalign(&ptr, 16, height * width * sizeof(int));
    pBitmap = (int*) ptr;

    for (row=0; row<height; row++) {
        for (col=0; col<width; col++) {
            pBitmap[col+row*width] = rand();
        }
    }

	__m128i accum;
    int alignedLength = (height*width) - ((height*width)% 4);
	__m128i *pBitmap_128 = (__m128i *)pBitmap;
	accum = _mm_sub_epi32 (accum, accum);

	float B[4] = {0.299, 0.299, 0.299, 0.299};
    float C[4] = {0.587, 0.587, 0.587, 0.587};
    float D[4] = {0.114, 0.114, 0.144, 0.114};

    __m128 RED_R = _mm_load_ps(B);
    __m128 RED_G = _mm_load_ps(C);
    __m128 RED_B = _mm_load_ps(D);

    for (int i=0; i<(height*width)/4; i++){
        
        // Update the accumulator by adding the current block of the image
        accum = _mm_add_epi32(accum, pBitmap_128[i]);

        // Shift the accumulator right by 24, 16, and 8 bits
        __m128i pBitmap_24 = _mm_srli_epi32(accum, 24);
        __m128i pBitmap_16 = _mm_srli_epi32(accum, 16);
        __m128i pBitmap_08 = _mm_srli_epi32(accum, 8);

        // Store the original accumulator value
        __m128i pBitmap_00 = accum;

        // Convert the pBitmap_16, pBitmap_08, and pBitmap_00 variables to single precision floating point values
        __m128 pBitmap16_float = _mm_cvtepi32_ps(pBitmap_16);
        __m128 pBitmap08_float = _mm_cvtepi32_ps(pBitmap_08);
        __m128 pBitmap00_float = _mm_cvtepi32_ps(pBitmap_00);

        // Perform some arithmetic operations on the floating point values
        __m128 BW_0 = _mm_add_ps(_mm_mul_ps(pBitmap16_float,RED_R),_mm_mul_ps(pBitmap08_float,RED_G));
        __m128 BW = _mm_add_ps(_mm_mul_ps(pBitmap00_float,RED_B),BW_0);

        // Convert the result back to an integer
        __m128i BW_128_int = _mm_cvtps_epi32(BW);

        // Shift the pBitmap_24, BW_128_int variables left by 24, 16, and 8 bits
        pBitmap_24 = _mm_slli_epi32(pBitmap_24, 24);
        pBitmap_16 = _mm_slli_epi32(BW_128_int, 16);
        pBitmap_08 = _mm_slli_epi32(BW_128_int, 8);
        pBitmap_00 = BW_128_int;

        // Add the shifted variables together
        __m128i answer = _mm_add_epi32(_mm_add_epi32(pBitmap_24,pBitmap_16),_mm_add_epi32(pBitmap_08,pBitmap_00));

        // Store the result in an array of integers
        int tmp[4];
        _mm_storeu_si64 (tmp,answer);

        // Print the integers to the console
        printf("%d %d %d %d\n",tmp[0],tmp[1],tmp[2],tmp[3]);
    }
    return 0;
}