Assembler Language HW7

mail@pastecode.io avatar
unknown
c_cpp
2 years ago
3.3 kB
1
Indexable
Never
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <immintrin.h>
#include <emmintrin.h>
#include <xmmintrin.h>

#define height 172   // 17280
#define width  307    // 30720

struct timespec diff(struct timespec start, struct timespec end);

int main(void){
	
	struct timespec time1, time2;

    void *ptr;
    int *pBitmap, row, col;
	int o = 0xff;

	__m128i *pBitmap_128 = (__m128i *)pBitmap;

    __m128 RED_R = _mm_set_ps1(0.299);		// R value 0.299
    __m128 RED_G = _mm_set_ps1(0.587);		// G value 0.299
    __m128 RED_B = _mm_set_ps1(0.114);		// B value 0.299

    posix_memalign(&ptr, 16, height * width * sizeof(int));
    pBitmap = (int*) ptr;

    for (row=0; row<height; row++) {
        for (col=0; col<width; col++) {
            pBitmap[col+row*width] = rand();
        }
    }

	clock_gettime(CLOCK_MONOTONIC, &time1);		// Starting time

    for (int i=0; i<(height*width); i+=4){

		pBitmap_128 = (__m128i *)pBitmap + i;

		// Shift the accumulator right by 24, 16, and 8 bits 
		__m128i pBitmap_24 = _mm_and_si128((__m128i)_mm_srli_epi32(*pBitmap_128, 24),_mm_set1_epi32(o));
		__m128i pBitmap_16 = _mm_and_si128((__m128i)_mm_srli_epi32(*pBitmap_128, 16),_mm_set1_epi32(o));
		__m128i pBitmap_08 = _mm_and_si128((__m128i)_mm_srli_epi32(*pBitmap_128, 8),_mm_set1_epi32(o));

		// Store the original accumulator value
		__m128i pBitmap_00 = _mm_and_si128(*pBitmap_128,_mm_set1_epi32(o));

		// Convert the pBitmap_16, pBitmap_08, and pBitmap_00 variables to float point values
		__m128 pBitmap16_float = _mm_cvtepi32_ps(pBitmap_16);
		__m128 pBitmap08_float = _mm_cvtepi32_ps(pBitmap_08);
		__m128 pBitmap00_float = _mm_cvtepi32_ps(pBitmap_00);

		// Calculate the value of RGB value
		__m128 BW_0 = _mm_add_ps(_mm_mul_ps(pBitmap16_float,RED_R),_mm_mul_ps(pBitmap08_float,RED_G));
		__m128 BW = _mm_add_ps(_mm_mul_ps(pBitmap00_float,RED_B),BW_0);

		// Convert the result back to an integer
		__m128i BW_128_int = _mm_cvtps_epi32(BW);

		// Shift the pBitmap_24, BW_128_int variables left by 24, 16, and 8 bits
		pBitmap_24 = _mm_slli_epi32(pBitmap_24, 24);
		pBitmap_16 = _mm_slli_epi32(BW_128_int, 16);
		pBitmap_08 = _mm_slli_epi32(BW_128_int, 8);
		pBitmap_00 = BW_128_int;

		// sum all variables and  Store the result in an array of integers
		__m128i answer = _mm_add_epi32(_mm_add_epi32(pBitmap_24,pBitmap_16),_mm_add_epi32(pBitmap_08,pBitmap_00));

	}
	// print result
	// for (int row = 0; row < height; row++){
	// 	for (int col = 0; col < width; col++){
	// 		printf("%d ",pBitmap[col + row * width]);
	// 	}
	// 	printf("\n");
	// }

	clock_gettime(CLOCK_MONOTONIC, &time2);		// End Time

    printf("\nsec : %ld nsec : %ld\n",diff(time1,time2).tv_sec,diff(time1,time2).tv_nsec); // print total run time
    return 0;
}

// time function
struct timespec diff(struct timespec start, struct timespec end){
    struct timespec temp;

    if ( (end.tv_nsec-start.tv_nsec) < 0 ){
        temp.tv_sec = end.tv_sec-start.tv_sec-1;
        temp.tv_nsec = 1000000000+end.tv_nsec-start.tv_nsec;
    }else{
        temp.tv_sec = end.tv_sec-start.tv_sec;
        temp.tv_nsec = end.tv_nsec-start.tv_nsec;
    }
    return temp;

}