Assembler Language HW7

mail@pastecode.io avatar
unknown
c_cpp
2 years ago
2.7 kB
3
Indexable
Never
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <immintrin.h>
#include <emmintrin.h>

#define height 17280    // 17280
#define width  30720    // 30720

struct timespec diff(struct timespec start, struct timespec end);

int main(void){
	
	struct timespec time1, time2;

    void *ptr;
    int *pBitmap, row, col;
	int o = 0xff;

	__m128i *pBitmap_128 = (__m128i *)pBitmap;

    __m128 RED_R = _mm_set_ps1(0.299);
    __m128 RED_G = _mm_set_ps1(0.587);
    __m128 RED_B = _mm_set_ps1(0.114);

    posix_memalign(&ptr, 16, height * width * sizeof(int));
    pBitmap = (int*) ptr;

    for (row=0; row<height; row++) {
        for (col=0; col<width; col++) {
            pBitmap[col+row*width] = rand();
        }
    }

	clock_gettime(CLOCK_MONOTONIC, &time1);
    for (int i=0; i<(height*width); i+=4){

		pBitmap_128 = (__m128i *)pBitmap + i;
		__m128i pBitmap_24 = _mm_and_si128((__m128i)_mm_srli_epi32(*pBitmap_128, 24),_mm_set1_epi32(o));
		__m128i pBitmap_16 = _mm_and_si128((__m128i)_mm_srli_epi32(*pBitmap_128, 16),_mm_set1_epi32(o));
		__m128i pBitmap_08 = _mm_and_si128((__m128i)_mm_srli_epi32(*pBitmap_128, 8),_mm_set1_epi32(o));
		__m128i pBitmap_00 = _mm_and_si128(*pBitmap_128,_mm_set1_epi32(o));

		_mm_cvtsi128_si32 (pBitmap_24);

		__m128 pBitmap16_float = _mm_cvtepi32_ps(pBitmap_16);
		__m128 pBitmap08_float = _mm_cvtepi32_ps(pBitmap_08);
		__m128 pBitmap00_float = _mm_cvtepi32_ps(pBitmap_00);

		__m128 BW_0 = _mm_add_ps(_mm_mul_ps(pBitmap16_float,RED_R),_mm_mul_ps(pBitmap08_float,RED_G));
		__m128 BW = _mm_add_ps(_mm_mul_ps(pBitmap00_float,RED_B),BW_0);

		__m128i BW_128_int = _mm_cvtps_epi32(BW);
		pBitmap_24 = _mm_slli_epi32(pBitmap_24, 24);
		pBitmap_16 = _mm_slli_epi32(BW_128_int, 16);
		pBitmap_08 = _mm_slli_epi32(BW_128_int, 8);
		pBitmap_00 = BW_128_int;

		__m128i answer = _mm_add_epi32(_mm_add_epi32(pBitmap_24,pBitmap_16),_mm_add_epi32(pBitmap_08,pBitmap_00));

	}
	// for (int row = 0; row < height; row++){
	// 	for (int col = 0; col < width; col++){
	// 		printf("%d ",pBitmap[col + row * width]);
	// 	}
	// 	printf("\n");
	// }
	clock_gettime(CLOCK_MONOTONIC, &time2);
    printf("\nsec : %ld nsec : %ld\n",diff(time1,time2).tv_sec,diff(time1,time2).tv_nsec);
    return 0;
}

struct timespec diff(struct timespec start, struct timespec end){
    struct timespec temp;

    if ( (end.tv_nsec-start.tv_nsec) < 0 ){
        temp.tv_sec = end.tv_sec-start.tv_sec-1;
        temp.tv_nsec = 1000000000+end.tv_nsec-start.tv_nsec;
    }else{
        temp.tv_sec = end.tv_sec-start.tv_sec;
        temp.tv_nsec = end.tv_nsec-start.tv_nsec;
    }
    return temp;

}