Assembler Language HW7
2023/01/06 PM: 2:45unknown
c_cpp
3 years ago
2.6 kB
9
Indexable
#include <stdio.h>
#include <stdlib.h>
#include <immintrin.h>
#include <emmintrin.h>
#define height 2 // 17280
#define width 2 // 30720
int main(void){
void *ptr;
int *pBitmap, row, col;
int pixel, red, green, blue, alpha, bw;
//pBitmap = (int*) malloc(height * width * sizeof(int));
posix_memalign(&ptr, 16, height * width * sizeof(int));
pBitmap = (int*) ptr;
for (row=0; row<height; row++) {
for (col=0; col<width; col++) {
pBitmap[col+row*width] = rand();
}
}
__m128i accum;
int alignedLength = (height*width) - ((height*width)% 4);
__m128i *pBitmap_128 = (__m128i *)pBitmap;
accum = _mm_sub_epi32 (accum, accum);
float B[4] = {0.299, 0.299, 0.299, 0.299};
float C[4] = {0.587, 0.587, 0.587, 0.587};
float D[4] = {0.114, 0.114, 0.144, 0.114};
__m128 RED_R = _mm_load_ps(B);
__m128 RED_G = _mm_load_ps(C);
__m128 RED_B = _mm_load_ps(D);
int o = 0xff;
__m128i o_128 = _mm_cvtsi32_si128(o);
for (int i=0; i<(height*width)/4; i++){
accum = _mm_add_epi32(accum, pBitmap_128[i]);
__m128i pBitmap_24 = _mm_and_si128((__m128i)_mm_srli_epi32(accum, 24),o_128);
__m128i pBitmap_16 = _mm_and_si128((__m128i)_mm_srli_epi32(accum, 16),o_128);
__m128i pBitmap_08 = _mm_and_si128((__m128i)_mm_srli_epi32(accum, 8),o_128);
__m128i pBitmap_00 = _mm_and_si128(accum,o_128);
_mm_cvtsi128_si32 (pBitmap_24);
printf("shift 24: %lld %lld %lld %lld\n",pBitmap_24[0],pBitmap_24[1],pBitmap_24[2],pBitmap_24[3]);
//int x = _mm_cvtsi128_si32(pBitmap_24);
__m128 pBitmap16_float = _mm_cvtepi32_ps(pBitmap_16);
__m128 pBitmap08_float = _mm_cvtepi32_ps(pBitmap_08);
__m128 pBitmap00_float = _mm_cvtepi32_ps(pBitmap_00);
// _mm_cvtss_f32(pBitmap16_float);
// printf("%f %f %f %f\n",pBitmap16_float[0],pBitmap16_float[1],pBitmap16_float[2],pBitmap16_float[3]);
// int y = _mm_cvtsi128_si32(accum);
__m128 BW_0 = _mm_add_ps(_mm_mul_ps(pBitmap16_float,RED_R),_mm_mul_ps(pBitmap08_float,RED_G));
__m128 BW = _mm_add_ps(_mm_mul_ps(pBitmap00_float,RED_B),BW_0);
__m128i BW_128_int = _mm_cvtps_epi32(BW);
pBitmap_24 = _mm_slli_epi32(pBitmap_24, 24);
pBitmap_16 = _mm_slli_epi32(BW_128_int, 16);
pBitmap_08 = _mm_slli_epi32(BW_128_int, 8);
pBitmap_00 = BW_128_int;
__m128i answer = _mm_add_epi32(_mm_add_epi32(pBitmap_24,pBitmap_16),_mm_add_epi32(pBitmap_08,pBitmap_00));
//_mm_cvtsi128_si32
int tmp[4];
_mm_storeu_si64 (tmp,answer);
printf("%d %d %d %d\n",tmp[0],tmp[1],tmp[2],tmp[3]);
}
return 0;
}
Editor is loading...