Assembler Language HW7
unknown
plain_text
2 years ago
2.8 kB
3
Indexable
#include <stdio.h> #include <stdlib.h> #include <immintrin.h> #include <emmintrin.h> #define height 2 // 17280 #define width 2 // 30720 int main(void){ void *ptr; int *pBitmap, row, col; int pixel, red, green, blue, alpha, bw; //pBitmap = (int*) malloc(height * width * sizeof(int)); posix_memalign(&ptr, 16, height * width * sizeof(int)); pBitmap = (int*) ptr; for (row=0; row<height; row++) { for (col=0; col<width; col++) { pBitmap[col+row*width] = rand(); } } __m128i accum; int alignedLength = (height*width) - ((height*width)% 4); __m128i *pBitmap_128 = (__m128i *)pBitmap; accum = _mm_sub_epi32 (accum, accum); float B[4] = {0.299, 0.299, 0.299, 0.299}; float C[4] = {0.587, 0.587, 0.587, 0.587}; float D[4] = {0.114, 0.114, 0.144, 0.114}; __m128 RED_R = _mm_load_ps(B); __m128 RED_G = _mm_load_ps(C); __m128 RED_B = _mm_load_ps(D); for (int i=0; i<(height*width)/4; i++){ // Update the accumulator by adding the current block of the image accum = _mm_add_epi32(accum, pBitmap_128[i]); // Shift the accumulator right by 24, 16, and 8 bits __m128i pBitmap_24 = _mm_srli_epi32(accum, 24); __m128i pBitmap_16 = _mm_srli_epi32(accum, 16); __m128i pBitmap_08 = _mm_srli_epi32(accum, 8); // Store the original accumulator value __m128i pBitmap_00 = accum; // Convert the pBitmap_16, pBitmap_08, and pBitmap_00 variables to single precision floating point values __m128 pBitmap16_float = _mm_cvtepi32_ps(pBitmap_16); __m128 pBitmap08_float = _mm_cvtepi32_ps(pBitmap_08); __m128 pBitmap00_float = _mm_cvtepi32_ps(pBitmap_00); // Perform some arithmetic operations on the floating point values __m128 BW_0 = _mm_add_ps(_mm_mul_ps(pBitmap16_float,RED_R),_mm_mul_ps(pBitmap08_float,RED_G)); __m128 BW = _mm_add_ps(_mm_mul_ps(pBitmap00_float,RED_B),BW_0); // Convert the result back to an integer __m128i BW_128_int = _mm_cvtps_epi32(BW); // Shift the pBitmap_24, BW_128_int variables left by 24, 16, and 8 bits pBitmap_24 = _mm_slli_epi32(pBitmap_24, 24); pBitmap_16 = _mm_slli_epi32(BW_128_int, 16); pBitmap_08 = _mm_slli_epi32(BW_128_int, 8); pBitmap_00 = BW_128_int; // Add the shifted variables together __m128i answer = _mm_add_epi32(_mm_add_epi32(pBitmap_24,pBitmap_16),_mm_add_epi32(pBitmap_08,pBitmap_00)); // Store the result in an array of integers int tmp[4]; _mm_storeu_si64 (tmp,answer); // Print the integers to the console printf("%d %d %d %d\n",tmp[0],tmp[1],tmp[2],tmp[3]); } return 0; }
Editor is loading...