Assembler Language HW7
unknown
plain_text
3 years ago
2.8 kB
4
Indexable
#include <stdio.h>
#include <stdlib.h>
#include <immintrin.h>
#include <emmintrin.h>
#define height 2 // 17280
#define width 2 // 30720
int main(void){
void *ptr;
int *pBitmap, row, col;
int pixel, red, green, blue, alpha, bw;
//pBitmap = (int*) malloc(height * width * sizeof(int));
posix_memalign(&ptr, 16, height * width * sizeof(int));
pBitmap = (int*) ptr;
for (row=0; row<height; row++) {
for (col=0; col<width; col++) {
pBitmap[col+row*width] = rand();
}
}
__m128i accum;
int alignedLength = (height*width) - ((height*width)% 4);
__m128i *pBitmap_128 = (__m128i *)pBitmap;
accum = _mm_sub_epi32 (accum, accum);
float B[4] = {0.299, 0.299, 0.299, 0.299};
float C[4] = {0.587, 0.587, 0.587, 0.587};
float D[4] = {0.114, 0.114, 0.144, 0.114};
__m128 RED_R = _mm_load_ps(B);
__m128 RED_G = _mm_load_ps(C);
__m128 RED_B = _mm_load_ps(D);
for (int i=0; i<(height*width)/4; i++){
// Update the accumulator by adding the current block of the image
accum = _mm_add_epi32(accum, pBitmap_128[i]);
// Shift the accumulator right by 24, 16, and 8 bits
__m128i pBitmap_24 = _mm_srli_epi32(accum, 24);
__m128i pBitmap_16 = _mm_srli_epi32(accum, 16);
__m128i pBitmap_08 = _mm_srli_epi32(accum, 8);
// Store the original accumulator value
__m128i pBitmap_00 = accum;
// Convert the pBitmap_16, pBitmap_08, and pBitmap_00 variables to single precision floating point values
__m128 pBitmap16_float = _mm_cvtepi32_ps(pBitmap_16);
__m128 pBitmap08_float = _mm_cvtepi32_ps(pBitmap_08);
__m128 pBitmap00_float = _mm_cvtepi32_ps(pBitmap_00);
// Perform some arithmetic operations on the floating point values
__m128 BW_0 = _mm_add_ps(_mm_mul_ps(pBitmap16_float,RED_R),_mm_mul_ps(pBitmap08_float,RED_G));
__m128 BW = _mm_add_ps(_mm_mul_ps(pBitmap00_float,RED_B),BW_0);
// Convert the result back to an integer
__m128i BW_128_int = _mm_cvtps_epi32(BW);
// Shift the pBitmap_24, BW_128_int variables left by 24, 16, and 8 bits
pBitmap_24 = _mm_slli_epi32(pBitmap_24, 24);
pBitmap_16 = _mm_slli_epi32(BW_128_int, 16);
pBitmap_08 = _mm_slli_epi32(BW_128_int, 8);
pBitmap_00 = BW_128_int;
// Add the shifted variables together
__m128i answer = _mm_add_epi32(_mm_add_epi32(pBitmap_24,pBitmap_16),_mm_add_epi32(pBitmap_08,pBitmap_00));
// Store the result in an array of integers
int tmp[4];
_mm_storeu_si64 (tmp,answer);
// Print the integers to the console
printf("%d %d %d %d\n",tmp[0],tmp[1],tmp[2],tmp[3]);
}
return 0;
}
Editor is loading...