Assembler Language HW7
unknown
c_cpp
3 years ago
3.3 kB
7
Indexable
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <immintrin.h>
#include <emmintrin.h>
#include <xmmintrin.h>
#define height 172 // 17280
#define width 307 // 30720
struct timespec diff(struct timespec start, struct timespec end);
int main(void){
struct timespec time1, time2;
void *ptr;
int *pBitmap, row, col;
int o = 0xff;
__m128i *pBitmap_128 = (__m128i *)pBitmap;
__m128 RED_R = _mm_set_ps1(0.299); // R value 0.299
__m128 RED_G = _mm_set_ps1(0.587); // G value 0.299
__m128 RED_B = _mm_set_ps1(0.114); // B value 0.299
posix_memalign(&ptr, 16, height * width * sizeof(int));
pBitmap = (int*) ptr;
for (row=0; row<height; row++) {
for (col=0; col<width; col++) {
pBitmap[col+row*width] = rand();
}
}
clock_gettime(CLOCK_MONOTONIC, &time1); // Starting time
for (int i=0; i<(height*width); i+=4){
pBitmap_128 = (__m128i *)pBitmap + i;
// Shift the accumulator right by 24, 16, and 8 bits
__m128i pBitmap_24 = _mm_and_si128((__m128i)_mm_srli_epi32(*pBitmap_128, 24),_mm_set1_epi32(o));
__m128i pBitmap_16 = _mm_and_si128((__m128i)_mm_srli_epi32(*pBitmap_128, 16),_mm_set1_epi32(o));
__m128i pBitmap_08 = _mm_and_si128((__m128i)_mm_srli_epi32(*pBitmap_128, 8),_mm_set1_epi32(o));
// Store the original accumulator value
__m128i pBitmap_00 = _mm_and_si128(*pBitmap_128,_mm_set1_epi32(o));
// Convert the pBitmap_16, pBitmap_08, and pBitmap_00 variables to float point values
__m128 pBitmap16_float = _mm_cvtepi32_ps(pBitmap_16);
__m128 pBitmap08_float = _mm_cvtepi32_ps(pBitmap_08);
__m128 pBitmap00_float = _mm_cvtepi32_ps(pBitmap_00);
// Calculate the value of RGB value
__m128 BW_0 = _mm_add_ps(_mm_mul_ps(pBitmap16_float,RED_R),_mm_mul_ps(pBitmap08_float,RED_G));
__m128 BW = _mm_add_ps(_mm_mul_ps(pBitmap00_float,RED_B),BW_0);
// Convert the result back to an integer
__m128i BW_128_int = _mm_cvtps_epi32(BW);
// Shift the pBitmap_24, BW_128_int variables left by 24, 16, and 8 bits
pBitmap_24 = _mm_slli_epi32(pBitmap_24, 24);
pBitmap_16 = _mm_slli_epi32(BW_128_int, 16);
pBitmap_08 = _mm_slli_epi32(BW_128_int, 8);
pBitmap_00 = BW_128_int;
// sum all variables and Store the result in an array of integers
__m128i answer = _mm_add_epi32(_mm_add_epi32(pBitmap_24,pBitmap_16),_mm_add_epi32(pBitmap_08,pBitmap_00));
}
// print result
// for (int row = 0; row < height; row++){
// for (int col = 0; col < width; col++){
// printf("%d ",pBitmap[col + row * width]);
// }
// printf("\n");
// }
clock_gettime(CLOCK_MONOTONIC, &time2); // End Time
printf("\nsec : %ld nsec : %ld\n",diff(time1,time2).tv_sec,diff(time1,time2).tv_nsec); // print total run time
return 0;
}
// time function
struct timespec diff(struct timespec start, struct timespec end){
struct timespec temp;
if ( (end.tv_nsec-start.tv_nsec) < 0 ){
temp.tv_sec = end.tv_sec-start.tv_sec-1;
temp.tv_nsec = 1000000000+end.tv_nsec-start.tv_nsec;
}else{
temp.tv_sec = end.tv_sec-start.tv_sec;
temp.tv_nsec = end.tv_nsec-start.tv_nsec;
}
return temp;
}Editor is loading...