#include <stdio.h>
#include <errno.h>
#include <string.h>
#include <ppm.h>
#include "blur.h"
void do_blur( pixel **out_image,
pixel **in_image,
int n_cols,
int n_rows )
{
float filter_div = 16.0;
float filter_div_rcp = 0.0;
static float filter[3][3] = {{1.0, 2.0, 1.0},
{2.0, 4.0, 2.0},
{1.0, 2.0, 1.0}};
int x;
int y;
int i;
int j;
pixel vpix;
float res_r;
float res_g;
float res_b;
int tmp_x;
int tmp_y;
filter_div_rcp = 1.0 / filter_div;
for(y = 0; y < n_rows; y++) {
for(x = 0; x < n_cols; x++) {
res_r = 0.0;
res_g = 0.0;
res_b = 0.0;
for(j = 0; j < 3; j++) {
for(i = 0; i < 3; i++) {
tmp_x = x - (3>>1) + i;
tmp_y = y - (3>>1) + j;
if (tmp_x < 0) tmp_x = 0;
if (tmp_x == n_cols) tmp_x = n_cols - 1;
if (tmp_y < 0) tmp_y = 0;
if (tmp_y == n_rows) tmp_y = n_rows - 1;
vpix = in_image[tmp_y][tmp_x];
res_r += (float)PPM_GETR(vpix) * filter[j][i];
res_g += (float)PPM_GETG(vpix) * filter[j][i];
res_b += (float)PPM_GETB(vpix) * filter[j][i];
}
}
res_r *= filter_div_rcp;
res_g *= filter_div_rcp;
res_b *= filter_div_rcp;
res_r=(res_r>255.0)?255.0:((res_r<0.0)?0.0:res_r);
res_g=(res_g>255.0)?255.0:((res_g<0.0)?0.0:res_g);
res_b=(res_b>255.0)?255.0:((res_b<0.0)?0.0:res_b);
PPM_ASSIGN(out_image[y][x],
(pixval) res_r,
(pixval) res_g,
(pixval) res_b);
}
}
}
|
#include <stdio.h>
#include <errno.h>
#include <string.h>
#include <ppm.h>
#include "blur.h"
void do_blur( pixel **out_image,
pixel **in_image,
int n_cols,
int n_rows )
{
float filter_div = 16.0;
float filter_div_rcp = 0.0;
static float filter[3][3] = {{1.0, 2.0, 1.0},
{2.0, 4.0, 2.0},
{1.0, 2.0, 1.0}};
int x;
int y;
int i;
int j;
pixel vpix;
int res_r;
int res_g;
int res_b;
float tmp_min = 0.0;
float tmp_max = 255.0;
int tmp_x;
int tmp_y;
int tmp_r;
int tmp_g;
int tmp_b;
int tmp_filter_index;
__asm__ __volatile__("femms\n\t"
"movd %1, %%mm0\n\t"
"pfrcp %%mm0, %%mm1\n\t"
"pfrcpit1 %%mm1, %%mm0\n\t"
"pfrcpit2 %%mm1, %%mm0\n\t"
"movd %%mm0, %0\n\t"
"femms"
: "=g" (filter_div_rcp)
: "g" (filter_div)
);
__asm__ __volatile__("femms");
__asm__ __volatile__("movd %0, %%mm4\n\t"
"punpckldq %%mm4, %%mm4\n\t"
"movd %1, %%mm3\n\t"
"punpckldq %%mm3, %%mm3\n\t"
:
: "m" (tmp_min), "m" (tmp_max)
);
for(y = 0; y < n_rows; y++) {
for(x = 0; x < n_cols; x++) {
__asm__ __volatile__("pxor %%mm7, %%mm7\n\t"
"pxor %%mm6, %%mm6\n\t"
:
:
);
for(j = 0; j < 3; j++) {
for(i = 0; i < 3; i++) {
tmp_x = x - (3>>1) + i;
tmp_y = y - (3>>1) + j;
if (tmp_x < 0) tmp_x = 0;
if (tmp_x == n_cols) tmp_x = n_cols - 1;
if (tmp_y < 0) tmp_y = 0;
if (tmp_y == n_rows) tmp_y = n_rows - 1;
vpix = in_image[tmp_y][tmp_x];
tmp_filter_index = i + 3 * j;
tmp_r = PPM_GETR(vpix);
tmp_g = PPM_GETG(vpix);
tmp_b = PPM_GETB(vpix);
__asm__ __volatile__(
"movq filter.0(,%3,4), %%mm5\n\t"
"punpckldq %%mm5, %%mm5\n\t"
"movd %0, %%mm0\n\t"
"movd %1, %%mm1\n\t"
"punpckldq %%mm1, %%mm0\n\t"
"pi2fd %%mm0, %%mm0\n\t"
"movd %2, %%mm1\n\t"
"pi2fd %%mm1, %%mm1\n\t"
"pfmul %%mm5, %%mm0\n\t"
"pfadd %%mm0, %%mm7\n\t"
"pfmul %%mm5, %%mm1\n\t"
"pfadd %%mm1, %%mm6\n\t"
:
:"r"(tmp_r),"r"(tmp_g),\
"r"(tmp_b),"r"(tmp_filter_index),\
"r"(filter)
);
}
}
__asm__ __volatile__(
"movd %3, %%mm5\n\t"
"punpckldq %%mm5, %%mm5\n\t"
"pfmul %%mm5, %%mm7\n\t"
"pfmul %%mm5, %%mm6\n\t"
"pfmax %%mm4, %%mm7\n\t"
"pfmax %%mm4, %%mm6\n\t"
"pfmin %%mm3, %%mm7\n\t"
"pfmin %%mm3, %%mm6\n\t"
"pf2id %%mm7, %%mm7\n\t"
"pf2id %%mm6, %%mm6\n\t"
"movd %%mm7, %0\n\t"
"punpckhdq %%mm7, %%mm7\n\t"
"movd %%mm7, %1\n\t"
"movd %%mm6, %2\n\t"
:"=g"(res_r),"=g"(res_g),"=g"(res_b)
:"g"(filter_div_rcp)
);
PPM_ASSIGN(out_image[y][x],
(pixval) res_r,
(pixval) res_g,
(pixval) res_b);
}
}
}
|