#undef MOVNTQ
#undef EMMS
#undef SFENCE
-#undef MMREG_SIZE
#undef PAVGB
-#if COMPILE_TEMPLATE_SSE2
-#define MMREG_SIZE 16
-#else
-#define MMREG_SIZE 8
-#endif
-
#if COMPILE_TEMPLATE_AMD3DNOW
#define PREFETCH "prefetch"
#define PAVGB "pavgusb"
#define SFENCE " # nop"
#endif
-static inline void RENAME(rgb24tobgr32)(const uint8_t *src, uint8_t *dst, long src_size)
+#if !COMPILE_TEMPLATE_SSE2
+
+#if !COMPILE_TEMPLATE_AMD3DNOW
+
+static inline void RENAME(rgb24tobgr32)(const uint8_t *src, uint8_t *dst, int src_size)
{
uint8_t *dest = dst;
const uint8_t *s = src;
MOVNTQ" %%mm4, 16%0"
-static inline void RENAME(rgb32tobgr24)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void RENAME(rgb32tobgr24)(const uint8_t *src, uint8_t *dst, int src_size)
{
uint8_t *dest = dst;
const uint8_t *s = src;
MMX2, 3DNOW optimization by Nick Kurshev
32-bit C version, and and&add trick by Michael Niedermayer
*/
-static inline void RENAME(rgb15to16)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void RENAME(rgb15to16)(const uint8_t *src, uint8_t *dst, int src_size)
{
register const uint8_t* s=src;
register uint8_t* d=dst;
}
}
-static inline void RENAME(rgb16to15)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void RENAME(rgb16to15)(const uint8_t *src, uint8_t *dst, int src_size)
{
register const uint8_t* s=src;
register uint8_t* d=dst;
}
}
-static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, int src_size)
{
const uint8_t *s = src;
const uint8_t *end;
}
}
-static inline void RENAME(rgb32tobgr16)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void RENAME(rgb32tobgr16)(const uint8_t *src, uint8_t *dst, int src_size)
{
const uint8_t *s = src;
const uint8_t *end;
}
}
-static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, int src_size)
{
const uint8_t *s = src;
const uint8_t *end;
}
}
-static inline void RENAME(rgb32tobgr15)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void RENAME(rgb32tobgr15)(const uint8_t *src, uint8_t *dst, int src_size)
{
const uint8_t *s = src;
const uint8_t *end;
}
}
-static inline void RENAME(rgb24tobgr16)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void RENAME(rgb24tobgr16)(const uint8_t *src, uint8_t *dst, int src_size)
{
const uint8_t *s = src;
const uint8_t *end;
}
}
-static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, int src_size)
{
const uint8_t *s = src;
const uint8_t *end;
}
}
-static inline void RENAME(rgb24tobgr15)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void RENAME(rgb24tobgr15)(const uint8_t *src, uint8_t *dst, int src_size)
{
const uint8_t *s = src;
const uint8_t *end;
}
}
-static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, int src_size)
{
const uint8_t *s = src;
const uint8_t *end;
|
original bits
*/
-static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int src_size)
{
const uint16_t *end;
const uint16_t *mm_end;
}
}
-static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int src_size)
{
const uint16_t *end;
const uint16_t *mm_end;
MOVNTQ" %%mm0, %0 \n\t" \
MOVNTQ" %%mm3, 8%0 \n\t" \
-static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, int src_size)
{
const uint16_t *end;
const uint16_t *mm_end;
}
}
-static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, int src_size)
{
const uint16_t *end;
const uint16_t *mm_end;
}
}
-static inline void RENAME(shuffle_bytes_2103)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void RENAME(shuffle_bytes_2103)(const uint8_t *src, uint8_t *dst, int src_size)
{
x86_reg idx = 15 - src_size;
const uint8_t *s = src-idx;
}
}
-static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, int src_size)
{
unsigned i;
x86_reg mmx_size= 23 - src_size;
}
static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
- long width, long height,
- long lumStride, long chromStride, long dstStride, long vertLumPerChroma)
+ int width, int height,
+ int lumStride, int chromStride, int dstStride, int vertLumPerChroma)
{
- long y;
+ int y;
const x86_reg chromWidth= width>>1;
for (y=0; y<height; y++) {
//FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway)
* (If this is a problem for anyone then tell me, and I will fix it.)
*/
static inline void RENAME(yv12toyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
- long width, long height,
- long lumStride, long chromStride, long dstStride)
+ int width, int height,
+ int lumStride, int chromStride, int dstStride)
{
//FIXME interpolate chroma
RENAME(yuvPlanartoyuy2)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 2);
}
static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
- long width, long height,
- long lumStride, long chromStride, long dstStride, long vertLumPerChroma)
+ int width, int height,
+ int lumStride, int chromStride, int dstStride, int vertLumPerChroma)
{
- long y;
+ int y;
const x86_reg chromWidth= width>>1;
for (y=0; y<height; y++) {
//FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway)
* (If this is a problem for anyone then tell me, and I will fix it.)
*/
static inline void RENAME(yv12touyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
- long width, long height,
- long lumStride, long chromStride, long dstStride)
+ int width, int height,
+ int lumStride, int chromStride, int dstStride)
{
//FIXME interpolate chroma
RENAME(yuvPlanartouyvy)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 2);
* Width should be a multiple of 16.
*/
static inline void RENAME(yuv422ptouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
- long width, long height,
- long lumStride, long chromStride, long dstStride)
+ int width, int height,
+ int lumStride, int chromStride, int dstStride)
{
RENAME(yuvPlanartouyvy)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 1);
}
* Width should be a multiple of 16.
*/
static inline void RENAME(yuv422ptoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
- long width, long height,
- long lumStride, long chromStride, long dstStride)
+ int width, int height,
+ int lumStride, int chromStride, int dstStride)
{
RENAME(yuvPlanartoyuy2)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 1);
}
* (If this is a problem for anyone then tell me, and I will fix it.)
*/
static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
- long width, long height,
- long lumStride, long chromStride, long srcStride)
+ int width, int height,
+ int lumStride, int chromStride, int srcStride)
{
- long y;
+ int y;
const x86_reg chromWidth= width>>1;
for (y=0; y<height; y+=2) {
__asm__ volatile(
SFENCE" \n\t"
:::"memory");
}
+#endif /* !COMPILE_TEMPLATE_AMD3DNOW */
-static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWidth, long srcHeight, long srcStride, long dstStride)
+#if COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW
+static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, int srcWidth, int srcHeight, int srcStride, int dstStride)
{
- long x,y;
+ int x,y;
dst[0]= src[0];
dst+= dstStride;
for (y=1; y<srcHeight; y++) {
-#if COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW
const x86_reg mmxSize= srcWidth&~15;
__asm__ volatile(
"mov %4, %%"REG_a" \n\t"
"punpckhbw %%mm3, %%mm7 \n\t"
"punpcklbw %%mm2, %%mm4 \n\t"
"punpckhbw %%mm2, %%mm6 \n\t"
-#if 1
MOVNTQ" %%mm5, (%2, %%"REG_a", 2) \n\t"
MOVNTQ" %%mm7, 8(%2, %%"REG_a", 2) \n\t"
MOVNTQ" %%mm4, (%3, %%"REG_a", 2) \n\t"
MOVNTQ" %%mm6, 8(%3, %%"REG_a", 2) \n\t"
-#else
- "movq %%mm5, (%2, %%"REG_a", 2) \n\t"
- "movq %%mm7, 8(%2, %%"REG_a", 2) \n\t"
- "movq %%mm4, (%3, %%"REG_a", 2) \n\t"
- "movq %%mm6, 8(%3, %%"REG_a", 2) \n\t"
-#endif
"add $8, %%"REG_a" \n\t"
"movq -1(%0, %%"REG_a"), %%mm4 \n\t"
"movq -1(%1, %%"REG_a"), %%mm5 \n\t"
"g" (-mmxSize)
: "%"REG_a
);
-#else
- const x86_reg mmxSize=1;
-
- dst[0 ]= (3*src[0] + src[srcStride])>>2;
- dst[dstStride]= ( src[0] + 3*src[srcStride])>>2;
-#endif
for (x=mmxSize-1; x<srcWidth-1; x++) {
dst[2*x +1]= (3*src[x+0] + src[x+srcStride+1])>>2;
}
// last line
-#if 1
dst[0]= src[0];
for (x=0; x<srcWidth-1; x++) {
dst[2*x+2]= ( src[x] + 3*src[x+1])>>2;
}
dst[2*srcWidth-1]= src[srcWidth-1];
-#else
- for (x=0; x<srcWidth; x++) {
- dst[2*x+0]=
- dst[2*x+1]= src[x];
- }
-#endif
__asm__ volatile(EMMS" \n\t"
SFENCE" \n\t"
:::"memory");
}
+#endif /* COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW */
+#if !COMPILE_TEMPLATE_AMD3DNOW
/**
* Height should be a multiple of 2 and width should be a multiple of 16.
* (If this is a problem for anyone then tell me, and I will fix it.)
* FIXME: Write HQ version.
*/
static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
- long width, long height,
- long lumStride, long chromStride, long srcStride)
+ int width, int height,
+ int lumStride, int chromStride, int srcStride)
{
- long y;
+ int y;
const x86_reg chromWidth= width>>1;
for (y=0; y<height; y+=2) {
__asm__ volatile(
SFENCE" \n\t"
:::"memory");
}
+#endif /* !COMPILE_TEMPLATE_AMD3DNOW */
/**
* Height should be a multiple of 2 and width should be a multiple of 2.
* FIXME: Write HQ version.
*/
static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
- long width, long height,
- long lumStride, long chromStride, long srcStride)
+ int width, int height,
+ int lumStride, int chromStride, int srcStride)
{
- long y;
+ int y;
const x86_reg chromWidth= width>>1;
for (y=0; y<height-2; y+=2) {
- long i;
+ int i;
for (i=0; i<2; i++) {
__asm__ volatile(
"mov %2, %%"REG_a" \n\t"
rgb24toyv12_c(src, ydst, udst, vdst, width, height-y, lumStride, chromStride, srcStride);
}
+#endif /* !COMPILE_TEMPLATE_SSE2 */
+#if !COMPILE_TEMPLATE_AMD3DNOW
static void RENAME(interleaveBytes)(const uint8_t *src1, const uint8_t *src2, uint8_t *dest,
- long width, long height, long src1Stride,
- long src2Stride, long dstStride)
+ int width, int height, int src1Stride,
+ int src2Stride, int dstStride)
{
- long h;
+ int h;
for (h=0; h < height; h++) {
- long w;
+ int w;
#if COMPILE_TEMPLATE_SSE2
__asm__(
::: "memory"
);
}
+#endif /* !COMPILE_TEMPLATE_AMD3DNOW */
+#if !COMPILE_TEMPLATE_SSE2
+#if !COMPILE_TEMPLATE_AMD3DNOW
static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
uint8_t *dst1, uint8_t *dst2,
- long width, long height,
- long srcStride1, long srcStride2,
- long dstStride1, long dstStride2)
+ int width, int height,
+ int srcStride1, int srcStride2,
+ int dstStride1, int dstStride2)
{
x86_reg y;
- long x,w,h;
+ int x,w,h;
w=width/2; h=height/2;
__asm__ volatile(
PREFETCH" %0 \n\t"
static inline void RENAME(yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2, const uint8_t *src3,
uint8_t *dst,
- long width, long height,
- long srcStride1, long srcStride2,
- long srcStride3, long dstStride)
+ int width, int height,
+ int srcStride1, int srcStride2,
+ int srcStride3, int dstStride)
{
x86_reg x;
- long y,w,h;
+ int y,w,h;
w=width/2; h=height;
for (y=0;y<h;y++) {
const uint8_t* yp=src1+srcStride1*y;
:"memory");
}
for (; x<w; x++) {
- const long x2 = x<<2;
+ const int x2 = x<<2;
d[8*x+0] = yp[x2];
d[8*x+1] = up[x];
d[8*x+2] = yp[x2+1];
::: "memory"
);
}
+#endif /* !COMPILE_TEMPLATE_AMD3DNOW */
static void RENAME(extract_even)(const uint8_t *src, uint8_t *dst, x86_reg count)
{
}
}
+#if !COMPILE_TEMPLATE_AMD3DNOW
static void RENAME(extract_even2)(const uint8_t *src, uint8_t *dst0, uint8_t *dst1, x86_reg count)
{
dst0+= count;
count++;
}
}
+#endif /* !COMPILE_TEMPLATE_AMD3DNOW */
static void RENAME(extract_even2avg)(const uint8_t *src0, const uint8_t *src1, uint8_t *dst0, uint8_t *dst1, x86_reg count)
{
}
}
+#if !COMPILE_TEMPLATE_AMD3DNOW
static void RENAME(extract_odd2)(const uint8_t *src, uint8_t *dst0, uint8_t *dst1, x86_reg count)
{
dst0+= count;
count++;
}
}
+#endif /* !COMPILE_TEMPLATE_AMD3DNOW */
static void RENAME(extract_odd2avg)(const uint8_t *src0, const uint8_t *src1, uint8_t *dst0, uint8_t *dst1, x86_reg count)
{
}
static void RENAME(yuyvtoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
- long width, long height,
- long lumStride, long chromStride, long srcStride)
+ int width, int height,
+ int lumStride, int chromStride, int srcStride)
{
- long y;
- const long chromWidth= -((-width)>>1);
+ int y;
+ const int chromWidth= -((-width)>>1);
for (y=0; y<height; y++) {
RENAME(extract_even)(src, ydst, width);
);
}
+#if !COMPILE_TEMPLATE_AMD3DNOW
static void RENAME(yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
- long width, long height,
- long lumStride, long chromStride, long srcStride)
+ int width, int height,
+ int lumStride, int chromStride, int srcStride)
{
- long y;
- const long chromWidth= -((-width)>>1);
+ int y;
+ const int chromWidth= -((-width)>>1);
for (y=0; y<height; y++) {
RENAME(extract_even)(src, ydst, width);
::: "memory"
);
}
+#endif /* !COMPILE_TEMPLATE_AMD3DNOW */
static void RENAME(uyvytoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
- long width, long height,
- long lumStride, long chromStride, long srcStride)
+ int width, int height,
+ int lumStride, int chromStride, int srcStride)
{
- long y;
- const long chromWidth= -((-width)>>1);
+ int y;
+ const int chromWidth= -((-width)>>1);
for (y=0; y<height; y++) {
RENAME(extract_even)(src+1, ydst, width);
);
}
+#if !COMPILE_TEMPLATE_AMD3DNOW
static void RENAME(uyvytoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
- long width, long height,
- long lumStride, long chromStride, long srcStride)
+ int width, int height,
+ int lumStride, int chromStride, int srcStride)
{
- long y;
- const long chromWidth= -((-width)>>1);
+ int y;
+ const int chromWidth= -((-width)>>1);
for (y=0; y<height; y++) {
RENAME(extract_even)(src+1, ydst, width);
::: "memory"
);
}
+#endif /* !COMPILE_TEMPLATE_AMD3DNOW */
+#endif /* !COMPILE_TEMPLATE_SSE2 */
static inline void RENAME(rgb2rgb_init)(void)
{
+#if !COMPILE_TEMPLATE_SSE2
+#if !COMPILE_TEMPLATE_AMD3DNOW
rgb15to16 = RENAME(rgb15to16);
rgb15tobgr24 = RENAME(rgb15tobgr24);
rgb15to32 = RENAME(rgb15to32);
yuv422ptoyuy2 = RENAME(yuv422ptoyuy2);
yuv422ptouyvy = RENAME(yuv422ptouyvy);
yuy2toyv12 = RENAME(yuy2toyv12);
- planar2x = RENAME(planar2x);
- rgb24toyv12 = RENAME(rgb24toyv12);
- interleaveBytes = RENAME(interleaveBytes);
vu9_to_vu12 = RENAME(vu9_to_vu12);
yvu9_to_yuy2 = RENAME(yvu9_to_yuy2);
-
- uyvytoyuv420 = RENAME(uyvytoyuv420);
uyvytoyuv422 = RENAME(uyvytoyuv422);
- yuyvtoyuv420 = RENAME(yuyvtoyuv420);
yuyvtoyuv422 = RENAME(yuyvtoyuv422);
+#endif /* !COMPILE_TEMPLATE_SSE2 */
+
+#if COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW
+ planar2x = RENAME(planar2x);
+#endif /* COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW */
+ rgb24toyv12 = RENAME(rgb24toyv12);
+
+ yuyvtoyuv420 = RENAME(yuyvtoyuv420);
+ uyvytoyuv420 = RENAME(uyvytoyuv420);
+#endif /* COMPILE_TEMPLATE_SSE2 */
+
+#if !COMPILE_TEMPLATE_AMD3DNOW
+ interleaveBytes = RENAME(interleaveBytes);
+#endif /* !COMPILE_TEMPLATE_AMD3DNOW */
}