swscale: avoid reading prior to the source buffer in planar2x() MMX2
authorRamiro Polla <ramiro.polla@gmail.com>
Mon, 13 Sep 2010 14:25:18 +0000 (14:25 +0000)
committerRamiro Polla <ramiro.polla@gmail.com>
Mon, 13 Sep 2010 14:25:18 +0000 (14:25 +0000)
Originally committed as revision 32221 to svn://svn.mplayerhq.hu/mplayer/trunk/libswscale

libswscale/rgb2rgb.c
libswscale/rgb2rgb_template.c

index 641965e..3746b9b 100644 (file)
@@ -99,6 +99,7 @@ void (*yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t
 
 
 #if ARCH_X86
 
 
 #if ARCH_X86
+DECLARE_ASM_CONST(8, uint64_t, mmx_ff)       = 0x00000000000000FFULL;
 DECLARE_ASM_CONST(8, uint64_t, mmx_null)     = 0x0000000000000000ULL;
 DECLARE_ASM_CONST(8, uint64_t, mmx_one)      = 0xFFFFFFFFFFFFFFFFULL;
 DECLARE_ASM_CONST(8, uint64_t, mask32b)      = 0x000000FF000000FFULL;
 DECLARE_ASM_CONST(8, uint64_t, mmx_null)     = 0x0000000000000000ULL;
 DECLARE_ASM_CONST(8, uint64_t, mmx_one)      = 0xFFFFFFFFFFFFFFFFULL;
 DECLARE_ASM_CONST(8, uint64_t, mask32b)      = 0x000000FF000000FFULL;
index 663514a..9c78ff6 100644 (file)
@@ -1773,13 +1773,22 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWi
         const x86_reg mmxSize= srcWidth&~15;
         __asm__ volatile(
             "mov           %4, %%"REG_a"            \n\t"
         const x86_reg mmxSize= srcWidth&~15;
         __asm__ volatile(
             "mov           %4, %%"REG_a"            \n\t"
+            "movq        "MANGLE(mmx_ff)", %%mm0    \n\t"
+            "movq         (%0, %%"REG_a"), %%mm4    \n\t"
+            "movq                   %%mm4, %%mm2    \n\t"
+            "psllq                     $8, %%mm4    \n\t"
+            "pand                   %%mm0, %%mm2    \n\t"
+            "por                    %%mm2, %%mm4    \n\t"
+            "movq         (%1, %%"REG_a"), %%mm5    \n\t"
+            "movq                   %%mm5, %%mm3    \n\t"
+            "psllq                     $8, %%mm5    \n\t"
+            "pand                   %%mm0, %%mm3    \n\t"
+            "por                    %%mm3, %%mm5    \n\t"
             "1:                                     \n\t"
             "movq         (%0, %%"REG_a"), %%mm0    \n\t"
             "movq         (%1, %%"REG_a"), %%mm1    \n\t"
             "movq        1(%0, %%"REG_a"), %%mm2    \n\t"
             "movq        1(%1, %%"REG_a"), %%mm3    \n\t"
             "1:                                     \n\t"
             "movq         (%0, %%"REG_a"), %%mm0    \n\t"
             "movq         (%1, %%"REG_a"), %%mm1    \n\t"
             "movq        1(%0, %%"REG_a"), %%mm2    \n\t"
             "movq        1(%1, %%"REG_a"), %%mm3    \n\t"
-            "movq       -1(%0, %%"REG_a"), %%mm4    \n\t"
-            "movq       -1(%1, %%"REG_a"), %%mm5    \n\t"
             PAVGB"                  %%mm0, %%mm5    \n\t"
             PAVGB"                  %%mm0, %%mm3    \n\t"
             PAVGB"                  %%mm0, %%mm5    \n\t"
             PAVGB"                  %%mm0, %%mm5    \n\t"
             PAVGB"                  %%mm0, %%mm3    \n\t"
             PAVGB"                  %%mm0, %%mm5    \n\t"
@@ -1806,6 +1815,8 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWi
             "movq                   %%mm6, 8(%3, %%"REG_a", 2)  \n\t"
 #endif
             "add                       $8, %%"REG_a"            \n\t"
             "movq                   %%mm6, 8(%3, %%"REG_a", 2)  \n\t"
 #endif
             "add                       $8, %%"REG_a"            \n\t"
+            "movq       -1(%0, %%"REG_a"), %%mm4    \n\t"
+            "movq       -1(%1, %%"REG_a"), %%mm5    \n\t"
             " js                       1b                       \n\t"
             :: "r" (src + mmxSize  ), "r" (src + srcStride + mmxSize  ),
             "r" (dst + mmxSize*2), "r" (dst + dstStride + mmxSize*2),
             " js                       1b                       \n\t"
             :: "r" (src + mmxSize  ), "r" (src + srcStride + mmxSize  ),
             "r" (dst + mmxSize*2), "r" (dst + dstStride + mmxSize*2),
@@ -1815,9 +1826,9 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWi
         );
 #else
         const x86_reg mmxSize=1;
         );
 #else
         const x86_reg mmxSize=1;
-#endif
         dst[0        ]= (3*src[0] +   src[srcStride])>>2;
         dst[dstStride]= (  src[0] + 3*src[srcStride])>>2;
         dst[0        ]= (3*src[0] +   src[srcStride])>>2;
         dst[dstStride]= (  src[0] + 3*src[srcStride])>>2;
+#endif
 
         for (x=mmxSize-1; x<srcWidth-1; x++) {
             dst[2*x          +1]= (3*src[x+0] +   src[x+srcStride+1])>>2;
 
         for (x=mmxSize-1; x<srcWidth-1; x++) {
             dst[2*x          +1]= (3*src[x+0] +   src[x+srcStride+1])>>2;