yvu9 -> yv12 unscaled converter with linear chroma scaling
authorMichael Niedermayer <michaelni@gmx.at>
Thu, 27 Jun 2002 23:48:53 +0000 (23:48 +0000)
committerMichael Niedermayer <michaelni@gmx.at>
Thu, 27 Jun 2002 23:48:53 +0000 (23:48 +0000)
Originally committed as revision 6583 to svn://svn.mplayerhq.hu/mplayer/trunk/postproc

postproc/rgb2rgb.c
postproc/rgb2rgb.h
postproc/rgb2rgb_template.c
postproc/swscale.c

index 962a58945f20dc5afecfbcfd47821667875775e9..3878e4835fd77e32202ed6c27f8cc2976d5d20a7 100644 (file)
@@ -512,6 +512,19 @@ void yvu9toyv12(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc,
 #endif
 }
 
 #endif
 }
 
+void planar2x(const uint8_t *src, uint8_t *dst, int width, int height, int srcStride, int dstStride)
+{
+#ifdef CAN_COMPILE_X86_ASM
+       // ordered per speed fasterst first
+       if(gCpuCaps.hasMMX2)
+               planar2x_MMX2(src, dst, width, height, srcStride, dstStride);
+       else if(gCpuCaps.has3DNow)
+               planar2x_3DNow(src, dst, width, height, srcStride, dstStride);
+       else
+#endif
+               planar2x_C(src, dst, width, height, srcStride, dstStride);
+}
+
 /**
  *
  * height should be a multiple of 2 and width should be a multiple of 2 (if this is a
 /**
  *
  * height should be a multiple of 2 and width should be a multiple of 2 (if this is a
index 9fb6da6ef1000f1144acb0e0a246568142b106ac..a0ce0061034ddcdcf8b9bcbc4de092c90e32fad2 100644 (file)
@@ -41,6 +41,7 @@ extern void yuy2toyv12(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t
 extern void rgb24toyv12(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
        unsigned int width, unsigned int height,
        unsigned int lumStride, unsigned int chromStride, unsigned int srcStride);
 extern void rgb24toyv12(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
        unsigned int width, unsigned int height,
        unsigned int lumStride, unsigned int chromStride, unsigned int srcStride);
+extern void planar2x(const uint8_t *src, uint8_t *dst, int width, int height, int srcStride, int dstStride);
 
 extern void interleaveBytes(uint8_t *src1, uint8_t *src2, uint8_t *dst,
                            unsigned width, unsigned height, unsigned src1Stride,
 
 extern void interleaveBytes(uint8_t *src1, uint8_t *src2, uint8_t *dst,
                            unsigned width, unsigned height, unsigned src1Stride,
index 015e7f2d560313eac75491162336462b4b027e0c..b6c26a11ee4035d575583335768bc815d8c9ae3c 100644 (file)
@@ -1295,6 +1295,95 @@ static inline void RENAME(yvu9toyv12)(const uint8_t *ysrc, const uint8_t *usrc,
        /* XXX: implement upscaling for U,V */
 }
 
        /* XXX: implement upscaling for U,V */
 }
 
+static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, int srcWidth, int srcHeight, int srcStride, int dstStride)
+{
+       int x,y;
+       
+       // first line
+       for(x=0; x<srcWidth; x++){
+               dst[2*x+0]=
+               dst[2*x+1]= src[x];
+       }
+       dst+= dstStride;
+
+       for(y=1; y<srcHeight; y++){
+#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
+               const int mmxSize= srcWidth;
+               asm volatile(
+                       "movl %4, %%eax                 \n\t"
+                       "1:                             \n\t"
+                       "movq (%0, %%eax), %%mm0        \n\t"
+                       "movq (%1, %%eax), %%mm1        \n\t"
+                       "movq 1(%0, %%eax), %%mm2       \n\t"
+                       "movq 1(%1, %%eax), %%mm3       \n\t"
+                       "movq %%mm0, %%mm4              \n\t"
+                       "movq %%mm1, %%mm5              \n\t"
+                       PAVGB" %%mm3, %%mm0             \n\t"
+                       PAVGB" %%mm3, %%mm0             \n\t"
+                       PAVGB" %%mm4, %%mm3             \n\t"
+                       PAVGB" %%mm4, %%mm3             \n\t"
+                       PAVGB" %%mm2, %%mm1             \n\t"
+                       PAVGB" %%mm2, %%mm1             \n\t"
+                       PAVGB" %%mm5, %%mm2             \n\t"
+                       PAVGB" %%mm5, %%mm2             \n\t"
+                       "movq %%mm3, %%mm4              \n\t"
+                       "movq %%mm2, %%mm5              \n\t"
+                       "punpcklbw %%mm1, %%mm3         \n\t"
+                       "punpckhbw %%mm1, %%mm4         \n\t"
+                       "punpcklbw %%mm0, %%mm2         \n\t"
+                       "punpckhbw %%mm0, %%mm5         \n\t"
+#if 1
+                       MOVNTQ" %%mm3, (%2, %%eax, 2)   \n\t"
+                       MOVNTQ" %%mm4, 8(%2, %%eax, 2)  \n\t"
+                       MOVNTQ" %%mm2, (%3, %%eax, 2)   \n\t"
+                       MOVNTQ" %%mm5, 8(%3, %%eax, 2)  \n\t"
+#else
+                       "movq %%mm3, (%2, %%eax, 2)     \n\t"
+                       "movq %%mm4, 8(%2, %%eax, 2)    \n\t"
+                       "movq %%mm2, (%3, %%eax, 2)     \n\t"
+                       "movq %%mm5, 8(%3, %%eax, 2)    \n\t"
+#endif
+                       "addl $8, %%eax                 \n\t"
+                       " js 1b                         \n\t"
+                       :: "r" (src + mmxSize-1), "r" (src + srcStride + mmxSize-1),
+                          "r" (dst + mmxSize*2), "r" (dst + dstStride + mmxSize*2),
+                          "g" (-mmxSize)
+                       : "%eax"
+
+               );
+               dst[0]= 
+               dst[dstStride]= src[0];
+#else
+               dst[0]= 
+               dst[dstStride]= src[0];
+
+               for(x=0; x<srcWidth-1; x++){
+                       dst[2*x          +1]= (3*src[x+0] +   src[x+srcStride+1])>>2;
+                       dst[2*x+dstStride+2]= (  src[x+0] + 3*src[x+srcStride+1])>>2;
+                       dst[2*x+dstStride+1]= (  src[x+1] + 3*src[x+srcStride  ])>>2;
+                       dst[2*x          +2]= (3*src[x+1] +   src[x+srcStride  ])>>2;
+               }
+#endif
+               dst[srcWidth*2 -1]= 
+               dst[srcWidth*2 -1 + dstStride]= src[srcWidth-1];
+
+               dst+=dstStride*2;
+               src+=srcStride;
+       }
+       src-=srcStride;
+       
+       // last line
+       for(x=0; x<srcWidth; x++){
+               dst[2*x+0]=
+               dst[2*x+1]= src[x];
+       }
+#ifdef HAVE_MMX
+asm volatile(   EMMS" \n\t"
+               SFENCE" \n\t"
+               :::"memory");
+#endif
+}
+
 /**
  *
  * height should be a multiple of 2 and width should be a multiple of 16 (if this is a
 /**
  *
  * height should be a multiple of 2 and width should be a multiple of 16 (if this is a
index 0f58c4d75651d79f48d5fbb2449e2af130591c4f..8a4223a7fa984e34ece9829099cebb5daba97a61 100644 (file)
@@ -1777,6 +1777,34 @@ static void bgr24toyv12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], i
                dstStride[0], dstStride[1], srcStride[0]);
 }
 
                dstStride[0], dstStride[1], srcStride[0]);
 }
 
+static void yvu9toyv12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
+             int srcSliceH, uint8_t* dst[], int dstStride[]){
+       int i;
+
+       /* copy Y */
+       if(srcStride[0]==dstStride[0]) 
+               memcpy(dst[0]+ srcSliceY*dstStride[0], src[0], srcStride[0]*srcSliceH);
+       else{
+               uint8_t *srcPtr= src[0];
+               uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
+
+               for(i=0; i<srcSliceH; i++)
+               {
+                       memcpy(dstPtr, srcPtr, c->srcW);
+                       srcPtr+= srcStride[0];
+                       dstPtr+= dstStride[0];
+               }
+       }
+
+       if(c->dstFormat==IMGFMT_YV12){
+               planar2x(src[1], dst[1], c->chrSrcW, c->chrSrcH, srcStride[1], dstStride[1]);
+               planar2x(src[2], dst[2], c->chrSrcW, c->chrSrcH, srcStride[2], dstStride[2]);
+       }else{
+               planar2x(src[1], dst[2], c->chrSrcW, c->chrSrcH, srcStride[1], dstStride[2]);
+               planar2x(src[2], dst[1], c->chrSrcW, c->chrSrcH, srcStride[2], dstStride[1]);
+       }
+}
+
 /**
  * bring pointers in YUV order instead of YVU
  */
 /**
  * bring pointers in YUV order instead of YVU
  */
@@ -2051,7 +2079,7 @@ SwsContext *getSwsContext(int srcW, int srcH, int srcFormat, int dstW, int dstH,
                                        vo_format_name(srcFormat), vo_format_name(dstFormat));
                        return c;
                }
                                        vo_format_name(srcFormat), vo_format_name(dstFormat));
                        return c;
                }
-#if 1
+               
                /* simple copy */
                if(   srcFormat == dstFormat
                   || (srcFormat==IMGFMT_YV12 && dstFormat==IMGFMT_I420)
                /* simple copy */
                if(   srcFormat == dstFormat
                   || (srcFormat==IMGFMT_YV12 && dstFormat==IMGFMT_I420)
@@ -2067,7 +2095,17 @@ SwsContext *getSwsContext(int srcW, int srcH, int srcFormat, int dstW, int dstH,
                                        vo_format_name(srcFormat), vo_format_name(dstFormat));
                        return c;
                }
                                        vo_format_name(srcFormat), vo_format_name(dstFormat));
                        return c;
                }
-#endif
+               
+               if( srcFormat==IMGFMT_YVU9 && (dstFormat==IMGFMT_YV12 || dstFormat==IMGFMT_I420) )
+               {
+                       c->swScale= yvu9toyv12Wrapper;
+
+                       if(flags&SWS_PRINT_INFO)
+                               MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n", 
+                                       vo_format_name(srcFormat), vo_format_name(dstFormat));
+                       return c;
+               }
+
                /* bgr32to24 & rgb32to24*/
                if((srcFormat==IMGFMT_BGR32 && dstFormat==IMGFMT_BGR24)
                 ||(srcFormat==IMGFMT_RGB32 && dstFormat==IMGFMT_RGB24))
                /* bgr32to24 & rgb32to24*/
                if((srcFormat==IMGFMT_BGR32 && dstFormat==IMGFMT_BGR24)
                 ||(srcFormat==IMGFMT_RGB32 && dstFormat==IMGFMT_RGB24))