swscale: fix compile on ppc.
authorRonald S. Bultje <rsbultje@gmail.com>
Thu, 26 May 2011 15:32:32 +0000 (11:32 -0400)
committerRonald S. Bultje <rsbultje@gmail.com>
Thu, 26 May 2011 15:32:32 +0000 (11:32 -0400)
libswscale/ppc/swscale_altivec_template.c
libswscale/ppc/swscale_template.c
libswscale/ppc/yuv2rgb_altivec.c
libswscale/swscale_internal.h

index b123f70..eee7bdd 100644 (file)
@@ -86,9 +86,11 @@ altivec_packIntArrayToCharArray(int *val, uint8_t* dest, int dstW)
 }
 
 static inline void
-yuv2yuvX_altivec_real(const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
-                      const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
-                      uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW, int chrDstW)
+yuv2yuvX_altivec_real(const int16_t *lumFilter, const int16_t **lumSrc,
+                      int lumFilterSize, const int16_t *chrFilter,
+                      const int16_t **chrUSrc, const int16_t **chrVSrc,
+                      int chrFilterSize, uint8_t *dest, uint8_t *uDest,
+                      uint8_t *vDest, int dstW, int chrDstW)
 {
     const vector signed int vini = {(1 << 18), (1 << 18), (1 << 18), (1 << 18)};
     register int i, j;
@@ -159,22 +161,22 @@ yuv2yuvX_altivec_real(const int16_t *lumFilter, const int16_t **lumSrc, int lumF
             vChrFilter = vec_perm(vChrFilter, vChrFilter, perm0);
             vChrFilter = vec_splat(vChrFilter, 0); // chrFilter[j] is loaded 8 times in vChrFilter
 
-            perm = vec_lvsl(0, chrSrc[j]);
-            l1 = vec_ld(0, chrSrc[j]);
-            l1_V = vec_ld(2048 << 1, chrSrc[j]);
+            perm = vec_lvsl(0, chrUSrc[j]);
+            l1 = vec_ld(0, chrUSrc[j]);
+            l1_V = vec_ld(0, chrVSrc[j]);
 
             for (i = 0; i < (chrDstW - 7); i+=8) {
                 int offset = i << 2;
-                vector signed short l2 = vec_ld((i << 1) + 16, chrSrc[j]);
-                vector signed short l2_V = vec_ld(((i + 2048) << 1) + 16, chrSrc[j]);
+                vector signed short l2 = vec_ld((i << 1) + 16, chrUSrc[j]);
+                vector signed short l2_V = vec_ld((i << 1) + 16, chrVSrc[j]);
 
                 vector signed int v1 = vec_ld(offset, u);
                 vector signed int v2 = vec_ld(offset + 16, u);
                 vector signed int v1_V = vec_ld(offset, v);
                 vector signed int v2_V = vec_ld(offset + 16, v);
 
-                vector signed short ls = vec_perm(l1, l2, perm); // chrSrc[j][i] ... chrSrc[j][i+7]
-                vector signed short ls_V = vec_perm(l1_V, l2_V, perm); // chrSrc[j][i+2048] ... chrSrc[j][i+2055]
+                vector signed short ls = vec_perm(l1, l2, perm); // chrUSrc[j][i] ... chrUSrc[j][i+7]
+                vector signed short ls_V = vec_perm(l1_V, l2_V, perm); // chrVSrc[j][i] ... chrVSrc[j][i]
 
                 vector signed int i1 = vec_mule(vChrFilter, ls);
                 vector signed int i2 = vec_mulo(vChrFilter, ls);
@@ -182,9 +184,9 @@ yuv2yuvX_altivec_real(const int16_t *lumFilter, const int16_t **lumSrc, int lumF
                 vector signed int i2_V = vec_mulo(vChrFilter, ls_V);
 
                 vector signed int vf1 = vec_mergeh(i1, i2);
-                vector signed int vf2 = vec_mergel(i1, i2); // chrSrc[j][i] * chrFilter[j] ... chrSrc[j][i+7] * chrFilter[j]
+                vector signed int vf2 = vec_mergel(i1, i2); // chrUSrc[j][i] * chrFilter[j] ... chrUSrc[j][i+7] * chrFilter[j]
                 vector signed int vf1_V = vec_mergeh(i1_V, i2_V);
-                vector signed int vf2_V = vec_mergel(i1_V, i2_V); // chrSrc[j][i] * chrFilter[j] ... chrSrc[j][i+7] * chrFilter[j]
+                vector signed int vf2_V = vec_mergel(i1_V, i2_V); // chrVSrc[j][i] * chrFilter[j] ... chrVSrc[j][i+7] * chrFilter[j]
 
                 vector signed int vo1 = vec_add(v1, vf1);
                 vector signed int vo2 = vec_add(v2, vf2);
@@ -200,8 +202,8 @@ yuv2yuvX_altivec_real(const int16_t *lumFilter, const int16_t **lumSrc, int lumF
                 l1_V = l2_V;
             }
             for ( ; i < chrDstW; i++) {
-                u[i] += chrSrc[j][i] * chrFilter[j];
-                v[i] += chrSrc[j][i + 2048] * chrFilter[j];
+                u[i] += chrUSrc[j][i] * chrFilter[j];
+                v[i] += chrVSrc[j][i] * chrFilter[j];
             }
         }
         altivec_packIntArrayToCharArray(u, uDest, chrDstW);
index 7968177..0fe97a1 100644 (file)
 #endif
 
 #if COMPILE_TEMPLATE_ALTIVEC
-static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
-                                    const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, const int16_t **alpSrc,
-                                    uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, long dstW, long chrDstW)
+static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter,
+                                    const int16_t **lumSrc, int lumFilterSize,
+                                    const int16_t *chrFilter, const int16_t **chrUSrc,
+                                    const int16_t **chrVSrc, int chrFilterSize,
+                                    const int16_t **alpSrc,
+                                    uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
+                                    uint8_t *aDest, long dstW, long chrDstW)
 {
     yuv2yuvX_altivec_real(lumFilter, lumSrc, lumFilterSize,
-                          chrFilter, chrSrc, chrFilterSize,
+                          chrFilter, chrUSrc, chrVSrc, chrFilterSize,
                           dest, uDest, vDest, dstW, chrDstW);
 }
 
 /**
  * vertical scale YV12 to RGB
  */
-static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
-                                       const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
-                                       const int16_t **alpSrc, uint8_t *dest, long dstW, long dstY)
+static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter,
+                                       const int16_t **lumSrc, int lumFilterSize,
+                                       const int16_t *chrFilter, const int16_t **chrUSrc,
+                                       const int16_t **chrVSrc, int chrFilterSize,
+                                       const int16_t **alpSrc, uint8_t *dest,
+                                       long dstW, long dstY)
 {
     /* The following list of supported dstFormat values should
        match what's found in the body of ff_yuv2packedX_altivec() */
@@ -47,11 +54,11 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter,
           c->dstFormat==PIX_FMT_BGR24 || c->dstFormat==PIX_FMT_RGB24 ||
           c->dstFormat==PIX_FMT_RGBA  || c->dstFormat==PIX_FMT_ARGB))
             ff_yuv2packedX_altivec(c, lumFilter, lumSrc, lumFilterSize,
-                                   chrFilter, chrSrc, chrFilterSize,
+                                   chrFilter, chrUSrc, chrVSrc, chrFilterSize,
                                    dest, dstW, dstY);
     else
         yuv2packedXinC(c, lumFilter, lumSrc, lumFilterSize,
-                       chrFilter, chrSrc, chrFilterSize,
+                       chrFilter, chrUSrc, chrVSrc, chrFilterSize,
                        alpSrc, dest, dstW, dstY);
 }
 #endif
index 96c208a..abd49c9 100644 (file)
@@ -778,10 +778,11 @@ void ff_yuv2rgb_init_tables_altivec(SwsContext *c, const int inv_table[4], int b
 
 
 void
-ff_yuv2packedX_altivec(SwsContext *c,
-                       const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
-                       const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
-                     uint8_t *dest, int dstW, int dstY)
+ff_yuv2packedX_altivec(SwsContext *c, const int16_t *lumFilter,
+                       const int16_t **lumSrc, int lumFilterSize,
+                       const int16_t *chrFilter, const int16_t **chrUSrc,
+                       const int16_t **chrVSrc, int chrFilterSize,
+                       uint8_t *dest, int dstW, int dstY)
 {
     int i,j;
     vector signed short X,X0,X1,Y0,U0,V0,Y1,U1,V1,U,V;
@@ -816,9 +817,9 @@ ff_yuv2packedX_altivec(SwsContext *c,
         V = RND;
         /* extract 8 coeffs from U,V */
         for (j=0; j<chrFilterSize; j++) {
-            X  = vec_ld (0, &chrSrc[j][i/2]);
+            X  = vec_ld (0, &chrUSrc[j][i/2]);
             U  = vec_mradds (X, CCoeffs[j], U);
-            X  = vec_ld (0, &chrSrc[j][i/2+2048]);
+            X  = vec_ld (0, &chrVSrc[j][i/2]);
             V  = vec_mradds (X, CCoeffs[j], V);
         }
 
@@ -894,9 +895,9 @@ ff_yuv2packedX_altivec(SwsContext *c,
         V = RND;
         /* extract 8 coeffs from U,V */
         for (j=0; j<chrFilterSize; j++) {
-            X  = vec_ld (0, &chrSrc[j][i/2]);
+            X  = vec_ld (0, &chrUSrc[j][i/2]);
             U  = vec_mradds (X, CCoeffs[j], U);
-            X  = vec_ld (0, &chrSrc[j][i/2+2048]);
+            X  = vec_ld (0, &chrVSrc[j][i/2]);
             V  = vec_mradds (X, CCoeffs[j], V);
         }
 
index 5aad9cf..8ba0fe9 100644 (file)
@@ -333,9 +333,10 @@ SwsFunc ff_yuv2rgb_init_mlib(SwsContext *c);
 SwsFunc ff_yuv2rgb_init_altivec(SwsContext *c);
 SwsFunc ff_yuv2rgb_get_func_ptr_bfin(SwsContext *c);
 void ff_bfin_get_unscaled_swscale(SwsContext *c);
-void ff_yuv2packedX_altivec(SwsContext *c,
-                            const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
-                            const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
+void ff_yuv2packedX_altivec(SwsContext *c, const int16_t *lumFilter,
+                            const int16_t **lumSrc, int lumFilterSize,
+                            const int16_t *chrFilter, const int16_t **chrUSrc,
+                            const int16_t **chrVSrc, int chrFilterSize,
                             uint8_t *dest, int dstW, int dstY);
 
 const char *sws_format_name(enum PixelFormat format);