swscale: dont loose bits on planar >8bit yuv ind gray nput.
authorMichael Niedermayer <michaelni@gmx.at>
Tue, 24 May 2011 20:59:11 +0000 (22:59 +0200)
committerMichael Niedermayer <michaelni@gmx.at>
Wed, 25 May 2011 02:08:34 +0000 (04:08 +0200)
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
libswscale/swscale.c
libswscale/swscale_internal.h
libswscale/swscale_template.c
libswscale/utils.c
tests/ref/lavfi/pixfmts_scale_le

index c048180..e4e69cf 100644 (file)
@@ -60,6 +60,7 @@ untested special converters
 #include "swscale.h"
 #include "swscale_internal.h"
 #include "rgb2rgb.h"
+#include "libavutil/avassert.h"
 #include "libavutil/intreadwrite.h"
 #include "libavutil/x86_cpu.h"
 #include "libavutil/avutil.h"
index 03c5bf9..cffb51f 100644 (file)
@@ -303,6 +303,10 @@ typedef struct SwsContext {
                    int xInc, const int16_t *filter, const int16_t *filterPos,
                    long filterSize);
 
+    void (*hScale16)(int16_t *dst, int dstW, const uint16_t *src, int srcW,
+                   int xInc, const int16_t *filter, const int16_t *filterPos,
+                   long filterSize, int shift);
+
     void (*lumConvertRange)(int16_t *dst, int width); ///< Color range conversion function for luma plane if needed.
     void (*chrConvertRange)(int16_t *dst, int width); ///< Color range conversion function for chroma planes if needed.
 
index e53cfc0..33f9035 100644 (file)
@@ -2242,6 +2242,34 @@ static inline void RENAME(hScale)(int16_t *dst, int dstW, const uint8_t *src, in
 #endif /* COMPILE_MMX */
 }
 
+static inline void RENAME(hScale16)(int16_t *dst, int dstW, const uint16_t *src, int srcW, int xInc,
+                                    const int16_t *filter, const int16_t *filterPos, long filterSize, int shift)
+{
+    int i, j;
+    for (i=0; i<dstW; i++) {
+        int srcPos= filterPos[i];
+        int val=0;
+        for (j=0; j<filterSize; j++) {
+            val += ((int)src[srcPos + j])*filter[filterSize*i + j];
+        }
+        dst[i] = FFMIN(val>>shift, (1<<15)-1); // the cubic equation does overflow ...
+    }
+}
+
+static inline void RENAME(hScale16X)(int16_t *dst, int dstW, const uint16_t *src, int srcW, int xInc,
+                                    const int16_t *filter, const int16_t *filterPos, long filterSize, int shift)
+{
+    int i, j;
+    for (i=0; i<dstW; i++) {
+        int srcPos= filterPos[i];
+        int val=0;
+        for (j=0; j<filterSize; j++) {
+            val += ((int)av_bswap16(src[srcPos + j]))*filter[filterSize*i + j];
+        }
+        dst[i] = FFMIN(val>>shift, (1<<15)-1); // the cubic equation does overflow ...
+    }
+}
+
 //FIXME all pal and rgb srcFormats could do this convertion as well
 //FIXME all scalers more complex than bilinear could do half of this transform
 static void RENAME(chrRangeToJpeg)(int16_t *dst, int width)
@@ -2421,7 +2449,9 @@ static inline void RENAME(hyscale)(SwsContext *c, uint16_t *dst, long dstWidth,
         src= formatConvBuffer;
     }
 
-    if (!c->hyscale_fast) {
+    if (c->hScale16) {
+        c->hScale16(dst, dstWidth, (uint16_t*)src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize, av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1);
+    } else if (!c->hyscale_fast) {
         c->hScale(dst, dstWidth, src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize);
     } else { // fast bilinear upscale / crap downscale
         c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
@@ -2569,7 +2599,10 @@ inline static void RENAME(hcscale)(SwsContext *c, uint16_t *dst, long dstWidth,
         src2= formatConvBuffer+VOFW;
     }
 
-    if (!c->hcscale_fast) {
+    if (c->hScale16) {
+        c->hScale16(dst     , dstWidth, (uint16_t*)src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize, av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1);
+        c->hScale16(dst+VOFW, dstWidth, (uint16_t*)src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize, av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1);
+    } else if (!c->hcscale_fast) {
         c->hScale(dst     , dstWidth, src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
         c->hScale(dst+VOFW, dstWidth, src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
     } else { // fast bilinear upscale / crap downscale
@@ -2984,18 +3017,20 @@ static void RENAME(sws_init_swScale)(SwsContext *c)
         case PIX_FMT_PAL8     :
         case PIX_FMT_BGR4_BYTE:
         case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV; break;
-        case PIX_FMT_YUV420P9BE: c->chrToYV12 = BE9ToUV_c; break;
-        case PIX_FMT_YUV420P9LE: c->chrToYV12 = LE9ToUV_c; break;
+        case PIX_FMT_GRAY16BE :
+        case PIX_FMT_YUV420P9BE:
         case PIX_FMT_YUV422P10BE:
-        case PIX_FMT_YUV420P10BE: c->chrToYV12 = BE10ToUV_c; break;
-        case PIX_FMT_YUV422P10LE:
-        case PIX_FMT_YUV420P10LE: c->chrToYV12 = LE10ToUV_c; break;
+        case PIX_FMT_YUV420P10BE:
         case PIX_FMT_YUV420P16BE:
         case PIX_FMT_YUV422P16BE:
-        case PIX_FMT_YUV444P16BE: c->chrToYV12 = RENAME(BEToUV); break;
+        case PIX_FMT_YUV444P16BE: c->hScale16= HAVE_BIGENDIAN ? RENAME(hScale16) : RENAME(hScale16X); break;
+        case PIX_FMT_GRAY16LE :
+        case PIX_FMT_YUV420P9LE:
+        case PIX_FMT_YUV422P10LE:
+        case PIX_FMT_YUV420P10LE:
         case PIX_FMT_YUV420P16LE:
         case PIX_FMT_YUV422P16LE:
-        case PIX_FMT_YUV444P16LE: c->chrToYV12 = RENAME(LEToUV); break;
+        case PIX_FMT_YUV444P16LE: c->hScale16= HAVE_BIGENDIAN ? RENAME(hScale16X) : RENAME(hScale16); break;
     }
     if (c->chrSrcHSubSample) {
         switch(srcFormat) {
@@ -3036,23 +3071,11 @@ static void RENAME(sws_init_swScale)(SwsContext *c)
     c->lumToYV12 = NULL;
     c->alpToYV12 = NULL;
     switch (srcFormat) {
-    case PIX_FMT_YUV420P9BE: c->lumToYV12 = BE9ToY_c; break;
-    case PIX_FMT_YUV420P9LE: c->lumToYV12 = LE9ToY_c; break;
-    case PIX_FMT_YUV422P10BE:
-    case PIX_FMT_YUV420P10BE: c->lumToYV12 = BE10ToY_c; break;
-    case PIX_FMT_YUV422P10LE:
-    case PIX_FMT_YUV420P10LE: c->lumToYV12 = LE10ToY_c; break;
     case PIX_FMT_YUYV422  :
-    case PIX_FMT_YUV420P16BE:
-    case PIX_FMT_YUV422P16BE:
-    case PIX_FMT_YUV444P16BE:
     case PIX_FMT_GRAY8A   :
-    case PIX_FMT_GRAY16BE : c->lumToYV12 = RENAME(yuy2ToY); break;
+                            c->lumToYV12 = RENAME(yuy2ToY); break;
     case PIX_FMT_UYVY422  :
-    case PIX_FMT_YUV420P16LE:
-    case PIX_FMT_YUV422P16LE:
-    case PIX_FMT_YUV444P16LE:
-    case PIX_FMT_GRAY16LE : c->lumToYV12 = RENAME(uyvyToY); break;
+                            c->lumToYV12 = RENAME(uyvyToY); break;
     case PIX_FMT_BGR24    : c->lumToYV12 = RENAME(bgr24ToY); break;
     case PIX_FMT_BGR565   : c->lumToYV12 = bgr16ToY; break;
     case PIX_FMT_BGR555   : c->lumToYV12 = bgr15ToY; break;
index ea44190..d227243 100644 (file)
@@ -890,7 +890,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
             if (flags&SWS_PRINT_INFO)
                 av_log(c, AV_LOG_INFO, "output width is not a multiple of 32 -> no MMX2 scaler\n");
         }
-        if (usesHFilter) c->canMMX2BeUsed=0;
+        if (usesHFilter || isNBPS(c->srcFormat) || is16BPS(c->srcFormat)) c->canMMX2BeUsed=0;
     }
     else
         c->canMMX2BeUsed=0;
index 5c32a36..29d1b2c 100644 (file)
@@ -9,8 +9,8 @@ bgr565le            3a514a298c6161a071ddf9963c06509d
 bgr8                7f007fa6c153a16e808a9c51605a4016
 bgra                a5e7040f9a80cccd65e5acf2ca09ace5
 gray                d7786a7d9d99ac74230cc045cab5632c
-gray16be            af39ce3a497f6734b157c8b94544f537
-gray16le            7ac1b788bcc472010df7a97e762485e0
+gray16be            5ba22d4802b40ec27e62abb22ad1d1cc
+gray16le            2d5e83aa875a4c3baa6fecf55e3223bf
 monob               88c4c050758e64d120f50c7eff694381
 monow               d31772ebaa877fc2a78565937f7f9673
 nv12                4676d59db43d657dc12841f6bc3ab452
@@ -27,20 +27,20 @@ uyvy422             314bd486277111a95d9369b944fa0400
 yuv410p             7df8f6d69b56a8dcb6c7ee908e5018b5
 yuv411p             1143e7c5cc28fe0922b051b17733bc4c
 yuv420p             fdad2d8df8985e3d17e73c71f713cb14
-yuv420p10be         6d335e75b553da590135cf8bb999610c
-yuv420p10le         d510ddbabefd03ef39ec943fcb51b709
-yuv420p16be         29a0265764530070f5cd3251cc01f66a
-yuv420p16le         6f3a265b084a78baec229238d9f7945f
-yuv420p9be          ec4983b7a949c0472110a7a2c58e278a
-yuv420p9le          c136dce5913a722eee44ab72cff664b2
+yuv420p10be         c143e77e97d2f7d62c3b518857ba9f9b
+yuv420p10le         72d90eccf5c34691ff057dafb7447aa2
+yuv420p16be         01da53e7f4f9882d5189ec1b1165ee05
+yuv420p16le         165f9aaf5332e5d088f44534d8ed2bc9
+yuv420p9be          bb87fddca65d1742412c8d2b1caf96c6
+yuv420p9le          828eec50014a41258a5423c1fe56ac97
 yuv422p             918e37701ee7377d16a8a6c119c56a40
-yuv422p10le         aeb0ef08a883f43429ca9d886d8fc095
-yuv422p16be         ef3e865fc1d0c68977c735323c50af6e
-yuv422p16le         428a9b96214c09cb5a983ce36d6961ff
+yuv422p10le         a10c4a5837547716f13cd61918b145f9
+yuv422p16be         961860aa4f229e09f1249910c687081c
+yuv422p16le         7695ee42c0581279bbe68de81deb7aee
 yuv440p             461503fdb9b90451020aa3b25ddf041c
 yuv444p             81b2eba962d12e8d64f003ac56f6faf2
-yuv444p16be         99a3738c70c8fbdc5a0e4ad4bf50648d
-yuv444p16le         385d0cc5240d62da0871915be5d86f0a
+yuv444p16be         5f924c2b385826106300cecc4ef4d2df
+yuv444p16le         40a55a85858508138b7661c83d95223e
 yuva420p            8673a9131fb47de69788863f93a50eb7
 yuvj420p            30427bd6caf5bda93a173dbebe759e09
 yuvj422p            fc8288f64fd149573f73cf8da05d8e6d