Merge remote branch 'qatar/master'
authorMichael Niedermayer <michaelni@gmx.at>
Fri, 13 May 2011 02:40:40 +0000 (04:40 +0200)
committerMichael Niedermayer <michaelni@gmx.at>
Fri, 13 May 2011 02:40:40 +0000 (04:40 +0200)
* qatar/master: (33 commits)
  rtpdec_qdm2: Don't try to parse data packet if no configuration is received
  ac3enc: put the counting of stereo rematrixing bits in the same place to make the code easier to understand.
  ac3enc: clean up count_frame_bits() and count_frame_bits_fixed()
  mpegvideo: make FF_DEBUG_DCT_COEFF output coeffs via av_log() instead of just via AVFrame.
  srtdec: make sure we don't write past the end of buffer
  wmaenc: improve channel count and bitrate error handling in encode_init()
  matroskaenc: make sure we don't produce invalid file with no codec ID
  matroskadec: check that pointers were initialized before accessing them
  lavf: fix function name in compute_pkt_fields2 av_dlog message
  lavf: fix av_find_best_stream when providing a wanted stream.
  lavf: fix av_find_best_stream when decoder_ret is given and using a related stream.
  ffmpeg: factorize quality calculation
  tiff: add support for SamplesPerPixel tag in tiff_decode_tag()
  tiff: Prefer enum TiffCompr over int for TiffContext.compr.
  mov: Support edit list atom version 1.
  configure: Enable libpostproc automatically if GPL code is enabled.
  Cosmetics: fix prototypes in oggdec
  oggdec: fix memleak with continuous streams.
  matroskaenc: add missing new line in av_log() call
  dnxhdenc: add AVClass in private context.
  ...

swscale changes largely rewritten by me or replaced by baptsites due to lots of bugs in ronalds code.
Above code is also just in case its not obvios to a large extended duplicates that where cherry picked
from ffmpeg.

Conflicts:
configure
ffmpeg.c
libavformat/matroskaenc.c
libavutil/pixfmt.h
libswscale/ppc/swscale_template.c
libswscale/swscale.c
libswscale/swscale_template.c
libswscale/utils.c
libswscale/x86/swscale_template.c
tests/fate/h264.mak
tests/ref/lavfi/pixdesc_le
tests/ref/lavfi/pixfmts_copy_le
tests/ref/lavfi/pixfmts_null_le
tests/ref/lavfi/pixfmts_scale_le
tests/ref/lavfi/pixfmts_vflip_le

Merged-by: Michael Niedermayer <michaelni@gmx.at>
38 files changed:
1  2 
configure
ffmpeg.c
ffplay.c
ffserver.c
libavcodec/ac3enc.c
libavcodec/cavs.h
libavcodec/cavsdata.h
libavcodec/ffv1.c
libavcodec/libxvidff.c
libavcodec/tiff.c
libavcodec/vorbis.c
libavcodec/vorbis.h
libavcodec/vorbisdec.c
libavcodec/vorbisenc.c
libavdevice/bktr.c
libavdevice/x11grab.c
libavformat/applehttp.c
libavformat/applehttpproto.c
libavformat/avio.c
libavformat/isom.h
libavformat/matroskaenc.c
libavformat/mov.c
libavformat/oggdec.c
libavformat/os_support.c
libavformat/rtpdec.c
libavformat/rtpdec_qdm2.c
libavformat/udp.c
libavutil/ppc/cpu.c
libswscale/swscale.c
libswscale/swscale_internal.h
libswscale/swscale_template.c
libswscale/utils.c
tests/fate/h264.mak
tests/ref/lavfi/pixdesc_le
tests/ref/lavfi/pixfmts_copy_le
tests/ref/lavfi/pixfmts_null_le
tests/ref/lavfi/pixfmts_scale_le
tests/ref/lavfi/pixfmts_vflip_le

diff --cc configure
+++ b/configure
@@@ -1667,9 -1644,9 +1668,10 @@@ enable ffprob
  enable ffserver
  enable network
  enable optimizations
+ enable postproc
  enable protocols
  enable static
 +enable stripping
  enable swscale
  enable swscale_alpha
  
diff --cc ffmpeg.c
Simple merge
diff --cc ffplay.c
Simple merge
diff --cc ffserver.c
Simple merge
Simple merge
Simple merge
Simple merge
@@@ -250,9 -250,8 +250,9 @@@ typedef struct FFV1Context
      uint8_t (*initial_states[MAX_QUANT_TABLES])[32];
      int run_index;
      int colorspace;
-     int_fast16_t *sample_buffer;
+     int16_t *sample_buffer;
      int gob_count;
 +    int packed_at_lsb;
  
      int quant_table_count;
  
Simple merge
Simple merge
Simple merge
Simple merge
@@@ -1145,12 -1146,12 +1148,12 @@@ static int vorbis_floor1_decode(vorbis_
  {
      vorbis_floor1 *vf = &vfu->t1;
      GetBitContext *gb = &vc->gb;
-     uint_fast16_t range_v[4] = { 256, 128, 86, 64 };
-     unsigned range = range_v[vf->multiplier-1];
-     uint_fast16_t floor1_Y[258];
-     uint_fast16_t floor1_Y_final[258];
+     uint16_t range_v[4] = { 256, 128, 86, 64 };
+     unsigned range = range_v[vf->multiplier - 1];
+     uint16_t floor1_Y[258];
+     uint16_t floor1_Y_final[258];
      int floor1_flag[258];
 -    unsigned class, cdim, cbits, csub, cval, offset, i, j;
 +    unsigned partition_class, cdim, cbits, csub, cval, offset, i, j;
      int book, adx, ady, dy, off, predicted, err;
  
  
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
@@@ -1875,28 -1660,6 +1875,28 @@@ static int packedCopyWrapper(SwsContex
      return srcSliceH;
  }
  
- #define DITHER_COPY(dst, dstStride, src, srcStride, bswap)\
++#define DITHER_COPY(dst, dstStride, src, srcStride, bswap, dbswap)\
 +    uint16_t scale= dither_scale[dst_depth-1][src_depth-1];\
 +    int shift= src_depth-dst_depth + dither_scale[src_depth-2][dst_depth-1];\
 +    for (i = 0; i < height; i++) {\
 +        uint8_t *dither= dithers[src_depth-9][i&7];\
 +        for (j = 0; j < length-7; j+=8){\
-             dst[j+0] = (bswap(src[j+0]) + dither[0])*scale>>shift;\
-             dst[j+1] = (bswap(src[j+1]) + dither[1])*scale>>shift;\
-             dst[j+2] = (bswap(src[j+2]) + dither[2])*scale>>shift;\
-             dst[j+3] = (bswap(src[j+3]) + dither[3])*scale>>shift;\
-             dst[j+4] = (bswap(src[j+4]) + dither[4])*scale>>shift;\
-             dst[j+5] = (bswap(src[j+5]) + dither[5])*scale>>shift;\
-             dst[j+6] = (bswap(src[j+6]) + dither[6])*scale>>shift;\
-             dst[j+7] = (bswap(src[j+7]) + dither[7])*scale>>shift;\
++            dst[j+0] = dbswap((bswap(src[j+0]) + dither[0])*scale>>shift);\
++            dst[j+1] = dbswap((bswap(src[j+1]) + dither[1])*scale>>shift);\
++            dst[j+2] = dbswap((bswap(src[j+2]) + dither[2])*scale>>shift);\
++            dst[j+3] = dbswap((bswap(src[j+3]) + dither[3])*scale>>shift);\
++            dst[j+4] = dbswap((bswap(src[j+4]) + dither[4])*scale>>shift);\
++            dst[j+5] = dbswap((bswap(src[j+5]) + dither[5])*scale>>shift);\
++            dst[j+6] = dbswap((bswap(src[j+6]) + dither[6])*scale>>shift);\
++            dst[j+7] = dbswap((bswap(src[j+7]) + dither[7])*scale>>shift);\
 +        }\
 +        for (; j < length; j++)\
-             dst[j] = (bswap(src[j]) + dither[j&7])*scale>>shift;\
++            dst[j] = dbswap((bswap(src[j]) + dither[j&7])*scale>>shift);\
 +        dst += dstStride;\
 +        src += srcStride;\
 +    }
 +
 +
  static int planarCopyWrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
                               int srcSliceH, uint8_t* dst[], int dstStride[])
  {
                  length*=2;
              fillPlane(dst[plane], dstStride[plane], length, height, y, (plane==3) ? 255 : 128);
          } else {
 -            if(is9_OR_10BPS(c->srcFormat)) {
 +            if(isNBPS(c->srcFormat) || isNBPS(c->dstFormat)
 +               || (is16BPS(c->srcFormat) != is16BPS(c->dstFormat))
 +            ) {
                  const int src_depth = av_pix_fmt_descriptors[c->srcFormat].comp[plane].depth_minus1+1;
                  const int dst_depth = av_pix_fmt_descriptors[c->dstFormat].comp[plane].depth_minus1+1;
-                 uint16_t *srcPtr2 = (uint16_t*)srcPtr;
+                 const uint16_t *srcPtr2 = (const uint16_t*)srcPtr;
 +                uint16_t *dstPtr2 = (uint16_t*)dstPtr;
  
 -                if (is16BPS(c->dstFormat)) {
 -                    uint16_t *dstPtr2 = (uint16_t*)dstPtr;
 -#define COPY9_OR_10TO16(rfunc, wfunc) \
 -                    for (i = 0; i < height; i++) { \
 -                        for (j = 0; j < length; j++) { \
 -                            int srcpx = rfunc(&srcPtr2[j]); \
 -                            wfunc(&dstPtr2[j], (srcpx<<(16-src_depth)) | (srcpx>>(2*src_depth-16))); \
 -                        } \
 -                        dstPtr2 += dstStride[plane]/2; \
 -                        srcPtr2 += srcStride[plane]/2; \
 -                    }
 -                    if (isBE(c->dstFormat)) {
 -                        if (isBE(c->srcFormat)) {
 -                            COPY9_OR_10TO16(AV_RB16, AV_WB16);
 -                        } else {
 -                            COPY9_OR_10TO16(AV_RL16, AV_WB16);
 -                        }
 +                if (dst_depth == 8) {
 +                    if(isBE(c->srcFormat) == HAVE_BIGENDIAN){
-                         DITHER_COPY(dstPtr, dstStride[plane], srcPtr2, srcStride[plane]/2, )
++                        DITHER_COPY(dstPtr, dstStride[plane], srcPtr2, srcStride[plane]/2, , )
                      } else {
-                         DITHER_COPY(dstPtr, dstStride[plane], srcPtr2, srcStride[plane]/2, av_bswap16)
 -                        if (isBE(c->srcFormat)) {
 -                            COPY9_OR_10TO16(AV_RB16, AV_WL16);
 -                        } else {
 -                            COPY9_OR_10TO16(AV_RL16, AV_WL16);
 -                        }
 -                    }
 -                } else if (is9_OR_10BPS(c->dstFormat)) {
 -                    uint16_t *dstPtr2 = (uint16_t*)dstPtr;
 -#define COPY9_OR_10TO9_OR_10(loop) \
 -                    for (i = 0; i < height; i++) { \
 -                        for (j = 0; j < length; j++) { \
 -                            loop; \
 -                        } \
 -                        dstPtr2 += dstStride[plane]/2; \
 -                        srcPtr2 += srcStride[plane]/2; \
 -                    }
 -#define COPY9_OR_10TO9_OR_10_2(rfunc, wfunc) \
 -                    if (dst_depth > src_depth) { \
 -                        COPY9_OR_10TO9_OR_10(int srcpx = rfunc(&srcPtr2[j]); \
 -                            wfunc(&dstPtr2[j], (srcpx << 1) | (srcpx >> 9))); \
 -                    } else if (dst_depth < src_depth) { \
 -                        COPY9_OR_10TO9_OR_10(wfunc(&dstPtr2[j], rfunc(&srcPtr2[j]) >> 1)); \
 -                    } else { \
 -                        COPY9_OR_10TO9_OR_10(wfunc(&dstPtr2[j], rfunc(&srcPtr2[j]))); \
++                        DITHER_COPY(dstPtr, dstStride[plane], srcPtr2, srcStride[plane]/2, av_bswap16, )
                      }
 -                    if (isBE(c->dstFormat)) {
 -                        if (isBE(c->srcFormat)) {
 -                            COPY9_OR_10TO9_OR_10_2(AV_RB16, AV_WB16);
 +                } else if (src_depth == 8) {
 +                    for (i = 0; i < height; i++) {
-                         for (j = 0; j < length; j++)
-                             dstPtr2[j] = (srcPtr[j]<<(dst_depth-8)) |
-                                 (srcPtr[j]>>(2*8-dst_depth));
++                        if(isBE(c->dstFormat)){
++                            for (j = 0; j < length; j++)
++                                AV_WB16(&dstPtr2[j], (srcPtr[j]<<(dst_depth-8)) |
++                                                     (srcPtr[j]>>(2*8-dst_depth)));
+                         } else {
 -                            COPY9_OR_10TO9_OR_10_2(AV_RL16, AV_WB16);
++                            for (j = 0; j < length; j++)
++                                AV_WL16(&dstPtr2[j], (srcPtr[j]<<(dst_depth-8)) |
++                                                     (srcPtr[j]>>(2*8-dst_depth)));
+                         }
 -                    } else {
 -                        if (isBE(c->srcFormat)) {
 -                            COPY9_OR_10TO9_OR_10_2(AV_RB16, AV_WL16);
 +                        dstPtr2 += dstStride[plane]/2;
 +                        srcPtr  += srcStride[plane];
 +                    }
 +                } else if (src_depth <= dst_depth) {
 +                    for (i = 0; i < height; i++) {
-                         if(isBE(c->dstFormat)){
-                             for (j = 0; j < length; j++)
-                                 AV_WB16(&dstPtr2[j], (srcPtr2[j]<<(dst_depth-src_depth)) |
-                                                      (srcPtr2[j]>>(2*src_depth-dst_depth)));
-                         }else{
-                             for (j = 0; j < length; j++)
-                                 AV_WL16(&dstPtr2[j], (srcPtr2[j]<<(dst_depth-src_depth)) |
-                                                      (srcPtr2[j]>>(2*src_depth-dst_depth)));
++#define COPY_UP(r,w) \
++    for (j = 0; j < length; j++){ \
++        unsigned int v= r(&srcPtr2[j]);\
++        w(&dstPtr2[j], (v<<(dst_depth-src_depth)) | \
++                       (v>>(2*src_depth-dst_depth)));\
++    }
++                        if(isBE(c->srcFormat)){
++                            if(isBE(c->dstFormat)){
++                                COPY_UP(AV_RB16, AV_WB16)
++                            } else {
++                                COPY_UP(AV_RB16, AV_WL16)
++                            }
+                         } else {
 -                            COPY9_OR_10TO9_OR_10_2(AV_RL16, AV_WL16);
++                            if(isBE(c->dstFormat)){
++                                COPY_UP(AV_RL16, AV_WB16)
++                            } else {
++                                COPY_UP(AV_RL16, AV_WL16)
++                            }
                          }
 +                        dstPtr2 += dstStride[plane]/2;
 +                        srcPtr2 += srcStride[plane]/2;
                      }
                  } else {
 -                    // FIXME Maybe dither instead.
 -#define COPY9_OR_10TO8(rfunc) \
 -                    for (i = 0; i < height; i++) { \
 -                        for (j = 0; j < length; j++) { \
 -                            dstPtr[j] = rfunc(&srcPtr2[j])>>(src_depth-8); \
 -                        } \
 -                        dstPtr  += dstStride[plane]; \
 -                        srcPtr2 += srcStride[plane]/2; \
 -                    }
 -                    if (isBE(c->srcFormat)) {
 -                        COPY9_OR_10TO8(AV_RB16);
 -                    } else {
 -                        COPY9_OR_10TO8(AV_RL16);
 -                    }
 -                }
 -            } else if(is9_OR_10BPS(c->dstFormat)) {
 -                const int dst_depth = av_pix_fmt_descriptors[c->dstFormat].comp[plane].depth_minus1+1;
 -                uint16_t *dstPtr2 = (uint16_t*)dstPtr;
 -
 -                if (is16BPS(c->srcFormat)) {
 -                    const uint16_t *srcPtr2 = (const uint16_t*)srcPtr;
 -#define COPY16TO9_OR_10(rfunc, wfunc) \
 -                    for (i = 0; i < height; i++) { \
 -                        for (j = 0; j < length; j++) { \
 -                            wfunc(&dstPtr2[j], rfunc(&srcPtr2[j])>>(16-dst_depth)); \
 -                        } \
 -                        dstPtr2 += dstStride[plane]/2; \
 -                        srcPtr2 += srcStride[plane]/2; \
 -                    }
 -                    if (isBE(c->dstFormat)) {
 -                        if (isBE(c->srcFormat)) {
 -                            COPY16TO9_OR_10(AV_RB16, AV_WB16);
 +                    if(isBE(c->srcFormat) == HAVE_BIGENDIAN){
-                         DITHER_COPY(dstPtr2, dstStride[plane]/2, srcPtr2, srcStride[plane]/2, )
++                        if(isBE(c->dstFormat) == HAVE_BIGENDIAN){
++                            DITHER_COPY(dstPtr2, dstStride[plane]/2, srcPtr2, srcStride[plane]/2, , )
+                         } else {
 -                            COPY16TO9_OR_10(AV_RL16, AV_WB16);
++                            DITHER_COPY(dstPtr2, dstStride[plane]/2, srcPtr2, srcStride[plane]/2, , av_bswap16)
+                         }
 -                    } else {
 -                        if (isBE(c->srcFormat)) {
 -                            COPY16TO9_OR_10(AV_RB16, AV_WL16);
 +                    }else{
-                         DITHER_COPY(dstPtr2, dstStride[plane]/2, srcPtr2, srcStride[plane]/2, av_bswap16)
++                        if(isBE(c->dstFormat) == HAVE_BIGENDIAN){
++                            DITHER_COPY(dstPtr2, dstStride[plane]/2, srcPtr2, srcStride[plane]/2, av_bswap16, )
+                         } else {
 -                            COPY16TO9_OR_10(AV_RL16, AV_WL16);
++                            DITHER_COPY(dstPtr2, dstStride[plane]/2, srcPtr2, srcStride[plane]/2, av_bswap16, av_bswap16)
+                         }
                      }
 -                } else /* 8bit */ {
 -#define COPY8TO9_OR_10(wfunc) \
 -                    for (i = 0; i < height; i++) { \
 -                        for (j = 0; j < length; j++) { \
 -                            const int srcpx = srcPtr[j]; \
 -                            wfunc(&dstPtr2[j], (srcpx<<(dst_depth-8)) | (srcpx >> (16-dst_depth))); \
 -                        } \
 -                        dstPtr2 += dstStride[plane]/2; \
 -                        srcPtr  += srcStride[plane]; \
 -                    }
 -                    if (isBE(c->dstFormat)) {
 -                        COPY8TO9_OR_10(AV_WB16);
 -                    } else {
 -                        COPY8TO9_OR_10(AV_WL16);
 -                    }
 -                }
 -            } else if(is16BPS(c->srcFormat) && !is16BPS(c->dstFormat)) {
 -                if (!isBE(c->srcFormat)) srcPtr++;
 -                for (i=0; i<height; i++) {
 -                    for (j=0; j<length; j++) dstPtr[j] = srcPtr[j<<1];
 -                    srcPtr+= srcStride[plane];
 -                    dstPtr+= dstStride[plane];
 -                }
 -            } else if(!is16BPS(c->srcFormat) && is16BPS(c->dstFormat)) {
 -                for (i=0; i<height; i++) {
 -                    for (j=0; j<length; j++) {
 -                        dstPtr[ j<<1   ] = srcPtr[j];
 -                        dstPtr[(j<<1)+1] = srcPtr[j];
 -                    }
 -                    srcPtr+= srcStride[plane];
 -                    dstPtr+= dstStride[plane];
                  }
              } else if(is16BPS(c->srcFormat) && is16BPS(c->dstFormat)
                    && isBE(c->srcFormat) != isBE(c->dstFormat)) {
@@@ -355,9 -359,7 +355,10 @@@ const char *sws_format_name(enum PixelF
          || (x)==PIX_FMT_YUV420P9BE  \
          || (x)==PIX_FMT_YUV420P10LE \
          || (x)==PIX_FMT_YUV420P10BE \
 +        || (x)==PIX_FMT_YUV422P10LE \
 +        || (x)==PIX_FMT_YUV422P10BE \
      )
++#define is9_OR_10BPS isNBPS //for ronald
  #define isBE(x) ((x)&1)
  #define isPlanar8YUV(x) (           \
             (x)==PIX_FMT_YUV410P     \
@@@ -1653,31 -160,12 +1653,33 @@@ static inline void RENAME(yuy2ToUV)(uin
      assert(src1 == src2);
  }
  
 -static inline void LEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
 -                            const uint8_t *src2, long width, uint32_t *unused)
 +static inline void RENAME(LEToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
  {
 +#if COMPILE_TEMPLATE_MMX
 +    __asm__ volatile(
 +        "mov                    %0, %%"REG_a"       \n\t"
 +        "1:                                         \n\t"
 +        "movq    (%1, %%"REG_a",2), %%mm0           \n\t"
 +        "movq   8(%1, %%"REG_a",2), %%mm1           \n\t"
 +        "movq    (%2, %%"REG_a",2), %%mm2           \n\t"
 +        "movq   8(%2, %%"REG_a",2), %%mm3           \n\t"
 +        "psrlw                  $8, %%mm0           \n\t"
 +        "psrlw                  $8, %%mm1           \n\t"
 +        "psrlw                  $8, %%mm2           \n\t"
 +        "psrlw                  $8, %%mm3           \n\t"
 +        "packuswb            %%mm1, %%mm0           \n\t"
 +        "packuswb            %%mm3, %%mm2           \n\t"
 +        "movq                %%mm0, (%3, %%"REG_a") \n\t"
 +        "movq                %%mm2, (%4, %%"REG_a") \n\t"
 +        "add                    $8, %%"REG_a"       \n\t"
 +        " js                    1b                  \n\t"
 +        : : "g" ((x86_reg)-width), "r" (src1+width*2), "r" (src2+width*2), "r" (dstU+width), "r" (dstV+width)
 +        : "%"REG_a
 +    );
 +#else
      int i;
+     // FIXME I don't think this code is right for YUV444/422, since then h is not subsampled so
+     // we need to skip each second pixel. Same for BEToUV.
      for (i=0; i<width; i++) {
          dstU[i]= src1[2*i + 1];
          dstV[i]= src2[2*i + 1];
@@@ -1827,146 -228,36 +1829,148 @@@ static inline void RENAME(nv21ToUV)(uin
  }
  
  // FIXME Maybe dither instead.
- #define YUV_NBPS(depth) \
- static inline void RENAME(yuv ## depth ## ToUV)(uint8_t *dstU, uint8_t *dstV, \
-                                      const uint16_t *srcU, const uint16_t *srcV, \
-                                      long width, uint32_t *unused) \
+ #define YUV_NBPS(depth, endianness, rfunc) \
+ static inline void endianness ## depth ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
 -                                          const uint8_t *_srcU, const uint8_t *_srcV, \
++                                          const uint16_t *srcU, const uint16_t *srcV, \
+                                           long width, uint32_t *unused) \
  { \
      int i; \
 -    const uint16_t *srcU = (const uint16_t*)_srcU; \
 -    const uint16_t *srcV = (const uint16_t*)_srcV; \
      for (i = 0; i < width; i++) { \
-         dstU[i] = srcU[i]>>(depth-8); \
-         dstV[i] = srcV[i]>>(depth-8); \
+         dstU[i] = rfunc(&srcU[i])>>(depth-8); \
+         dstV[i] = rfunc(&srcV[i])>>(depth-8); \
      } \
  } \
  \
- static inline void RENAME(yuv ## depth ## ToY)(uint8_t *dstY, const uint16_t *srcY, long width, uint32_t *unused) \
 -static inline void endianness ## depth ## ToY_c(uint8_t *dstY, const uint8_t *_srcY, long width, uint32_t *unused) \
++static inline void endianness ## depth ## ToY_c(uint8_t *dstY, const uint16_t *srcY, long width, uint32_t *unused) \
  { \
      int i; \
 -    const uint16_t *srcY = (const uint16_t*)_srcY; \
      for (i = 0; i < width; i++) \
-         dstY[i] = srcY[i]>>(depth-8); \
+         dstY[i] = rfunc(&srcY[i])>>(depth-8); \
  } \
  
- YUV_NBPS( 9)
- YUV_NBPS(10)
+ YUV_NBPS( 9, LE, AV_RL16)
+ YUV_NBPS( 9, BE, AV_RB16)
+ YUV_NBPS(10, LE, AV_RL16)
+ YUV_NBPS(10, BE, AV_RB16)
  
 -static inline void bgr24ToY_c(uint8_t *dst, const uint8_t *src,
 -                              long width, uint32_t *unused)
 +#if COMPILE_TEMPLATE_MMX
 +static inline void RENAME(bgr24ToY_mmx)(uint8_t *dst, const uint8_t *src, long width, enum PixelFormat srcFormat)
  {
 +
 +    if(srcFormat == PIX_FMT_BGR24) {
 +        __asm__ volatile(
 +            "movq  "MANGLE(ff_bgr24toY1Coeff)", %%mm5       \n\t"
 +            "movq  "MANGLE(ff_bgr24toY2Coeff)", %%mm6       \n\t"
 +            :
 +        );
 +    } else {
 +        __asm__ volatile(
 +            "movq  "MANGLE(ff_rgb24toY1Coeff)", %%mm5       \n\t"
 +            "movq  "MANGLE(ff_rgb24toY2Coeff)", %%mm6       \n\t"
 +            :
 +        );
 +    }
 +
 +    __asm__ volatile(
 +        "movq  "MANGLE(ff_bgr24toYOffset)", %%mm4   \n\t"
 +        "mov                        %2, %%"REG_a"   \n\t"
 +        "pxor                    %%mm7, %%mm7       \n\t"
 +        "1:                                         \n\t"
 +        PREFETCH"               64(%0)              \n\t"
 +        "movd                     (%0), %%mm0       \n\t"
 +        "movd                    2(%0), %%mm1       \n\t"
 +        "movd                    6(%0), %%mm2       \n\t"
 +        "movd                    8(%0), %%mm3       \n\t"
 +        "add                       $12, %0          \n\t"
 +        "punpcklbw               %%mm7, %%mm0       \n\t"
 +        "punpcklbw               %%mm7, %%mm1       \n\t"
 +        "punpcklbw               %%mm7, %%mm2       \n\t"
 +        "punpcklbw               %%mm7, %%mm3       \n\t"
 +        "pmaddwd                 %%mm5, %%mm0       \n\t"
 +        "pmaddwd                 %%mm6, %%mm1       \n\t"
 +        "pmaddwd                 %%mm5, %%mm2       \n\t"
 +        "pmaddwd                 %%mm6, %%mm3       \n\t"
 +        "paddd                   %%mm1, %%mm0       \n\t"
 +        "paddd                   %%mm3, %%mm2       \n\t"
 +        "paddd                   %%mm4, %%mm0       \n\t"
 +        "paddd                   %%mm4, %%mm2       \n\t"
 +        "psrad                     $15, %%mm0       \n\t"
 +        "psrad                     $15, %%mm2       \n\t"
 +        "packssdw                %%mm2, %%mm0       \n\t"
 +        "packuswb                %%mm0, %%mm0       \n\t"
 +        "movd                %%mm0, (%1, %%"REG_a") \n\t"
 +        "add                        $4, %%"REG_a"   \n\t"
 +        " js                        1b              \n\t"
 +    : "+r" (src)
 +    : "r" (dst+width), "g" ((x86_reg)-width)
 +    : "%"REG_a
 +    );
 +}
 +
 +static inline void RENAME(bgr24ToUV_mmx)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src, long width, enum PixelFormat srcFormat)
 +{
 +    __asm__ volatile(
 +        "movq                   24(%4), %%mm6       \n\t"
 +        "mov                        %3, %%"REG_a"   \n\t"
 +        "pxor                    %%mm7, %%mm7       \n\t"
 +        "1:                                         \n\t"
 +        PREFETCH"               64(%0)              \n\t"
 +        "movd                     (%0), %%mm0       \n\t"
 +        "movd                    2(%0), %%mm1       \n\t"
 +        "punpcklbw               %%mm7, %%mm0       \n\t"
 +        "punpcklbw               %%mm7, %%mm1       \n\t"
 +        "movq                    %%mm0, %%mm2       \n\t"
 +        "movq                    %%mm1, %%mm3       \n\t"
 +        "pmaddwd                  (%4), %%mm0       \n\t"
 +        "pmaddwd                 8(%4), %%mm1       \n\t"
 +        "pmaddwd                16(%4), %%mm2       \n\t"
 +        "pmaddwd                 %%mm6, %%mm3       \n\t"
 +        "paddd                   %%mm1, %%mm0       \n\t"
 +        "paddd                   %%mm3, %%mm2       \n\t"
 +
 +        "movd                    6(%0), %%mm1       \n\t"
 +        "movd                    8(%0), %%mm3       \n\t"
 +        "add                       $12, %0          \n\t"
 +        "punpcklbw               %%mm7, %%mm1       \n\t"
 +        "punpcklbw               %%mm7, %%mm3       \n\t"
 +        "movq                    %%mm1, %%mm4       \n\t"
 +        "movq                    %%mm3, %%mm5       \n\t"
 +        "pmaddwd                  (%4), %%mm1       \n\t"
 +        "pmaddwd                 8(%4), %%mm3       \n\t"
 +        "pmaddwd                16(%4), %%mm4       \n\t"
 +        "pmaddwd                 %%mm6, %%mm5       \n\t"
 +        "paddd                   %%mm3, %%mm1       \n\t"
 +        "paddd                   %%mm5, %%mm4       \n\t"
 +
 +        "movq "MANGLE(ff_bgr24toUVOffset)", %%mm3       \n\t"
 +        "paddd                   %%mm3, %%mm0       \n\t"
 +        "paddd                   %%mm3, %%mm2       \n\t"
 +        "paddd                   %%mm3, %%mm1       \n\t"
 +        "paddd                   %%mm3, %%mm4       \n\t"
 +        "psrad                     $15, %%mm0       \n\t"
 +        "psrad                     $15, %%mm2       \n\t"
 +        "psrad                     $15, %%mm1       \n\t"
 +        "psrad                     $15, %%mm4       \n\t"
 +        "packssdw                %%mm1, %%mm0       \n\t"
 +        "packssdw                %%mm4, %%mm2       \n\t"
 +        "packuswb                %%mm0, %%mm0       \n\t"
 +        "packuswb                %%mm2, %%mm2       \n\t"
 +        "movd                %%mm0, (%1, %%"REG_a") \n\t"
 +        "movd                %%mm2, (%2, %%"REG_a") \n\t"
 +        "add                        $4, %%"REG_a"   \n\t"
 +        " js                        1b              \n\t"
 +    : "+r" (src)
 +    : "r" (dstU+width), "r" (dstV+width), "g" ((x86_reg)-width), "r"(ff_bgr24toUV[srcFormat == PIX_FMT_RGB24])
 +    : "%"REG_a
 +    );
 +}
 +#endif
 +
 +static inline void RENAME(bgr24ToY)(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
 +{
 +#if COMPILE_TEMPLATE_MMX
 +    RENAME(bgr24ToY_mmx)(dst, src, width, PIX_FMT_BGR24);
 +#else
      int i;
      for (i=0; i<width; i++) {
          int b= src[i*3+0];
@@@ -2978,15 -820,16 +2982,18 @@@ static void RENAME(sws_init_swScale)(Sw
          case PIX_FMT_PAL8     :
          case PIX_FMT_BGR4_BYTE:
          case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV; break;
-         case PIX_FMT_YUV420P9 : c->chrToYV12 = (void*)RENAME(yuv9ToUV ); break;
-         case PIX_FMT_YUV422P10:
-         case PIX_FMT_YUV420P10: c->chrToYV12 = (void*)RENAME(yuv10ToUV); break;
+         case PIX_FMT_YUV420P9BE: c->chrToYV12 = BE9ToUV_c; break;
+         case PIX_FMT_YUV420P9LE: c->chrToYV12 = LE9ToUV_c; break;
++        case PIX_FMT_YUV422P10BE:
+         case PIX_FMT_YUV420P10BE: c->chrToYV12 = BE10ToUV_c; break;
++        case PIX_FMT_YUV422P10LE:
+         case PIX_FMT_YUV420P10LE: c->chrToYV12 = LE10ToUV_c; break;
          case PIX_FMT_YUV420P16BE:
          case PIX_FMT_YUV422P16BE:
 -        case PIX_FMT_YUV444P16BE: c->chrToYV12 = BEToUV_c; break;
 +        case PIX_FMT_YUV444P16BE: c->chrToYV12 = RENAME(BEToUV); break;
          case PIX_FMT_YUV420P16LE:
          case PIX_FMT_YUV422P16LE:
 -        case PIX_FMT_YUV444P16LE: c->chrToYV12 = LEToUV_c; break;
 +        case PIX_FMT_YUV444P16LE: c->chrToYV12 = RENAME(LEToUV); break;
      }
      if (c->chrSrcHSubSample) {
          switch(srcFormat) {
      c->lumToYV12 = NULL;
      c->alpToYV12 = NULL;
      switch (srcFormat) {
-     case PIX_FMT_YUV420P9 : c->lumToYV12 = (void*)RENAME(yuv9ToY ); break;
-     case PIX_FMT_YUV422P10:
-     case PIX_FMT_YUV420P10: c->lumToYV12 = (void*)RENAME(yuv10ToY); break;
+     case PIX_FMT_YUV420P9BE: c->lumToYV12 = BE9ToY_c; break;
+     case PIX_FMT_YUV420P9LE: c->lumToYV12 = LE9ToY_c; break;
++    case PIX_FMT_YUV422P10BE:
+     case PIX_FMT_YUV420P10BE: c->lumToYV12 = BE10ToY_c; break;
++    case PIX_FMT_YUV422P10LE:
+     case PIX_FMT_YUV420P10LE: c->lumToYV12 = LE10ToY_c; break;
      case PIX_FMT_YUYV422  :
      case PIX_FMT_YUV420P16BE:
      case PIX_FMT_YUV422P16BE:
@@@ -113,9 -114,6 +117,7 @@@ const char *swscale_license(void
          || (x)==PIX_FMT_YUV420P16BE   \
          || (x)==PIX_FMT_YUV422P16BE   \
          || (x)==PIX_FMT_YUV444P16BE   \
-         || (x)==PIX_FMT_YUV420P9    \
-         || (x)==PIX_FMT_YUV420P10   \
-         || (x)==PIX_FMT_YUV422P10   \
++        || (x)==PIX_FMT_YUV422P10     \
      )
  
  int sws_isSupportedInput(enum PixelFormat pix_fmt)
          || (x)==PIX_FMT_GRAY8       \
          || (x)==PIX_FMT_YUV410P     \
          || (x)==PIX_FMT_YUV440P     \
-         || (x)==PIX_FMT_YUV420P9    \
-         || (x)==PIX_FMT_YUV420P10   \
 +        || (x)==PIX_FMT_YUV422P10   \
+         || (x)==PIX_FMT_YUV420P9LE    \
+         || (x)==PIX_FMT_YUV420P10LE   \
          || (x)==PIX_FMT_YUV420P16LE   \
          || (x)==PIX_FMT_YUV422P16LE   \
          || (x)==PIX_FMT_YUV444P16LE   \
@@@ -307,12 -308,13 +307,12 @@@ fate-h264-conformance-frext-hpcvfl_bcrm
  fate-h264-conformance-frext-hpcvflnl_bcrm_a: CMD = framecrc  -i $(SAMPLES)/h264-conformance/FRext/HPCVFLNL_BRCM_A.264 -vsync 0
  fate-h264-conformance-frext-hpcvmolq_brcm_b: CMD = framecrc  -i $(SAMPLES)/h264-conformance/FRext/HPCVMOLQ_BRCM_B.264
  fate-h264-conformance-frext-hpcvnl_brcm_a: CMD = framecrc  -i $(SAMPLES)/h264-conformance/FRext/HPCVNL_BRCM_A.264
- fate-h264-conformance-frext-pph10i1_panasonic_a: CMD = framecrc  -i $(SAMPLES)/h264-conformance/FRext/PPH10I1_Panasonic_A.264 -pix_fmt yuv420p16be
- fate-h264-conformance-frext-pph10i2_panasonic_a: CMD = framecrc  -i $(SAMPLES)/h264-conformance/FRext/PPH10I2_Panasonic_A.264 -pix_fmt yuv420p16be
- fate-h264-conformance-frext-pph10i3_panasonic_a: CMD = framecrc  -i $(SAMPLES)/h264-conformance/FRext/PPH10I3_Panasonic_A.264 -pix_fmt yuv420p16be
- fate-h264-conformance-frext-pph10i5_panasonic_a: CMD = framecrc  -i $(SAMPLES)/h264-conformance/FRext/PPH10I5_Panasonic_A.264 -pix_fmt yuv420p16be
- fate-h264-conformance-frext-pph10i6_panasonic_a: CMD = framecrc  -i $(SAMPLES)/h264-conformance/FRext/PPH10I6_Panasonic_A.264 -pix_fmt yuv420p16be
- fate-h264-conformance-frext-pph10i7_panasonic_a: CMD = framecrc  -i $(SAMPLES)/h264-conformance/FRext/PPH10I7_Panasonic_A.264 -pix_fmt yuv420p16be
+ fate-h264-conformance-frext-pph10i1_panasonic_a: CMD = framecrc  -i $(SAMPLES)/h264-conformance/FRext/PPH10I1_Panasonic_A.264 -pix_fmt yuv420p10le
+ fate-h264-conformance-frext-pph10i2_panasonic_a: CMD = framecrc  -i $(SAMPLES)/h264-conformance/FRext/PPH10I2_Panasonic_A.264 -pix_fmt yuv420p10le
+ fate-h264-conformance-frext-pph10i3_panasonic_a: CMD = framecrc  -i $(SAMPLES)/h264-conformance/FRext/PPH10I3_Panasonic_A.264 -pix_fmt yuv420p10le
 -fate-h264-conformance-frext-pph10i4_panasonic_a: CMD = framecrc  -i $(SAMPLES)/h264-conformance/FRext/PPH10I4_Panasonic_A.264 -pix_fmt yuv420p10le
+ fate-h264-conformance-frext-pph10i5_panasonic_a: CMD = framecrc  -i $(SAMPLES)/h264-conformance/FRext/PPH10I5_Panasonic_A.264 -pix_fmt yuv420p10le
+ fate-h264-conformance-frext-pph10i6_panasonic_a: CMD = framecrc  -i $(SAMPLES)/h264-conformance/FRext/PPH10I6_Panasonic_A.264 -pix_fmt yuv420p10le
+ fate-h264-conformance-frext-pph10i7_panasonic_a: CMD = framecrc  -i $(SAMPLES)/h264-conformance/FRext/PPH10I7_Panasonic_A.264 -pix_fmt yuv420p10le
  fate-h264-conformance-hcbp2_hhi_a: CMD = framecrc  -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/HCBP2_HHI_A.264
  fate-h264-conformance-hcmp1_hhi_a: CMD = framecrc  -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/HCMP1_HHI_A.264
  fate-h264-conformance-ls_sva_d: CMD = framecrc  -i $(SAMPLES)/h264-conformance/LS_SVA_D.264
@@@ -30,9 -31,9 +31,10 @@@ yuv420p10be         7605e266c088d0fcf68
  yuv420p10le         4228ee628c6deec123a13b9784516cc7
  yuv420p16be         16c009a235cd52b74791a895423152a3
  yuv420p16le         2d59c4f1d0314a5a957a7cfc4b6fabcc
+ yuv420p9be          ce880fa07830e5297c22acf6e20555ce
  yuv420p9le          16543fda8f87d94a6cf857d2e8d4461a
  yuv422p             c9bba4529821d796a6ab09f6a5fd355a
 +yuv422p10le         d0607c260a45c973e6639f4e449730ad
  yuv422p16be         5499502e1c29534a158a1fe60e889f60
  yuv422p16le         e3d61fde6978591596bc36b914386623
  yuv440p             5a064afe2b453bb52cdb3f176b1aa1cf
@@@ -30,9 -31,9 +31,10 @@@ yuv420p10be         7605e266c088d0fcf68
  yuv420p10le         4228ee628c6deec123a13b9784516cc7
  yuv420p16be         16c009a235cd52b74791a895423152a3
  yuv420p16le         2d59c4f1d0314a5a957a7cfc4b6fabcc
+ yuv420p9be          ce880fa07830e5297c22acf6e20555ce
  yuv420p9le          16543fda8f87d94a6cf857d2e8d4461a
  yuv422p             c9bba4529821d796a6ab09f6a5fd355a
 +yuv422p10le         d0607c260a45c973e6639f4e449730ad
  yuv422p16be         5499502e1c29534a158a1fe60e889f60
  yuv422p16le         e3d61fde6978591596bc36b914386623
  yuv440p             5a064afe2b453bb52cdb3f176b1aa1cf
@@@ -30,9 -31,9 +31,10 @@@ yuv420p10be         7605e266c088d0fcf68
  yuv420p10le         4228ee628c6deec123a13b9784516cc7
  yuv420p16be         16c009a235cd52b74791a895423152a3
  yuv420p16le         2d59c4f1d0314a5a957a7cfc4b6fabcc
+ yuv420p9be          ce880fa07830e5297c22acf6e20555ce
  yuv420p9le          16543fda8f87d94a6cf857d2e8d4461a
  yuv422p             c9bba4529821d796a6ab09f6a5fd355a
 +yuv422p10le         d0607c260a45c973e6639f4e449730ad
  yuv422p16be         5499502e1c29534a158a1fe60e889f60
  yuv422p16le         e3d61fde6978591596bc36b914386623
  yuv440p             5a064afe2b453bb52cdb3f176b1aa1cf
@@@ -27,12 -27,13 +27,14 @@@ uyvy422             314bd486277111a95d9
  yuv410p             7df8f6d69b56a8dcb6c7ee908e5018b5
  yuv411p             1143e7c5cc28fe0922b051b17733bc4c
  yuv420p             fdad2d8df8985e3d17e73c71f713cb14
 -yuv420p10be         5051128ca208d89595f7672b1707340b
 -yuv420p10le         77895bf65e70ad2ca021702fff55c8fc
++yuv420p10be         6d335e75b553da590135cf8bb999610c
 +yuv420p10le         d510ddbabefd03ef39ec943fcb51b709
  yuv420p16be         29a0265764530070f5cd3251cc01f66a
  yuv420p16le         6f3a265b084a78baec229238d9f7945f
 -yuv420p9be          02de6b37dc8a631ce2367b535670c40c
 -yuv420p9le          cddfbaf8e2a61aa5ea09fb396bcbc872
++yuv420p9be          ec4983b7a949c0472110a7a2c58e278a
 +yuv420p9le          c136dce5913a722eee44ab72cff664b2
  yuv422p             918e37701ee7377d16a8a6c119c56a40
 +yuv422p10le         aeb0ef08a883f43429ca9d886d8fc095
  yuv422p16be         ef3e865fc1d0c68977c735323c50af6e
  yuv422p16le         428a9b96214c09cb5a983ce36d6961ff
  yuv440p             461503fdb9b90451020aa3b25ddf041c
@@@ -30,9 -31,9 +31,10 @@@ yuv420p10be         df97d20b3b4a10c174d
  yuv420p10le         4b5249208602b941332945c926f80ae9
  yuv420p16be         539076782902664a8acf381bf4f713e8
  yuv420p16le         0f609e588e5a258644ef85170d70e030
+ yuv420p9be          be40ec975fb2873891643cbbbddbc3b0
  yuv420p9le          7e606310d3f5ff12badf911e8f333471
  yuv422p             d7f5cb44d9b0210d66d6a8762640ab34
 +yuv422p10le         11b57f2bd9661024153f3973b9090cdb
  yuv422p16be         9bd8f8c961822b586fa4cf992be54acc
  yuv422p16le         9c4a1239605c7952b736ac3130163f14
  yuv440p             876385e96165acf51271b20e5d85a416