Merge commit 'e3fcb14347466095839c2a3c47ebecff02da891e'
authorMichael Niedermayer <michaelni@gmx.at>
Tue, 1 Jul 2014 12:38:57 +0000 (14:38 +0200)
committerMichael Niedermayer <michaelni@gmx.at>
Tue, 1 Jul 2014 13:22:11 +0000 (15:22 +0200)
* commit 'e3fcb14347466095839c2a3c47ebecff02da891e':
  dsputil: Split off IDCT bits into their own context

Conflicts:
configure
libavcodec/aic.c
libavcodec/arm/Makefile
libavcodec/arm/dsputil_init_arm.c
libavcodec/arm/dsputil_init_armv6.c
libavcodec/asvdec.c
libavcodec/dnxhdenc.c
libavcodec/dsputil.c
libavcodec/dvdec.c
libavcodec/dxva2_mpeg2.c
libavcodec/intrax8.c
libavcodec/mdec.c
libavcodec/mjpegdec.c
libavcodec/mjpegenc_common.h
libavcodec/mpegvideo.c
libavcodec/ppc/dsputil_altivec.h
libavcodec/ppc/dsputil_ppc.c
libavcodec/ppc/idctdsp.c
libavcodec/x86/Makefile
libavcodec/x86/dsputil_init.c
libavcodec/x86/dsputil_mmx.c
libavcodec/x86/dsputil_x86.h

Merged-by: Michael Niedermayer <michaelni@gmx.at>
84 files changed:
1  2 
configure
doc/optimization.txt
libavcodec/Makefile
libavcodec/aic.c
libavcodec/arm/Makefile
libavcodec/arm/dsputil_arm.h
libavcodec/arm/dsputil_armv6.S
libavcodec/arm/dsputil_init_arm.c
libavcodec/arm/dsputil_init_armv6.c
libavcodec/arm/idctdsp_arm.S
libavcodec/arm/idctdsp_arm.h
libavcodec/arm/idctdsp_armv6.S
libavcodec/arm/idctdsp_init_arm.c
libavcodec/arm/idctdsp_init_armv5te.c
libavcodec/arm/idctdsp_init_armv6.c
libavcodec/arm/idctdsp_init_neon.c
libavcodec/arm/idctdsp_neon.S
libavcodec/asv.c
libavcodec/asv.h
libavcodec/asvdec.c
libavcodec/asvenc.c
libavcodec/cavs.c
libavcodec/cavs.h
libavcodec/cavsdsp.c
libavcodec/dnxhddec.c
libavcodec/dnxhdenc.c
libavcodec/dsputil.c
libavcodec/dsputil.h
libavcodec/dvdec.c
libavcodec/dxva2_mpeg2.c
libavcodec/eamad.c
libavcodec/eatgq.c
libavcodec/eatqi.c
libavcodec/g2meet.c
libavcodec/h263.c
libavcodec/idctdsp.c
libavcodec/idctdsp.h
libavcodec/intrax8.c
libavcodec/libavcodec.v
libavcodec/ljpegenc.c
libavcodec/mdec.c
libavcodec/mimic.c
libavcodec/mjpegdec.c
libavcodec/mjpegdec.h
libavcodec/mjpegenc_common.c
libavcodec/mjpegenc_common.h
libavcodec/mpeg12dec.c
libavcodec/mpeg4videodec.c
libavcodec/mpeg4videoenc.c
libavcodec/mpegvideo.c
libavcodec/mpegvideo.h
libavcodec/mpegvideo_enc.c
libavcodec/mpegvideo_xvmc.c
libavcodec/msmpeg4.c
libavcodec/nuv.c
libavcodec/ppc/Makefile
libavcodec/ppc/dsputil_altivec.h
libavcodec/ppc/dsputil_ppc.c
libavcodec/ppc/idctdsp.c
libavcodec/proresdec2.c
libavcodec/proresdec_lgpl.c
libavcodec/proresdsp.c
libavcodec/rtjpeg.c
libavcodec/rtjpeg.h
libavcodec/vaapi_mpeg2.c
libavcodec/vaapi_mpeg4.c
libavcodec/vc1dec.c
libavcodec/wmv2.c
libavcodec/wmv2dsp.c
libavcodec/x86/Makefile
libavcodec/x86/cavsdsp.c
libavcodec/x86/dsputil_init.c
libavcodec/x86/dsputil_mmx.c
libavcodec/x86/dsputil_x86.h
libavcodec/x86/idct_mmx_xvid.c
libavcodec/x86/idct_sse2_xvid.c
libavcodec/x86/idctdsp.h
libavcodec/x86/idctdsp_init.c
libavcodec/x86/idctdsp_mmx.c
libavcodec/x86/mpegvideoenc_template.c
libavcodec/x86/proresdsp_init.c
libavcodec/x86/simple_idct.c
libavfilter/vf_spp.c
libavfilter/vf_spp.h

diff --cc configure
+++ b/configure
@@@ -1814,10 -1546,9 +1814,11 @@@ CONFIG_EXTRA=
      huffman
      huffyuvdsp
      huffyuvencdsp
+     idctdsp
      intrax8
      lgplv3
 +    llauddsp
 +    llviddsp
      lpc
      mpeg_er
      mpegaudio
@@@ -1992,15 -1704,15 +1993,16 @@@ threads_if_any="$THREADS_LIST
  
  # subsystems
  dct_select="rdft"
+ dsputil_select="idctdsp"
  error_resilience_select="dsputil"
 +frame_thread_encoder_deps="encoders threads"
  intrax8_select="error_resilience"
  mdct_select="fft"
  rdft_select="fft"
  mpeg_er_select="error_resilience"
  mpegaudio_select="mpegaudiodsp"
  mpegaudiodsp_select="dct"
- mpegvideo_select="blockdsp dsputil h264chroma hpeldsp videodsp"
 -mpegvideo_select="blockdsp dsputil hpeldsp idctdsp videodsp"
++mpegvideo_select="blockdsp dsputil h264chroma hpeldsp idctdsp videodsp"
  mpegvideoenc_select="dsputil mpegvideo qpeldsp"
  
  # decoders / encoders
@@@ -2008,20 -1720,18 +2010,20 @@@ aac_decoder_select="mdct sinewin
  aac_encoder_select="audio_frame_queue mdct sinewin"
  aac_latm_decoder_select="aac_decoder aac_latm_parser"
  ac3_decoder_select="ac3_parser ac3dsp bswapdsp mdct"
 +ac3_fixed_decoder_select="ac3_parser ac3dsp bswapdsp mdct"
  ac3_encoder_select="ac3dsp audiodsp dsputil mdct"
  ac3_fixed_encoder_select="ac3dsp audiodsp dsputil mdct"
- aic_decoder_select="dsputil golomb"
+ aic_decoder_select="golomb idctdsp"
  alac_encoder_select="lpc"
  als_decoder_select="bswapdsp"
  amrnb_decoder_select="lsp"
  amrwb_decoder_select="lsp"
 -amv_decoder_select="sp5x_decoder"
 -ape_decoder_select="bswapdsp"
 +amv_decoder_select="sp5x_decoder exif"
 +amv_encoder_select="aandcttables"
 +ape_decoder_select="bswapdsp llauddsp"
- asv1_decoder_select="blockdsp bswapdsp dsputil"
+ asv1_decoder_select="blockdsp bswapdsp idctdsp"
  asv1_encoder_select="bswapdsp dsputil"
- asv2_decoder_select="blockdsp bswapdsp dsputil"
+ asv2_decoder_select="blockdsp bswapdsp idctdsp"
  asv2_encoder_select="bswapdsp dsputil"
  atrac1_decoder_select="mdct sinewin"
  atrac3_decoder_select="mdct"
@@@ -2037,18 -1746,17 +2039,18 @@@ cook_decoder_select="audiodsp mdct sine
  cscd_decoder_select="lzo"
  cscd_decoder_suggest="zlib"
  dca_decoder_select="mdct"
- dnxhd_decoder_select="blockdsp dsputil"
- dnxhd_encoder_select="aandcttables blockdsp dsputil mpegvideoenc"
- dvvideo_decoder_select="dsputil"
 +dirac_decoder_select="dsputil dwt golomb videodsp"
+ dnxhd_decoder_select="blockdsp idctdsp"
+ dnxhd_encoder_select="aandcttables blockdsp dsputil idctdsp mpegvideoenc"
+ dvvideo_decoder_select="idctdsp"
  dvvideo_encoder_select="dsputil"
 -dxa_decoder_deps="zlib"
 +dxa_decoder_select="zlib"
  eac3_decoder_select="ac3_decoder"
  eac3_encoder_select="ac3_encoder"
- eamad_decoder_select="aandcttables blockdsp bswapdsp dsputil mpegvideo"
+ eamad_decoder_select="aandcttables blockdsp bswapdsp idctdsp mpegvideo"
 -eatgq_decoder_select="aandcttables idctdsp"
 +eatgq_decoder_select="aandcttables"
- eatqi_decoder_select="aandcttables blockdsp bswapdsp dsputil mpeg1video_decoder"
+ eatqi_decoder_select="aandcttables blockdsp bswapdsp idctdsp mpeg1video_decoder"
 -exr_decoder_deps="zlib"
 +exr_decoder_select="zlib"
  ffv1_decoder_select="golomb rangecoder"
  ffv1_encoder_select="rangecoder"
  ffvhuff_decoder_select="huffyuv_decoder"
@@@ -2064,8 -1771,8 +2066,8 @@@ flv_decoder_select="h263_decoder
  flv_encoder_select="h263_encoder"
  fourxm_decoder_select="blockdsp bswapdsp"
  fraps_decoder_select="bswapdsp huffman"
- g2m_decoder_select="blockdsp dsputil zlib"
 -g2m_decoder_deps="zlib"
 -g2m_decoder_select="blockdsp idctdsp"
++g2m_decoder_select="blockdsp idctdsp zlib"
 +g729_decoder_select="audiodsp"
  h261_decoder_select="mpeg_er mpegvideo"
  h261_encoder_select="aandcttables mpegvideoenc"
  h263_decoder_select="error_resilience h263_parser h263dsp mpeg_er mpegvideo qpeldsp"
@@@ -2085,12 -1792,12 +2087,12 @@@ jpegls_decoder_select="golomb mjpeg_dec
  jpegls_encoder_select="golomb"
  jv_decoder_select="blockdsp"
  lagarith_decoder_select="huffyuvdsp"
- ljpeg_encoder_select="aandcttables dsputil"
+ ljpeg_encoder_select="aandcttables idctdsp"
  loco_decoder_select="golomb"
- mdec_decoder_select="blockdsp dsputil mpegvideo"
+ mdec_decoder_select="blockdsp idctdsp mpegvideo"
  metasound_decoder_select="lsp mdct sinewin"
- mimic_decoder_select="blockdsp bswapdsp dsputil hpeldsp"
- mjpeg_decoder_select="blockdsp dsputil hpeldsp exif"
+ mimic_decoder_select="blockdsp bswapdsp hpeldsp idctdsp"
 -mjpeg_decoder_select="blockdsp hpeldsp idctdsp"
++mjpeg_decoder_select="blockdsp hpeldsp exif idctdsp"
  mjpeg_encoder_select="aandcttables mpegvideoenc"
  mjpegb_decoder_select="mjpeg_decoder"
  mlp_decoder_select="mlp_parser"
@@@ -2124,12 -1831,13 +2126,12 @@@ mss2_decoder_select="error_resilience m
  mxpeg_decoder_select="mjpeg_decoder"
  nellymoser_decoder_select="mdct sinewin"
  nellymoser_encoder_select="audio_frame_queue mdct sinewin"
- nuv_decoder_select="dsputil lzo"
+ nuv_decoder_select="idctdsp lzo"
  on2avc_decoder_select="mdct"
 -opus_decoder_deps="avresample"
 -png_decoder_deps="zlib"
 -png_encoder_deps="zlib"
 -png_encoder_select="huffyuvencdsp"
 -prores_decoder_select="idctdsp"
 +opus_decoder_deps="swresample"
 +png_decoder_select="zlib"
 +png_encoder_select="huffyuvencdsp zlib"
- prores_decoder_select="blockdsp dsputil"
++prores_decoder_select="blockdsp idctdsp"
  prores_encoder_select="dsputil"
  qcelp_decoder_select="lsp"
  qdm2_decoder_select="mdct rdft mpegaudiodsp"
Simple merge
@@@ -37,17 -32,13 +37,16 @@@ OBJS-$(CONFIG_AUDIODSP)                
  OBJS-$(CONFIG_BLOCKDSP)                += blockdsp.o
  OBJS-$(CONFIG_BSWAPDSP)                += bswapdsp.o
  OBJS-$(CONFIG_CABAC)                   += cabac.o
 +OBJS-$(CONFIG_CRYSTALHD)               += crystalhd.o
  OBJS-$(CONFIG_DCT)                     += dct.o dct32_fixed.o dct32_float.o
+ OBJS-$(CONFIG_DSPUTIL)                 += dsputil.o
  OBJS-$(CONFIG_DXVA2)                   += dxva2.o
- OBJS-$(CONFIG_DSPUTIL)                 += dsputil.o faanidct.o          \
-                                           simple_idct.o jrevdct.o
  OBJS-$(CONFIG_ENCODERS)                += faandct.o jfdctfst.o jfdctint.o
  OBJS-$(CONFIG_ERROR_RESILIENCE)        += error_resilience.o
 +OBJS-$(CONFIG_EXIF)                    += exif.o tiff_common.o
  FFT-OBJS-$(CONFIG_HARDCODED_TABLES)    += cos_tables.o cos_fixed_tables.o
  OBJS-$(CONFIG_FFT)                     += avfft.o fft_fixed.o fft_float.o \
 +                                          fft_fixed_32.o fft_init_table.o \
                                            $(FFT-OBJS-yes)
  OBJS-$(CONFIG_GOLOMB)                  += golomb.o
  OBJS-$(CONFIG_H263DSP)                 += h263dsp.o
@@@ -59,13 -50,13 +58,15 @@@ OBJS-$(CONFIG_HPELDSP)                 
  OBJS-$(CONFIG_HUFFMAN)                 += huffman.o
  OBJS-$(CONFIG_HUFFYUVDSP)              += huffyuvdsp.o
  OBJS-$(CONFIG_HUFFYUVENCDSP)           += huffyuvencdsp.o
+ OBJS-$(CONFIG_IDCTDSP)                 += idctdsp.o faanidct.o          \
+                                           simple_idct.o jrevdct.o
  OBJS-$(CONFIG_INTRAX8)                 += intrax8.o intrax8dsp.o
  OBJS-$(CONFIG_LIBXVID)                 += libxvid_rc.o
 +OBJS-$(CONFIG_LLAUDDSP)                += lossless_audiodsp.o
 +OBJS-$(CONFIG_LLVIDDSP)                += lossless_videodsp.o
  OBJS-$(CONFIG_LPC)                     += lpc.o
  OBJS-$(CONFIG_LSP)                     += lsp.o
 -OBJS-$(CONFIG_MDCT)                    += mdct_fixed.o mdct_float.o
 +OBJS-$(CONFIG_MDCT)                    += mdct_fixed.o mdct_float.o mdct_fixed_32.o
  OBJS-$(CONFIG_MPEG_ER)                 += mpeg_er.o
  OBJS-$(CONFIG_MPEGAUDIO)               += mpegaudio.o mpegaudiodata.o   \
                                            mpegaudiodecheader.o
@@@ -337,8 -335,8 +337,8 @@@ static int aic_decode_slice(AICContext 
              else
                  recombine_block_il(ctx->block, ctx->scantable.permutated,
                                     &base_y, &ext_y, blk);
 -            unquant_block(ctx->block, ctx->quant);
 +            unquant_block(ctx->block, ctx->quant, ctx->quant_matrix);
-             ctx->dsp.idct(ctx->block);
+             ctx->idsp.idct(ctx->block);
  
              if (!ctx->interlaced) {
                  dst = Y + (blk >> 1) * 8 * ystride + (blk & 1) * 8;
          for (blk = 0; blk < 2; blk++) {
              recombine_block(ctx->block, ctx->scantable.permutated,
                              &base_c, &ext_c);
 -            unquant_block(ctx->block, ctx->quant);
 +            unquant_block(ctx->block, ctx->quant, ctx->quant_matrix);
-             ctx->dsp.idct(ctx->block);
-             ctx->dsp.put_signed_pixels_clamped(ctx->block, C[blk],
-                                                ctx->frame->linesize[blk + 1]);
+             ctx->idsp.idct(ctx->block);
+             ctx->idsp.put_signed_pixels_clamped(ctx->block, C[blk],
+                                                 ctx->frame->linesize[blk + 1]);
              C[blk] += 8;
          }
      }
@@@ -432,9 -429,7 +431,9 @@@ static av_cold int aic_decode_init(AVCo
  
      for (i = 0; i < 64; i++)
          scan[i] = i;
-     ff_init_scantable(ctx->dsp.idct_permutation, &ctx->scantable, scan);
+     ff_init_scantable(ctx->idsp.idct_permutation, &ctx->scantable, scan);
 +    for (i = 0; i < 64; i++)
-         ctx->quant_matrix[ctx->dsp.idct_permutation[i]] = aic_quant_matrix[i];
++        ctx->quant_matrix[ctx->idsp.idct_permutation[i]] = aic_quant_matrix[i];
  
      ctx->mb_width  = FFALIGN(avctx->width,  16) >> 4;
      ctx->mb_height = FFALIGN(avctx->height, 16) >> 4;
@@@ -18,7 -15,10 +15,11 @@@ OBJS-$(CONFIG_H264PRED)                
  OBJS-$(CONFIG_H264QPEL)                += arm/h264qpel_init_arm.o
  OBJS-$(CONFIG_HPELDSP)                 += arm/hpeldsp_init_arm.o        \
                                            arm/hpeldsp_arm.o
+ OBJS-$(CONFIG_IDCTDSP)                 += arm/idctdsp_init_arm.o        \
+                                           arm/idctdsp_arm.o             \
+                                           arm/jrevdct_arm.o             \
+                                           arm/simple_idct_arm.o
 +OBJS-$(CONFIG_LLAUDDSP)                += arm/lossless_audiodsp_init_arm.o
  OBJS-$(CONFIG_MPEGAUDIODSP)            += arm/mpegaudiodsp_init_arm.o
  OBJS-$(CONFIG_MPEGVIDEO)               += arm/mpegvideo_arm.o
  OBJS-$(CONFIG_NEON_CLOBBER_TEST)       += arm/neontest.o
@@@ -52,11 -52,13 +53,13 @@@ ARMV5TE-OBJS-$(CONFIG_MLP_DECODER)     
  
  ARMV6-OBJS-$(CONFIG_AC3DSP)            += arm/ac3dsp_armv6.o
  ARMV6-OBJS-$(CONFIG_DSPUTIL)           += arm/dsputil_init_armv6.o      \
-                                           arm/dsputil_armv6.o           \
-                                           arm/simple_idct_armv6.o
+                                           arm/dsputil_armv6.o
 -ARMV6-OBJS-$(CONFIG_H264DSP)           += arm/h264dsp_armv6.o
 +ARMV6-OBJS-$(CONFIG_H264DSP)           += arm/startcode_armv6.o
  ARMV6-OBJS-$(CONFIG_HPELDSP)           += arm/hpeldsp_init_armv6.o      \
                                            arm/hpeldsp_armv6.o
+ ARMV6-OBJS-$(CONFIG_IDCTDSP)           += arm/idctdsp_init_armv6.o      \
+                                           arm/idctdsp_armv6.o           \
+                                           arm/simple_idct_armv6.o
  ARMV6-OBJS-$(CONFIG_MPEGAUDIODSP)      += arm/mpegaudiodsp_fixed_armv6.o
  
  ARMV6-OBJS-$(CONFIG_MLP_DECODER)       += arm/mlpdsp_armv6.o
Simple merge
Simple merge
Simple merge
Simple merge
@@@ -1,10 -1,10 +1,10 @@@
  @
- @ ARMv4 optimized DSP utils
+ @ ARMv4-optimized IDCT functions
  @ Copyright (c) 2004 AGAWA Koji <i (AT) atty (DOT) jp>
  @
 -@ This file is part of Libav.
 +@ This file is part of FFmpeg.
  @
 -@ Libav is free software; you can redistribute it and/or
 +@ FFmpeg is free software; you can redistribute it and/or
  @ modify it under the terms of the GNU Lesser General Public
  @ License as published by the Free Software Foundation; either
  @ version 2.1 of the License, or (at your option) any later version.
index 0000000,9012b82..d7bc5cd
mode 000000,100644..100644
--- /dev/null
@@@ -1,0 -1,34 +1,34 @@@
 - * This file is part of Libav.
+ /*
+  * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
+  *
 - * Libav is free software; you can redistribute it and/or
++ * This file is part of FFmpeg.
+  *
 - * Libav is distributed in the hope that it will be useful,
++ * FFmpeg is free software; you can redistribute it and/or
+  * modify it under the terms of the GNU Lesser General Public
+  * License as published by the Free Software Foundation; either
+  * version 2.1 of the License, or (at your option) any later version.
+  *
 - * License along with Libav; if not, write to the Free Software
++ * FFmpeg is distributed in the hope that it will be useful,
+  * but WITHOUT ANY WARRANTY; without even the implied warranty of
+  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  * Lesser General Public License for more details.
+  *
+  * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
+  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+  */
+ #ifndef AVCODEC_ARM_IDCTDSP_ARM_H
+ #define AVCODEC_ARM_IDCTDSP_ARM_H
+ #include "libavcodec/avcodec.h"
+ #include "libavcodec/idctdsp.h"
+ void ff_idctdsp_init_armv5te(IDCTDSPContext *c, AVCodecContext *avctx,
+                              unsigned high_bit_depth);
+ void ff_idctdsp_init_armv6(IDCTDSPContext *c, AVCodecContext *avctx,
+                            unsigned high_bit_depth);
+ void ff_idctdsp_init_neon(IDCTDSPContext *c, AVCodecContext *avctx,
+                           unsigned high_bit_depth);
+ #endif /* AVCODEC_ARM_IDCTDSP_ARM_H */
index 0000000,c180d73..a6e77d6
mode 000000,100644..100644
--- /dev/null
@@@ -1,0 -1,48 +1,48 @@@
 - * This file is part of Libav.
+ /*
+  * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
+  *
 - * Libav is free software; you can redistribute it and/or
++ * This file is part of FFmpeg.
+  *
 - * Libav is distributed in the hope that it will be useful,
++ * FFmpeg is free software; you can redistribute it and/or
+  * modify it under the terms of the GNU Lesser General Public
+  * License as published by the Free Software Foundation; either
+  * version 2.1 of the License, or (at your option) any later version.
+  *
 - * License along with Libav; if not, write to the Free Software
++ * FFmpeg is distributed in the hope that it will be useful,
+  * but WITHOUT ANY WARRANTY; without even the implied warranty of
+  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  * Lesser General Public License for more details.
+  *
+  * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
+  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+  */
+ #include "libavutil/arm/asm.S"
+ function ff_add_pixels_clamped_armv6, export=1
+         push            {r4-r8,lr}
+         mov             r3,  #8
+ 1:
+         ldm             r0!, {r4,r5,r12,lr}
+         ldrd            r6,  r7,  [r1]
+         pkhbt           r8,  r4,  r5,  lsl #16
+         pkhtb           r5,  r5,  r4,  asr #16
+         pkhbt           r4,  r12, lr,  lsl #16
+         pkhtb           lr,  lr,  r12, asr #16
+         pld             [r1, r2]
+         uxtab16         r8,  r8,  r6
+         uxtab16         r5,  r5,  r6,  ror #8
+         uxtab16         r4,  r4,  r7
+         uxtab16         lr,  lr,  r7,  ror #8
+         usat16          r8,  #8,  r8
+         usat16          r5,  #8,  r5
+         usat16          r4,  #8,  r4
+         usat16          lr,  #8,  lr
+         orr             r6,  r8,  r5,  lsl #8
+         orr             r7,  r4,  lr,  lsl #8
+         subs            r3,  r3,  #1
+         strd_post       r6,  r7,  r1,  r2
+         bgt             1b
+         pop             {r4-r8,pc}
+ endfunc
index 0000000,b4d1899..cc5e116
mode 000000,100644..100644
--- /dev/null
@@@ -1,0 -1,98 +1,98 @@@
 - * This file is part of Libav.
+ /*
+  * ARM-optimized IDCT functions
+  * Copyright (c) 2001 Lionel Ulmer
+  *
 - * Libav is free software; you can redistribute it and/or
++ * This file is part of FFmpeg.
+  *
 - * Libav is distributed in the hope that it will be useful,
++ * FFmpeg is free software; you can redistribute it and/or
+  * modify it under the terms of the GNU Lesser General Public
+  * License as published by the Free Software Foundation; either
+  * version 2.1 of the License, or (at your option) any later version.
+  *
 - * License along with Libav; if not, write to the Free Software
++ * FFmpeg is distributed in the hope that it will be useful,
+  * but WITHOUT ANY WARRANTY; without even the implied warranty of
+  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  * Lesser General Public License for more details.
+  *
+  * You should have received a copy of the GNU Lesser General Public
 -    if (!high_bit_depth) {
++ * License along with FFmpeg; if not, write to the Free Software
+  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+  */
+ #include <stdint.h>
+ #include "libavutil/attributes.h"
+ #include "libavutil/cpu.h"
+ #include "libavutil/arm/cpu.h"
+ #include "libavcodec/avcodec.h"
+ #include "libavcodec/idctdsp.h"
+ #include "idctdsp_arm.h"
+ void ff_j_rev_dct_arm(int16_t *data);
+ void ff_simple_idct_arm(int16_t *data);
+ /* XXX: local hack */
+ static void (*ff_put_pixels_clamped)(const int16_t *block, uint8_t *pixels, int line_size);
+ static void (*ff_add_pixels_clamped)(const int16_t *block, uint8_t *pixels, int line_size);
+ void ff_add_pixels_clamped_arm(const int16_t *block, uint8_t *dest,
+                                int line_size);
+ /* XXX: those functions should be suppressed ASAP when all IDCTs are
+  * converted */
+ static void j_rev_dct_arm_put(uint8_t *dest, int line_size, int16_t *block)
+ {
+     ff_j_rev_dct_arm(block);
+     ff_put_pixels_clamped(block, dest, line_size);
+ }
+ static void j_rev_dct_arm_add(uint8_t *dest, int line_size, int16_t *block)
+ {
+     ff_j_rev_dct_arm(block);
+     ff_add_pixels_clamped(block, dest, line_size);
+ }
+ static void simple_idct_arm_put(uint8_t *dest, int line_size, int16_t *block)
+ {
+     ff_simple_idct_arm(block);
+     ff_put_pixels_clamped(block, dest, line_size);
+ }
+ static void simple_idct_arm_add(uint8_t *dest, int line_size, int16_t *block)
+ {
+     ff_simple_idct_arm(block);
+     ff_add_pixels_clamped(block, dest, line_size);
+ }
+ av_cold void ff_idctdsp_init_arm(IDCTDSPContext *c, AVCodecContext *avctx,
+                                  unsigned high_bit_depth)
+ {
+     int cpu_flags = av_get_cpu_flags();
+     ff_put_pixels_clamped = c->put_pixels_clamped;
+     ff_add_pixels_clamped = c->add_pixels_clamped;
++    if (!avctx->lowres && !high_bit_depth) {
+         if (avctx->idct_algo == FF_IDCT_AUTO ||
+             avctx->idct_algo == FF_IDCT_ARM) {
+             c->idct_put              = j_rev_dct_arm_put;
+             c->idct_add              = j_rev_dct_arm_add;
+             c->idct                  = ff_j_rev_dct_arm;
+             c->idct_permutation_type = FF_LIBMPEG2_IDCT_PERM;
+         } else if (avctx->idct_algo == FF_IDCT_SIMPLEARM) {
+             c->idct_put              = simple_idct_arm_put;
+             c->idct_add              = simple_idct_arm_add;
+             c->idct                  = ff_simple_idct_arm;
+             c->idct_permutation_type = FF_NO_IDCT_PERM;
+         }
+     }
+     c->add_pixels_clamped = ff_add_pixels_clamped_arm;
+     if (have_armv5te(cpu_flags))
+         ff_idctdsp_init_armv5te(c, avctx, high_bit_depth);
+     if (have_armv6(cpu_flags))
+         ff_idctdsp_init_armv6(c, avctx, high_bit_depth);
+     if (have_neon(cpu_flags))
+         ff_idctdsp_init_neon(c, avctx, high_bit_depth);
+ }
@@@ -29,12 -29,11 +29,12 @@@ void ff_simple_idct_armv5te(int16_t *da
  void ff_simple_idct_put_armv5te(uint8_t *dest, int line_size, int16_t *data);
  void ff_simple_idct_add_armv5te(uint8_t *dest, int line_size, int16_t *data);
  
- av_cold void ff_dsputil_init_armv5te(DSPContext *c, AVCodecContext *avctx,
+ av_cold void ff_idctdsp_init_armv5te(IDCTDSPContext *c, AVCodecContext *avctx,
                                       unsigned high_bit_depth)
  {
 -    if (!high_bit_depth &&
 +    if (!avctx->lowres && !high_bit_depth &&
          (avctx->idct_algo == FF_IDCT_AUTO ||
 +         avctx->idct_algo == FF_IDCT_SIMPLEAUTO ||
           avctx->idct_algo == FF_IDCT_SIMPLEARMV5TE)) {
          c->idct_put              = ff_simple_idct_put_armv5te;
          c->idct_add              = ff_simple_idct_add_armv5te;
index 0000000,e92f471..27ca6ce
mode 000000,100644..100644
--- /dev/null
@@@ -1,0 -1,48 +1,48 @@@
 - * This file is part of Libav.
+ /*
+  * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
+  *
 - * Libav is free software; you can redistribute it and/or
++ * This file is part of FFmpeg.
+  *
 - * Libav is distributed in the hope that it will be useful,
++ * FFmpeg is free software; you can redistribute it and/or
+  * modify it under the terms of the GNU Lesser General Public
+  * License as published by the Free Software Foundation; either
+  * version 2.1 of the License, or (at your option) any later version.
+  *
 - * License along with Libav; if not, write to the Free Software
++ * FFmpeg is distributed in the hope that it will be useful,
+  * but WITHOUT ANY WARRANTY; without even the implied warranty of
+  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  * Lesser General Public License for more details.
+  *
+  * You should have received a copy of the GNU Lesser General Public
 -    if (!high_bit_depth) {
++ * License along with FFmpeg; if not, write to the Free Software
+  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+  */
+ #include <stdint.h>
+ #include "libavutil/attributes.h"
+ #include "libavcodec/avcodec.h"
+ #include "libavcodec/idctdsp.h"
+ #include "idctdsp_arm.h"
+ void ff_simple_idct_armv6(int16_t *data);
+ void ff_simple_idct_put_armv6(uint8_t *dest, int line_size, int16_t *data);
+ void ff_simple_idct_add_armv6(uint8_t *dest, int line_size, int16_t *data);
+ void ff_add_pixels_clamped_armv6(const int16_t *block, uint8_t *pixels,
+                                  int line_size);
+ av_cold void ff_idctdsp_init_armv6(IDCTDSPContext *c, AVCodecContext *avctx,
+                                    unsigned high_bit_depth)
+ {
++    if (!avctx->lowres && !high_bit_depth) {
+         if (avctx->idct_algo == FF_IDCT_AUTO ||
+             avctx->idct_algo == FF_IDCT_SIMPLEARMV6) {
+             c->idct_put              = ff_simple_idct_put_armv6;
+             c->idct_add              = ff_simple_idct_add_armv6;
+             c->idct                  = ff_simple_idct_armv6;
+             c->idct_permutation_type = FF_LIBMPEG2_IDCT_PERM;
+         }
+     }
+     c->add_pixels_clamped = ff_add_pixels_clamped_armv6;
+ }
@@@ -1,10 -1,10 +1,10 @@@
  /*
-  * ARM NEON optimised DSP functions
+  * ARM-NEON-optimized IDCT functions
   * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
   *
 - * This file is part of Libav.
 + * This file is part of FFmpeg.
   *
 - * Libav is free software; you can redistribute it and/or
 + * FFmpeg is free software; you can redistribute it and/or
   * modify it under the terms of the GNU Lesser General Public
   * License as published by the Free Software Foundation; either
   * version 2.1 of the License, or (at your option) any later version.
@@@ -34,12 -34,11 +34,12 @@@ void ff_add_pixels_clamped_neon(const i
  void ff_put_pixels_clamped_neon(const int16_t *, uint8_t *, int);
  void ff_put_signed_pixels_clamped_neon(const int16_t *, uint8_t *, int);
  
- av_cold void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx,
+ av_cold void ff_idctdsp_init_neon(IDCTDSPContext *c, AVCodecContext *avctx,
                                    unsigned high_bit_depth)
  {
 -    if (!high_bit_depth) {
 +    if (!avctx->lowres && !high_bit_depth) {
          if (avctx->idct_algo == FF_IDCT_AUTO ||
 +            avctx->idct_algo == FF_IDCT_SIMPLEAUTO ||
              avctx->idct_algo == FF_IDCT_SIMPLENEON) {
              c->idct_put              = ff_simple_idct_put_neon;
              c->idct_add              = ff_simple_idct_add_neon;
@@@ -1,10 -1,10 +1,10 @@@
  /*
-  * ARM NEON optimised DSP functions
+  * ARM-NEON-optimized IDCT functions
   * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
   *
 - * This file is part of Libav.
 + * This file is part of FFmpeg.
   *
 - * Libav is free software; you can redistribute it and/or
 + * FFmpeg is free software; you can redistribute it and/or
   * modify it under the terms of the GNU Lesser General Public
   * License as published by the Free Software Foundation; either
   * version 2.1 of the License, or (at your option) any later version.
Simple merge
Simple merge
@@@ -29,6 -29,8 +29,7 @@@
  #include "asv.h"
  #include "avcodec.h"
  #include "blockdsp.h"
 -#include "put_bits.h"
+ #include "idctdsp.h"
  #include "internal.h"
  #include "mathops.h"
  #include "mpeg12data.h"
@@@ -279,11 -284,13 +280,12 @@@ static av_cold int decode_init(AVCodecC
  
      ff_asv_common_init(avctx);
      ff_blockdsp_init(&a->bdsp, avctx);
+     ff_idctdsp_init(&a->idsp, avctx);
      init_vlcs(a);
-     ff_init_scantable(a->dsp.idct_permutation, &a->scantable, ff_asv_scantab);
+     ff_init_scantable(a->idsp.idct_permutation, &a->scantable, ff_asv_scantab);
      avctx->pix_fmt = AV_PIX_FMT_YUV420P;
  
 -    a->inv_qscale = avctx->extradata[0];
 -    if (a->inv_qscale == 0) {
 +    if (avctx->extradata_size < 1 || (a->inv_qscale = avctx->extradata[0]) == 0) {
          av_log(avctx, AV_LOG_ERROR, "illegal qscale 0\n");
          if (avctx->codec_id == AV_CODEC_ID_ASV1)
              a->inv_qscale = 6;
@@@ -280,9 -240,16 +280,10 @@@ static av_cold int encode_init(AVCodecC
      int i;
      const int scale= avctx->codec_id == AV_CODEC_ID_ASV1 ? 1 : 2;
  
 -    avctx->coded_frame = av_frame_alloc();
 -    if (!avctx->coded_frame)
 -        return AVERROR(ENOMEM);
 -    avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
 -    avctx->coded_frame->key_frame = 1;
 -
      ff_asv_common_init(avctx);
+     ff_dsputil_init(&a->dsp, avctx);
  
 -    if(avctx->global_quality == 0) avctx->global_quality= 4*FF_QUALITY_SCALE;
 +    if(avctx->global_quality <= 0) avctx->global_quality= 4*FF_QUALITY_SCALE;
  
      a->inv_qscale= (32*scale*FF_QUALITY_SCALE +  avctx->global_quality/2) / avctx->global_quality;
  
Simple merge
Simple merge
Simple merge
@@@ -28,9 -28,8 +28,9 @@@
  #include "blockdsp.h"
  #include "get_bits.h"
  #include "dnxhddata.h"
- #include "dsputil.h"
+ #include "idctdsp.h"
  #include "internal.h"
 +#include "thread.h"
  
  typedef struct DNXHDContext {
      AVCodecContext *avctx;
@@@ -322,9 -307,9 +322,10 @@@ static av_cold int dnxhd_encode_init(AV
      avctx->bits_per_raw_sample = ctx->cid_table->bit_depth;
  
      ff_blockdsp_init(&ctx->bdsp, avctx);
 -    ff_dsputil_init(&ctx->m.dsp, avctx);
+     ff_idctdsp_init(&ctx->m.idsp, avctx);
      ff_dct_common_init(&ctx->m);
 +    ff_dct_encode_init(&ctx->m);
 +
      if (!ctx->m.dct_quantize)
          ctx->m.dct_quantize = ff_dct_quantize_c;
  
@@@ -1475,17 -1152,4 +1203,14 @@@ av_cold void ff_dsputil_init(DSPContex
          ff_dsputil_init_ppc(c, avctx, high_bit_depth);
      if (ARCH_X86)
          ff_dsputil_init_x86(c, avctx, high_bit_depth);
-     ff_init_scantable_permutation(c->idct_permutation,
-                                   c->idct_permutation_type);
  }
 +
 +av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
 +{
 +    ff_dsputil_init(c, avctx);
 +}
 +
 +av_cold void avpriv_dsputil_init(DSPContext *c, AVCodecContext *avctx)
 +{
 +    ff_dsputil_init(c, avctx);
 +}
Simple merge
@@@ -62,24 -62,17 +63,24 @@@ static const int dv_iweight_bits = 14
  static av_cold int dvvideo_decode_init(AVCodecContext *avctx)
  {
      DVVideoContext *s = avctx->priv_data;
-     DSPContext dsp;
+     IDCTDSPContext idsp;
      int i;
  
-     memset(&dsp,0, sizeof(dsp));
-     ff_dsputil_init(&dsp, avctx);
++    memset(&idsp,0, sizeof(idsp));
+     ff_idctdsp_init(&idsp, avctx);
  
      for (i = 0; i < 64; i++)
-        s->dv_zigzag[0][i] = dsp.idct_permutation[ff_zigzag_direct[i]];
+        s->dv_zigzag[0][i] = idsp.idct_permutation[ff_zigzag_direct[i]];
  
 -    memcpy(s->dv_zigzag[1], ff_dv_zigzag248_direct, sizeof(s->dv_zigzag[1]));
 +    if (avctx->lowres){
 +        for (i = 0; i < 64; i++){
 +            int j = ff_dv_zigzag248_direct[i];
-             s->dv_zigzag[1][i] = dsp.idct_permutation[(j & 7) + (j & 8) * 4 + (j & 48) / 2];
++            s->dv_zigzag[1][i] = idsp.idct_permutation[(j & 7) + (j & 8) * 4 + (j & 48) / 2];
 +        }
 +    }else
 +        memcpy(s->dv_zigzag[1], ff_dv_zigzag248_direct, sizeof(s->dv_zigzag[1]));
  
-     s->idct_put[0] = dsp.idct_put;
+     s->idct_put[0] = idsp.idct_put;
      s->idct_put[1] = ff_simple_idct248_put;
  
      return ff_dvvideo_init(avctx);
@@@ -110,11 -110,11 +110,11 @@@ static void fill_quantization_matrices(
      for (i = 0; i < 4; i++)
          qm->bNewQmatrix[i] = 1;
      for (i = 0; i < 64; i++) {
-         int n = s->dsp.idct_permutation[ff_zigzag_direct[i]];
+         int n = s->idsp.idct_permutation[ff_zigzag_direct[i]];
 -        qm->Qmatrix[0][i] = s->intra_matrix[n];;
 -        qm->Qmatrix[1][i] = s->inter_matrix[n];;
 -        qm->Qmatrix[2][i] = s->chroma_intra_matrix[n];;
 -        qm->Qmatrix[3][i] = s->chroma_inter_matrix[n];;
 +        qm->Qmatrix[0][i] = s->intra_matrix[n];
 +        qm->Qmatrix[1][i] = s->inter_matrix[n];
 +        qm->Qmatrix[2][i] = s->chroma_intra_matrix[n];
 +        qm->Qmatrix[3][i] = s->chroma_inter_matrix[n];
      }
  }
  
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
index 0000000,8542ab3..b0d4185
mode 000000,100644..100644
--- /dev/null
@@@ -1,0 -1,197 +1,311 @@@
 - * This file is part of Libav.
+ /*
 - * Libav is free software; you can redistribute it and/or
++ * This file is part of FFmpeg.
+  *
 - * Libav is distributed in the hope that it will be useful,
++ * FFmpeg is free software; you can redistribute it and/or
+  * modify it under the terms of the GNU Lesser General Public
+  * License as published by the Free Software Foundation; either
+  * version 2.1 of the License, or (at your option) any later version.
+  *
 - * License along with Libav; if not, write to the Free Software
++ * FFmpeg is distributed in the hope that it will be useful,
+  * but WITHOUT ANY WARRANTY; without even the implied warranty of
+  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  * Lesser General Public License for more details.
+  *
+  * You should have received a copy of the GNU Lesser General Public
 -static void put_pixels_clamped_c(const int16_t *block, uint8_t *restrict pixels,
++ * License along with FFmpeg; if not, write to the Free Software
+  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+  */
+ #include "config.h"
+ #include "libavutil/attributes.h"
+ #include "libavutil/common.h"
+ #include "avcodec.h"
+ #include "dct.h"
+ #include "faanidct.h"
+ #include "idctdsp.h"
+ #include "simple_idct.h"
+ av_cold void ff_init_scantable(uint8_t *permutation, ScanTable *st,
+                                const uint8_t *src_scantable)
+ {
+     int i, end;
+     st->scantable = src_scantable;
+     for (i = 0; i < 64; i++) {
+         int j = src_scantable[i];
+         st->permutated[i] = permutation[j];
+     }
+     end = -1;
+     for (i = 0; i < 64; i++) {
+         int j = st->permutated[i];
+         if (j > end)
+             end = j;
+         st->raster_end[i] = end;
+     }
+ }
+ av_cold void ff_init_scantable_permutation(uint8_t *idct_permutation,
+                                            int idct_permutation_type)
+ {
+     int i;
+     if (ARCH_X86)
+         if (ff_init_scantable_permutation_x86(idct_permutation,
+                                               idct_permutation_type))
+             return;
+     switch (idct_permutation_type) {
+     case FF_NO_IDCT_PERM:
+         for (i = 0; i < 64; i++)
+             idct_permutation[i] = i;
+         break;
+     case FF_LIBMPEG2_IDCT_PERM:
+         for (i = 0; i < 64; i++)
+             idct_permutation[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
+         break;
+     case FF_TRANSPOSE_IDCT_PERM:
+         for (i = 0; i < 64; i++)
+             idct_permutation[i] = ((i & 7) << 3) | (i >> 3);
+         break;
+     case FF_PARTTRANS_IDCT_PERM:
+         for (i = 0; i < 64; i++)
+             idct_permutation[i] = (i & 0x24) | ((i & 3) << 3) | ((i >> 3) & 3);
+         break;
+     default:
+         av_log(NULL, AV_LOG_ERROR,
+                "Internal error, IDCT permutation not set\n");
+     }
+ }
 -                                        uint8_t *restrict pixels,
++static void put_pixels_clamped_c(const int16_t *block, uint8_t *av_restrict pixels,
+                                  int line_size)
+ {
+     int i;
+     /* read the pixels */
+     for (i = 0; i < 8; i++) {
+         pixels[0] = av_clip_uint8(block[0]);
+         pixels[1] = av_clip_uint8(block[1]);
+         pixels[2] = av_clip_uint8(block[2]);
+         pixels[3] = av_clip_uint8(block[3]);
+         pixels[4] = av_clip_uint8(block[4]);
+         pixels[5] = av_clip_uint8(block[5]);
+         pixels[6] = av_clip_uint8(block[6]);
+         pixels[7] = av_clip_uint8(block[7]);
+         pixels += line_size;
+         block  += 8;
+     }
+ }
++static void put_pixels_clamped4_c(const int16_t *block, uint8_t *av_restrict pixels,
++                                 int line_size)
++{
++    int i;
++
++    /* read the pixels */
++    for(i=0;i<4;i++) {
++        pixels[0] = av_clip_uint8(block[0]);
++        pixels[1] = av_clip_uint8(block[1]);
++        pixels[2] = av_clip_uint8(block[2]);
++        pixels[3] = av_clip_uint8(block[3]);
++
++        pixels += line_size;
++        block += 8;
++    }
++}
++
++static void put_pixels_clamped2_c(const int16_t *block, uint8_t *av_restrict pixels,
++                                 int line_size)
++{
++    int i;
++
++    /* read the pixels */
++    for(i=0;i<2;i++) {
++        pixels[0] = av_clip_uint8(block[0]);
++        pixels[1] = av_clip_uint8(block[1]);
++
++        pixels += line_size;
++        block += 8;
++    }
++}
++
+ static void put_signed_pixels_clamped_c(const int16_t *block,
 -static void add_pixels_clamped_c(const int16_t *block, uint8_t *restrict pixels,
++                                        uint8_t *av_restrict pixels,
+                                         int line_size)
+ {
+     int i, j;
+     for (i = 0; i < 8; i++) {
+         for (j = 0; j < 8; j++) {
+             if (*block < -128)
+                 *pixels = 0;
+             else if (*block > 127)
+                 *pixels = 255;
+             else
+                 *pixels = (uint8_t) (*block + 128);
+             block++;
+             pixels++;
+         }
+         pixels += (line_size - 8);
+     }
+ }
 -    if (avctx->bits_per_raw_sample == 10) {
 -        c->idct_put              = ff_simple_idct_put_10;
 -        c->idct_add              = ff_simple_idct_add_10;
 -        c->idct                  = ff_simple_idct_10;
++static void add_pixels_clamped_c(const int16_t *block, uint8_t *av_restrict pixels,
+                                  int line_size)
+ {
+     int i;
+     /* read the pixels */
+     for (i = 0; i < 8; i++) {
+         pixels[0] = av_clip_uint8(pixels[0] + block[0]);
+         pixels[1] = av_clip_uint8(pixels[1] + block[1]);
+         pixels[2] = av_clip_uint8(pixels[2] + block[2]);
+         pixels[3] = av_clip_uint8(pixels[3] + block[3]);
+         pixels[4] = av_clip_uint8(pixels[4] + block[4]);
+         pixels[5] = av_clip_uint8(pixels[5] + block[5]);
+         pixels[6] = av_clip_uint8(pixels[6] + block[6]);
+         pixels[7] = av_clip_uint8(pixels[7] + block[7]);
+         pixels   += line_size;
+         block    += 8;
+     }
+ }
++static void add_pixels_clamped4_c(const int16_t *block, uint8_t *av_restrict pixels,
++                          int line_size)
++{
++    int i;
++
++    /* read the pixels */
++    for(i=0;i<4;i++) {
++        pixels[0] = av_clip_uint8(pixels[0] + block[0]);
++        pixels[1] = av_clip_uint8(pixels[1] + block[1]);
++        pixels[2] = av_clip_uint8(pixels[2] + block[2]);
++        pixels[3] = av_clip_uint8(pixels[3] + block[3]);
++        pixels += line_size;
++        block += 8;
++    }
++}
++
++static void add_pixels_clamped2_c(const int16_t *block, uint8_t *av_restrict pixels,
++                          int line_size)
++{
++    int i;
++
++    /* read the pixels */
++    for(i=0;i<2;i++) {
++        pixels[0] = av_clip_uint8(pixels[0] + block[0]);
++        pixels[1] = av_clip_uint8(pixels[1] + block[1]);
++        pixels += line_size;
++        block += 8;
++    }
++}
++
+ static void jref_idct_put(uint8_t *dest, int line_size, int16_t *block)
+ {
+     ff_j_rev_dct(block);
+     put_pixels_clamped_c(block, dest, line_size);
+ }
+ static void jref_idct_add(uint8_t *dest, int line_size, int16_t *block)
+ {
+     ff_j_rev_dct(block);
+     add_pixels_clamped_c(block, dest, line_size);
+ }
++static void ff_jref_idct4_put(uint8_t *dest, int line_size, int16_t *block)
++{
++    ff_j_rev_dct4 (block);
++    put_pixels_clamped4_c(block, dest, line_size);
++}
++static void ff_jref_idct4_add(uint8_t *dest, int line_size, int16_t *block)
++{
++    ff_j_rev_dct4 (block);
++    add_pixels_clamped4_c(block, dest, line_size);
++}
++
++static void ff_jref_idct2_put(uint8_t *dest, int line_size, int16_t *block)
++{
++    ff_j_rev_dct2 (block);
++    put_pixels_clamped2_c(block, dest, line_size);
++}
++static void ff_jref_idct2_add(uint8_t *dest, int line_size, int16_t *block)
++{
++    ff_j_rev_dct2 (block);
++    add_pixels_clamped2_c(block, dest, line_size);
++}
++
++static void ff_jref_idct1_put(uint8_t *dest, int line_size, int16_t *block)
++{
++    dest[0] = av_clip_uint8((block[0] + 4)>>3);
++}
++static void ff_jref_idct1_add(uint8_t *dest, int line_size, int16_t *block)
++{
++    dest[0] = av_clip_uint8(dest[0] + ((block[0] + 4)>>3));
++}
+ av_cold void ff_idctdsp_init(IDCTDSPContext *c, AVCodecContext *avctx)
+ {
+     const unsigned high_bit_depth = avctx->bits_per_raw_sample > 8;
++    if (avctx->lowres==1) {
++        c->idct_put              = ff_jref_idct4_put;
++        c->idct_add              = ff_jref_idct4_add;
++        c->idct                  = ff_j_rev_dct4;
++        c->idct_permutation_type = FF_NO_IDCT_PERM;
++    } else if (avctx->lowres==2) {
++        c->idct_put              =  ff_jref_idct2_put;
++        c->idct_add              =  ff_jref_idct2_add;
++        c->idct                  =  ff_j_rev_dct2;
++        c->idct_permutation_type = FF_NO_IDCT_PERM;
++    } else if (avctx->lowres==3) {
++        c->idct_put              =  ff_jref_idct1_put;
++        c->idct_add              =  ff_jref_idct1_add;
++        c->idct                  =  ff_j_rev_dct1;
+         c->idct_permutation_type = FF_NO_IDCT_PERM;
+     } else {
++        if (avctx->bits_per_raw_sample == 10) {
++            c->idct_put              = ff_simple_idct_put_10;
++            c->idct_add              = ff_simple_idct_add_10;
++            c->idct                  = ff_simple_idct_10;
++            c->idct_permutation_type = FF_NO_IDCT_PERM;
++        } else if (avctx->bits_per_raw_sample == 12) {
++            c->idct_put              = ff_simple_idct_put_12;
++            c->idct_add              = ff_simple_idct_add_12;
++            c->idct                  = ff_simple_idct_12;
++            c->idct_permutation_type = FF_NO_IDCT_PERM;
++        } else {
+         if (avctx->idct_algo == FF_IDCT_INT) {
+             c->idct_put              = jref_idct_put;
+             c->idct_add              = jref_idct_add;
+             c->idct                  = ff_j_rev_dct;
+             c->idct_permutation_type = FF_LIBMPEG2_IDCT_PERM;
+         } else if (avctx->idct_algo == FF_IDCT_FAAN) {
+             c->idct_put              = ff_faanidct_put;
+             c->idct_add              = ff_faanidct_add;
+             c->idct                  = ff_faanidct;
+             c->idct_permutation_type = FF_NO_IDCT_PERM;
+         } else { // accurate/default
+             c->idct_put              = ff_simple_idct_put_8;
+             c->idct_add              = ff_simple_idct_add_8;
+             c->idct                  = ff_simple_idct_8;
+             c->idct_permutation_type = FF_NO_IDCT_PERM;
+         }
++        }
+     }
+     c->put_pixels_clamped        = put_pixels_clamped_c;
+     c->put_signed_pixels_clamped = put_signed_pixels_clamped_c;
+     c->add_pixels_clamped        = add_pixels_clamped_c;
+     if (ARCH_ARM)
+         ff_idctdsp_init_arm(c, avctx, high_bit_depth);
+     if (ARCH_PPC)
+         ff_idctdsp_init_ppc(c, avctx, high_bit_depth);
+     if (ARCH_X86)
+         ff_idctdsp_init_x86(c, avctx, high_bit_depth);
+     ff_init_scantable_permutation(c->idct_permutation,
+                                   c->idct_permutation_type);
+ }
index 0000000,e3a2317..d93790e
mode 000000,100644..100644
--- /dev/null
@@@ -1,0 -1,104 +1,104 @@@
 - * This file is part of Libav.
+ /*
 - * Libav is free software; you can redistribute it and/or
++ * This file is part of FFmpeg.
+  *
 - * Libav is distributed in the hope that it will be useful,
++ * FFmpeg is free software; you can redistribute it and/or
+  * modify it under the terms of the GNU Lesser General Public
+  * License as published by the Free Software Foundation; either
+  * version 2.1 of the License, or (at your option) any later version.
+  *
 - * License along with Libav; if not, write to the Free Software
++ * FFmpeg is distributed in the hope that it will be useful,
+  * but WITHOUT ANY WARRANTY; without even the implied warranty of
+  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  * Lesser General Public License for more details.
+  *
+  * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
+  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+  */
+ #ifndef AVCODEC_IDCTDSP_H
+ #define AVCODEC_IDCTDSP_H
+ #include <stdint.h>
+ #include "avcodec.h"
+ /**
+  * Scantable.
+  */
+ typedef struct ScanTable {
+     const uint8_t *scantable;
+     uint8_t permutated[64];
+     uint8_t raster_end[64];
+ } ScanTable;
+ void ff_init_scantable(uint8_t *permutation, ScanTable *st,
+                        const uint8_t *src_scantable);
+ void ff_init_scantable_permutation(uint8_t *idct_permutation,
+                                    int idct_permutation_type);
+ int ff_init_scantable_permutation_x86(uint8_t *idct_permutation,
+                                       int idct_permutation_type);
+ typedef struct IDCTDSPContext {
+     /* pixel ops : interface with DCT */
+     void (*put_pixels_clamped)(const int16_t *block /* align 16 */,
+                                uint8_t *pixels /* align 8 */,
+                                int line_size);
+     void (*put_signed_pixels_clamped)(const int16_t *block /* align 16 */,
+                                       uint8_t *pixels /* align 8 */,
+                                       int line_size);
+     void (*add_pixels_clamped)(const int16_t *block /* align 16 */,
+                                uint8_t *pixels /* align 8 */,
+                                int line_size);
+     void (*idct)(int16_t *block /* align 16 */);
+     /**
+      * block -> idct -> clip to unsigned 8 bit -> dest.
+      * (-1392, 0, 0, ...) -> idct -> (-174, -174, ...) -> put -> (0, 0, ...)
+      * @param line_size size in bytes of a horizontal line of dest
+      */
+     void (*idct_put)(uint8_t *dest /* align 8 */,
+                      int line_size, int16_t *block /* align 16 */);
+     /**
+      * block -> idct -> add dest -> clip to unsigned 8 bit -> dest.
+      * @param line_size size in bytes of a horizontal line of dest
+      */
+     void (*idct_add)(uint8_t *dest /* align 8 */,
+                      int line_size, int16_t *block /* align 16 */);
+     /**
+      * IDCT input permutation.
+      * Several optimized IDCTs need a permutated input (relative to the
+      * normal order of the reference IDCT).
+      * This permutation must be performed before the idct_put/add.
+      * Note, normally this can be merged with the zigzag/alternate scan<br>
+      * An example to avoid confusion:
+      * - (->decode coeffs -> zigzag reorder -> dequant -> reference IDCT -> ...)
+      * - (x -> reference DCT -> reference IDCT -> x)
+      * - (x -> reference DCT -> simple_mmx_perm = idct_permutation
+      *    -> simple_idct_mmx -> x)
+      * - (-> decode coeffs -> zigzag reorder -> simple_mmx_perm -> dequant
+      *    -> simple_idct_mmx -> ...)
+      */
+     uint8_t idct_permutation[64];
+     int idct_permutation_type;
+ #define FF_NO_IDCT_PERM 1
+ #define FF_LIBMPEG2_IDCT_PERM 2
+ #define FF_SIMPLE_IDCT_PERM 3
+ #define FF_TRANSPOSE_IDCT_PERM 4
+ #define FF_PARTTRANS_IDCT_PERM 5
+ #define FF_SSE2_IDCT_PERM 6
+ } IDCTDSPContext;
+ void ff_idctdsp_init(IDCTDSPContext *c, AVCodecContext *avctx);
+ void ff_idctdsp_init_arm(IDCTDSPContext *c, AVCodecContext *avctx,
+                          unsigned high_bit_depth);
+ void ff_idctdsp_init_ppc(IDCTDSPContext *c, AVCodecContext *avctx,
+                          unsigned high_bit_depth);
+ void ff_idctdsp_init_x86(IDCTDSPContext *c, AVCodecContext *avctx,
+                          unsigned high_bit_depth);
+ #endif /* AVCODEC_IDCTDSP_H */
@@@ -643,9 -647,9 +644,9 @@@ static int x8_decode_intra_mb(IntraX8Co
                                              s->current_picture.f->linesize[!!chroma] );
      }
      if(!zeros_only)
-         w->wdsp.idct_add (s->dest[chroma],
-                           s->current_picture.f->linesize[!!chroma],
-                           s->block[0] );
 -        s->idsp.idct_add(s->dest[chroma],
++        w->wdsp.idct_add(s->dest[chroma],
+                          s->current_picture.f->linesize[!!chroma],
+                          s->block[0]);
  
  block_placed:
  
@@@ -1,31 -1,4 +1,32 @@@
  LIBAVCODEC_$MAJOR {
          global: av*;
 +                #deprecated, remove after next bump
 +                audio_resample;
 +                audio_resample_close;
 +                dsputil_init;
 +                ff_dsputil_init;
 +                ff_find_pix_fmt;
 +                ff_framenum_to_drop_timecode;
 +                ff_framenum_to_smtpe_timecode;
 +                ff_raw_pix_fmt_tags;
 +                ff_init_smtpe_timecode;
 +                ff_fft*;
 +                ff_mdct*;
 +                ff_dct*;
 +                ff_rdft*;
 +                ff_prores_idct_put_10_sse2;
 +                ff_simple_idct*;
 +                ff_aanscales;
 +                ff_faan*;
 +                ff_mmx_idct;
 +                ff_fdct*;
 +                fdct_ifast;
 +                j_rev_dct;
 +                ff_mmxext_idct;
 +                ff_idct_xvid*;
 +                ff_jpeg_fdct*;
 +                ff_dnxhd_get_cid_table;
 +                ff_dnxhd_cid_table;
++                ff_idctdsp_init;
          local:  *;
  };
@@@ -294,10 -285,25 +294,11 @@@ static av_cold int ljpeg_encode_init(AV
  
      s->scratch = av_malloc_array(avctx->width + 1, sizeof(*s->scratch));
  
-     ff_dsputil_init(&s->dsp, avctx);
-     ff_init_scantable(s->dsp.idct_permutation, &s->scantable, ff_zigzag_direct);
+     ff_idctdsp_init(&s->idsp, avctx);
+     ff_init_scantable(s->idsp.idct_permutation, &s->scantable,
+                       ff_zigzag_direct);
  
 -    av_pix_fmt_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift,
 -                                     &chroma_v_shift);
 -
 -    if (avctx->pix_fmt   == AV_PIX_FMT_BGR24) {
 -        s->vsample[0] = s->hsample[0] =
 -        s->vsample[1] = s->hsample[1] =
 -        s->vsample[2] = s->hsample[2] = 1;
 -    } else {
 -        s->vsample[0] = 2;
 -        s->vsample[1] = 2 >> chroma_v_shift;
 -        s->vsample[2] = 2 >> chroma_v_shift;
 -        s->hsample[0] = 2;
 -        s->hsample[1] = 2 >> chroma_h_shift;
 -        s->hsample[2] = 2 >> chroma_h_shift;
 -    }
 +    ff_mjpeg_init_hvsample(avctx, s->hsample, s->vsample);
  
      ff_mjpeg_build_huffman_codes(s->huff_size_dc_luminance,
                                   s->huff_code_dc_luminance,
@@@ -29,7 -29,7 +29,8 @@@
  
  #include "avcodec.h"
  #include "blockdsp.h"
 +#include "bswapdsp.h"
+ #include "idctdsp.h"
  #include "mpegvideo.h"
  #include "mpeg12.h"
  #include "thread.h"
@@@ -37,8 -37,7 +38,8 @@@
  typedef struct MDECContext {
      AVCodecContext *avctx;
      BlockDSPContext bdsp;
-     DSPContext dsp;
 +    BswapDSPContext bbdsp;
+     IDCTDSPContext idsp;
      ThreadFrame frame;
      GetBitContext gb;
      ScanTable scantable;
@@@ -213,10 -216,10 +214,11 @@@ static av_cold int decode_init(AVCodecC
      a->avctx           = avctx;
  
      ff_blockdsp_init(&a->bdsp, avctx);
-     ff_dsputil_init(&a->dsp, avctx);
 +    ff_bswapdsp_init(&a->bbdsp);
+     ff_idctdsp_init(&a->idsp, avctx);
      ff_mpeg12_init_vlcs();
-     ff_init_scantable(a->dsp.idct_permutation, &a->scantable, ff_zigzag_direct);
+     ff_init_scantable(a->idsp.idct_permutation, &a->scantable,
+                       ff_zigzag_direct);
  
      if (avctx->idct_algo == FF_IDCT_AUTO)
          avctx->idct_algo = FF_IDCT_SIMPLE;
Simple merge
@@@ -35,7 -36,7 +35,8 @@@
  #include "libavutil/opt.h"
  #include "avcodec.h"
  #include "blockdsp.h"
 +#include "copy_block.h"
+ #include "idctdsp.h"
  #include "internal.h"
  #include "mjpeg.h"
  #include "mjpegdec.h"
@@@ -1228,9 -891,7 +1230,9 @@@ static int mjpeg_decode_scan(MJpegDecod
                                         "error y=%d x=%d\n", mb_y, mb_x);
                                  return AVERROR_INVALIDDATA;
                              }
-                             s->dsp.idct_put(ptr, linesize[c], s->block);
+                             s->idsp.idct_put(ptr, linesize[c], s->block);
 +                            if (s->bits & 7)
 +                                shift_output(s, ptr, linesize[c]);
                          }
                      } else {
                          int block_idx  = s->block_stride[c] * (v * mb_y + y) +
@@@ -1313,15 -996,18 +1315,15 @@@ static int mjpeg_decode_scan_progressiv
                             "error y=%d x=%d\n", mb_y, mb_x);
                      return AVERROR_INVALIDDATA;
                  }
 -            }
  
              if (last_scan) {
-                     s->dsp.idct_put(ptr, linesize, *block);
 -                if (copy_mb) {
 -                    s->hdsp.put_pixels_tab[1][0](ptr,
 -                                                 reference_data + block_offset,
 -                                                 linesize, 8);
 -                } else {
+                     s->idsp.idct_put(ptr, linesize, *block);
 -                    ptr += 8;
 -                }
 +                    if (s->bits & 7)
 +                        shift_output(s, ptr, linesize);
 +                    ptr += bytes_per_pixel*8 >> s->avctx->lowres;
              }
 +            if (handle_rstn(s, 0))
 +                EOBRUN = 0;
          }
      }
      return 0;
@@@ -103,11 -95,10 +103,11 @@@ typedef struct MJpegDecodeContext 
      int16_t (*blocks[MAX_COMPONENTS])[64]; ///< intermediate sums (progressive mode)
      uint8_t *last_nnz[MAX_COMPONENTS];
      uint64_t coefs_finished[MAX_COMPONENTS]; ///< bitmask of which coefs have been completely decoded (progressive mode)
 +    int palette_index;
      ScanTable scantable;
      BlockDSPContext bdsp;
-     DSPContext dsp;
      HpelDSPContext hdsp;
+     IDCTDSPContext idsp;
  
      int restart_interval;
      int restart_count;
Simple merge
@@@ -24,8 -24,7 +24,8 @@@
  #include <stdint.h>
  
  #include "avcodec.h"
- #include "dsputil.h"
+ #include "idctdsp.h"
 +#include "mpegvideo.h"
  #include "put_bits.h"
  
  void ff_mjpeg_encode_picture_header(AVCodecContext *avctx, PutBitContext *pb,
@@@ -1588,12 -1534,38 +1588,12 @@@ static void mpeg_decode_picture_coding_
      s->chroma_420_type            = get_bits1(&s->gb);
      s->progressive_frame          = get_bits1(&s->gb);
  
 -    if (s->progressive_sequence && !s->progressive_frame) {
 -        s->progressive_frame = 1;
 -        av_log(s->avctx, AV_LOG_ERROR,
 -               "interlaced frame in progressive sequence, ignoring\n");
 -    }
 -
 -    if (s->picture_structure == 0 ||
 -        (s->progressive_frame && s->picture_structure != PICT_FRAME)) {
 -        av_log(s->avctx, AV_LOG_ERROR,
 -               "picture_structure %d invalid, ignoring\n",
 -               s->picture_structure);
 -        s->picture_structure = PICT_FRAME;
 -    }
 -
 -    if (s->progressive_sequence && !s->frame_pred_frame_dct)
 -        av_log(s->avctx, AV_LOG_WARNING, "invalid frame_pred_frame_dct\n");
 -
 -    if (s->picture_structure == PICT_FRAME) {
 -        s->first_field = 0;
 -        s->v_edge_pos  = 16 * s->mb_height;
 -    } else {
 -        s->first_field ^= 1;
 -        s->v_edge_pos   = 8 * s->mb_height;
 -        memset(s->mbskip_table, 0, s->mb_stride * s->mb_height);
 -    }
 -
      if (s->alternate_scan) {
-         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable, ff_alternate_vertical_scan);
-         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable, ff_alternate_vertical_scan);
+         ff_init_scantable(s->idsp.idct_permutation, &s->inter_scantable, ff_alternate_vertical_scan);
+         ff_init_scantable(s->idsp.idct_permutation, &s->intra_scantable, ff_alternate_vertical_scan);
      } else {
-         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable, ff_zigzag_direct);
-         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable, ff_zigzag_direct);
+         ff_init_scantable(s->idsp.idct_permutation, &s->inter_scantable, ff_zigzag_direct);
+         ff_init_scantable(s->idsp.idct_permutation, &s->intra_scantable, ff_zigzag_direct);
      }
  
      /* composite display not parsed */
   * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
   */
  
 +#define UNCHECKED_BITSTREAM_READER 1
 +
 +#include "libavutil/opt.h"
  #include "error_resilience.h"
+ #include "idctdsp.h"
  #include "internal.h"
  #include "mpegutils.h"
  #include "mpegvideo.h"
Simple merge
@@@ -34,8 -34,7 +34,8 @@@
  #include "libavutil/timer.h"
  #include "avcodec.h"
  #include "blockdsp.h"
- #include "dsputil.h"
 +#include "h264chroma.h"
+ #include "idctdsp.h"
  #include "internal.h"
  #include "mathops.h"
  #include "mpegutils.h"
@@@ -381,8 -379,8 +381,9 @@@ av_cold int ff_dct_common_init(MpegEncC
  {
      ff_blockdsp_init(&s->bdsp, s->avctx);
      ff_dsputil_init(&s->dsp, s->avctx);
 +    ff_h264chroma_init(&s->h264chroma, 8); //for lowres
      ff_hpeldsp_init(&s->hdsp, s->avctx->flags);
+     ff_idctdsp_init(&s->idsp, s->avctx);
      ff_mpegvideodsp_init(&s->mdsp);
      ff_videodsp_init(&s->vdsp, s->avctx->bits_per_raw_sample);
  
@@@ -3083,17 -2318,17 +3085,17 @@@ void MPV_decode_mb_internal(MpegEncCont
                      }else{
  
                          dct_linesize = uvlinesize << s->interlaced_dct;
 -                        dct_offset   = s->interlaced_dct ? uvlinesize : uvlinesize * 8;
 +                        dct_offset   = s->interlaced_dct ? uvlinesize : uvlinesize*block_size;
  
-                         s->dsp.idct_put(dest_cb,              dct_linesize, block[4]);
-                         s->dsp.idct_put(dest_cr,              dct_linesize, block[5]);
-                         s->dsp.idct_put(dest_cb + dct_offset, dct_linesize, block[6]);
-                         s->dsp.idct_put(dest_cr + dct_offset, dct_linesize, block[7]);
+                         s->idsp.idct_put(dest_cb,              dct_linesize, block[4]);
+                         s->idsp.idct_put(dest_cr,              dct_linesize, block[5]);
+                         s->idsp.idct_put(dest_cb + dct_offset, dct_linesize, block[6]);
+                         s->idsp.idct_put(dest_cr + dct_offset, dct_linesize, block[7]);
                          if(!s->chroma_x_shift){//Chroma444
-                             s->dsp.idct_put(dest_cb + block_size,              dct_linesize, block[8]);
-                             s->dsp.idct_put(dest_cr + block_size,              dct_linesize, block[9]);
-                             s->dsp.idct_put(dest_cb + block_size + dct_offset, dct_linesize, block[10]);
-                             s->dsp.idct_put(dest_cr + block_size + dct_offset, dct_linesize, block[11]);
 -                            s->idsp.idct_put(dest_cb + 8,              dct_linesize, block[8]);
 -                            s->idsp.idct_put(dest_cr + 8,              dct_linesize, block[9]);
 -                            s->idsp.idct_put(dest_cb + 8 + dct_offset, dct_linesize, block[10]);
 -                            s->idsp.idct_put(dest_cr + 8 + dct_offset, dct_linesize, block[11]);
++                            s->idsp.idct_put(dest_cb + block_size,              dct_linesize, block[8]);
++                            s->idsp.idct_put(dest_cr + block_size,              dct_linesize, block[9]);
++                            s->idsp.idct_put(dest_cb + block_size + dct_offset, dct_linesize, block[10]);
++                            s->idsp.idct_put(dest_cr + block_size + dct_offset, dct_linesize, block[11]);
                          }
                      }
                  }//gray
  #include "dsputil.h"
  #include "error_resilience.h"
  #include "get_bits.h"
 +#include "h264chroma.h"
  #include "h263dsp.h"
  #include "hpeldsp.h"
+ #include "idctdsp.h"
  #include "mpegvideodsp.h"
  #include "put_bits.h"
  #include "ratecontrol.h"
@@@ -360,8 -352,8 +361,9 @@@ typedef struct MpegEncContext 
  
      BlockDSPContext bdsp;
      DSPContext dsp;             ///< pointers for accelerated dsp functions
 +    H264ChromaContext h264chroma;
      HpelDSPContext hdsp;
+     IDCTDSPContext idsp;
      MpegVideoDSPContext mdsp;
      QpelDSPContext qdsp;
      VideoDSPContext vdsp;
@@@ -3541,22 -3359,11 +3542,22 @@@ static int encode_picture(MpegEncContex
          s->qscale= 3; //reduce clipping problems
  
      if (s->out_format == FMT_MJPEG) {
 +        const uint16_t *  luma_matrix = ff_mpeg1_default_intra_matrix;
 +        const uint16_t *chroma_matrix = ff_mpeg1_default_intra_matrix;
 +
 +        if (s->avctx->intra_matrix) {
 +            chroma_matrix =
 +            luma_matrix = s->avctx->intra_matrix;
 +        }
 +        if (s->avctx->chroma_intra_matrix)
 +            chroma_matrix = s->avctx->chroma_intra_matrix;
 +
          /* for mjpeg, we do include qscale in the matrix */
          for(i=1;i<64;i++){
-             int j= s->dsp.idct_permutation[i];
+             int j = s->idsp.idct_permutation[i];
  
 -            s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
 +            s->chroma_intra_matrix[j] = av_clip_uint8((chroma_matrix[i] * s->qscale) >> 3);
 +            s->       intra_matrix[j] = av_clip_uint8((  luma_matrix[i] * s->qscale) >> 3);
          }
          s->y_dc_scale_table=
          s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
          s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
          ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
                         s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
-             int j= s->dsp.idct_permutation[ff_zigzag_direct[i]];
 +        ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
 +                       s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
 +        s->qscale= 8;
 +    }
 +    if(s->codec_id == AV_CODEC_ID_AMV){
 +        static const uint8_t y[32]={13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13};
 +        static const uint8_t c[32]={14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14};
 +        for(i=1;i<64;i++){
++            int j= s->idsp.idct_permutation[ff_zigzag_direct[i]];
 +
 +            s->intra_matrix[j] = sp5x_quant_table[5*2+0][i];
 +            s->chroma_intra_matrix[j] = sp5x_quant_table[5*2+1][i];
 +        }
 +        s->y_dc_scale_table= y;
 +        s->c_dc_scale_table= c;
 +        s->intra_matrix[0] = 13;
 +        s->chroma_intra_matrix[0] = 14;
 +        ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
 +                       s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
 +        ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
 +                       s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
          s->qscale= 8;
      }
  
@@@ -3801,12 -3585,12 +3802,12 @@@ static int dct_quantize_trellis_c(MpegE
              const int alevel= FFABS(level);
              int unquant_coeff;
  
 -            assert(level);
 +            av_assert2(level);
  
 -            if(s->out_format == FMT_H263){
 +            if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
                  unquant_coeff= alevel*qmul + qadd;
              }else{ //MPEG1
-                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
+                 j = s->idsp.idct_permutation[scantable[i]]; // FIXME: optimize
                  if(s->mb_intra){
                          unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
                          unquant_coeff =   (unquant_coeff - 1) | 1;
Simple merge
Simple merge
Simple merge
@@@ -24,8 -25,6 +25,7 @@@ OBJS-$(CONFIG_VP8_DECODER)             
  
  ALTIVEC-OBJS-$(CONFIG_DSPUTIL)         += ppc/dsputil_altivec.o         \
                                            ppc/fdct_altivec.o            \
-                                           ppc/idct_altivec.o            \
  
  FFT-OBJS-$(HAVE_GNU_AS)                += ppc/fft_altivec_s.o
 +FFT-OBJS-$(HAVE_VSX)                   += ppc/fft_vsx.o
  ALTIVEC-OBJS-$(CONFIG_FFT)             += $(FFT-OBJS-yes)
Simple merge
Simple merge
@@@ -148,23 -153,7 +153,23 @@@ static const vec_s16 constants[5] = 
      { 19266, 26722, 25172, 22654,  19266,  22654, 25172, 26722 }
  };
  
- void ff_idct_altivec(int16_t *blk)
++static void idct_altivec(int16_t *blk)
 +{
 +    vec_s16 *block = (vec_s16 *) blk;
 +
 +    IDCT;
 +
 +    block[0] = vx0;
 +    block[1] = vx1;
 +    block[2] = vx2;
 +    block[3] = vx3;
 +    block[4] = vx4;
 +    block[5] = vx5;
 +    block[6] = vx6;
 +    block[7] = vx7;
 +}
 +
void ff_idct_put_altivec(uint8_t *dest, int stride, int16_t *blk)
static void idct_put_altivec(uint8_t *dest, int stride, int16_t *blk)
  {
      vec_s16 *block = (vec_s16 *) blk;
      vec_u8 tmp;
@@@ -235,3 -224,22 +240,23 @@@ static void idct_add_altivec(uint8_t *d
      dest += stride;
      ADD(dest, vx7, perm1);
  }
 -        if (!high_bit_depth) {
+ #endif /* HAVE_ALTIVEC */
+ av_cold void ff_idctdsp_init_ppc(IDCTDSPContext *c, AVCodecContext *avctx,
+                                  unsigned high_bit_depth)
+ {
+ #if HAVE_ALTIVEC
+     if (PPC_ALTIVEC(av_get_cpu_flags())) {
++        if (!high_bit_depth && avctx->lowres == 0) {
+             if ((avctx->idct_algo == FF_IDCT_AUTO) ||
+                 (avctx->idct_algo == FF_IDCT_ALTIVEC)) {
++                c->idct                  = idct_altivec;
+                 c->idct_add              = idct_add_altivec;
+                 c->idct_put              = idct_put_altivec;
+                 c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
+             }
+         }
+     }
+ #endif /* HAVE_ALTIVEC */
+ }
index fb0c484,0000000..d597114
mode 100644,000000..100644
--- /dev/null
@@@ -1,689 -1,0 +1,690 @@@
 +/*
 + * Copyright (c) 2010-2011 Maxim Poliakovski
 + * Copyright (c) 2010-2011 Elvis Presley
 + *
 + * This file is part of FFmpeg.
 + *
 + * FFmpeg is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU Lesser General Public
 + * License as published by the Free Software Foundation; either
 + * version 2.1 of the License, or (at your option) any later version.
 + *
 + * FFmpeg is distributed in the hope that it will be useful,
 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 + * Lesser General Public License for more details.
 + *
 + * You should have received a copy of the GNU Lesser General Public
 + * License along with FFmpeg; if not, write to the Free Software
 + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 + */
 +
 +/**
 + * @file
 + * Known FOURCCs: 'apch' (HQ), 'apcn' (SD), 'apcs' (LT), 'acpo' (Proxy), 'ap4h' (4444)
 + */
 +
 +//#define DEBUG
 +
 +#define LONG_BITSTREAM_READER
 +
 +#include "avcodec.h"
 +#include "get_bits.h"
++#include "idctdsp.h"
 +#include "internal.h"
 +#include "simple_idct.h"
 +#include "proresdec.h"
 +#include "proresdata.h"
 +
 +static void permute(uint8_t *dst, const uint8_t *src, const uint8_t permutation[64])
 +{
 +    int i;
 +    for (i = 0; i < 64; i++)
 +        dst[i] = permutation[src[i]];
 +}
 +
 +static av_cold int decode_init(AVCodecContext *avctx)
 +{
 +    ProresContext *ctx = avctx->priv_data;
 +    uint8_t idct_permutation[64];
 +
 +    avctx->bits_per_raw_sample = 10;
 +
 +    ff_dsputil_init(&ctx->dsp, avctx);
 +    ff_blockdsp_init(&ctx->bdsp, avctx);
 +    ff_proresdsp_init(&ctx->prodsp, avctx);
 +
 +    ff_init_scantable_permutation(idct_permutation,
 +                                  ctx->prodsp.idct_permutation_type);
 +
 +    permute(ctx->progressive_scan, ff_prores_progressive_scan, idct_permutation);
 +    permute(ctx->interlaced_scan, ff_prores_interlaced_scan, idct_permutation);
 +
 +    return 0;
 +}
 +
 +static int decode_frame_header(ProresContext *ctx, const uint8_t *buf,
 +                               const int data_size, AVCodecContext *avctx)
 +{
 +    int hdr_size, width, height, flags;
 +    int version;
 +    const uint8_t *ptr;
 +
 +    hdr_size = AV_RB16(buf);
 +    av_dlog(avctx, "header size %d\n", hdr_size);
 +    if (hdr_size > data_size) {
 +        av_log(avctx, AV_LOG_ERROR, "error, wrong header size\n");
 +        return AVERROR_INVALIDDATA;
 +    }
 +
 +    version = AV_RB16(buf + 2);
 +    av_dlog(avctx, "%.4s version %d\n", buf+4, version);
 +    if (version > 1) {
 +        av_log(avctx, AV_LOG_ERROR, "unsupported version: %d\n", version);
 +        return AVERROR_PATCHWELCOME;
 +    }
 +
 +    width  = AV_RB16(buf + 8);
 +    height = AV_RB16(buf + 10);
 +    if (width != avctx->width || height != avctx->height) {
 +        av_log(avctx, AV_LOG_ERROR, "picture resolution change: %dx%d -> %dx%d\n",
 +               avctx->width, avctx->height, width, height);
 +        return AVERROR_PATCHWELCOME;
 +    }
 +
 +    ctx->frame_type = (buf[12] >> 2) & 3;
 +    ctx->alpha_info = buf[17] & 0xf;
 +
 +    if (ctx->alpha_info > 2) {
 +        av_log(avctx, AV_LOG_ERROR, "Invalid alpha mode %d\n", ctx->alpha_info);
 +        return AVERROR_INVALIDDATA;
 +    }
 +    if (avctx->skip_alpha) ctx->alpha_info = 0;
 +
 +    av_dlog(avctx, "frame type %d\n", ctx->frame_type);
 +
 +    if (ctx->frame_type == 0) {
 +        ctx->scan = ctx->progressive_scan; // permuted
 +    } else {
 +        ctx->scan = ctx->interlaced_scan; // permuted
 +        ctx->frame->interlaced_frame = 1;
 +        ctx->frame->top_field_first = ctx->frame_type == 1;
 +    }
 +
 +    if (ctx->alpha_info) {
 +        avctx->pix_fmt = (buf[12] & 0xC0) == 0xC0 ? AV_PIX_FMT_YUVA444P10 : AV_PIX_FMT_YUVA422P10;
 +    } else {
 +        avctx->pix_fmt = (buf[12] & 0xC0) == 0xC0 ? AV_PIX_FMT_YUV444P10 : AV_PIX_FMT_YUV422P10;
 +    }
 +
 +    ptr   = buf + 20;
 +    flags = buf[19];
 +    av_dlog(avctx, "flags %x\n", flags);
 +
 +    if (flags & 2) {
 +        if(buf + data_size - ptr < 64) {
 +            av_log(avctx, AV_LOG_ERROR, "Header truncated\n");
 +            return AVERROR_INVALIDDATA;
 +        }
 +        permute(ctx->qmat_luma, ctx->prodsp.idct_permutation, ptr);
 +        ptr += 64;
 +    } else {
 +        memset(ctx->qmat_luma, 4, 64);
 +    }
 +
 +    if (flags & 1) {
 +        if(buf + data_size - ptr < 64) {
 +            av_log(avctx, AV_LOG_ERROR, "Header truncated\n");
 +            return AVERROR_INVALIDDATA;
 +        }
 +        permute(ctx->qmat_chroma, ctx->prodsp.idct_permutation, ptr);
 +    } else {
 +        memset(ctx->qmat_chroma, 4, 64);
 +    }
 +
 +    return hdr_size;
 +}
 +
 +static int decode_picture_header(AVCodecContext *avctx, const uint8_t *buf, const int buf_size)
 +{
 +    ProresContext *ctx = avctx->priv_data;
 +    int i, hdr_size, slice_count;
 +    unsigned pic_data_size;
 +    int log2_slice_mb_width, log2_slice_mb_height;
 +    int slice_mb_count, mb_x, mb_y;
 +    const uint8_t *data_ptr, *index_ptr;
 +
 +    hdr_size = buf[0] >> 3;
 +    if (hdr_size < 8 || hdr_size > buf_size) {
 +        av_log(avctx, AV_LOG_ERROR, "error, wrong picture header size\n");
 +        return AVERROR_INVALIDDATA;
 +    }
 +
 +    pic_data_size = AV_RB32(buf + 1);
 +    if (pic_data_size > buf_size) {
 +        av_log(avctx, AV_LOG_ERROR, "error, wrong picture data size\n");
 +        return AVERROR_INVALIDDATA;
 +    }
 +
 +    log2_slice_mb_width  = buf[7] >> 4;
 +    log2_slice_mb_height = buf[7] & 0xF;
 +    if (log2_slice_mb_width > 3 || log2_slice_mb_height) {
 +        av_log(avctx, AV_LOG_ERROR, "unsupported slice resolution: %dx%d\n",
 +               1 << log2_slice_mb_width, 1 << log2_slice_mb_height);
 +        return AVERROR_INVALIDDATA;
 +    }
 +
 +    ctx->mb_width  = (avctx->width  + 15) >> 4;
 +    if (ctx->frame_type)
 +        ctx->mb_height = (avctx->height + 31) >> 5;
 +    else
 +        ctx->mb_height = (avctx->height + 15) >> 4;
 +
 +    slice_count = AV_RB16(buf + 5);
 +
 +    if (ctx->slice_count != slice_count || !ctx->slices) {
 +        av_freep(&ctx->slices);
 +        ctx->slices = av_mallocz_array(slice_count, sizeof(*ctx->slices));
 +        if (!ctx->slices)
 +            return AVERROR(ENOMEM);
 +        ctx->slice_count = slice_count;
 +    }
 +
 +    if (!slice_count)
 +        return AVERROR(EINVAL);
 +
 +    if (hdr_size + slice_count*2 > buf_size) {
 +        av_log(avctx, AV_LOG_ERROR, "error, wrong slice count\n");
 +        return AVERROR_INVALIDDATA;
 +    }
 +
 +    // parse slice information
 +    index_ptr = buf + hdr_size;
 +    data_ptr  = index_ptr + slice_count*2;
 +
 +    slice_mb_count = 1 << log2_slice_mb_width;
 +    mb_x = 0;
 +    mb_y = 0;
 +
 +    for (i = 0; i < slice_count; i++) {
 +        SliceContext *slice = &ctx->slices[i];
 +
 +        slice->data = data_ptr;
 +        data_ptr += AV_RB16(index_ptr + i*2);
 +
 +        while (ctx->mb_width - mb_x < slice_mb_count)
 +            slice_mb_count >>= 1;
 +
 +        slice->mb_x = mb_x;
 +        slice->mb_y = mb_y;
 +        slice->mb_count = slice_mb_count;
 +        slice->data_size = data_ptr - slice->data;
 +
 +        if (slice->data_size < 6) {
 +            av_log(avctx, AV_LOG_ERROR, "error, wrong slice data size\n");
 +            return AVERROR_INVALIDDATA;
 +        }
 +
 +        mb_x += slice_mb_count;
 +        if (mb_x == ctx->mb_width) {
 +            slice_mb_count = 1 << log2_slice_mb_width;
 +            mb_x = 0;
 +            mb_y++;
 +        }
 +        if (data_ptr > buf + buf_size) {
 +            av_log(avctx, AV_LOG_ERROR, "error, slice out of bounds\n");
 +            return AVERROR_INVALIDDATA;
 +        }
 +    }
 +
 +    if (mb_x || mb_y != ctx->mb_height) {
 +        av_log(avctx, AV_LOG_ERROR, "error wrong mb count y %d h %d\n",
 +               mb_y, ctx->mb_height);
 +        return AVERROR_INVALIDDATA;
 +    }
 +
 +    return pic_data_size;
 +}
 +
 +#define DECODE_CODEWORD(val, codebook)                                  \
 +    do {                                                                \
 +        unsigned int rice_order, exp_order, switch_bits;                \
 +        unsigned int q, buf, bits;                                      \
 +                                                                        \
 +        UPDATE_CACHE(re, gb);                                           \
 +        buf = GET_CACHE(re, gb);                                        \
 +                                                                        \
 +        /* number of bits to switch between rice and exp golomb */      \
 +        switch_bits =  codebook & 3;                                    \
 +        rice_order  =  codebook >> 5;                                   \
 +        exp_order   = (codebook >> 2) & 7;                              \
 +                                                                        \
 +        q = 31 - av_log2(buf);                                          \
 +                                                                        \
 +        if (q > switch_bits) { /* exp golomb */                         \
 +            bits = exp_order - switch_bits + (q<<1);                    \
 +            val = SHOW_UBITS(re, gb, bits) - (1 << exp_order) +         \
 +                ((switch_bits + 1) << rice_order);                      \
 +            SKIP_BITS(re, gb, bits);                                    \
 +        } else if (rice_order) {                                        \
 +            SKIP_BITS(re, gb, q+1);                                     \
 +            val = (q << rice_order) + SHOW_UBITS(re, gb, rice_order);   \
 +            SKIP_BITS(re, gb, rice_order);                              \
 +        } else {                                                        \
 +            val = q;                                                    \
 +            SKIP_BITS(re, gb, q+1);                                     \
 +        }                                                               \
 +    } while (0)
 +
 +#define TOSIGNED(x) (((x) >> 1) ^ (-((x) & 1)))
 +
 +#define FIRST_DC_CB 0xB8
 +
 +static const uint8_t dc_codebook[7] = { 0x04, 0x28, 0x28, 0x4D, 0x4D, 0x70, 0x70};
 +
 +static av_always_inline void decode_dc_coeffs(GetBitContext *gb, int16_t *out,
 +                                              int blocks_per_slice)
 +{
 +    int16_t prev_dc;
 +    int code, i, sign;
 +
 +    OPEN_READER(re, gb);
 +
 +    DECODE_CODEWORD(code, FIRST_DC_CB);
 +    prev_dc = TOSIGNED(code);
 +    out[0] = prev_dc;
 +
 +    out += 64; // dc coeff for the next block
 +
 +    code = 5;
 +    sign = 0;
 +    for (i = 1; i < blocks_per_slice; i++, out += 64) {
 +        DECODE_CODEWORD(code, dc_codebook[FFMIN(code, 6U)]);
 +        if(code) sign ^= -(code & 1);
 +        else     sign  = 0;
 +        prev_dc += (((code + 1) >> 1) ^ sign) - sign;
 +        out[0] = prev_dc;
 +    }
 +    CLOSE_READER(re, gb);
 +}
 +
 +// adaptive codebook switching lut according to previous run/level values
 +static const uint8_t run_to_cb[16] = { 0x06, 0x06, 0x05, 0x05, 0x04, 0x29, 0x29, 0x29, 0x29, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x4C };
 +static const uint8_t lev_to_cb[10] = { 0x04, 0x0A, 0x05, 0x06, 0x04, 0x28, 0x28, 0x28, 0x28, 0x4C };
 +
 +static av_always_inline int decode_ac_coeffs(AVCodecContext *avctx, GetBitContext *gb,
 +                                             int16_t *out, int blocks_per_slice)
 +{
 +    ProresContext *ctx = avctx->priv_data;
 +    int block_mask, sign;
 +    unsigned pos, run, level;
 +    int max_coeffs, i, bits_left;
 +    int log2_block_count = av_log2(blocks_per_slice);
 +
 +    OPEN_READER(re, gb);
 +    UPDATE_CACHE(re, gb);                                           \
 +    run   = 4;
 +    level = 2;
 +
 +    max_coeffs = 64 << log2_block_count;
 +    block_mask = blocks_per_slice - 1;
 +
 +    for (pos = block_mask;;) {
 +        bits_left = gb->size_in_bits - re_index;
 +        if (!bits_left || (bits_left < 32 && !SHOW_UBITS(re, gb, bits_left)))
 +            break;
 +
 +        DECODE_CODEWORD(run, run_to_cb[FFMIN(run,  15)]);
 +        pos += run + 1;
 +        if (pos >= max_coeffs) {
 +            av_log(avctx, AV_LOG_ERROR, "ac tex damaged %d, %d\n", pos, max_coeffs);
 +            return AVERROR_INVALIDDATA;
 +        }
 +
 +        DECODE_CODEWORD(level, lev_to_cb[FFMIN(level, 9)]);
 +        level += 1;
 +
 +        i = pos >> log2_block_count;
 +
 +        sign = SHOW_SBITS(re, gb, 1);
 +        SKIP_BITS(re, gb, 1);
 +        out[((pos & block_mask) << 6) + ctx->scan[i]] = ((level ^ sign) - sign);
 +    }
 +
 +    CLOSE_READER(re, gb);
 +    return 0;
 +}
 +
 +static int decode_slice_luma(AVCodecContext *avctx, SliceContext *slice,
 +                             uint16_t *dst, int dst_stride,
 +                             const uint8_t *buf, unsigned buf_size,
 +                             const int16_t *qmat)
 +{
 +    ProresContext *ctx = avctx->priv_data;
 +    LOCAL_ALIGNED_16(int16_t, blocks, [8*4*64]);
 +    int16_t *block;
 +    GetBitContext gb;
 +    int i, blocks_per_slice = slice->mb_count<<2;
 +    int ret;
 +
 +    for (i = 0; i < blocks_per_slice; i++)
 +        ctx->bdsp.clear_block(blocks+(i<<6));
 +
 +    init_get_bits(&gb, buf, buf_size << 3);
 +
 +    decode_dc_coeffs(&gb, blocks, blocks_per_slice);
 +    if ((ret = decode_ac_coeffs(avctx, &gb, blocks, blocks_per_slice)) < 0)
 +        return ret;
 +
 +    block = blocks;
 +    for (i = 0; i < slice->mb_count; i++) {
 +        ctx->prodsp.idct_put(dst, dst_stride, block+(0<<6), qmat);
 +        ctx->prodsp.idct_put(dst             +8, dst_stride, block+(1<<6), qmat);
 +        ctx->prodsp.idct_put(dst+4*dst_stride  , dst_stride, block+(2<<6), qmat);
 +        ctx->prodsp.idct_put(dst+4*dst_stride+8, dst_stride, block+(3<<6), qmat);
 +        block += 4*64;
 +        dst += 16;
 +    }
 +    return 0;
 +}
 +
 +static int decode_slice_chroma(AVCodecContext *avctx, SliceContext *slice,
 +                               uint16_t *dst, int dst_stride,
 +                               const uint8_t *buf, unsigned buf_size,
 +                               const int16_t *qmat, int log2_blocks_per_mb)
 +{
 +    ProresContext *ctx = avctx->priv_data;
 +    LOCAL_ALIGNED_16(int16_t, blocks, [8*4*64]);
 +    int16_t *block;
 +    GetBitContext gb;
 +    int i, j, blocks_per_slice = slice->mb_count << log2_blocks_per_mb;
 +    int ret;
 +
 +    for (i = 0; i < blocks_per_slice; i++)
 +        ctx->bdsp.clear_block(blocks+(i<<6));
 +
 +    init_get_bits(&gb, buf, buf_size << 3);
 +
 +    decode_dc_coeffs(&gb, blocks, blocks_per_slice);
 +    if ((ret = decode_ac_coeffs(avctx, &gb, blocks, blocks_per_slice)) < 0)
 +        return ret;
 +
 +    block = blocks;
 +    for (i = 0; i < slice->mb_count; i++) {
 +        for (j = 0; j < log2_blocks_per_mb; j++) {
 +            ctx->prodsp.idct_put(dst,              dst_stride, block+(0<<6), qmat);
 +            ctx->prodsp.idct_put(dst+4*dst_stride, dst_stride, block+(1<<6), qmat);
 +            block += 2*64;
 +            dst += 8;
 +        }
 +    }
 +    return 0;
 +}
 +
 +static void unpack_alpha(GetBitContext *gb, uint16_t *dst, int num_coeffs,
 +                         const int num_bits)
 +{
 +    const int mask = (1 << num_bits) - 1;
 +    int i, idx, val, alpha_val;
 +
 +    idx       = 0;
 +    alpha_val = mask;
 +    do {
 +        do {
 +            if (get_bits1(gb)) {
 +                val = get_bits(gb, num_bits);
 +            } else {
 +                int sign;
 +                val  = get_bits(gb, num_bits == 16 ? 7 : 4);
 +                sign = val & 1;
 +                val  = (val + 2) >> 1;
 +                if (sign)
 +                    val = -val;
 +            }
 +            alpha_val = (alpha_val + val) & mask;
 +            if (num_bits == 16) {
 +                dst[idx++] = alpha_val >> 6;
 +            } else {
 +                dst[idx++] = (alpha_val << 2) | (alpha_val >> 6);
 +            }
 +            if (idx >= num_coeffs)
 +                break;
 +        } while (get_bits_left(gb)>0 && get_bits1(gb));
 +        val = get_bits(gb, 4);
 +        if (!val)
 +            val = get_bits(gb, 11);
 +        if (idx + val > num_coeffs)
 +            val = num_coeffs - idx;
 +        if (num_bits == 16) {
 +            for (i = 0; i < val; i++)
 +                dst[idx++] = alpha_val >> 6;
 +        } else {
 +            for (i = 0; i < val; i++)
 +                dst[idx++] = (alpha_val << 2) | (alpha_val >> 6);
 +
 +        }
 +    } while (idx < num_coeffs);
 +}
 +
 +/**
 + * Decode alpha slice plane.
 + */
 +static void decode_slice_alpha(ProresContext *ctx,
 +                               uint16_t *dst, int dst_stride,
 +                               const uint8_t *buf, int buf_size,
 +                               int blocks_per_slice)
 +{
 +    GetBitContext gb;
 +    int i;
 +    LOCAL_ALIGNED_16(int16_t, blocks, [8*4*64]);
 +    int16_t *block;
 +
 +    for (i = 0; i < blocks_per_slice<<2; i++)
 +        ctx->bdsp.clear_block(blocks+(i<<6));
 +
 +    init_get_bits(&gb, buf, buf_size << 3);
 +
 +    if (ctx->alpha_info == 2) {
 +        unpack_alpha(&gb, blocks, blocks_per_slice * 4 * 64, 16);
 +    } else {
 +        unpack_alpha(&gb, blocks, blocks_per_slice * 4 * 64, 8);
 +    }
 +
 +    block = blocks;
 +    for (i = 0; i < 16; i++) {
 +        memcpy(dst, block, 16 * blocks_per_slice * sizeof(*dst));
 +        dst   += dst_stride >> 1;
 +        block += 16 * blocks_per_slice;
 +    }
 +}
 +
 +static int decode_slice_thread(AVCodecContext *avctx, void *arg, int jobnr, int threadnr)
 +{
 +    ProresContext *ctx = avctx->priv_data;
 +    SliceContext *slice = &ctx->slices[jobnr];
 +    const uint8_t *buf = slice->data;
 +    AVFrame *pic = ctx->frame;
 +    int i, hdr_size, qscale, log2_chroma_blocks_per_mb;
 +    int luma_stride, chroma_stride;
 +    int y_data_size, u_data_size, v_data_size, a_data_size;
 +    uint8_t *dest_y, *dest_u, *dest_v, *dest_a;
 +    int16_t qmat_luma_scaled[64];
 +    int16_t qmat_chroma_scaled[64];
 +    int mb_x_shift;
 +    int ret;
 +
 +    slice->ret = -1;
 +    //av_log(avctx, AV_LOG_INFO, "slice %d mb width %d mb x %d y %d\n",
 +    //       jobnr, slice->mb_count, slice->mb_x, slice->mb_y);
 +
 +    // slice header
 +    hdr_size = buf[0] >> 3;
 +    qscale = av_clip(buf[1], 1, 224);
 +    qscale = qscale > 128 ? qscale - 96 << 2: qscale;
 +    y_data_size = AV_RB16(buf + 2);
 +    u_data_size = AV_RB16(buf + 4);
 +    v_data_size = slice->data_size - y_data_size - u_data_size - hdr_size;
 +    if (hdr_size > 7) v_data_size = AV_RB16(buf + 6);
 +    a_data_size = slice->data_size - y_data_size - u_data_size -
 +                  v_data_size - hdr_size;
 +
 +    if (y_data_size < 0 || u_data_size < 0 || v_data_size < 0
 +        || hdr_size+y_data_size+u_data_size+v_data_size > slice->data_size){
 +        av_log(avctx, AV_LOG_ERROR, "invalid plane data size\n");
 +        return AVERROR_INVALIDDATA;
 +    }
 +
 +    buf += hdr_size;
 +
 +    for (i = 0; i < 64; i++) {
 +        qmat_luma_scaled  [i] = ctx->qmat_luma  [i] * qscale;
 +        qmat_chroma_scaled[i] = ctx->qmat_chroma[i] * qscale;
 +    }
 +
 +    if (ctx->frame_type == 0) {
 +        luma_stride   = pic->linesize[0];
 +        chroma_stride = pic->linesize[1];
 +    } else {
 +        luma_stride   = pic->linesize[0] << 1;
 +        chroma_stride = pic->linesize[1] << 1;
 +    }
 +
 +    if (avctx->pix_fmt == AV_PIX_FMT_YUV444P10 || avctx->pix_fmt == AV_PIX_FMT_YUVA444P10) {
 +        mb_x_shift = 5;
 +        log2_chroma_blocks_per_mb = 2;
 +    } else {
 +        mb_x_shift = 4;
 +        log2_chroma_blocks_per_mb = 1;
 +    }
 +
 +    dest_y = pic->data[0] + (slice->mb_y << 4) * luma_stride + (slice->mb_x << 5);
 +    dest_u = pic->data[1] + (slice->mb_y << 4) * chroma_stride + (slice->mb_x << mb_x_shift);
 +    dest_v = pic->data[2] + (slice->mb_y << 4) * chroma_stride + (slice->mb_x << mb_x_shift);
 +    dest_a = pic->data[3] + (slice->mb_y << 4) * luma_stride + (slice->mb_x << 5);
 +
 +    if (ctx->frame_type && ctx->first_field ^ ctx->frame->top_field_first) {
 +        dest_y += pic->linesize[0];
 +        dest_u += pic->linesize[1];
 +        dest_v += pic->linesize[2];
 +        dest_a += pic->linesize[3];
 +    }
 +
 +    ret = decode_slice_luma(avctx, slice, (uint16_t*)dest_y, luma_stride,
 +                            buf, y_data_size, qmat_luma_scaled);
 +    if (ret < 0)
 +        return ret;
 +
 +    if (!(avctx->flags & CODEC_FLAG_GRAY)) {
 +        ret = decode_slice_chroma(avctx, slice, (uint16_t*)dest_u, chroma_stride,
 +                                  buf + y_data_size, u_data_size,
 +                                  qmat_chroma_scaled, log2_chroma_blocks_per_mb);
 +        if (ret < 0)
 +            return ret;
 +
 +        ret = decode_slice_chroma(avctx, slice, (uint16_t*)dest_v, chroma_stride,
 +                                  buf + y_data_size + u_data_size, v_data_size,
 +                                  qmat_chroma_scaled, log2_chroma_blocks_per_mb);
 +        if (ret < 0)
 +            return ret;
 +    }
 +    /* decode alpha plane if available */
 +    if (ctx->alpha_info && pic->data[3] && a_data_size)
 +        decode_slice_alpha(ctx, (uint16_t*)dest_a, luma_stride,
 +                           buf + y_data_size + u_data_size + v_data_size,
 +                           a_data_size, slice->mb_count);
 +
 +    slice->ret = 0;
 +    return 0;
 +}
 +
 +static int decode_picture(AVCodecContext *avctx)
 +{
 +    ProresContext *ctx = avctx->priv_data;
 +    int i;
 +
 +    avctx->execute2(avctx, decode_slice_thread, NULL, NULL, ctx->slice_count);
 +
 +    for (i = 0; i < ctx->slice_count; i++)
 +        if (ctx->slices[i].ret < 0)
 +            return ctx->slices[i].ret;
 +
 +    return 0;
 +}
 +
 +static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
 +                        AVPacket *avpkt)
 +{
 +    ProresContext *ctx = avctx->priv_data;
 +    AVFrame *frame = data;
 +    const uint8_t *buf = avpkt->data;
 +    int buf_size = avpkt->size;
 +    int frame_hdr_size, pic_size, ret;
 +
 +    if (buf_size < 28 || AV_RL32(buf + 4) != AV_RL32("icpf")) {
 +        av_log(avctx, AV_LOG_ERROR, "invalid frame header\n");
 +        return AVERROR_INVALIDDATA;
 +    }
 +
 +    ctx->frame = frame;
 +    ctx->frame->pict_type = AV_PICTURE_TYPE_I;
 +    ctx->frame->key_frame = 1;
 +    ctx->first_field = 1;
 +
 +    buf += 8;
 +    buf_size -= 8;
 +
 +    frame_hdr_size = decode_frame_header(ctx, buf, buf_size, avctx);
 +    if (frame_hdr_size < 0)
 +        return frame_hdr_size;
 +
 +    buf += frame_hdr_size;
 +    buf_size -= frame_hdr_size;
 +
 +    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
 +        return ret;
 +
 + decode_picture:
 +    pic_size = decode_picture_header(avctx, buf, buf_size);
 +    if (pic_size < 0) {
 +        av_log(avctx, AV_LOG_ERROR, "error decoding picture header\n");
 +        return pic_size;
 +    }
 +
 +    if ((ret = decode_picture(avctx)) < 0) {
 +        av_log(avctx, AV_LOG_ERROR, "error decoding picture\n");
 +        return ret;
 +    }
 +
 +    buf += pic_size;
 +    buf_size -= pic_size;
 +
 +    if (ctx->frame_type && buf_size > 0 && ctx->first_field) {
 +        ctx->first_field = 0;
 +        goto decode_picture;
 +    }
 +
 +    *got_frame      = 1;
 +
 +    return avpkt->size;
 +}
 +
 +static av_cold int decode_close(AVCodecContext *avctx)
 +{
 +    ProresContext *ctx = avctx->priv_data;
 +
 +    av_freep(&ctx->slices);
 +
 +    return 0;
 +}
 +
 +AVCodec ff_prores_decoder = {
 +    .name           = "prores",
 +    .long_name      = NULL_IF_CONFIG_SMALL("ProRes"),
 +    .type           = AVMEDIA_TYPE_VIDEO,
 +    .id             = AV_CODEC_ID_PRORES,
 +    .priv_data_size = sizeof(ProresContext),
 +    .init           = decode_init,
 +    .close          = decode_close,
 +    .decode         = decode_frame,
 +    .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_SLICE_THREADS,
 +};
Simple merge
Simple merge
Simple merge
Simple merge
@@@ -90,7 -90,7 +90,7 @@@ static int vaapi_mpeg2_start_frame(AVCo
      iq_matrix->load_chroma_non_intra_quantiser_matrix   = 1;
  
      for (i = 0; i < 64; i++) {
--        int n = s->dsp.idct_permutation[ff_zigzag_direct[i]];
++        int n = s->idsp.idct_permutation[ff_zigzag_direct[i]];
          iq_matrix->intra_quantiser_matrix[i]            = s->intra_matrix[n];
          iq_matrix->non_intra_quantiser_matrix[i]        = s->inter_matrix[n];
          iq_matrix->chroma_intra_quantiser_matrix[i]     = s->chroma_intra_matrix[n];
@@@ -109,7 -109,7 +109,7 @@@ static int vaapi_mpeg4_start_frame(AVCo
          iq_matrix->load_non_intra_quant_mat     = 1;
  
          for (i = 0; i < 64; i++) {
--            int n = s->dsp.idct_permutation[ff_zigzag_direct[i]];
++            int n = s->idsp.idct_permutation[ff_zigzag_direct[i]];
              iq_matrix->intra_quant_mat[i]       = s->intra_matrix[n];
              iq_matrix->non_intra_quant_mat[i]   = s->inter_matrix[n];
          }
Simple merge
Simple merge
Simple merge
@@@ -18,12 -15,10 +18,13 @@@ OBJS-$(CONFIG_H264CHROMA)              
  OBJS-$(CONFIG_H264DSP)                 += x86/h264dsp_init.o
  OBJS-$(CONFIG_H264PRED)                += x86/h264_intrapred_init.o
  OBJS-$(CONFIG_H264QPEL)                += x86/h264_qpel.o
 +OBJS-$(CONFIG_HEVC_DECODER)            += x86/hevcdsp_init.o
  OBJS-$(CONFIG_HPELDSP)                 += x86/hpeldsp_init.o
 +OBJS-$(CONFIG_LLAUDDSP)                += x86/lossless_audiodsp_init.o
 +OBJS-$(CONFIG_LLVIDDSP)                += x86/lossless_videodsp_init.o
  OBJS-$(CONFIG_HUFFYUVDSP)              += x86/huffyuvdsp_init.o
  OBJS-$(CONFIG_HUFFYUVENCDSP)           += x86/huffyuvencdsp_mmx.o
+ OBJS-$(CONFIG_IDCTDSP)                 += x86/idctdsp_init.o
  OBJS-$(CONFIG_LPC)                     += x86/lpc.o
  OBJS-$(CONFIG_MPEGAUDIODSP)            += x86/mpegaudiodsp.o
  OBJS-$(CONFIG_MPEGVIDEO)               += x86/mpegvideo.o              \
@@@ -55,17 -47,20 +56,18 @@@ OBJS-$(CONFIG_VP6_DECODER)             
  OBJS-$(CONFIG_VP7_DECODER)             += x86/vp8dsp_init.o
  OBJS-$(CONFIG_VP8_DECODER)             += x86/vp8dsp_init.o
  OBJS-$(CONFIG_VP9_DECODER)             += x86/vp9dsp_init.o
 +OBJS-$(CONFIG_WEBP_DECODER)            += x86/vp8dsp_init.o
  
- MMX-OBJS-$(CONFIG_DSPUTIL)             += x86/dsputil_mmx.o             \
 -MMX-OBJS-$(CONFIG_AUDIODSP)            += x86/audiodsp_mmx.o
 -MMX-OBJS-$(CONFIG_BLOCKDSP)            += x86/blockdsp_mmx.o
+ MMX-OBJS-$(CONFIG_DSPUTIL)             += x86/dsputil_mmx.o
 -MMX-OBJS-$(CONFIG_HPELDSP)             += x86/fpel_mmx.o                \
 -                                          x86/hpeldsp_mmx.o
++MMX-OBJS-$(CONFIG_DIRAC_DECODER)       += x86/dirac_dwt.o
+ MMX-OBJS-$(CONFIG_HUFFYUVDSP)          += x86/huffyuvdsp_mmx.o
+ MMX-OBJS-$(CONFIG_IDCTDSP)             += x86/idctdsp_mmx.o             \
                                            x86/idct_mmx_xvid.o           \
                                            x86/idct_sse2_xvid.o          \
                                            x86/simple_idct.o
- MMX-OBJS-$(CONFIG_DIRAC_DECODER)       += x86/dirac_dwt.o
- MMX-OBJS-$(CONFIG_HUFFYUVDSP)          += x86/huffyuvdsp_mmx.o
 -MMX-OBJS-$(CONFIG_QPELDSP)             += x86/fpel_mmx.o
  
 -MMX-OBJS-$(CONFIG_SVQ1_ENCODER)        += x86/svq1enc_mmx.o
 +MMX-OBJS-$(CONFIG_SNOW_DECODER)        += x86/snowdsp.o
 +MMX-OBJS-$(CONFIG_SNOW_ENCODER)        += x86/snowdsp.o
  MMX-OBJS-$(CONFIG_VC1_DECODER)         += x86/vc1dsp_mmx.o
  
  YASM-OBJS                              += x86/deinterlace.o             \
Simple merge
@@@ -69,62 -31,7 +34,8 @@@ static av_cold void dsputil_init_mmx(DS
      if (!high_bit_depth) {
          c->draw_edges   = ff_draw_edges_mmx;
      }
-     if (avctx->lowres == 0 && !high_bit_depth) {
-         switch (avctx->idct_algo) {
-         case FF_IDCT_AUTO:
-         case FF_IDCT_SIMPLEAUTO:
-         case FF_IDCT_SIMPLEMMX:
-             c->idct_put              = ff_simple_idct_put_mmx;
-             c->idct_add              = ff_simple_idct_add_mmx;
-             c->idct                  = ff_simple_idct_mmx;
-             c->idct_permutation_type = FF_SIMPLE_IDCT_PERM;
-             break;
-         case FF_IDCT_XVIDMMX:
-             c->idct_put              = ff_idct_xvid_mmx_put;
-             c->idct_add              = ff_idct_xvid_mmx_add;
-             c->idct                  = ff_idct_xvid_mmx;
-             break;
-         }
-     }
 +
  #endif /* HAVE_MMX_INLINE */
- #if HAVE_MMX_EXTERNAL
-     c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_mmx;
- #endif /* HAVE_MMX_EXTERNAL */
- }
- static av_cold void dsputil_init_mmxext(DSPContext *c, AVCodecContext *avctx,
-                                         int cpu_flags, unsigned high_bit_depth)
- {
- #if HAVE_MMXEXT_INLINE
-     if (!high_bit_depth && avctx->idct_algo == FF_IDCT_XVIDMMX && avctx->lowres == 0) {
-         c->idct_put = ff_idct_xvid_mmxext_put;
-         c->idct_add = ff_idct_xvid_mmxext_add;
-         c->idct     = ff_idct_xvid_mmxext;
-     }
- #endif /* HAVE_MMXEXT_INLINE */
- }
- static av_cold void dsputil_init_sse2(DSPContext *c, AVCodecContext *avctx,
-                                       int cpu_flags, unsigned high_bit_depth)
- {
- #if HAVE_SSE2_INLINE
-     if (!high_bit_depth && avctx->idct_algo == FF_IDCT_XVIDMMX && avctx->lowres == 0) {
-         c->idct_put              = ff_idct_xvid_sse2_put;
-         c->idct_add              = ff_idct_xvid_sse2_add;
-         c->idct                  = ff_idct_xvid_sse2;
-         c->idct_permutation_type = FF_SSE2_IDCT_PERM;
-     }
- #endif /* HAVE_SSE2_INLINE */
- #if HAVE_SSE2_EXTERNAL
-     c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_sse2;
- #endif /* HAVE_SSE2_EXTERNAL */
  }
  
  av_cold void ff_dsputil_init_x86(DSPContext *c, AVCodecContext *avctx,
Simple merge
Simple merge
Simple merge
Simple merge
index 0000000,22df3dd..9b7177a
mode 000000,100644..100644
--- /dev/null
@@@ -1,0 -1,31 +1,33 @@@
 - * This file is part of Libav.
+ /*
 - * Libav is free software; you can redistribute it and/or
++ * This file is part of FFmpeg.
+  *
 - * Libav is distributed in the hope that it will be useful,
++ * FFmpeg is free software; you can redistribute it and/or
+  * modify it under the terms of the GNU Lesser General Public
+  * License as published by the Free Software Foundation; either
+  * version 2.1 of the License, or (at your option) any later version.
+  *
 - * License along with Libav; if not, write to the Free Software
++ * FFmpeg is distributed in the hope that it will be useful,
+  * but WITHOUT ANY WARRANTY; without even the implied warranty of
+  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  * Lesser General Public License for more details.
+  *
+  * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
+  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+  */
+ #ifndef AVCODEC_X86_IDCTDSP_H
+ #define AVCODEC_X86_IDCTDSP_H
+ #include <stdint.h>
+ void ff_add_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels,
+                                int line_size);
+ void ff_put_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels,
+                                int line_size);
+ void ff_put_signed_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels,
+                                       int line_size);
++void ff_put_signed_pixels_clamped_sse2(const int16_t *block, uint8_t *pixels,
++                                       int line_size);
+ #endif /* AVCODEC_X86_IDCTDSP_H */
index 0000000,9b68497..b2332b2
mode 000000,100644..100644
--- /dev/null
@@@ -1,0 -1,106 +1,112 @@@
 - * This file is part of Libav.
+ /*
 - * Libav is free software; you can redistribute it and/or
++ * This file is part of FFmpeg.
+  *
 - * Libav is distributed in the hope that it will be useful,
++ * FFmpeg is free software; you can redistribute it and/or
+  * modify it under the terms of the GNU Lesser General Public
+  * License as published by the Free Software Foundation; either
+  * version 2.1 of the License, or (at your option) any later version.
+  *
 - * License along with Libav; if not, write to the Free Software
++ * FFmpeg is distributed in the hope that it will be useful,
+  * but WITHOUT ANY WARRANTY; without even the implied warranty of
+  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  * Lesser General Public License for more details.
+  *
+  * You should have received a copy of the GNU Lesser General Public
 -        c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_mmx;
++ * License along with FFmpeg; if not, write to the Free Software
+  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+  */
+ #include "config.h"
+ #include "libavutil/attributes.h"
+ #include "libavutil/cpu.h"
+ #include "libavutil/x86/cpu.h"
+ #include "libavcodec/avcodec.h"
+ #include "libavcodec/idctdsp.h"
+ #include "libavcodec/simple_idct.h"
+ #include "idct_xvid.h"
+ #include "idctdsp.h"
+ /* Input permutation for the simple_idct_mmx */
+ static const uint8_t simple_mmx_permutation[64] = {
+     0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
+     0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
+     0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
+     0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
+     0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
+     0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
+     0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
+     0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
+ };
+ static const uint8_t idct_sse2_row_perm[8] = { 0, 4, 1, 5, 2, 6, 3, 7 };
+ av_cold int ff_init_scantable_permutation_x86(uint8_t *idct_permutation,
+                                               int idct_permutation_type)
+ {
+     int i;
+     switch (idct_permutation_type) {
+     case FF_SIMPLE_IDCT_PERM:
+         for (i = 0; i < 64; i++)
+             idct_permutation[i] = simple_mmx_permutation[i];
+         return 1;
+     case FF_SSE2_IDCT_PERM:
+         for (i = 0; i < 64; i++)
+             idct_permutation[i] = (i & 0x38) | idct_sse2_row_perm[i & 7];
+         return 1;
+     }
+     return 0;
+ }
+ av_cold void ff_idctdsp_init_x86(IDCTDSPContext *c, AVCodecContext *avctx,
+                                  unsigned high_bit_depth)
+ {
+     int cpu_flags = av_get_cpu_flags();
+     if (INLINE_MMX(cpu_flags)) {
+         c->put_pixels_clamped        = ff_put_pixels_clamped_mmx;
 -        if (!high_bit_depth) {
+         c->add_pixels_clamped        = ff_add_pixels_clamped_mmx;
 -        if (!high_bit_depth && avctx->idct_algo == FF_IDCT_XVIDMMX) {
++        if (avctx->lowres == 0 && !high_bit_depth) {
+             switch (avctx->idct_algo) {
+             case FF_IDCT_AUTO:
++            case FF_IDCT_SIMPLEAUTO:
+             case FF_IDCT_SIMPLEMMX:
+                 c->idct_put              = ff_simple_idct_put_mmx;
+                 c->idct_add              = ff_simple_idct_add_mmx;
+                 c->idct                  = ff_simple_idct_mmx;
+                 c->idct_permutation_type = FF_SIMPLE_IDCT_PERM;
+                 break;
+             case FF_IDCT_XVIDMMX:
+                 c->idct_put              = ff_idct_xvid_mmx_put;
+                 c->idct_add              = ff_idct_xvid_mmx_add;
+                 c->idct                  = ff_idct_xvid_mmx;
+                 break;
+             }
+         }
+     }
++    if (EXTERNAL_MMX(cpu_flags)) {
++        c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_mmx;
++    }
+     if (INLINE_MMXEXT(cpu_flags)) {
 -        if (!high_bit_depth && avctx->idct_algo == FF_IDCT_XVIDMMX) {
++        if (!high_bit_depth && avctx->idct_algo == FF_IDCT_XVIDMMX && avctx->lowres == 0) {
+             c->idct_put = ff_idct_xvid_mmxext_put;
+             c->idct_add = ff_idct_xvid_mmxext_add;
+             c->idct     = ff_idct_xvid_mmxext;
+         }
+     }
+     if (INLINE_SSE2(cpu_flags)) {
++        if (!high_bit_depth && avctx->idct_algo == FF_IDCT_XVIDMMX && avctx->lowres == 0) {
+             c->idct_put              = ff_idct_xvid_sse2_put;
+             c->idct_add              = ff_idct_xvid_sse2_add;
+             c->idct                  = ff_idct_xvid_sse2;
+             c->idct_permutation_type = FF_SSE2_IDCT_PERM;
+         }
+     }
++    if (EXTERNAL_SSE2(cpu_flags)) {
++        c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_sse2;
++    }
+ }
index 0000000,7285b1d..a72b941
mode 000000,100644..100644
--- /dev/null
@@@ -1,0 -1,168 +1,133 @@@
 - * This file is part of Libav.
+ /*
+  * SIMD-optimized IDCT-related routines
+  * Copyright (c) 2000, 2001 Fabrice Bellard
+  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
+  *
+  * MMX optimization by Nick Kurshev <nickols_k@mail.ru>
+  *
 - * Libav is free software; you can redistribute it and/or
++ * This file is part of FFmpeg.
+  *
 - * Libav is distributed in the hope that it will be useful,
++ * FFmpeg is free software; you can redistribute it and/or
+  * modify it under the terms of the GNU Lesser General Public
+  * License as published by the Free Software Foundation; either
+  * version 2.1 of the License, or (at your option) any later version.
+  *
 - * License along with Libav; if not, write to the Free Software
++ * FFmpeg is distributed in the hope that it will be useful,
+  * but WITHOUT ANY WARRANTY; without even the implied warranty of
+  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  * Lesser General Public License for more details.
+  *
+  * You should have received a copy of the GNU Lesser General Public
 -#define put_signed_pixels_clamped_mmx_half(off)             \
 -    "movq          "#off"(%2), %%mm1        \n\t"           \
 -    "movq     16 + "#off"(%2), %%mm2        \n\t"           \
 -    "movq     32 + "#off"(%2), %%mm3        \n\t"           \
 -    "movq     48 + "#off"(%2), %%mm4        \n\t"           \
 -    "packsswb  8 + "#off"(%2), %%mm1        \n\t"           \
 -    "packsswb 24 + "#off"(%2), %%mm2        \n\t"           \
 -    "packsswb 40 + "#off"(%2), %%mm3        \n\t"           \
 -    "packsswb 56 + "#off"(%2), %%mm4        \n\t"           \
 -    "paddb              %%mm0, %%mm1        \n\t"           \
 -    "paddb              %%mm0, %%mm2        \n\t"           \
 -    "paddb              %%mm0, %%mm3        \n\t"           \
 -    "paddb              %%mm0, %%mm4        \n\t"           \
 -    "movq               %%mm1, (%0)         \n\t"           \
 -    "movq               %%mm2, (%0, %3)     \n\t"           \
 -    "movq               %%mm3, (%0, %3, 2)  \n\t"           \
 -    "movq               %%mm4, (%0, %1)     \n\t"
 -
 -void ff_put_signed_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels,
 -                                      int line_size)
 -{
 -    x86_reg line_skip = line_size;
 -    x86_reg line_skip3;
 -
 -    __asm__ volatile (
 -        "movq "MANGLE(ff_pb_80)", %%mm0     \n\t"
 -        "lea         (%3, %3, 2), %1        \n\t"
 -        put_signed_pixels_clamped_mmx_half(0)
 -        "lea         (%0, %3, 4), %0        \n\t"
 -        put_signed_pixels_clamped_mmx_half(64)
 -        : "+&r" (pixels), "=&r" (line_skip3)
 -        : "r" (block), "r" (line_skip)
 -        : "memory");
 -}
 -
++ * License along with FFmpeg; if not, write to the Free Software
+  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+  */
+ #include "config.h"
+ #include "libavutil/cpu.h"
+ #include "libavutil/x86/asm.h"
+ #include "idctdsp.h"
+ #include "inline_asm.h"
+ #if HAVE_INLINE_ASM
+ void ff_put_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels,
+                                int line_size)
+ {
+     const int16_t *p;
+     uint8_t *pix;
+     /* read the pixels */
+     p   = block;
+     pix = pixels;
+     /* unrolled loop */
+     __asm__ volatile (
+         "movq      (%3), %%mm0          \n\t"
+         "movq     8(%3), %%mm1          \n\t"
+         "movq    16(%3), %%mm2          \n\t"
+         "movq    24(%3), %%mm3          \n\t"
+         "movq    32(%3), %%mm4          \n\t"
+         "movq    40(%3), %%mm5          \n\t"
+         "movq    48(%3), %%mm6          \n\t"
+         "movq    56(%3), %%mm7          \n\t"
+         "packuswb %%mm1, %%mm0          \n\t"
+         "packuswb %%mm3, %%mm2          \n\t"
+         "packuswb %%mm5, %%mm4          \n\t"
+         "packuswb %%mm7, %%mm6          \n\t"
+         "movq     %%mm0, (%0)           \n\t"
+         "movq     %%mm2, (%0, %1)       \n\t"
+         "movq     %%mm4, (%0, %1, 2)    \n\t"
+         "movq     %%mm6, (%0, %2)       \n\t"
+         :: "r" (pix), "r" ((x86_reg) line_size), "r" ((x86_reg) line_size * 3),
+            "r" (p)
+         : "memory");
+     pix += line_size * 4;
+     p   += 32;
+     // if here would be an exact copy of the code above
+     // compiler would generate some very strange code
+     // thus using "r"
+     __asm__ volatile (
+         "movq       (%3), %%mm0         \n\t"
+         "movq      8(%3), %%mm1         \n\t"
+         "movq     16(%3), %%mm2         \n\t"
+         "movq     24(%3), %%mm3         \n\t"
+         "movq     32(%3), %%mm4         \n\t"
+         "movq     40(%3), %%mm5         \n\t"
+         "movq     48(%3), %%mm6         \n\t"
+         "movq     56(%3), %%mm7         \n\t"
+         "packuswb  %%mm1, %%mm0         \n\t"
+         "packuswb  %%mm3, %%mm2         \n\t"
+         "packuswb  %%mm5, %%mm4         \n\t"
+         "packuswb  %%mm7, %%mm6         \n\t"
+         "movq      %%mm0, (%0)          \n\t"
+         "movq      %%mm2, (%0, %1)      \n\t"
+         "movq      %%mm4, (%0, %1, 2)   \n\t"
+         "movq      %%mm6, (%0, %2)      \n\t"
+         :: "r" (pix), "r" ((x86_reg) line_size), "r" ((x86_reg) line_size * 3),
+            "r" (p)
+         : "memory");
+ }
+ void ff_add_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels,
+                                int line_size)
+ {
+     const int16_t *p;
+     uint8_t *pix;
+     int i;
+     /* read the pixels */
+     p   = block;
+     pix = pixels;
+     MOVQ_ZERO(mm7);
+     i = 4;
+     do {
+         __asm__ volatile (
+             "movq        (%2), %%mm0    \n\t"
+             "movq       8(%2), %%mm1    \n\t"
+             "movq      16(%2), %%mm2    \n\t"
+             "movq      24(%2), %%mm3    \n\t"
+             "movq          %0, %%mm4    \n\t"
+             "movq          %1, %%mm6    \n\t"
+             "movq       %%mm4, %%mm5    \n\t"
+             "punpcklbw  %%mm7, %%mm4    \n\t"
+             "punpckhbw  %%mm7, %%mm5    \n\t"
+             "paddsw     %%mm4, %%mm0    \n\t"
+             "paddsw     %%mm5, %%mm1    \n\t"
+             "movq       %%mm6, %%mm5    \n\t"
+             "punpcklbw  %%mm7, %%mm6    \n\t"
+             "punpckhbw  %%mm7, %%mm5    \n\t"
+             "paddsw     %%mm6, %%mm2    \n\t"
+             "paddsw     %%mm5, %%mm3    \n\t"
+             "packuswb   %%mm1, %%mm0    \n\t"
+             "packuswb   %%mm3, %%mm2    \n\t"
+             "movq       %%mm0, %0       \n\t"
+             "movq       %%mm2, %1       \n\t"
+             : "+m" (*pix), "+m" (*(pix + line_size))
+             : "r" (p)
+             : "memory");
+         pix += line_size * 2;
+         p   += 16;
+     } while (--i);
+ }
+ #endif /* HAVE_INLINE_ASM */
@@@ -276,50 -273,6 +276,50 @@@ static int RENAME(dct_quantize)(MpegEnc
          block[0x3E] = temp_block[0x3D]; block[0x27] = temp_block[0x36];
          block[0x3D] = temp_block[0x2F]; block[0x2F] = temp_block[0x37];
          block[0x37] = temp_block[0x3E]; block[0x3F] = temp_block[0x3F];
-     }else if(s->dsp.idct_permutation_type == FF_LIBMPEG2_IDCT_PERM){
++    }else if(s->idsp.idct_permutation_type == FF_LIBMPEG2_IDCT_PERM){
 +        if(last_non_zero_p1 <= 1) goto end;
 +        block[0x04] = temp_block[0x01];
 +        block[0x08] = temp_block[0x08]; block[0x10] = temp_block[0x10];
 +        if(last_non_zero_p1 <= 4) goto end;
 +        block[0x0C] = temp_block[0x09]; block[0x01] = temp_block[0x02];
 +        block[0x05] = temp_block[0x03];
 +        if(last_non_zero_p1 <= 7) goto end;
 +        block[0x09] = temp_block[0x0A]; block[0x14] = temp_block[0x11];
 +        block[0x18] = temp_block[0x18]; block[0x20] = temp_block[0x20];
 +        if(last_non_zero_p1 <= 11) goto end;
 +        block[0x1C] = temp_block[0x19];
 +        block[0x11] = temp_block[0x12]; block[0x0D] = temp_block[0x0B];
 +        block[0x02] = temp_block[0x04]; block[0x06] = temp_block[0x05];
 +        if(last_non_zero_p1 <= 16) goto end;
 +        block[0x0A] = temp_block[0x0C]; block[0x15] = temp_block[0x13];
 +        block[0x19] = temp_block[0x1A]; block[0x24] = temp_block[0x21];
 +        block[0x28] = temp_block[0x28]; block[0x30] = temp_block[0x30];
 +        block[0x2C] = temp_block[0x29]; block[0x21] = temp_block[0x22];
 +        if(last_non_zero_p1 <= 24) goto end;
 +        block[0x1D] = temp_block[0x1B]; block[0x12] = temp_block[0x14];
 +        block[0x0E] = temp_block[0x0D]; block[0x03] = temp_block[0x06];
 +        block[0x07] = temp_block[0x07]; block[0x0B] = temp_block[0x0E];
 +        block[0x16] = temp_block[0x15]; block[0x1A] = temp_block[0x1C];
 +        if(last_non_zero_p1 <= 32) goto end;
 +        block[0x25] = temp_block[0x23]; block[0x29] = temp_block[0x2A];
 +        block[0x34] = temp_block[0x31]; block[0x38] = temp_block[0x38];
 +        block[0x3C] = temp_block[0x39]; block[0x31] = temp_block[0x32];
 +        block[0x2D] = temp_block[0x2B]; block[0x22] = temp_block[0x24];
 +        if(last_non_zero_p1 <= 40) goto end;
 +        block[0x1E] = temp_block[0x1D]; block[0x13] = temp_block[0x16];
 +        block[0x0F] = temp_block[0x0F]; block[0x17] = temp_block[0x17];
 +        block[0x1B] = temp_block[0x1E]; block[0x26] = temp_block[0x25];
 +        block[0x2A] = temp_block[0x2C]; block[0x35] = temp_block[0x33];
 +        if(last_non_zero_p1 <= 48) goto end;
 +        block[0x39] = temp_block[0x3A]; block[0x3D] = temp_block[0x3B];
 +        block[0x32] = temp_block[0x34]; block[0x2E] = temp_block[0x2D];
 +            block[0x23] = temp_block[0x26]; block[0x1F] = temp_block[0x1F];
 +        block[0x27] = temp_block[0x27]; block[0x2B] = temp_block[0x2E];
 +        if(last_non_zero_p1 <= 56) goto end;
 +        block[0x36] = temp_block[0x35]; block[0x3A] = temp_block[0x3C];
 +        block[0x3E] = temp_block[0x3D]; block[0x33] = temp_block[0x36];
 +        block[0x2F] = temp_block[0x2F]; block[0x37] = temp_block[0x37];
 +        block[0x3B] = temp_block[0x3E]; block[0x3F] = temp_block[0x3F];
      }else{
          if(last_non_zero_p1 <= 1) goto end;
          block[0x01] = temp_block[0x01];
Simple merge
   * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
   */
  #include "libavcodec/simple_idct.h"
 -#include "libavutil/internal.h"
  #include "libavutil/mem.h"
  #include "libavutil/x86/asm.h"
- #include "dsputil_x86.h"
+ #include "idctdsp.h"
  
  #if HAVE_INLINE_ASM
  
index ba0104b,0000000..5c4ebdc
mode 100644,000000..100644
--- /dev/null
@@@ -1,437 -1,0 +1,438 @@@
-                 p->requantize(block2, block, qp, p->dsp.idct_permutation);
-                 p->dsp.idct(block2);
 +/*
 + * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
 + * Copyright (c) 2013 Clément Bœsch <u pkh me>
 + *
 + * This file is part of FFmpeg.
 + *
 + * FFmpeg is free software; you can redistribute it and/or modify
 + * it under the terms of the GNU General Public License as published by
 + * the Free Software Foundation; either version 2 of the License, or
 + * (at your option) any later version.
 + *
 + * FFmpeg is distributed in the hope that it will be useful,
 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 + * GNU General Public License for more details.
 + *
 + * You should have received a copy of the GNU General Public License along
 + * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
 + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 + */
 +
 +/**
 + * @file
 + * Simple post processing filter
 + *
 + * This implementation is based on an algorithm described in
 + * "Aria Nosratinia Embedded Post-Processing for
 + * Enhancement of Compressed Images (1999)"
 + *
 + * Originally written by Michael Niedermayer for the MPlayer project, and
 + * ported by Clément Bœsch for FFmpeg.
 + */
 +
 +#include "libavcodec/dsputil.h"
 +#include "libavutil/avassert.h"
 +#include "libavutil/imgutils.h"
 +#include "libavutil/opt.h"
 +#include "libavutil/pixdesc.h"
 +#include "internal.h"
 +#include "vf_spp.h"
 +
 +enum mode {
 +    MODE_HARD,
 +    MODE_SOFT,
 +    NB_MODES
 +};
 +
 +#define OFFSET(x) offsetof(SPPContext, x)
 +#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
 +static const AVOption spp_options[] = {
 +    { "quality", "set quality", OFFSET(log2_count), AV_OPT_TYPE_INT, {.i64 = 3}, 0, MAX_LEVEL, FLAGS },
 +    { "qp", "force a constant quantizer parameter", OFFSET(qp), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 63, FLAGS },
 +    { "mode", "set thresholding mode", OFFSET(mode), AV_OPT_TYPE_INT, {.i64 = MODE_HARD}, 0, NB_MODES - 1, FLAGS, "mode" },
 +        { "hard", "hard thresholding", 0, AV_OPT_TYPE_CONST, {.i64 = MODE_HARD}, INT_MIN, INT_MAX, FLAGS, "mode" },
 +        { "soft", "soft thresholding", 0, AV_OPT_TYPE_CONST, {.i64 = MODE_SOFT}, INT_MIN, INT_MAX, FLAGS, "mode" },
 +    { "use_bframe_qp", "use B-frames' QP", OFFSET(use_bframe_qp), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, FLAGS },
 +    { NULL }
 +};
 +
 +AVFILTER_DEFINE_CLASS(spp);
 +
 +// XXX: share between filters?
 +DECLARE_ALIGNED(8, static const uint8_t, ldither)[8][8] = {
 +    {  0,  48,  12,  60,   3,  51,  15,  63 },
 +    { 32,  16,  44,  28,  35,  19,  47,  31 },
 +    {  8,  56,   4,  52,  11,  59,   7,  55 },
 +    { 40,  24,  36,  20,  43,  27,  39,  23 },
 +    {  2,  50,  14,  62,   1,  49,  13,  61 },
 +    { 34,  18,  46,  30,  33,  17,  45,  29 },
 +    { 10,  58,   6,  54,   9,  57,   5,  53 },
 +    { 42,  26,  38,  22,  41,  25,  37,  21 },
 +};
 +
 +static const uint8_t offset[127][2] = {
 +    {0,0},
 +    {0,0}, {4,4},                                           // quality = 1
 +    {0,0}, {2,2}, {6,4}, {4,6},                             // quality = 2
 +    {0,0}, {5,1}, {2,2}, {7,3}, {4,4}, {1,5}, {6,6}, {3,7}, // quality = 3
 +
 +    {0,0}, {4,0}, {1,1}, {5,1}, {3,2}, {7,2}, {2,3}, {6,3}, // quality = 4
 +    {0,4}, {4,4}, {1,5}, {5,5}, {3,6}, {7,6}, {2,7}, {6,7},
 +
 +    {0,0}, {0,2}, {0,4}, {0,6}, {1,1}, {1,3}, {1,5}, {1,7}, // quality = 5
 +    {2,0}, {2,2}, {2,4}, {2,6}, {3,1}, {3,3}, {3,5}, {3,7},
 +    {4,0}, {4,2}, {4,4}, {4,6}, {5,1}, {5,3}, {5,5}, {5,7},
 +    {6,0}, {6,2}, {6,4}, {6,6}, {7,1}, {7,3}, {7,5}, {7,7},
 +
 +    {0,0}, {4,4}, {0,4}, {4,0}, {2,2}, {6,6}, {2,6}, {6,2}, // quality = 6
 +    {0,2}, {4,6}, {0,6}, {4,2}, {2,0}, {6,4}, {2,4}, {6,0},
 +    {1,1}, {5,5}, {1,5}, {5,1}, {3,3}, {7,7}, {3,7}, {7,3},
 +    {1,3}, {5,7}, {1,7}, {5,3}, {3,1}, {7,5}, {3,5}, {7,1},
 +    {0,1}, {4,5}, {0,5}, {4,1}, {2,3}, {6,7}, {2,7}, {6,3},
 +    {0,3}, {4,7}, {0,7}, {4,3}, {2,1}, {6,5}, {2,5}, {6,1},
 +    {1,0}, {5,4}, {1,4}, {5,0}, {3,2}, {7,6}, {3,6}, {7,2},
 +    {1,2}, {5,6}, {1,6}, {5,2}, {3,0}, {7,4}, {3,4}, {7,0},
 +};
 +
 +static void hardthresh_c(int16_t dst[64], const int16_t src[64],
 +                         int qp, const uint8_t *permutation)
 +{
 +    int i;
 +    int bias = 0; // FIXME
 +
 +    unsigned threshold1 = qp * ((1<<4) - bias) - 1;
 +    unsigned threshold2 = threshold1 << 1;
 +
 +    memset(dst, 0, 64 * sizeof(dst[0]));
 +    dst[0] = (src[0] + 4) >> 3;
 +
 +    for (i = 1; i < 64; i++) {
 +        int level = src[i];
 +        if (((unsigned)(level + threshold1)) > threshold2) {
 +            const int j = permutation[i];
 +            dst[j] = (level + 4) >> 3;
 +        }
 +    }
 +}
 +
 +static void softthresh_c(int16_t dst[64], const int16_t src[64],
 +                         int qp, const uint8_t *permutation)
 +{
 +    int i;
 +    int bias = 0; //FIXME
 +
 +    unsigned threshold1 = qp * ((1<<4) - bias) - 1;
 +    unsigned threshold2 = threshold1 << 1;
 +
 +    memset(dst, 0, 64 * sizeof(dst[0]));
 +    dst[0] = (src[0] + 4) >> 3;
 +
 +    for (i = 1; i < 64; i++) {
 +        int level = src[i];
 +        if (((unsigned)(level + threshold1)) > threshold2) {
 +            const int j = permutation[i];
 +            if (level > 0) dst[j] = (level - threshold1 + 4) >> 3;
 +            else           dst[j] = (level + threshold1 + 4) >> 3;
 +        }
 +    }
 +}
 +
 +static void store_slice_c(uint8_t *dst, const int16_t *src,
 +                          int dst_linesize, int src_linesize,
 +                          int width, int height, int log2_scale,
 +                          const uint8_t dither[8][8])
 +{
 +    int y, x;
 +
 +#define STORE(pos) do {                                                     \
 +    temp = ((src[x + y*src_linesize + pos] << log2_scale) + d[pos]) >> 6;   \
 +    if (temp & 0x100)                                                       \
 +        temp = ~(temp >> 31);                                               \
 +    dst[x + y*dst_linesize + pos] = temp;                                   \
 +} while (0)
 +
 +    for (y = 0; y < height; y++) {
 +        const uint8_t *d = dither[y];
 +        for (x = 0; x < width; x += 8) {
 +            int temp;
 +            STORE(0);
 +            STORE(1);
 +            STORE(2);
 +            STORE(3);
 +            STORE(4);
 +            STORE(5);
 +            STORE(6);
 +            STORE(7);
 +        }
 +    }
 +}
 +
 +static inline void add_block(int16_t *dst, int linesize, const int16_t block[64])
 +{
 +    int y;
 +
 +    for (y = 0; y < 8; y++) {
 +        *(uint32_t *)&dst[0 + y*linesize] += *(uint32_t *)&block[0 + y*8];
 +        *(uint32_t *)&dst[2 + y*linesize] += *(uint32_t *)&block[2 + y*8];
 +        *(uint32_t *)&dst[4 + y*linesize] += *(uint32_t *)&block[4 + y*8];
 +        *(uint32_t *)&dst[6 + y*linesize] += *(uint32_t *)&block[6 + y*8];
 +    }
 +}
 +
 +// XXX: export the function?
 +static inline int norm_qscale(int qscale, int type)
 +{
 +    switch (type) {
 +    case FF_QSCALE_TYPE_MPEG1: return qscale;
 +    case FF_QSCALE_TYPE_MPEG2: return qscale >> 1;
 +    case FF_QSCALE_TYPE_H264:  return qscale >> 2;
 +    case FF_QSCALE_TYPE_VP56:  return (63 - qscale + 2) >> 2;
 +    }
 +    return qscale;
 +}
 +
 +static void filter(SPPContext *p, uint8_t *dst, uint8_t *src,
 +                   int dst_linesize, int src_linesize, int width, int height,
 +                   const uint8_t *qp_table, int qp_stride, int is_luma)
 +{
 +    int x, y, i;
 +    const int count = 1 << p->log2_count;
 +    const int linesize = is_luma ? p->temp_linesize : FFALIGN(width+16, 16);
 +    DECLARE_ALIGNED(16, uint64_t, block_align)[32];
 +    int16_t *block  = (int16_t *)block_align;
 +    int16_t *block2 = (int16_t *)(block_align + 16);
 +
 +    for (y = 0; y < height; y++) {
 +        int index = 8 + 8*linesize + y*linesize;
 +        memcpy(p->src + index, src + y*src_linesize, width);
 +        for (x = 0; x < 8; x++) {
 +            p->src[index         - x - 1] = p->src[index +         x    ];
 +            p->src[index + width + x    ] = p->src[index + width - x - 1];
 +        }
 +    }
 +    for (y = 0; y < 8; y++) {
 +        memcpy(p->src + (       7-y)*linesize, p->src + (       y+8)*linesize, linesize);
 +        memcpy(p->src + (height+8+y)*linesize, p->src + (height-y+7)*linesize, linesize);
 +    }
 +
 +    for (y = 0; y < height + 8; y += 8) {
 +        memset(p->temp + (8 + y) * linesize, 0, 8 * linesize * sizeof(*p->temp));
 +        for (x = 0; x < width + 8; x += 8) {
 +            int qp;
 +
 +            if (p->qp) {
 +                qp = p->qp;
 +            } else{
 +                const int qps = 3 + is_luma;
 +                qp = qp_table[(FFMIN(x, width - 1) >> qps) + (FFMIN(y, height - 1) >> qps) * qp_stride];
 +                qp = FFMAX(1, norm_qscale(qp, p->qscale_type));
 +            }
 +            for (i = 0; i < count; i++) {
 +                const int x1 = x + offset[i + count - 1][0];
 +                const int y1 = y + offset[i + count - 1][1];
 +                const int index = x1 + y1*linesize;
 +                p->dsp.get_pixels(block, p->src + index, linesize);
 +                p->dsp.fdct(block);
++                p->requantize(block2, block, qp, p->idsp.idct_permutation);
++                p->idsp.idct(block2);
 +                add_block(p->temp + index, linesize, block2);
 +            }
 +        }
 +        if (y)
 +            p->store_slice(dst + (y - 8) * dst_linesize, p->temp + 8 + y*linesize,
 +                           dst_linesize, linesize, width,
 +                           FFMIN(8, height + 8 - y), MAX_LEVEL - p->log2_count,
 +                           ldither);
 +    }
 +}
 +
 +static int query_formats(AVFilterContext *ctx)
 +{
 +    static const enum PixelFormat pix_fmts[] = {
 +        AV_PIX_FMT_YUV444P,  AV_PIX_FMT_YUV422P,
 +        AV_PIX_FMT_YUV420P,  AV_PIX_FMT_YUV411P,
 +        AV_PIX_FMT_YUV410P,  AV_PIX_FMT_YUV440P,
 +        AV_PIX_FMT_YUVJ444P, AV_PIX_FMT_YUVJ422P,
 +        AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ440P,
 +        AV_PIX_FMT_NONE
 +    };
 +    ff_set_common_formats(ctx, ff_make_format_list(pix_fmts));
 +    return 0;
 +}
 +
 +static int config_input(AVFilterLink *inlink)
 +{
 +    SPPContext *spp = inlink->dst->priv;
 +    const int h = FFALIGN(inlink->h + 16, 16);
 +    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
 +
 +    spp->hsub = desc->log2_chroma_w;
 +    spp->vsub = desc->log2_chroma_h;
 +    spp->temp_linesize = FFALIGN(inlink->w + 16, 16);
 +    spp->temp = av_malloc_array(spp->temp_linesize, h * sizeof(*spp->temp));
 +    spp->src  = av_malloc_array(spp->temp_linesize, h * sizeof(*spp->src));
 +    if (!spp->use_bframe_qp) {
 +        /* we are assuming here the qp blocks will not be smaller that 16x16 */
 +        spp->non_b_qp_alloc_size = FF_CEIL_RSHIFT(inlink->w, 4) * FF_CEIL_RSHIFT(inlink->h, 4);
 +        spp->non_b_qp_table = av_calloc(spp->non_b_qp_alloc_size, sizeof(*spp->non_b_qp_table));
 +        if (!spp->non_b_qp_table)
 +            return AVERROR(ENOMEM);
 +    }
 +    if (!spp->temp || !spp->src)
 +        return AVERROR(ENOMEM);
 +    return 0;
 +}
 +
 +static int filter_frame(AVFilterLink *inlink, AVFrame *in)
 +{
 +    AVFilterContext *ctx = inlink->dst;
 +    SPPContext *spp = ctx->priv;
 +    AVFilterLink *outlink = ctx->outputs[0];
 +    AVFrame *out = in;
 +    int qp_stride = 0;
 +    const int8_t *qp_table = NULL;
 +
 +    /* if we are not in a constant user quantizer mode and we don't want to use
 +     * the quantizers from the B-frames (B-frames often have a higher QP), we
 +     * need to save the qp table from the last non B-frame; this is what the
 +     * following code block does */
 +    if (!spp->qp) {
 +        qp_table = av_frame_get_qp_table(in, &qp_stride, &spp->qscale_type);
 +
 +        if (qp_table && !spp->use_bframe_qp && in->pict_type != AV_PICTURE_TYPE_B) {
 +            int w, h;
 +
 +            /* if the qp stride is not set, it means the QP are only defined on
 +             * a line basis */
 +            if (!qp_stride) {
 +                w = FF_CEIL_RSHIFT(inlink->w, 4);
 +                h = 1;
 +            } else {
 +                w = FF_CEIL_RSHIFT(qp_stride, 4);
 +                h = FF_CEIL_RSHIFT(inlink->h, 4);
 +            }
 +            av_assert0(w * h <= spp->non_b_qp_alloc_size);
 +            memcpy(spp->non_b_qp_table, qp_table, w * h);
 +        }
 +    }
 +
 +    if (spp->log2_count && !ctx->is_disabled) {
 +        if (!spp->use_bframe_qp && spp->non_b_qp_table)
 +            qp_table = spp->non_b_qp_table;
 +
 +        if (qp_table || spp->qp) {
 +            const int cw = FF_CEIL_RSHIFT(inlink->w, spp->hsub);
 +            const int ch = FF_CEIL_RSHIFT(inlink->h, spp->vsub);
 +
 +            /* get a new frame if in-place is not possible or if the dimensions
 +             * are not multiple of 8 */
 +            if (!av_frame_is_writable(in) || (inlink->w & 7) || (inlink->h & 7)) {
 +                const int aligned_w = FFALIGN(inlink->w, 8);
 +                const int aligned_h = FFALIGN(inlink->h, 8);
 +
 +                out = ff_get_video_buffer(outlink, aligned_w, aligned_h);
 +                if (!out) {
 +                    av_frame_free(&in);
 +                    return AVERROR(ENOMEM);
 +                }
 +                av_frame_copy_props(out, in);
 +                out->width  = in->width;
 +                out->height = in->height;
 +            }
 +
 +            filter(spp, out->data[0], in->data[0], out->linesize[0], in->linesize[0], inlink->w, inlink->h, qp_table, qp_stride, 1);
 +            filter(spp, out->data[1], in->data[1], out->linesize[1], in->linesize[1], cw,        ch,        qp_table, qp_stride, 0);
 +            filter(spp, out->data[2], in->data[2], out->linesize[2], in->linesize[2], cw,        ch,        qp_table, qp_stride, 0);
 +            emms_c();
 +        }
 +    }
 +
 +    if (in != out) {
 +        if (in->data[3])
 +            av_image_copy_plane(out->data[3], out->linesize[3],
 +                                in ->data[3], in ->linesize[3],
 +                                inlink->w, inlink->h);
 +        av_frame_free(&in);
 +    }
 +    return ff_filter_frame(outlink, out);
 +}
 +
 +static int process_command(AVFilterContext *ctx, const char *cmd, const char *args,
 +                           char *res, int res_len, int flags)
 +{
 +    SPPContext *spp = ctx->priv;
 +
 +    if (!strcmp(cmd, "level")) {
 +        if (!strcmp(args, "max"))
 +            spp->log2_count = MAX_LEVEL;
 +        else
 +            spp->log2_count = av_clip(strtol(args, NULL, 10), 0, MAX_LEVEL);
 +        return 0;
 +    }
 +    return AVERROR(ENOSYS);
 +}
 +
 +static av_cold int init(AVFilterContext *ctx)
 +{
 +    SPPContext *spp = ctx->priv;
 +
 +    spp->avctx = avcodec_alloc_context3(NULL);
 +    if (!spp->avctx)
 +        return AVERROR(ENOMEM);
 +    avpriv_dsputil_init(&spp->dsp, spp->avctx);
++    ff_idctdsp_init(&spp->idsp, spp->avctx);
 +    spp->store_slice = store_slice_c;
 +    switch (spp->mode) {
 +    case MODE_HARD: spp->requantize = hardthresh_c; break;
 +    case MODE_SOFT: spp->requantize = softthresh_c; break;
 +    }
 +    if (ARCH_X86)
 +        ff_spp_init_x86(spp);
 +    return 0;
 +}
 +
 +static av_cold void uninit(AVFilterContext *ctx)
 +{
 +    SPPContext *spp = ctx->priv;
 +
 +    av_freep(&spp->temp);
 +    av_freep(&spp->src);
 +    if (spp->avctx) {
 +        avcodec_close(spp->avctx);
 +        av_freep(&spp->avctx);
 +    }
 +    av_freep(&spp->non_b_qp_table);
 +}
 +
 +static const AVFilterPad spp_inputs[] = {
 +    {
 +        .name         = "default",
 +        .type         = AVMEDIA_TYPE_VIDEO,
 +        .config_props = config_input,
 +        .filter_frame = filter_frame,
 +    },
 +    { NULL }
 +};
 +
 +static const AVFilterPad spp_outputs[] = {
 +    {
 +        .name = "default",
 +        .type = AVMEDIA_TYPE_VIDEO,
 +    },
 +    { NULL }
 +};
 +
 +AVFilter ff_vf_spp = {
 +    .name            = "spp",
 +    .description     = NULL_IF_CONFIG_SMALL("Apply a simple post processing filter."),
 +    .priv_size       = sizeof(SPPContext),
 +    .init            = init,
 +    .uninit          = uninit,
 +    .query_formats   = query_formats,
 +    .inputs          = spp_inputs,
 +    .outputs         = spp_outputs,
 +    .process_command = process_command,
 +    .priv_class      = &spp_class,
 +    .flags           = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL,
 +};
index c4293b6,0000000..1884745
mode 100644,000000..100644
--- /dev/null
@@@ -1,59 -1,0 +1,61 @@@
 +/*
 + * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
 + * Copyright (c) 2013 Clément Bœsch
 + *
 + * This file is part of FFmpeg.
 + *
 + * FFmpeg is free software; you can redistribute it and/or modify
 + * it under the terms of the GNU General Public License as published by
 + * the Free Software Foundation; either version 2 of the License, or
 + * (at your option) any later version.
 + *
 + * FFmpeg is distributed in the hope that it will be useful,
 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 + * GNU General Public License for more details.
 + *
 + * You should have received a copy of the GNU General Public License along
 + * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
 + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 + */
 +
 +#ifndef AVFILTER_SPP_H
 +#define AVFILTER_SPP_H
 +
 +#include "libavcodec/avcodec.h"
 +#include "libavcodec/dsputil.h"
++#include "libavcodec/idctdsp.h"
 +#include "avfilter.h"
 +
 +#define MAX_LEVEL 6 /* quality levels */
 +
 +typedef struct {
 +    const AVClass *av_class;
 +
 +    int log2_count;
 +    int qp;
 +    int mode;
 +    int qscale_type;
 +    int temp_linesize;
 +    uint8_t *src;
 +    int16_t *temp;
 +    AVCodecContext *avctx;
 +    DSPContext dsp;
++    IDCTDSPContext idsp;
 +    int8_t *non_b_qp_table;
 +    int non_b_qp_alloc_size;
 +    int use_bframe_qp;
 +    int hsub, vsub;
 +
 +    void (*store_slice)(uint8_t *dst, const int16_t *src,
 +                        int dst_stride, int src_stride,
 +                        int width, int height, int log2_scale,
 +                        const uint8_t dither[8][8]);
 +
 +    void (*requantize)(int16_t dst[64], const int16_t src[64],
 +                       int qp, const uint8_t *permutation);
 +} SPPContext;
 +
 +void ff_spp_init_x86(SPPContext *s);
 +
 +#endif /* AVFILTER_SPP_H */