Merge commit 'b519298a1578e0c895d53d4b4ed8867b1c031a56'
authorMichael Niedermayer <michaelni@gmx.at>
Thu, 6 Dec 2012 14:43:34 +0000 (15:43 +0100)
committerMichael Niedermayer <michaelni@gmx.at>
Thu, 6 Dec 2012 14:55:47 +0000 (15:55 +0100)
* commit 'b519298a1578e0c895d53d4b4ed8867b1c031a56':
  pixdesc: fix yuva 10bit bit depth
  avconv: deprecate the -vol option
  x86: af_volume: add SSE2/SSSE3/AVX-optimized s32 volume scaling
  x86: af_volume: add SSE2-optimized s16 volume scaling

Conflicts:
ffmpeg.c
tests/ref/lavfi/pixdesc
tests/ref/lavfi/pixfmts_copy
tests/ref/lavfi/pixfmts_null
tests/ref/lavfi/pixfmts_scale
tests/ref/lavfi/pixfmts_vflip

Merged-by: Michael Niedermayer <michaelni@gmx.at>
1  2 
ffmpeg_filter.c
libavfilter/af_volume.h
libavfilter/af_volume_justin.c
libavfilter/x86/af_volume.asm
libavfilter/x86/af_volume_init.c
libavutil/x86/x86inc.asm

diff --cc ffmpeg_filter.c
@@@ -641,54 -424,56 +641,57 @@@ static int configure_input_audio_filter
                                              fg->graph)) < 0)
          return ret;
  
 -    if (audio_sync_method > 0) {
 -        AVFilterContext *async;
 -        int  len = 0;
 +#define AUTO_INSERT_FILTER_INPUT(opt_name, filter_name, arg) do {                 \
 +    AVFilterContext *filt_ctx;                                              \
 +                                                                            \
 +    av_log(NULL, AV_LOG_INFO, opt_name " is forwarded to lavfi "            \
 +           "similarly to -af " filter_name "=%s.\n", arg);                  \
 +                                                                            \
 +    snprintf(name, sizeof(name), "graph %d %s for input stream %d:%d",      \
 +                fg->index, filter_name, ist->file_index, ist->st->index);   \
 +    ret = avfilter_graph_create_filter(&filt_ctx,                           \
 +                                       avfilter_get_by_name(filter_name),   \
 +                                       name, arg, NULL, fg->graph);         \
 +    if (ret < 0)                                                            \
 +        return ret;                                                         \
 +                                                                            \
 +    ret = avfilter_link(filt_ctx, 0, first_filter, pad_idx);                \
 +    if (ret < 0)                                                            \
 +        return ret;                                                         \
 +                                                                            \
 +    first_filter = filt_ctx;                                                  \
 +} while (0)
  
 -        av_log(NULL, AV_LOG_WARNING, "-async has been deprecated. Used the "
 -               "asyncts audio filter instead.\n");
 +    if (audio_sync_method > 0) {
 +        char args[256] = {0};
  
 +        av_strlcatf(args, sizeof(args), "min_comp=0.001:min_hard_comp=%f", audio_drift_threshold);
          if (audio_sync_method > 1)
 -            len += snprintf(args + len, sizeof(args) - len, "compensate=1:"
 -                            "max_comp=%d:", audio_sync_method);
 -        snprintf(args + len, sizeof(args) - len, "min_delta=%f",
 -                 audio_drift_threshold);
 -
 -        snprintf(name, sizeof(name), "graph %d audio sync for input stream %d:%d",
 -                 fg->index, ist->file_index, ist->st->index);
 -        ret = avfilter_graph_create_filter(&async,
 -                                           avfilter_get_by_name("asyncts"),
 -                                           name, args, NULL, fg->graph);
 -        if (ret < 0)
 -            return ret;
 +            av_strlcatf(args, sizeof(args), ":max_soft_comp=%f", audio_sync_method/(double)ist->st->codec->sample_rate);
 +        AUTO_INSERT_FILTER_INPUT("-async", "aresample", args);
 +    }
  
 -        ret = avfilter_link(async, 0, first_filter, pad_idx);
 -        if (ret < 0)
 -            return ret;
 +//     if (ost->audio_channels_mapped) {
 +//         int i;
 +//         AVBPrint pan_buf;
 +//         av_bprint_init(&pan_buf, 256, 8192);
 +//         av_bprintf(&pan_buf, "0x%"PRIx64,
 +//                    av_get_default_channel_layout(ost->audio_channels_mapped));
 +//         for (i = 0; i < ost->audio_channels_mapped; i++)
 +//             if (ost->audio_channels_map[i] != -1)
 +//                 av_bprintf(&pan_buf, ":c%d=c%d", i, ost->audio_channels_map[i]);
 +//         AUTO_INSERT_FILTER_INPUT("-map_channel", "pan", pan_buf.str);
 +//         av_bprint_finalize(&pan_buf, NULL);
 +//     }
  
 -        first_filter = async;
 -        pad_idx = 0;
 -    }
      if (audio_volume != 256) {
 -        AVFilterContext *volume;
 +        char args[256];
  
 -        snprintf(args, sizeof(args), "volume=%f", audio_volume / 256.0);
 -
 -        snprintf(name, sizeof(name), "graph %d volume for input stream %d:%d",
 -                 fg->index, ist->file_index, ist->st->index);
 -        ret = avfilter_graph_create_filter(&volume,
 -                                           avfilter_get_by_name("volume"),
 -                                           name, args, NULL, fg->graph);
 -        if (ret < 0)
 -            return ret;
 -
 -        ret = avfilter_link(volume, 0, first_filter, pad_idx);
 -        if (ret < 0)
 -            return ret;
 -
 -        first_filter = volume;
 -        pad_idx = 0;
+         av_log(NULL, AV_LOG_WARNING, "-vol has been deprecated. Use the volume "
+                "audio filter instead.\n");
 +        snprintf(args, sizeof(args), "%f", audio_volume / 256.);
 +        AUTO_INSERT_FILTER_INPUT("-vol", "volume", args);
      }
      if ((ret = avfilter_link(ifilter->filter, 0, first_filter, pad_idx)) < 0)
          return ret;
Simple merge
Simple merge
index 0000000,4e5ad22..dc54f6e
mode 000000,100644..100644
--- /dev/null
@@@ -1,0 -1,138 +1,138 @@@
 -;* This file is part of Libav.
+ ;*****************************************************************************
+ ;* x86-optimized functions for volume filter
+ ;* Copyright (c) 2012 Justin Ruggles <justin.ruggles@gmail.com>
+ ;*
 -;* Libav is free software; you can redistribute it and/or
++;* This file is part of FFmpeg.
+ ;*
 -;* Libav is distributed in the hope that it will be useful,
++;* FFmpeg is free software; you can redistribute it and/or
+ ;* modify it under the terms of the GNU Lesser General Public
+ ;* License as published by the Free Software Foundation; either
+ ;* version 2.1 of the License, or (at your option) any later version.
+ ;*
 -;* License along with Libav; if not, write to the Free Software
++;* FFmpeg is distributed in the hope that it will be useful,
+ ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+ ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ ;* Lesser General Public License for more details.
+ ;*
+ ;* You should have received a copy of the GNU Lesser General Public
++;* License along with FFmpeg; if not, write to the Free Software
+ ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ ;******************************************************************************
+ %include "libavutil/x86/x86util.asm"
+ SECTION_RODATA 32
+ pd_1_256:     times 4 dq 0x3F70000000000000
+ pd_int32_max: times 4 dq 0x41DFFFFFFFC00000
+ pw_1:         times 8 dw 1
+ pw_128:       times 8 dw 128
+ pq_128:       times 2 dq 128
+ SECTION_TEXT
+ ;------------------------------------------------------------------------------
+ ; void ff_scale_samples_s16(uint8_t *dst, const uint8_t *src, int len,
+ ;                           int volume)
+ ;------------------------------------------------------------------------------
+ INIT_XMM sse2
+ cglobal scale_samples_s16, 4,4,4, dst, src, len, volume
+     movd        m0, volumem
+     pshuflw     m0, m0, 0
+     punpcklwd   m0, [pw_1]
+     mova        m1, [pw_128]
+     lea       lenq, [lend*2-mmsize]
+ .loop:
+     ; dst[i] = av_clip_int16((src[i] * volume + 128) >> 8);
+     mova        m2, [srcq+lenq]
+     punpcklwd   m3, m2, m1
+     punpckhwd   m2, m1
+     pmaddwd     m3, m0
+     pmaddwd     m2, m0
+     psrad       m3, 8
+     psrad       m2, 8
+     packssdw    m3, m2
+     mova  [dstq+lenq], m3
+     sub       lenq, mmsize
+     jge .loop
+     REP_RET
+ ;------------------------------------------------------------------------------
+ ; void ff_scale_samples_s32(uint8_t *dst, const uint8_t *src, int len,
+ ;                           int volume)
+ ;------------------------------------------------------------------------------
+ %macro SCALE_SAMPLES_S32 0
+ cglobal scale_samples_s32, 4,4,4, dst, src, len, volume
+ %if ARCH_X86_32 && cpuflag(avx)
+     vbroadcastss   xmm2, volumem
+ %else
+     movd           xmm2, volumed
+     pshufd         xmm2, xmm2, 0
+ %endif
+     CVTDQ2PD         m2, xmm2
+     mulpd            m2, m2, [pd_1_256]
+     mova             m3, [pd_int32_max]
+     lea            lenq, [lend*4-mmsize]
+ .loop:
+     CVTDQ2PD         m0, [srcq+lenq         ]
+     CVTDQ2PD         m1, [srcq+lenq+mmsize/2]
+     mulpd            m0, m0, m2
+     mulpd            m1, m1, m2
+     minpd            m0, m0, m3
+     minpd            m1, m1, m3
+     cvtpd2dq       xmm0, m0
+     cvtpd2dq       xmm1, m1
+ %if cpuflag(avx)
+     vmovdqa [dstq+lenq         ], xmm0
+     vmovdqa [dstq+lenq+mmsize/2], xmm1
+ %else
+     movq    [dstq+lenq         ], xmm0
+     movq    [dstq+lenq+mmsize/2], xmm1
+ %endif
+     sub            lenq, mmsize
+     jge .loop
+     REP_RET
+ %endmacro
+ INIT_XMM sse2
+ %define CVTDQ2PD cvtdq2pd
+ SCALE_SAMPLES_S32
+ %define CVTDQ2PD vcvtdq2pd
+ INIT_YMM avx
+ SCALE_SAMPLES_S32
+ %undef CVTDQ2PD
+ ; NOTE: This is not bit-identical with the C version because it clips to
+ ;       [-INT_MAX, INT_MAX] instead of [INT_MIN, INT_MAX]
+ INIT_XMM ssse3, atom
+ cglobal scale_samples_s32, 4,4,8, dst, src, len, volume
+     movd        m4, volumem
+     pshufd      m4, m4, 0
+     mova        m5, [pq_128]
+     pxor        m6, m6
+     lea       lenq, [lend*4-mmsize]
+ .loop:
+     ; src[i] = av_clipl_int32((src[i] * volume + 128) >> 8);
+     mova        m7, [srcq+lenq]
+     pabsd       m3, m7
+     pshufd      m0, m3, q0100
+     pshufd      m1, m3, q0302
+     pmuludq     m0, m4
+     pmuludq     m1, m4
+     paddq       m0, m5
+     paddq       m1, m5
+     psrlq       m0, 7
+     psrlq       m1, 7
+     shufps      m2, m0, m1, q3131
+     shufps      m0, m0, m1, q2020
+     pcmpgtd     m2, m6
+     por         m0, m2
+     psrld       m0, 1
+     psignd      m0, m7
+     mova  [dstq+lenq], m0
+     sub       lenq, mmsize
+     jge .loop
+     REP_RET
index 0000000,02bedd2..beee8ca
mode 000000,100644..100644
--- /dev/null
@@@ -1,0 -1,59 +1,59 @@@
 - * This file is part of Libav.
+ /*
 - * Libav is free software; you can redistribute it and/or
++ * This file is part of FFmpeg.
+  *
 - * Libav is distributed in the hope that it will be useful,
++ * FFmpeg is free software; you can redistribute it and/or
+  * modify it under the terms of the GNU Lesser General Public
+  * License as published by the Free Software Foundation; either
+  * version 2.1 of the License, or (at your option) any later version.
+  *
 - * License along with Libav; if not, write to the Free Software
++ * FFmpeg is distributed in the hope that it will be useful,
+  * but WITHOUT ANY WARRANTY; without even the implied warranty of
+  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  * Lesser General Public License for more details.
+  *
+  * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
+  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+  */
+ #include "config.h"
+ #include "libavutil/cpu.h"
+ #include "libavutil/samplefmt.h"
+ #include "libavutil/x86/cpu.h"
+ #include "libavfilter/af_volume.h"
+ void ff_scale_samples_s16_sse2(uint8_t *dst, const uint8_t *src, int len,
+                                int volume);
+ void ff_scale_samples_s32_sse2(uint8_t *dst, const uint8_t *src, int len,
+                                int volume);
+ void ff_scale_samples_s32_ssse3_atom(uint8_t *dst, const uint8_t *src, int len,
+                                      int volume);
+ void ff_scale_samples_s32_avx(uint8_t *dst, const uint8_t *src, int len,
+                               int volume);
+ void ff_volume_init_x86(VolumeContext *vol)
+ {
+     int mm_flags = av_get_cpu_flags();
+     enum AVSampleFormat sample_fmt = av_get_packed_sample_fmt(vol->sample_fmt);
+     if (sample_fmt == AV_SAMPLE_FMT_S16) {
+         if (EXTERNAL_SSE2(mm_flags) && vol->volume_i < 32768) {
+             vol->scale_samples = ff_scale_samples_s16_sse2;
+             vol->samples_align = 8;
+         }
+     } else if (sample_fmt == AV_SAMPLE_FMT_S32) {
+         if (EXTERNAL_SSE2(mm_flags)) {
+             vol->scale_samples = ff_scale_samples_s32_sse2;
+             vol->samples_align = 4;
+         }
+         if (EXTERNAL_SSSE3(mm_flags) && mm_flags & AV_CPU_FLAG_ATOM) {
+             vol->scale_samples = ff_scale_samples_s32_ssse3_atom;
+             vol->samples_align = 4;
+         }
+         if (EXTERNAL_AVX(mm_flags)) {
+             vol->scale_samples = ff_scale_samples_s32_avx;
+             vol->samples_align = 8;
+         }
+     }
+ }
Simple merge