vp9: remove one optimization branch in iadst16 which causes overflows.
authorRonald S. Bultje <rsbultje@gmail.com>
Wed, 22 Apr 2015 18:53:01 +0000 (14:53 -0400)
committerMichael Niedermayer <michaelni@gmx.at>
Fri, 15 May 2015 08:04:50 +0000 (10:04 +0200)
See sample vp90-2-14-resize-fp-tiles-16-8-4-2-1.webm from the vp9 test
vector set which reproduces the issue. This probably costs a few cycles,
but I don't think there's an easy way to workaround that.

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit d02d04a18f300ebe97319ca6e91fc943cb14f58b)

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
libavcodec/x86/vp9itxfm.asm

index bfe427f..a9d45a4 100644 (file)
@@ -1699,7 +1699,9 @@ VP9_IDCT_IDCT_16x16_ADD_XMM avx
     SUMSUB_BA                w,  5,  7,  4
     PSIGNW                  m5, [pw_m1]                     ; m12=out15[w], m8=t3[w]
 
-%if cpuflag(ssse3)
+    ; unfortunately, the code below overflows in some cases, e.g.
+    ; http://downloads.webmproject.org/test_data/libvpx/vp90-2-14-resize-fp-tiles-16-8-4-2-1.webm
+%if 0 ; cpuflag(ssse3)
     SUMSUB_BA               w,   7,  6,  4
     pmulhrsw                m7, [pw_m11585x2]               ; m8=out7[w]
     pmulhrsw                m6, [pw_11585x2]                ; m1=out8[w]