arm64: fix inverted register order in transpose_4x4H
authorJanne Grunau <janne-libav@jannau.net>
Fri, 18 Dec 2015 10:23:22 +0000 (11:23 +0100)
committerJanne Grunau <janne-libav@jannau.net>
Mon, 21 Dec 2015 12:44:20 +0000 (13:44 +0100)
Fix related register order issue in ff_h264_idct_add_neon.

Found-by: zjh8890 <243186085@qq.com>
libavcodec/aarch64/h264idct_neon.S
libavcodec/aarch64/neon.S

index 99c2cb5030a57ef01650505af27834a63436e32a..78f780a6322fcd50533ee7407b9f87cefdaf8e19 100644 (file)
@@ -37,8 +37,8 @@ function ff_h264_idct_add_neon, export=1
         sub             v7.4H,  v16.4H, v3.4H
         add             v0.4H,  v4.4H,  v6.4H
         add             v1.4H,  v5.4H,  v7.4H
-        sub             v2.4H,  v4.4H,  v6.4H
-        sub             v3.4H,  v5.4H,  v7.4H
+        sub             v3.4H,  v4.4H,  v6.4H
+        sub             v2.4H,  v5.4H,  v7.4H
 
         transpose_4x4H  v0, v1, v2, v3, v4, v5, v6, v7
 
index f1072b73e55ece9f8546b4bbedda5c49540e4aa3..767bc9d45576f0f0f1036664d3283badbfee6963 100644 (file)
 .macro  transpose_4x4H  r0, r1, r2, r3, r4, r5, r6, r7
         trn1            \r4\().4H,  \r0\().4H,  \r1\().4H
         trn2            \r5\().4H,  \r0\().4H,  \r1\().4H
-        trn1            \r7\().4H,  \r3\().4H,  \r2\().4H
-        trn2            \r6\().4H,  \r3\().4H,  \r2\().4H
+        trn1            \r7\().4H,  \r2\().4H,  \r3\().4H
+        trn2            \r6\().4H,  \r2\().4H,  \r3\().4H
         trn1            \r0\().2S,  \r4\().2S,  \r7\().2S
         trn2            \r3\().2S,  \r4\().2S,  \r7\().2S
         trn1            \r1\().2S,  \r5\().2S,  \r6\().2S