H.264: optimize CABAC x86 asm for Atom
authorJason Garrett-Glaser <jason@x264.com>
Wed, 27 Jul 2011 02:08:05 +0000 (19:08 -0700)
committerJason Garrett-Glaser <jason@x264.com>
Thu, 28 Jul 2011 20:06:13 +0000 (13:06 -0700)
libavcodec/h264_cabac.c
libavcodec/x86/cabac.h
libavcodec/x86/h264_i386.h

index f7cec5d..065b6e8 100644 (file)
@@ -1649,7 +1649,7 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT
         const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
 #if ARCH_X86 && HAVE_7REGS && HAVE_EBX_AVAILABLE && !defined(BROKEN_RELOCATIONS)
         coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index,
         const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
 #if ARCH_X86 && HAVE_7REGS && HAVE_EBX_AVAILABLE && !defined(BROKEN_RELOCATIONS)
         coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index,
-                                                 last_coeff_ctx_base-significant_coeff_ctx_base, sig_off);
+                                                 last_coeff_ctx_base, sig_off);
     } else {
         coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index,
                                              last_coeff_ctx_base-significant_coeff_ctx_base);
     } else {
         coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index,
                                              last_coeff_ctx_base-significant_coeff_ctx_base);
index 52bea9c..1ad74ff 100644 (file)
@@ -34,8 +34,8 @@
         "cmova  %%ecx       , "range"   \n\t"\
         "sbb    %%ecx       , %%ecx     \n\t"\
         "and    %%ecx       , "tmp"     \n\t"\
         "cmova  %%ecx       , "range"   \n\t"\
         "sbb    %%ecx       , %%ecx     \n\t"\
         "and    %%ecx       , "tmp"     \n\t"\
-        "sub    "tmp"       , "low"     \n\t"\
-        "xor    %%ecx       , "ret"     \n\t"
+        "xor    %%ecx       , "ret"     \n\t"\
+        "sub    "tmp"       , "low"     \n\t"
 #else /* HAVE_FAST_CMOV */
 #define BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, range, tmp)\
         "mov    "tmp"       , %%ecx     \n\t"\
 #else /* HAVE_FAST_CMOV */
 #define BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, range, tmp)\
         "mov    "tmp"       , %%ecx     \n\t"\
         "movzbl " MANGLE(ff_h264_norm_shift) "("range"), %%ecx          \n\t"\
         "shl    %%cl        , "range"                                   \n\t"\
         "movzbl "MANGLE(ff_h264_mlps_state)"+128("ret"), "tmp"          \n\t"\
         "movzbl " MANGLE(ff_h264_norm_shift) "("range"), %%ecx          \n\t"\
         "shl    %%cl        , "range"                                   \n\t"\
         "movzbl "MANGLE(ff_h264_mlps_state)"+128("ret"), "tmp"          \n\t"\
-        "mov    "tmpbyte"   , "statep"                                  \n\t"\
         "shl    %%cl        , "low"                                     \n\t"\
         "shl    %%cl        , "low"                                     \n\t"\
+        "mov    "tmpbyte"   , "statep"                                  \n\t"\
         "test   "lowword"   , "lowword"                                 \n\t"\
         " jnz   1f                                                      \n\t"\
         "mov "byte"("cabac"), %%"REG_c"                                 \n\t"\
         "test   "lowword"   , "lowword"                                 \n\t"\
         " jnz   1f                                                      \n\t"\
         "mov "byte"("cabac"), %%"REG_c"                                 \n\t"\
+        "add    $2          , "byte    "("cabac")                       \n\t"\
         "movzwl (%%"REG_c")     , "tmp"                                 \n\t"\
         "movzwl (%%"REG_c")     , "tmp"                                 \n\t"\
-        "bswap  "tmp"                                                   \n\t"\
-        "shr    $15         , "tmp"                                     \n\t"\
-        "sub    $0xFFFF     , "tmp"                                     \n\t"\
-        "add    $2          , %%"REG_c"                                 \n\t"\
-        "mov    %%"REG_c"   , "byte    "("cabac")                       \n\t"\
         "lea    -1("low")   , %%ecx                                     \n\t"\
         "xor    "low"       , %%ecx                                     \n\t"\
         "shr    $15         , %%ecx                                     \n\t"\
         "lea    -1("low")   , %%ecx                                     \n\t"\
         "xor    "low"       , %%ecx                                     \n\t"\
         "shr    $15         , %%ecx                                     \n\t"\
+        "bswap  "tmp"                                                   \n\t"\
+        "shr    $15         , "tmp"                                     \n\t"\
         "movzbl " MANGLE(ff_h264_norm_shift) "(%%ecx), %%ecx            \n\t"\
         "movzbl " MANGLE(ff_h264_norm_shift) "(%%ecx), %%ecx            \n\t"\
+        "sub    $0xFFFF     , "tmp"                                     \n\t"\
         "neg    %%ecx                                                   \n\t"\
         "add    $7          , %%ecx                                     \n\t"\
         "shl    %%cl        , "tmp"                                     \n\t"\
         "neg    %%ecx                                                   \n\t"\
         "add    $7          , %%ecx                                     \n\t"\
         "shl    %%cl        , "tmp"                                     \n\t"\
index 9c86210..0151cd5 100644 (file)
@@ -72,8 +72,7 @@ static int decode_significance_x86(CABACContext *c, int max_coeff,
         "test $1, %4                            \n\t"
         " jnz 4f                                \n\t"
 
         "test $1, %4                            \n\t"
         " jnz 4f                                \n\t"
 
-        "add  $4, %0                            \n\t"
-        "mov  %0, %2                            \n\t"
+        "add  $4, %2                            \n\t"
 
         "3:                                     \n\t"
         "add  $1, %1                            \n\t"
 
         "3:                                     \n\t"
         "add  $1, %1                            \n\t"
@@ -101,7 +100,7 @@ static int decode_significance_x86(CABACContext *c, int max_coeff,
 
 static int decode_significance_8x8_x86(CABACContext *c,
                                        uint8_t *significant_coeff_ctx_base,
 
 static int decode_significance_8x8_x86(CABACContext *c,
                                        uint8_t *significant_coeff_ctx_base,
-                                       int *index, x86_reg last_off, const uint8_t *sig_off){
+                                       int *index, uint8_t *last_coeff_ctx_base, const uint8_t *sig_off){
     int minusindex= 4-(intptr_t)index;
     int bit;
     x86_reg coeff_count;
     int minusindex= 4-(intptr_t)index;
     int bit;
     x86_reg coeff_count;
@@ -128,7 +127,6 @@ static int decode_significance_8x8_x86(CABACContext *c,
         " jz 3f                                 \n\t"
 
         "movzbl "MANGLE(last_coeff_flag_offset_8x8)"(%k6), %k6\n\t"
         " jz 3f                                 \n\t"
 
         "movzbl "MANGLE(last_coeff_flag_offset_8x8)"(%k6), %k6\n\t"
-        "add %9, %6                             \n\t"
         "add %11, %6                            \n\t"
 
         BRANCHLESS_GET_CABAC("%4", "%7", "(%6)", "%3",
         "add %11, %6                            \n\t"
 
         BRANCHLESS_GET_CABAC("%4", "%7", "(%6)", "%3",
@@ -141,8 +139,7 @@ static int decode_significance_8x8_x86(CABACContext *c,
         "test $1, %4                            \n\t"
         " jnz 4f                                \n\t"
 
         "test $1, %4                            \n\t"
         " jnz 4f                                \n\t"
 
-        "add $4, %0                             \n\t"
-        "mov %0, %2                             \n\t"
+        "add  $4, %2                            \n\t"
 
         "3:                                     \n\t"
         "addl $1, %k6                           \n\t"
 
         "3:                                     \n\t"
         "addl $1, %k6                           \n\t"
@@ -159,7 +156,7 @@ static int decode_significance_8x8_x86(CABACContext *c,
         "movl %3, %a13(%7)                      \n\t"
         :"=&q"(coeff_count),"+m"(last), "+m"(index), "=&r"(low), "=&r"(bit),
          "=&r"(range), "=&r"(state)
         "movl %3, %a13(%7)                      \n\t"
         :"=&q"(coeff_count),"+m"(last), "+m"(index), "=&r"(low), "=&r"(bit),
          "=&r"(range), "=&r"(state)
-        :"r"(c), "m"(minusindex), "m"(significant_coeff_ctx_base), "m"(sig_off), "m"(last_off),
+        :"r"(c), "m"(minusindex), "m"(significant_coeff_ctx_base), "m"(sig_off), "m"(last_coeff_ctx_base),
          "i"(offsetof(CABACContext, range)), "i"(offsetof(CABACContext, low)),
          "i"(offsetof(CABACContext, bytestream))
         : "%"REG_c, "memory"
          "i"(offsetof(CABACContext, range)), "i"(offsetof(CABACContext, low)),
          "i"(offsetof(CABACContext, bytestream))
         : "%"REG_c, "memory"