ARM: replace some #if with if()
[ffmpeg.git] / libavcodec / arm / simple_idct_neon.S
index e05c5fc..4c329d8 100644 (file)
@@ -45,8 +45,6 @@
 #define w7 d1[2]
 #define w4c d1[3]
 
 #define w7 d1[2]
 #define w4c d1[3]
 
-        .fpu neon
-
         .macro idct_col4_top
         vmull.s16       q7,  d6,  w2    /* q9   = W2 * col[2] */
         vmull.s16       q8,  d6,  w6    /* q10  = W6 * col[2] */
         .macro idct_col4_top
         vmull.s16       q7,  d6,  w2    /* q9   = W2 * col[2] */
         vmull.s16       q8,  d6,  w6    /* q10  = W6 * col[2] */
         .text
         .align 6
 
         .text
         .align 6
 
+function idct_row4_pld_neon
+        pld             [r0]
+        add             r3,  r0,  r1,  lsl #2
+        pld             [r0, r1]
+        pld             [r0, r1, lsl #1]
+        pld             [r3, -r1]
+        pld             [r3]
+        pld             [r3, r1]
+        add             r3,  r3,  r1,  lsl #1
+        pld             [r3]
+        pld             [r3, r1]
+        .endfunc
+
 function idct_row4_neon
         vmov.i32        q15, #(1<<(ROW_SHIFT-1))
         vld1.64         {d2-d5},  [r2,:128]!
 function idct_row4_neon
         vmov.i32        q15, #(1<<(ROW_SHIFT-1))
         vld1.64         {d2-d5},  [r2,:128]!
@@ -239,8 +250,7 @@ idct_coeff_neon:
         pld             [\data]
         pld             [\data, #64]
         vpush           {d8-d15}
         pld             [\data]
         pld             [\data, #64]
         vpush           {d8-d15}
-        movw            r3, #:lower16:idct_coeff_neon
-        movt            r3, #:upper16:idct_coeff_neon
+        movrel          r3,  idct_coeff_neon
         vld1.64         {d0,d1}, [r3,:128]
         .endm
 
         vld1.64         {d0,d1}, [r3,:128]
         .endm
 
@@ -253,7 +263,7 @@ idct_coeff_neon:
 function ff_simple_idct_put_neon, export=1
         idct_start      r2
 
 function ff_simple_idct_put_neon, export=1
         idct_start      r2
 
-        bl              idct_row4_neon
+        bl              idct_row4_pld_neon
         bl              idct_row4_neon
         add             r2,  r2,  #-128
         bl              idct_col4_neon
         bl              idct_row4_neon
         add             r2,  r2,  #-128
         bl              idct_col4_neon
@@ -308,7 +318,7 @@ function idct_col4_add8_neon
 function ff_simple_idct_add_neon, export=1
         idct_start      r2
 
 function ff_simple_idct_add_neon, export=1
         idct_start      r2
 
-        bl              idct_row4_neon
+        bl              idct_row4_pld_neon
         bl              idct_row4_neon
         add             r2,  r2,  #-128
         bl              idct_col4_neon
         bl              idct_row4_neon
         add             r2,  r2,  #-128
         bl              idct_col4_neon