ARM: move add_pixels_clamped_ARM() to dsputil_arm_s.S
[ffmpeg.git] / libavcodec / armv4l / dsputil_arm_s.S
index 0119251..ba06f37 100644 (file)
@@ -708,3 +708,92 @@ function put_no_rnd_pixels8_xy2_arm, export=1
         .word 0xFCFCFCFC >> 2
         .word 0x0F0F0F0F
         .endfunc
+
+@ void ff_add_pixels_clamped_ARM(int16_t *block, uint8_t *dest, int stride)
+function ff_add_pixels_clamped_ARM, export=1
+        push            {r4-r10}
+        mov             r10, #8
+1:
+        ldr             r4,  [r1]               /* load dest */
+        /* block[0] and block[1]*/
+        ldrsh           r5,  [r0]
+        ldrsh           r7,  [r0, #2]
+        and             r6,  r4,  #0xFF
+        and             r8,  r4,  #0xFF00
+        add             r6,  r5,  r6
+        add             r8,  r7,  r8,  lsr #8
+        mvn             r5,  r5
+        mvn             r7,  r7
+        tst             r6,  #0x100
+        movne           r6,  r5,  lsr #24
+        tst             r8,  #0x100
+        movne           r8,  r7,  lsr #24
+        mov             r9,  r6
+        ldrsh           r5,  [r0, #4]           /* moved form [A] */
+        orr             r9,  r9,  r8, lsl #8
+        /* block[2] and block[3] */
+        /* [A] */
+        ldrsh           r7,  [r0, #6]
+        and             r6,  r4,  #0xFF0000
+        and             r8,  r4,  #0xFF000000
+        add             r6,  r5,  r6, lsr #16
+        add             r8,  r7,  r8, lsr #24
+        mvn             r5,  r5
+        mvn             r7,  r7
+        tst             r6,  #0x100
+        movne           r6,  r5,  lsr #24
+        tst             r8,  #0x100
+        movne           r8,  r7,  lsr #24
+        orr             r9,  r9,  r6, lsl #16
+        ldr             r4,  [r1, #4]           /* moved form [B] */
+        orr             r9,  r9,  r8, lsl #24
+        /* store dest */
+        ldrsh           r5,  [r0, #8]           /* moved form [C] */
+        str             r9,  [r1]
+
+        /* load dest */
+        /* [B] */
+        /* block[4] and block[5] */
+        /* [C] */
+        ldrsh           r7,  [r0, #10]
+        and             r6,  r4,  #0xFF
+        and             r8,  r4,  #0xFF00
+        add             r6,  r5,  r6
+        add             r8,  r7,  r8, lsr #8
+        mvn             r5,  r5
+        mvn             r7,  r7
+        tst             r6,  #0x100
+        movne           r6,  r5,  lsr #24
+        tst             r8,  #0x100
+        movne           r8,  r7,  lsr #24
+        mov             r9,  r6
+        ldrsh           r5,  [r0, #12]          /* moved from [D] */
+        orr             r9,  r9,  r8, lsl #8
+        /* block[6] and block[7] */
+        /* [D] */
+        ldrsh           r7,  [r0, #14]
+        and             r6,  r4,  #0xFF0000
+        and             r8,  r4,  #0xFF000000
+        add             r6,  r5,  r6, lsr #16
+        add             r8,  r7,  r8, lsr #24
+        mvn             r5,  r5
+        mvn             r7,  r7
+        tst             r6,  #0x100
+        movne           r6,  r5,  lsr #24
+        tst             r8,  #0x100
+        movne           r8,  r7,  lsr #24
+        orr             r9,  r9,  r6, lsl #16
+        add             r0,  r0,  #16           /* moved from [E] */
+        orr             r9,  r9,  r8, lsl #24
+        subs            r10, r10, #1            /* moved from [F] */
+        /* store dest */
+        str             r9,  [r1, #4]
+
+        /* [E] */
+        /* [F] */
+        add             r1,  r1,  r2
+        bne             1b
+
+        pop             {r4-r10}
+        bx              lr
+        .endfunc