Merge commit '21732063a346475eb22c332b27e8216b79f9ad4a'
[ffmpeg.git] / libavcodec / bfin / hpel_pixels_bfin.S
1 /*
2  * Blackfin Pixel Operations
3  * Copyright (C) 2007 Marc Hoffman <marc.hoffman@analog.com>
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 #include "config_bfin.h"
22
23 /*
24   motion compensation
25   primitives
26
27      * Halfpel motion compensation with rounding (a+b+1)>>1.
28      * This is an array[4][4] of motion compensation funcions for 4
29      * horizontal blocksizes (8,16) and the 4 halfpel positions<br>
30      * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ]
31      * @param block destination where the result is stored
32      * @param pixels source
33      * @param line_size number of bytes in a horizontal line of block
34      * @param h height
35
36 */
37
38 DEFUN(put_pixels8uc,mL1,
39         (uint8_t *block, const uint8_t *s0, const uint8_t *s1,
40                  int dest_size, int line_size, int h)):
41         i3=r0;        // dest
42         i0=r1;        // src0
43         i1=r2;        // src1
44         r0=[sp+12];   // dest_size
45         r2=[sp+16];   // line_size
46         p0=[sp+20];   // h
47         [--sp] = (r7:6);
48         r0+=-4;
49         m3=r0;
50         r2+=-8;
51         m0=r2;
52         LSETUP(pp8$0,pp8$1) LC0=P0;
53         DISALGNEXCPT                || R0 = [I0++]  || R2  =[I1++];
54
55 pp8$0:  DISALGNEXCPT                || R1 = [I0++]  || R3  =[I1++];
56         R6 = BYTEOP1P(R1:0,R3:2)    || R0 = [I0++M0]|| R2  =[I1++M0];
57         R7 = BYTEOP1P(R1:0,R3:2)(R) || R0 = [I0++]  || [I3++] = R6 ;
58 pp8$1:  DISALGNEXCPT                || R2 = [I1++]  || [I3++M3] = R7;
59
60         (r7:6) = [sp++];
61         RTS;
62 DEFUN_END(put_pixels8uc)
63
64 DEFUN(put_pixels16uc,mL1,
65         (uint8_t *block, const uint8_t *s0, const uint8_t *s1,
66                  int dest_size, int line_size, int h)):
67         link 0;
68         [--sp] = (r7:6);
69         i3=r0;        // dest
70         i0=r1;        // src0
71         i1=r2;        // src1
72         r0=[fp+20];   // dest_size
73         r2=[fp+24];   // line_size
74         p0=[fp+28];   // h
75
76
77         r0+=-12;
78         m3=r0;        // line_size
79         r2+=-16;
80         m0=r2;
81
82         LSETUP(pp16$0,pp16$1) LC0=P0;
83          DISALGNEXCPT                || R0 = [I0++]   || R2  =[I1++];
84
85 pp16$0:  DISALGNEXCPT                || R1 = [I0++]   || R3  =[I1++];
86          R6 = BYTEOP1P(R1:0,R3:2)    || R0 = [I0++]   || R2  =[I1++];
87          R7 = BYTEOP1P(R1:0,R3:2)(R) || R1 = [I0++]   || R3  =[I1++];
88          [I3++] = R6;
89          R6 = BYTEOP1P(R1:0,R3:2)    || R0 = [I0++M0] || R2  =[I1++M0];
90          R7 = BYTEOP1P(R1:0,R3:2)(R) || R0 = [I0++]   || [I3++] = R7 ;
91          [I3++] = R6;
92 pp16$1:  DISALGNEXCPT                || R2 = [I1++]   || [I3++M3] = R7;
93
94         (r7:6) = [sp++];
95         unlink;
96         RTS;
97 DEFUN_END(put_pixels16uc)
98
99
100
101
102
103
104 DEFUN(put_pixels8uc_nornd,mL1,
105         (uint8_t *block, const uint8_t *s0, const uint8_t *s1,
106                  int line_size, int h)):
107         i3=r0;        // dest
108         i0=r1;        // src0
109         i1=r2;        // src1
110         r2=[sp+12];   // line_size
111         p0=[sp+16];   // h
112         [--sp] = (r7:6);
113         r2+=-4;
114         m3=r2;
115         r2+=-4;
116         m0=r2;
117         LSETUP(pp8$2,pp8$3) LC0=P0;
118         DISALGNEXCPT                || R0 = [I0++]  || R2  =[I1++];
119
120 pp8$2:  DISALGNEXCPT                || R1 = [I0++]  || R3  =[I1++];
121         R6 = BYTEOP1P(R1:0,R3:2)(T)  || R0 = [I0++M0]|| R2  =[I1++M0];
122         R7 = BYTEOP1P(R1:0,R3:2)(T,R) || R0 = [I0++]  || [I3++] = R6 ;
123 pp8$3:  DISALGNEXCPT                || R2 = [I1++]  || [I3++M3] = R7;
124
125         (r7:6) = [sp++];
126         RTS;
127 DEFUN_END(put_pixels8uc_nornd)
128
129 DEFUN(put_pixels16uc_nornd,mL1,
130         (uint8_t *block, const uint8_t *s0, const uint8_t *s1,
131                  int line_size, int h)):
132         i3=r0;        // dest
133         i0=r1;        // src0
134         i1=r2;        // src1
135         r2=[sp+12];   // line_size
136         p0=[sp+16];   // h
137
138         [--sp] = (r7:6);
139         r2+=-12;
140         m3=r2;        // line_size
141         r2+=-4;
142         m0=r2;
143
144         LSETUP(pp16$2,pp16$3) LC0=P0;
145         DISALGNEXCPT                || R0 = [I0++]   || R2  =[I1++];
146
147 pp16$2:
148         DISALGNEXCPT                || R1 = [I0++]   || R3  =[I1++];
149         R6 = BYTEOP1P(R1:0,R3:2)(T)    || R0 = [I0++]   || R2  =[I1++];
150         R7 = BYTEOP1P(R1:0,R3:2)(T,R) || R1 = [I0++]   || R3  =[I1++];
151         [I3++] = R6;
152
153         R6 = BYTEOP1P(R1:0,R3:2)(T)    || R0 = [I0++M0] || R2  =[I1++M0];
154         R7 = BYTEOP1P(R1:0,R3:2)(T,R) || R0 = [I0++]   || [I3++] = R7 ;
155         [I3++] = R6;
156 pp16$3: DISALGNEXCPT                || R2 = [I1++]   || [I3++M3] = R7;
157
158         (r7:6) = [sp++];
159
160         RTS;
161 DEFUN_END(put_pixels16uc_nornd)
162
163 DEFUN(z_put_pixels16_xy2,mL1,
164         (uint8_t *block, const uint8_t *s0,
165                  int dest_size, int line_size, int h)):
166         link 0;
167         [--sp] = (r7:4);
168         i3=r0;        // dest
169         i0=r1;        // src0--> pixels
170         i1=r1;        // src1--> pixels + line_size
171         r2+=-12;
172         m2=r2;        // m2=dest_width-4
173         r2=[fp+20];
174         m3=r2;        // line_size
175         p0=[fp+24];   // h
176         r2+=-16;
177         i1+=m3;       /* src1 + line_size */
178         m0=r2;        /* line-size - 20 */
179
180         B0 = I0;
181         B1 = I1;
182         B3 = I3;
183
184         DISALGNEXCPT                       || R0 = [I0++] || R2  =[I1++];
185
186         LSETUP(LS$16E,LE$16E) LC0=P0;
187 LS$16E: DISALGNEXCPT                       || R1 = [I0++] || R3  =[I1++];
188         R4 = BYTEOP2P (R3:2,R1:0) (RNDL)   || R0 = [I0++] || R2  =[I1++];
189         R5 = BYTEOP2P (R3:2,R1:0) (RNDL,R) || R1 = [I0++] || [I3++] = R4 ;
190         DISALGNEXCPT                       || R3 = [I1++] || [I3++] = R5;
191         R4 = BYTEOP2P (R3:2,R1:0) (RNDL)   || R0 = [I0++M0]|| R2  = [I1++M0];
192         R5 = BYTEOP2P (R3:2,R1:0) (RNDL,R) || R0 = [I0++] || [I3++] = R4 ;
193 LE$16E: DISALGNEXCPT                       || R2 = [I1++] || [I3++M2] = R5;
194
195         M1 = 1;
196         I3 = B3;
197         I1 = B1;
198         I0 = B0;
199
200         I0 += M1;
201         I1 += M1;
202
203         DISALGNEXCPT                       || R0 = [I0++] || R2  =[I1++];
204         LSETUP(LS$16O,LE$16O) LC0=P0;
205 LS$16O: DISALGNEXCPT                       || R1 = [I0++] || R3  =[I1++];
206         R4 = BYTEOP2P (R3:2,R1:0) (RNDH)   || R0 = [I0++] || R2  =[I1++];
207         R5 = BYTEOP2P (R3:2,R1:0) (RNDH,R) || R1 = [I0++] || R6  =[I3++];
208         R4 = R4 +|+ R6                       || R7 = [I3--];
209         R5 = R5 +|+ R7                       || [I3++] = R4;
210         DISALGNEXCPT                       || R3  =[I1++] || [I3++] = R5;
211         R4 = BYTEOP2P (R3:2,R1:0) (RNDH)   || R0 = [I0++M0]|| R2  = [I1++M0];
212         R5 = BYTEOP2P (R3:2,R1:0) (RNDH,R) || R0 = [I0++] || R6 = [I3++];
213         R4 = R4 +|+ R6                       || R7 = [I3--];
214         R5 = R5 +|+ R7                       || [I3++] = R4;
215 LE$16O: DISALGNEXCPT                       || R2 = [I1++] || [I3++M2] = R5;
216
217         (r7:4) = [sp++];
218         unlink;
219         rts;
220 DEFUN_END(z_put_pixels16_xy2)
221
222 DEFUN(put_pixels16_xy2_nornd,mL1,
223         (uint8_t *block, const uint8_t *s0,
224                  int line_size, int h)):
225         link 0;
226         [--sp] = (r7:4);
227         i3=r0;        // dest
228         i0=r1;        // src0--> pixels
229         i1=r1;        // src1--> pixels + line_size
230         m3=r2;
231         r2+=-12;
232         m2=r2;
233         r2+=-4;
234         i1+=m3;       /* src1 + line_size */
235         m0=r2;        /* line-size - 20 */
236         p0=[fp+20];   // h
237
238         B0=I0;
239         B1=I1;
240         B3=I3;
241
242         DISALGNEXCPT                       || R0 = [I0++] || R2  =[I1++];
243
244         LSETUP(LS$16ET,LE$16ET) LC0=P0;
245 LS$16ET:DISALGNEXCPT                       || R1 = [I0++] || R3  =[I1++];
246         R4 = BYTEOP2P (R3:2,R1:0) (TL)     || R0 = [I0++] || R2  =[I1++];
247         R5 = BYTEOP2P (R3:2,R1:0) (TL,R)   || R1 = [I0++] || [I3++] = R4 ;
248         DISALGNEXCPT                       || R3 = [I1++] || [I3++] = R5;
249         R4 = BYTEOP2P (R3:2,R1:0) (TL)     || R0 = [I0++M0]|| R2  = [I1++M0];
250         R5 = BYTEOP2P (R3:2,R1:0) (TL,R)   || R0 = [I0++] || [I3++] = R4 ;
251 LE$16ET:DISALGNEXCPT                       || R2 = [I1++] || [I3++M2] = R5;
252
253         M1 = 1;
254         I3=B3;
255         I1=B1;
256         I0=B0;
257
258         I0 += M1;
259         I1 += M1;
260
261         DISALGNEXCPT                       || R0 = [I0++] || R2  =[I1++];
262         LSETUP(LS$16OT,LE$16OT) LC0=P0;
263 LS$16OT:DISALGNEXCPT                       || R1 = [I0++] || R3  =[I1++];
264         R4 = BYTEOP2P (R3:2,R1:0) (TH)     || R0 = [I0++] || R2  =[I1++];
265         R5 = BYTEOP2P (R3:2,R1:0) (TH,R)   || R1 = [I0++] || R6  =[I3++];
266         R4 = R4 +|+ R6                                    || R7 = [I3--];
267         R5 = R5 +|+ R7                                    || [I3++] = R4;
268         DISALGNEXCPT                       || R3  =[I1++] || [I3++] = R5;
269         R4 = BYTEOP2P (R3:2,R1:0) (TH)     || R0 = [I0++M0]|| R2  = [I1++M0];
270         R5 = BYTEOP2P (R3:2,R1:0) (TH,R)   || R0 = [I0++] || R6 = [I3++];
271         R4 = R4 +|+ R6                                    || R7 = [I3--];
272         R5 = R5 +|+ R7                                    || [I3++] = R4;
273 LE$16OT:DISALGNEXCPT                       || R2 = [I1++] || [I3++M2] = R5;
274
275         (r7:4) = [sp++];
276         unlink;
277         rts;
278 DEFUN_END(put_pixels16_xy2_nornd)
279
280 DEFUN(z_put_pixels8_xy2,mL1,
281         (uint8_t *block, const uint8_t *s0,
282                  int dest_size, int line_size, int h)):
283         link 0;
284         [--sp] = (r7:4);
285         i3=r0;        // dest
286         i0=r1;        // src0--> pixels
287         i1=r1;        // src1--> pixels + line_size
288         r2+=-4;
289         m2=r2;        // m2=dest_width-4
290         r2=[fp+20];
291         m3=r2;        // line_size
292         p0=[fp+24];   // h
293         r2+=-8;
294         i1+=m3;       /* src1 + line_size */
295         m0=r2;        /* line-size - 20 */
296
297         b0 = I0;
298         b1 = I1;
299         b3 = I3;
300
301         LSETUP(LS$8E,LE$8E) LC0=P0;
302         DISALGNEXCPT                       || R0 = [I0++]   || R2  =[I1++];
303 LS$8E:  DISALGNEXCPT                       || R1 = [I0++]   || R3  =[I1++];
304         R4 = BYTEOP2P (R3:2,R1:0) (RNDL)   || R0 = [I0++M0] || R2  =[I1++M0];
305         R5 = BYTEOP2P (R3:2,R1:0) (RNDL,R) || R0 = [I0++]   || [I3++] = R4 ;
306 LE$8E:  DISALGNEXCPT                       || R2 = [I1++]   || [I3++M2] = R5;
307
308         M1 = 1;
309         I3 = b3;
310         I1 = b1;
311         I0 = b0;
312
313         I0 += M1;
314         I1 += M1;
315
316         LSETUP(LS$8O,LE$8O) LC0=P0;
317         DISALGNEXCPT                       || R0 = [I0++]   || R2  =[I1++];
318 LS$8O:  DISALGNEXCPT                       || R1 = [I0++]   || R3  =[I1++];
319         R4 = BYTEOP2P (R3:2,R1:0) (RNDH)   || R0 = [I0++M0] || R2  =[I1++M0];
320         R5 = BYTEOP2P (R3:2,R1:0) (RNDH,R) || R0 = [I0++]   || R6  =[I3++];
321         R4 = R4 +|+ R6                                      || R7 = [I3--];
322         R5 = R5 +|+ R7                                      || [I3++] = R4;
323 LE$8O:  DISALGNEXCPT                       || R2  =[I1++]   || [I3++M2] = R5;
324
325         (r7:4) = [sp++];
326         unlink;
327         rts;
328 DEFUN_END(z_put_pixels8_xy2)
329
330 DEFUN(put_pixels8_xy2_nornd,mL1,
331         (uint8_t *block, const uint8_t *s0, int line_size, int h)):
332         link 0;
333         [--sp] = (r7:4);
334         i3=r0;        // dest
335         i0=r1;        // src0--> pixels
336         i1=r1;        // src1--> pixels + line_size
337         m3=r2;
338         r2+=-4;
339         m2=r2;
340         r2+=-4;
341         i1+=m3;       /* src1 + line_size */
342         m0=r2;        /* line-size - 20 */
343         p0=[fp+20];   // h
344
345
346         b0 = I0;
347         b1 = I1;
348         b3 = I3;
349
350         LSETUP(LS$8ET,LE$8ET) LC0=P0;
351         DISALGNEXCPT                       || R0 = [I0++]   || R2  =[I1++];
352
353 LS$8ET: DISALGNEXCPT                       || R1 = [I0++]   || R3 = [I1++];
354         R4 = BYTEOP2P (R3:2,R1:0) (TL)     || R0 = [I0++M0] || R2 = [I1++M0];
355         R5 = BYTEOP2P (R3:2,R1:0) (TL,R)   || R0 = [I0++]   || [I3++] = R4 ;
356 LE$8ET: DISALGNEXCPT                       || R2 = [I1++]   || [I3++M2] = R5;
357
358         M1 = 1;
359         I3 = b3;
360         I1 = b1;
361         I0 = b0;
362
363         I0 += M1;
364         I1 += M1;
365
366         LSETUP(LS$8OT,LE$8OT) LC0=P0;
367         DISALGNEXCPT                       || R0 = [I0++]   || R2 = [I1++];
368
369 LS$8OT: DISALGNEXCPT                       || R1 = [I0++]   || R3 = [I1++];
370         R4 = BYTEOP2P (R3:2,R1:0) (TH)     || R0 = [I0++M0] || R2 = [I1++M0];
371         R5 = BYTEOP2P (R3:2,R1:0) (TH,R)   || R0 = [I0++]   || R6 = [I3++];
372         R4 = R4 +|+ R6                                      || R7 = [I3--];
373         R5 = R5 +|+ R7                                      || [I3++] = R4;
374 LE$8OT: DISALGNEXCPT                       || R2  =[I1++]   || [I3++M2] = R5;
375
376         (r7:4) = [sp++];
377         unlink;
378         rts;
379 DEFUN_END(put_pixels8_xy2_nornd)