avcodec/vc1: shuffle calculation of MV predictor candidates
[ffmpeg.git] / libavcodec / vc1_loopfilter.c
index aceb1f7..0f990cc 100644 (file)
 #include "vc1.h"
 #include "vc1dsp.h"
 
-void ff_vc1_loop_filter_iblk(VC1Context *v, int pq)
-{
-    MpegEncContext *s = &v->s;
-    int j;
-    if (!s->first_slice_line) {
-        v->vc1dsp.vc1_v_loop_filter16(s->dest[0], s->linesize, pq);
-        if (s->mb_x)
-            v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 16 * s->linesize, s->linesize, pq);
-        v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 16 * s->linesize + 8, s->linesize, pq);
-        if (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY))
-        for (j = 0; j < 2; j++) {
-            v->vc1dsp.vc1_v_loop_filter8(s->dest[j + 1], s->uvlinesize, pq);
-            if (s->mb_x)
-                v->vc1dsp.vc1_h_loop_filter8(s->dest[j + 1] - 8 * s->uvlinesize, s->uvlinesize, pq);
-        }
-    }
-    v->vc1dsp.vc1_v_loop_filter16(s->dest[0] + 8 * s->linesize, s->linesize, pq);
-
-    if (s->mb_y == s->end_mb_y - 1) {
-        if (s->mb_x) {
-            v->vc1dsp.vc1_h_loop_filter16(s->dest[0], s->linesize, pq);
-            if (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY)) {
-            v->vc1dsp.vc1_h_loop_filter8(s->dest[1], s->uvlinesize, pq);
-            v->vc1dsp.vc1_h_loop_filter8(s->dest[2], s->uvlinesize, pq);
-            }
-        }
-        v->vc1dsp.vc1_h_loop_filter16(s->dest[0] + 8, s->linesize, pq);
-    }
-}
-
 static av_always_inline void vc1_h_overlap_filter(VC1Context *v, int16_t (*left_block)[64],
-                                                  int16_t (*right_block)[64], int block_num)
+                                                  int16_t (*right_block)[64], int left_fieldtx,
+                                                  int right_fieldtx, int block_num)
 {
-    if (block_num > 3)
-        v->vc1dsp.vc1_h_s_overlap(left_block[block_num], right_block[block_num]);
-    else if (block_num & 1)
-        v->vc1dsp.vc1_h_s_overlap(right_block[block_num - 1], right_block[block_num]);
-    else
-        v->vc1dsp.vc1_h_s_overlap(left_block[block_num + 1], right_block[block_num]);
+    switch (block_num) {
+    case 0:
+        v->vc1dsp.vc1_h_s_overlap(left_block[2],
+                                  right_block[0],
+                                  left_fieldtx ^ right_fieldtx ? 16 - 8 * left_fieldtx : 8,
+                                  left_fieldtx ^ right_fieldtx ? 16 - 8 * right_fieldtx : 8,
+                                  left_fieldtx || right_fieldtx ? 0 : 1);
+        break;
+
+    case 1:
+        v->vc1dsp.vc1_h_s_overlap(right_block[0],
+                                  right_block[2],
+                                  8,
+                                  8,
+                                  right_fieldtx ? 0 : 1);
+        break;
+
+    case 2:
+        v->vc1dsp.vc1_h_s_overlap(!left_fieldtx && right_fieldtx ? left_block[2] + 8 : left_block[3],
+                                  left_fieldtx && !right_fieldtx ? right_block[0] + 8 : right_block[1],
+                                  left_fieldtx ^ right_fieldtx ? 16 - 8 * left_fieldtx : 8,
+                                  left_fieldtx ^ right_fieldtx ? 16 - 8 * right_fieldtx : 8,
+                                  left_fieldtx || right_fieldtx ? 2 : 1);
+        break;
+
+    case 3:
+        v->vc1dsp.vc1_h_s_overlap(right_block[1],
+                                  right_block[3],
+                                  8,
+                                  8,
+                                  right_fieldtx ? 2 : 1);
+        break;
+
+    case 4:
+    case 5:
+        v->vc1dsp.vc1_h_s_overlap(left_block[block_num], right_block[block_num], 8, 8, 1);
+        break;
+    }
 }
 
 static av_always_inline void vc1_v_overlap_filter(VC1Context *v, int16_t (*top_block)[64],
                                                   int16_t (*bottom_block)[64], int block_num)
 {
-    if (block_num > 3)
+    switch (block_num) {
+    case 0:
+        v->vc1dsp.vc1_v_s_overlap(top_block[1], bottom_block[0]);
+        break;
+
+    case 1:
+        v->vc1dsp.vc1_v_s_overlap(top_block[3], bottom_block[2]);
+        break;
+
+    case 2:
+        v->vc1dsp.vc1_v_s_overlap(bottom_block[0], bottom_block[1]);
+        break;
+
+    case 3:
+        v->vc1dsp.vc1_v_s_overlap(bottom_block[2], bottom_block[3]);
+        break;
+
+    case 4:
+    case 5:
         v->vc1dsp.vc1_v_s_overlap(top_block[block_num], bottom_block[block_num]);
-    else if (block_num & 2)
-        v->vc1dsp.vc1_v_s_overlap(bottom_block[block_num - 2], bottom_block[block_num]);
-    else
-        v->vc1dsp.vc1_v_s_overlap(top_block[block_num + 2], bottom_block[block_num]);
+        break;
+    }
 }
 
 void ff_vc1_i_overlap_filter(VC1Context *v)
@@ -108,9 +127,15 @@ void ff_vc1_i_overlap_filter(VC1Context *v)
         if (s->mb_x == 0 && (i & 5) != 1)
             continue;
 
-        if (v->pq >= 9 || v->condover == CONDOVER_ALL ||
-            (v->over_flags_plane[mb_pos] && ((i & 5) == 1 || v->over_flags_plane[mb_pos - 1])))
-            vc1_h_overlap_filter(v, s->mb_x ? left_blk : cur_blk, cur_blk, i);
+        if (v->pq >= 9 || (v->profile == PROFILE_ADVANCED &&
+                           (v->condover == CONDOVER_ALL ||
+                            (v->over_flags_plane[mb_pos] &&
+                             ((i & 5) == 1 || v->over_flags_plane[mb_pos - 1])))))
+            vc1_h_overlap_filter(v,
+                                 s->mb_x ? left_blk : cur_blk, cur_blk,
+                                 v->fcm == ILACE_FRAME && s->mb_x && v->fieldtx_plane[mb_pos - 1],
+                                 v->fcm == ILACE_FRAME && v->fieldtx_plane[mb_pos],
+                                 i);
     }
 
     if (v->fcm != ILACE_FRAME)
@@ -118,15 +143,18 @@ void ff_vc1_i_overlap_filter(VC1Context *v)
             if (s->first_slice_line && !(i & 2))
                 continue;
 
-            if (s->mb_x && (v->pq >= 9 || v->condover == CONDOVER_ALL ||
-                (v->over_flags_plane[mb_pos - 1] &&
-                 ((i & 2) || v->over_flags_plane[mb_pos - 1 - s->mb_stride]))))
+            if (s->mb_x &&
+                (v->pq >= 9 || (v->profile == PROFILE_ADVANCED &&
+                                (v->condover == CONDOVER_ALL ||
+                                 (v->over_flags_plane[mb_pos - 1] &&
+                                  ((i & 2) || v->over_flags_plane[mb_pos - 1 - s->mb_stride]))))))
                 vc1_v_overlap_filter(v, s->first_slice_line ? left_blk : topleft_blk, left_blk, i);
-            if (s->mb_x == s->mb_width - 1)
-                if (v->pq >= 9 || v->condover == CONDOVER_ALL ||
-                    (v->over_flags_plane[mb_pos] &&
-                     ((i & 2) || v->over_flags_plane[mb_pos - s->mb_stride])))
-                    vc1_v_overlap_filter(v, s->first_slice_line ? cur_blk : top_blk, cur_blk, i);
+            if (s->mb_x == s->mb_width - 1 &&
+                (v->pq >= 9 || (v->profile == PROFILE_ADVANCED &&
+                                (v->condover == CONDOVER_ALL ||
+                                 (v->over_flags_plane[mb_pos] &&
+                                  ((i & 2) || v->over_flags_plane[mb_pos - s->mb_stride]))))))
+                vc1_v_overlap_filter(v, s->first_slice_line ? cur_blk : top_blk, cur_blk, i);
         }
 }
 
@@ -135,6 +163,7 @@ void ff_vc1_p_overlap_filter(VC1Context *v)
     MpegEncContext *s = &v->s;
     int16_t (*topleft_blk)[64], (*top_blk)[64], (*left_blk)[64], (*cur_blk)[64];
     int block_count = CONFIG_GRAY && (s->avctx->flags & AV_CODEC_FLAG_GRAY) ? 4 : 6;
+    int mb_pos = s->mb_x + s->mb_y * s->mb_stride;
     int i;
 
     topleft_blk = v->block[v->topleft_blk_idx];
@@ -147,7 +176,11 @@ void ff_vc1_p_overlap_filter(VC1Context *v)
             continue;
 
         if (v->mb_type[0][s->block_index[i]] && v->mb_type[0][s->block_index[i] - 1])
-            vc1_h_overlap_filter(v, s->mb_x ? left_blk : cur_blk, cur_blk, i);
+            vc1_h_overlap_filter(v,
+                                 s->mb_x ? left_blk : cur_blk, cur_blk,
+                                 v->fcm == ILACE_FRAME && s->mb_x && v->fieldtx_plane[mb_pos - 1],
+                                 v->fcm == ILACE_FRAME && v->fieldtx_plane[mb_pos],
+                                 i);
     }
 
     if (v->fcm != ILACE_FRAME)
@@ -251,7 +284,7 @@ void ff_vc1_i_loop_filter(VC1Context *v)
      * bottom edge of this MB, before moving over and running the H loop
      * filter on the left and internal vertical borders. Therefore, the loop
      * filter trails by one row and one column relative to the overlap filter
-     * and two rows and two colums relative to the decoding loop. */
+     * and two rows and two columns relative to the decoding loop. */
     if (!s->first_slice_line) {
         dest = s->dest[0] - 16 * s->linesize - 16;
         flags = s->mb_y == s->start_mb_y + 1 ? TOP_EDGE : 0;
@@ -260,7 +293,7 @@ void ff_vc1_i_loop_filter(VC1Context *v)
             for (i = 0; i < block_count; i++)
                 vc1_i_v_loop_filter(v, i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize - 8 : dest, flags, fieldtx, i);
         }
-        if (s->mb_x == s->mb_width - 1) {
+        if (s->mb_x == v->end_mb_x - 1) {
             dest += 16;
             fieldtx = v->fieldtx_plane[mb_pos - s->mb_stride];
             for (i = 0; i < block_count; i++)
@@ -275,7 +308,7 @@ void ff_vc1_i_loop_filter(VC1Context *v)
             for (i = 0; i < block_count; i++)
                 vc1_i_v_loop_filter(v, i > 3 ? s->dest[i - 3] - 8 : dest, flags, fieldtx, i);
         }
-        if (s->mb_x == s->mb_width - 1) {
+        if (s->mb_x == v->end_mb_x - 1) {
             dest += 16;
             fieldtx = v->fieldtx_plane[mb_pos];
             for (i = 0; i < block_count; i++)
@@ -290,7 +323,7 @@ void ff_vc1_i_loop_filter(VC1Context *v)
             for (i = 0; i < block_count; i++)
                 vc1_i_h_loop_filter(v, i > 3 ? s->dest[i - 3] - 16 * s->uvlinesize - 8 : dest, flags, i);
         }
-        if (s->mb_x == s->mb_width - 1) {
+        if (s->mb_x == v->end_mb_x - 1) {
             dest += 16;
             flags = s->mb_x == 0 ? LEFT_EDGE | RIGHT_EDGE : RIGHT_EDGE;
             for (i = 0; i < block_count; i++)
@@ -305,7 +338,7 @@ void ff_vc1_i_loop_filter(VC1Context *v)
                 for (i = 0; i < block_count; i++)
                     vc1_i_h_loop_filter(v, i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize - 8 : dest, flags, i);
             }
-            if (s->mb_x == s->mb_width - 1) {
+            if (s->mb_x == v->end_mb_x - 1) {
                 flags = s->mb_x == 0 ? LEFT_EDGE | RIGHT_EDGE : RIGHT_EDGE;
                 dest += 16;
                 for (i = 0; i < block_count; i++)
@@ -318,7 +351,7 @@ void ff_vc1_i_loop_filter(VC1Context *v)
             for (i = 0; i < block_count; i++)
                 vc1_i_h_loop_filter(v, i > 3 ? s->dest[i - 3] - 8 : dest, flags, i);
         }
-        if (s->mb_x == s->mb_width - 1) {
+        if (s->mb_x == v->end_mb_x - 1) {
             dest += 16;
             flags = s->mb_x == 0 ? LEFT_EDGE | RIGHT_EDGE : RIGHT_EDGE;
             for (i = 0; i < block_count; i++)
@@ -452,7 +485,7 @@ void ff_vc1_p_loop_filter(VC1Context *v)
      * we wait for the next loop filter iteration to do H loop filter on all
      * applicable vertical borders of this MB. Therefore, the loop filter
      * trails by one row and one column relative to the overlap filter and two
-     * rows and two colums relative to the decoding loop. */
+     * rows and two columns relative to the decoding loop. */
     if (s->mb_y >= s->start_mb_y + 2) {
         if (s->mb_x) {
             dest = s->dest[0] - 32 * s->linesize - 16;
@@ -890,7 +923,7 @@ void ff_vc1_p_intfr_loop_filter(VC1Context *v)
      * we wait for the loop filter iteration on the next row and next column to
      * do H loop filter on all applicable vertical borders of this MB.
      * Therefore, the loop filter trails by two rows and one column relative to
-     * the overlap filter and two rows and two colums relative to the decoding
+     * the overlap filter and two rows and two columns relative to the decoding
      * loop. */
     if (s->mb_x) {
         if (s->mb_y >= s->start_mb_y + 1) {