postproc fix
[mplayer.git] / libmpeg2 / slice.c
index 1d85cc5..a58487f 100644 (file)
@@ -1,8 +1,11 @@
 /*
  * slice.c
- * Copyright (C) 1999-2001 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
+ * Copyright (C) 2003      Peter Gubanov <peter@elecard.net.ru>
+ * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
  *
  * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+ * See http://libmpeg2.sourceforge.net/ for updates.
  *
  * mpeg2dec is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
 
 #include "config.h"
 
-#include <string.h>
 #include <inttypes.h>
 
-#include "video_out.h"
-#include "mpeg2_internal.h"
+#include "mpeg2.h"
 #include "attributes.h"
+#include "mpeg2_internal.h"
 
-extern mc_functions_t mc_functions;
-extern void (* idct_block_copy) (int16_t * block, uint8_t * dest, int stride);
-extern void (* idct_block_add) (int16_t * block, uint8_t * dest, int stride);
+extern mpeg2_mc_t mpeg2_mc;
+extern void (* mpeg2_idct_copy) (int16_t * block, uint8_t * dest, int stride);
+extern void (* mpeg2_idct_add) (int last, int16_t * block,
+                               uint8_t * dest, int stride);
+extern void (* mpeg2_cpu_state_save) (cpu_state_t * state);
+extern void (* mpeg2_cpu_state_restore) (cpu_state_t * state);
 
 #include "vlc.h"
 
-static int non_linear_quantizer_scale [] = {
-     0,  1,  2,  3,  4,  5,   6,   7,
-     8, 10, 12, 14, 16, 18,  20,  22,
-    24, 28, 32, 36, 40, 44,  48,  52,
-    56, 64, 72, 80, 88, 96, 104, 112
-};
-
-static inline int get_macroblock_modes (picture_t * picture)
+static inline int get_macroblock_modes (mpeg2_decoder_t * const decoder)
 {
-#define bit_buf (picture->bitstream_buf)
-#define bits (picture->bitstream_bits)
-#define bit_ptr (picture->bitstream_ptr)
+#define bit_buf (decoder->bitstream_buf)
+#define bits (decoder->bitstream_bits)
+#define bit_ptr (decoder->bitstream_ptr)
     int macroblock_modes;
-    MBtab * tab;
+    const MBtab * tab;
 
-    switch (picture->picture_coding_type) {
+    switch (decoder->coding_type) {
     case I_TYPE:
 
        tab = MB_I + UBITS (bit_buf, 1);
        DUMPBITS (bit_buf, bits, tab->len);
        macroblock_modes = tab->modes;
 
-       if ((! (picture->frame_pred_frame_dct)) &&
-           (picture->picture_structure == FRAME_PICTURE)) {
+       if ((! (decoder->frame_pred_frame_dct)) &&
+           (decoder->picture_structure == FRAME_PICTURE)) {
            macroblock_modes |= UBITS (bit_buf, 1) * DCT_TYPE_INTERLACED;
            DUMPBITS (bit_buf, bits, 1);
        }
@@ -70,26 +68,26 @@ static inline int get_macroblock_modes (picture_t * picture)
        DUMPBITS (bit_buf, bits, tab->len);
        macroblock_modes = tab->modes;
 
-       if (picture->picture_structure != FRAME_PICTURE) {
+       if (decoder->picture_structure != FRAME_PICTURE) {
            if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) {
-               macroblock_modes |= UBITS (bit_buf, 2) * MOTION_TYPE_BASE;
+               macroblock_modes |= UBITS (bit_buf, 2) << MOTION_TYPE_SHIFT;
                DUMPBITS (bit_buf, bits, 2);
            }
-           return macroblock_modes;
-       } else if (picture->frame_pred_frame_dct) {
+           return macroblock_modes | MACROBLOCK_MOTION_FORWARD;
+       } else if (decoder->frame_pred_frame_dct) {
            if (macroblock_modes & MACROBLOCK_MOTION_FORWARD)
-               macroblock_modes |= MC_FRAME;
-           return macroblock_modes;
+               macroblock_modes |= MC_FRAME << MOTION_TYPE_SHIFT;
+           return macroblock_modes | MACROBLOCK_MOTION_FORWARD;
        } else {
            if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) {
-               macroblock_modes |= UBITS (bit_buf, 2) * MOTION_TYPE_BASE;
+               macroblock_modes |= UBITS (bit_buf, 2) << MOTION_TYPE_SHIFT;
                DUMPBITS (bit_buf, bits, 2);
            }
            if (macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN)) {
                macroblock_modes |= UBITS (bit_buf, 1) * DCT_TYPE_INTERLACED;
                DUMPBITS (bit_buf, bits, 1);
            }
-           return macroblock_modes;
+           return macroblock_modes | MACROBLOCK_MOTION_FORWARD;
        }
 
     case B_TYPE:
@@ -98,20 +96,20 @@ static inline int get_macroblock_modes (picture_t * picture)
        DUMPBITS (bit_buf, bits, tab->len);
        macroblock_modes = tab->modes;
 
-       if (picture->picture_structure != FRAME_PICTURE) {
+       if (decoder->picture_structure != FRAME_PICTURE) {
            if (! (macroblock_modes & MACROBLOCK_INTRA)) {
-               macroblock_modes |= UBITS (bit_buf, 2) * MOTION_TYPE_BASE;
+               macroblock_modes |= UBITS (bit_buf, 2) << MOTION_TYPE_SHIFT;
                DUMPBITS (bit_buf, bits, 2);
            }
            return macroblock_modes;
-       } else if (picture->frame_pred_frame_dct) {
+       } else if (decoder->frame_pred_frame_dct) {
            /* if (! (macroblock_modes & MACROBLOCK_INTRA)) */
-           macroblock_modes |= MC_FRAME;
+           macroblock_modes |= MC_FRAME << MOTION_TYPE_SHIFT;
            return macroblock_modes;
        } else {
            if (macroblock_modes & MACROBLOCK_INTRA)
                goto intra;
-           macroblock_modes |= UBITS (bit_buf, 2) * MOTION_TYPE_BASE;
+           macroblock_modes |= UBITS (bit_buf, 2) << MOTION_TYPE_SHIFT;
            DUMPBITS (bit_buf, bits, 2);
            if (macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN)) {
            intra:
@@ -134,35 +132,41 @@ static inline int get_macroblock_modes (picture_t * picture)
 #undef bit_ptr
 }
 
-static inline int get_quantizer_scale (picture_t * picture)
+static inline void get_quantizer_scale (mpeg2_decoder_t * const decoder)
 {
-#define bit_buf (picture->bitstream_buf)
-#define bits (picture->bitstream_bits)
-#define bit_ptr (picture->bitstream_ptr)
+#define bit_buf (decoder->bitstream_buf)
+#define bits (decoder->bitstream_bits)
+#define bit_ptr (decoder->bitstream_ptr)
 
     int quantizer_scale_code;
 
     quantizer_scale_code = UBITS (bit_buf, 5);
     DUMPBITS (bit_buf, bits, 5);
-
-    if (picture->q_scale_type)
-       return non_linear_quantizer_scale [quantizer_scale_code];
-    else
-       return quantizer_scale_code << 1;
+    decoder->quantizer_scale = decoder->quantizer_scales[quantizer_scale_code];
+
+    decoder->quantizer_matrix[0] =
+       decoder->quantizer_prescale[0][quantizer_scale_code];
+    decoder->quantizer_matrix[1] =
+       decoder->quantizer_prescale[1][quantizer_scale_code];
+    decoder->quantizer_matrix[2] =
+       decoder->chroma_quantizer[0][quantizer_scale_code];
+    decoder->quantizer_matrix[3] =
+       decoder->chroma_quantizer[1][quantizer_scale_code];
 #undef bit_buf
 #undef bits
 #undef bit_ptr
 }
 
-static inline int get_motion_delta (picture_t * picture, int f_code)
+static inline int get_motion_delta (mpeg2_decoder_t * const decoder,
+                                   const int f_code)
 {
-#define bit_buf (picture->bitstream_buf)
-#define bits (picture->bitstream_bits)
-#define bit_ptr (picture->bitstream_ptr)
+#define bit_buf (decoder->bitstream_buf)
+#define bits (decoder->bitstream_bits)
+#define bit_ptr (decoder->bitstream_ptr)
 
     int delta;
     int sign;
-    MVtab * tab;
+    const MVtab * tab;
 
     if (bit_buf & 0x80000000) {
        DUMPBITS (bit_buf, bits, 1);
@@ -207,30 +211,18 @@ static inline int get_motion_delta (picture_t * picture, int f_code)
 #undef bit_ptr
 }
 
-static inline int bound_motion_vector (int vector, int f_code)
+static inline int bound_motion_vector (const int vector, const int f_code)
 {
-#if 1
-    int limit;
-
-    limit = 16 << f_code;
-
-    if (vector >= limit)
-       return vector - 2*limit;
-    else if (vector < -limit)
-       return vector + 2*limit;
-    else return vector;
-#else
-    return (vector << (27 - f_code)) >> (27 - f_code);
-#endif
+    return ((int32_t)vector << (27 - f_code)) >> (27 - f_code);
 }
 
-static inline int get_dmv (picture_t * picture)
+static inline int get_dmv (mpeg2_decoder_t * const decoder)
 {
-#define bit_buf (picture->bitstream_buf)
-#define bits (picture->bitstream_bits)
-#define bit_ptr (picture->bitstream_ptr)
+#define bit_buf (decoder->bitstream_buf)
+#define bits (decoder->bitstream_bits)
+#define bit_ptr (decoder->bitstream_ptr)
 
-    DMVtab * tab;
+    const DMVtab * tab;
 
     tab = DMV_2 + UBITS (bit_buf, 2);
     DUMPBITS (bit_buf, bits, tab->len);
@@ -240,19 +232,19 @@ static inline int get_dmv (picture_t * picture)
 #undef bit_ptr
 }
 
-static inline int get_coded_block_pattern (picture_t * picture)
+static inline int get_coded_block_pattern (mpeg2_decoder_t * const decoder)
 {
-#define bit_buf (picture->bitstream_buf)
-#define bits (picture->bitstream_bits)
-#define bit_ptr (picture->bitstream_ptr)
+#define bit_buf (decoder->bitstream_buf)
+#define bits (decoder->bitstream_bits)
+#define bit_ptr (decoder->bitstream_ptr)
 
-    CBPtab * tab;
+    const CBPtab * tab;
 
     NEEDBITS (bit_buf, bits, bit_ptr);
 
     if (bit_buf >= 0x20000000) {
 
-       tab = CBP_7 - 16 + UBITS (bit_buf, 7);
+       tab = CBP_7 + (UBITS (bit_buf, 7) - 16);
        DUMPBITS (bit_buf, bits, tab->len);
        return tab->cbp;
 
@@ -268,12 +260,12 @@ static inline int get_coded_block_pattern (picture_t * picture)
 #undef bit_ptr
 }
 
-static inline int get_luma_dc_dct_diff (picture_t * picture)
+static inline int get_luma_dc_dct_diff (mpeg2_decoder_t * const decoder)
 {
-#define bit_buf (picture->bitstream_buf)
-#define bits (picture->bitstream_bits)
-#define bit_ptr (picture->bitstream_ptr)
-    DCtab * tab;
+#define bit_buf (decoder->bitstream_buf)
+#define bits (decoder->bitstream_bits)
+#define bit_ptr (decoder->bitstream_ptr)
+    const DCtab * tab;
     int size;
     int dc_diff;
 
@@ -286,31 +278,31 @@ static inline int get_luma_dc_dct_diff (picture_t * picture)
            dc_diff =
                UBITS (bit_buf, size) - UBITS (SBITS (~bit_buf, 1), size);
            bit_buf <<= size;
-           return dc_diff;
+           return dc_diff << decoder->intra_dc_precision;
        } else {
            DUMPBITS (bit_buf, bits, 3);
            return 0;
        }
     } else {
-       tab = DC_long - 0x1e0 + UBITS (bit_buf, 9);
+       tab = DC_long + (UBITS (bit_buf, 9) - 0x1e0);
        size = tab->size;
        DUMPBITS (bit_buf, bits, tab->len);
        NEEDBITS (bit_buf, bits, bit_ptr);
        dc_diff = UBITS (bit_buf, size) - UBITS (SBITS (~bit_buf, 1), size);
        DUMPBITS (bit_buf, bits, size);
-       return dc_diff;
+       return dc_diff << decoder->intra_dc_precision;
     }
 #undef bit_buf
 #undef bits
 #undef bit_ptr
 }
 
-static inline int get_chroma_dc_dct_diff (picture_t * picture)
+static inline int get_chroma_dc_dct_diff (mpeg2_decoder_t * const decoder)
 {
-#define bit_buf (picture->bitstream_buf)
-#define bits (picture->bitstream_bits)
-#define bit_ptr (picture->bitstream_ptr)
-    DCtab * tab;
+#define bit_buf (decoder->bitstream_buf)
+#define bits (decoder->bitstream_bits)
+#define bit_ptr (decoder->bitstream_ptr)
+    const DCtab * tab;
     int size;
     int dc_diff;
 
@@ -323,60 +315,59 @@ static inline int get_chroma_dc_dct_diff (picture_t * picture)
            dc_diff =
                UBITS (bit_buf, size) - UBITS (SBITS (~bit_buf, 1), size);
            bit_buf <<= size;
-           return dc_diff;
+           return dc_diff << decoder->intra_dc_precision;
        } else {
            DUMPBITS (bit_buf, bits, 2);
            return 0;
        }
     } else {
-       tab = DC_long - 0x3e0 + UBITS (bit_buf, 10);
+       tab = DC_long + (UBITS (bit_buf, 10) - 0x3e0);
        size = tab->size;
        DUMPBITS (bit_buf, bits, tab->len + 1);
        NEEDBITS (bit_buf, bits, bit_ptr);
        dc_diff = UBITS (bit_buf, size) - UBITS (SBITS (~bit_buf, 1), size);
        DUMPBITS (bit_buf, bits, size);
-       return dc_diff;
+       return dc_diff << decoder->intra_dc_precision;
     }
 #undef bit_buf
 #undef bits
 #undef bit_ptr
 }
 
-#define SATURATE(val)                  \
-do {                                   \
-    if ((uint32_t)(val + 2048) > 4095) \
-       val = (val > 0) ? 2047 : -2048; \
+#define SATURATE(val)                          \
+do {                                           \
+    val <<= 4;                                 \
+    if (unlikely (val != (int16_t) val))       \
+       val = (SBITS (val, 1) ^ 2047) << 4;     \
 } while (0)
 
-static void get_intra_block_B14 (picture_t * picture)
+static void get_intra_block_B14 (mpeg2_decoder_t * const decoder,
+                                const uint16_t * const quant_matrix)
 {
     int i;
     int j;
     int val;
-    uint8_t * scan = picture->scan;
-    uint8_t * quant_matrix = picture->intra_quantizer_matrix;
-    int quantizer_scale = picture->quantizer_scale;
+    const uint8_t * const scan = decoder->scan;
     int mismatch;
-    DCTtab * tab;
+    const DCTtab * tab;
     uint32_t bit_buf;
     int bits;
-    uint8_t * bit_ptr;
-    int16_t * dest;
+    const uint8_t * bit_ptr;
+    int16_t * const dest = decoder->DCTblock;
 
-    dest = picture->DCTblock;
     i = 0;
     mismatch = ~dest[0];
 
-    bit_buf = picture->bitstream_buf;
-    bits = picture->bitstream_bits;
-    bit_ptr = picture->bitstream_ptr;
+    bit_buf = decoder->bitstream_buf;
+    bits = decoder->bitstream_bits;
+    bit_ptr = decoder->bitstream_ptr;
 
     NEEDBITS (bit_buf, bits, bit_ptr);
 
     while (1) {
        if (bit_buf >= 0x28000000) {
 
-           tab = DCT_B14AC_5 - 5 + UBITS (bit_buf, 5);
+           tab = DCT_B14AC_5 + (UBITS (bit_buf, 5) - 5);
 
            i += tab->run;
            if (i >= 64)
@@ -386,7 +377,7 @@ static void get_intra_block_B14 (picture_t * picture)
            j = scan[i];
            bit_buf <<= tab->len;
            bits += tab->len + 1;
-           val = (tab->level * quantizer_scale * quant_matrix[j]) >> 4;
+           val = (tab->level * quant_matrix[j]) >> 4;
 
            /* if (bitstream_get (1)) val = -val; */
            val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1);
@@ -402,7 +393,7 @@ static void get_intra_block_B14 (picture_t * picture)
 
        } else if (bit_buf >= 0x04000000) {
 
-           tab = DCT_B14_8 - 4 + UBITS (bit_buf, 8);
+           tab = DCT_B14_8 + (UBITS (bit_buf, 8) - 4);
 
            i += tab->run;
            if (i < 64)
@@ -418,8 +409,7 @@ static void get_intra_block_B14 (picture_t * picture)
 
            DUMPBITS (bit_buf, bits, 12);
            NEEDBITS (bit_buf, bits, bit_ptr);
-           val = (SBITS (bit_buf, 12) *
-                  quantizer_scale * quant_matrix[j]) / 16;
+           val = (SBITS (bit_buf, 12) * quant_matrix[j]) / 16;
 
            SATURATE (val);
            dest[j] = val;
@@ -431,17 +421,17 @@ static void get_intra_block_B14 (picture_t * picture)
            continue;
 
        } else if (bit_buf >= 0x02000000) {
-           tab = DCT_B14_10 - 8 + UBITS (bit_buf, 10);
+           tab = DCT_B14_10 + (UBITS (bit_buf, 10) - 8);
            i += tab->run;
            if (i < 64)
                goto normal_code;
        } else if (bit_buf >= 0x00800000) {
-           tab = DCT_13 - 16 + UBITS (bit_buf, 13);
+           tab = DCT_13 + (UBITS (bit_buf, 13) - 16);
            i += tab->run;
            if (i < 64)
                goto normal_code;
        } else if (bit_buf >= 0x00200000) {
-           tab = DCT_15 - 16 + UBITS (bit_buf, 15);
+           tab = DCT_15 + (UBITS (bit_buf, 15) - 16);
            i += tab->run;
            if (i < 64)
                goto normal_code;
@@ -455,42 +445,40 @@ static void get_intra_block_B14 (picture_t * picture)
        }
        break;  /* illegal, check needed to avoid buffer overflow */
     }
-    dest[63] ^= mismatch & 1;
+    dest[63] ^= mismatch & 16;
     DUMPBITS (bit_buf, bits, 2);       /* dump end of block code */
-    picture->bitstream_buf = bit_buf;
-    picture->bitstream_bits = bits;
-    picture->bitstream_ptr = bit_ptr;
+    decoder->bitstream_buf = bit_buf;
+    decoder->bitstream_bits = bits;
+    decoder->bitstream_ptr = bit_ptr;
 }
 
-static void get_intra_block_B15 (picture_t * picture)
+static void get_intra_block_B15 (mpeg2_decoder_t * const decoder,
+                                const uint16_t * const quant_matrix)
 {
     int i;
     int j;
     int val;
-    uint8_t * scan = picture->scan;
-    uint8_t * quant_matrix = picture->intra_quantizer_matrix;
-    int quantizer_scale = picture->quantizer_scale;
+    const uint8_t * const scan = decoder->scan;
     int mismatch;
-    DCTtab * tab;
+    const DCTtab * tab;
     uint32_t bit_buf;
     int bits;
-    uint8_t * bit_ptr;
-    int16_t * dest;
+    const uint8_t * bit_ptr;
+    int16_t * const dest = decoder->DCTblock;
 
-    dest = picture->DCTblock;
     i = 0;
     mismatch = ~dest[0];
 
-    bit_buf = picture->bitstream_buf;
-    bits = picture->bitstream_bits;
-    bit_ptr = picture->bitstream_ptr;
+    bit_buf = decoder->bitstream_buf;
+    bits = decoder->bitstream_bits;
+    bit_ptr = decoder->bitstream_ptr;
 
     NEEDBITS (bit_buf, bits, bit_ptr);
 
     while (1) {
        if (bit_buf >= 0x04000000) {
 
-           tab = DCT_B15_8 - 4 + UBITS (bit_buf, 8);
+           tab = DCT_B15_8 + (UBITS (bit_buf, 8) - 4);
 
            i += tab->run;
            if (i < 64) {
@@ -499,7 +487,7 @@ static void get_intra_block_B15 (picture_t * picture)
                j = scan[i];
                bit_buf <<= tab->len;
                bits += tab->len + 1;
-               val = (tab->level * quantizer_scale * quant_matrix[j]) >> 4;
+               val = (tab->level * quant_matrix[j]) >> 4;
 
                /* if (bitstream_get (1)) val = -val; */
                val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1);
@@ -530,8 +518,7 @@ static void get_intra_block_B15 (picture_t * picture)
 
                DUMPBITS (bit_buf, bits, 12);
                NEEDBITS (bit_buf, bits, bit_ptr);
-               val = (SBITS (bit_buf, 12) *
-                      quantizer_scale * quant_matrix[j]) / 16;
+               val = (SBITS (bit_buf, 12) * quant_matrix[j]) / 16;
 
                SATURATE (val);
                dest[j] = val;
@@ -544,17 +531,17 @@ static void get_intra_block_B15 (picture_t * picture)
 
            }
        } else if (bit_buf >= 0x02000000) {
-           tab = DCT_B15_10 - 8 + UBITS (bit_buf, 10);
+           tab = DCT_B15_10 + (UBITS (bit_buf, 10) - 8);
            i += tab->run;
            if (i < 64)
                goto normal_code;
        } else if (bit_buf >= 0x00800000) {
-           tab = DCT_13 - 16 + UBITS (bit_buf, 13);
+           tab = DCT_13 + (UBITS (bit_buf, 13) - 16);
            i += tab->run;
            if (i < 64)
                goto normal_code;
        } else if (bit_buf >= 0x00200000) {
-           tab = DCT_15 - 16 + UBITS (bit_buf, 15);
+           tab = DCT_15 + (UBITS (bit_buf, 15) - 16);
            i += tab->run;
            if (i < 64)
                goto normal_code;
@@ -568,39 +555,37 @@ static void get_intra_block_B15 (picture_t * picture)
        }
        break;  /* illegal, check needed to avoid buffer overflow */
     }
-    dest[63] ^= mismatch & 1;
+    dest[63] ^= mismatch & 16;
     DUMPBITS (bit_buf, bits, 4);       /* dump end of block code */
-    picture->bitstream_buf = bit_buf;
-    picture->bitstream_bits = bits;
-    picture->bitstream_ptr = bit_ptr;
+    decoder->bitstream_buf = bit_buf;
+    decoder->bitstream_bits = bits;
+    decoder->bitstream_ptr = bit_ptr;
 }
 
-static void get_non_intra_block (picture_t * picture)
+static int get_non_intra_block (mpeg2_decoder_t * const decoder,
+                               const uint16_t * const quant_matrix)
 {
     int i;
     int j;
     int val;
-    uint8_t * scan = picture->scan;
-    uint8_t * quant_matrix = picture->non_intra_quantizer_matrix;
-    int quantizer_scale = picture->quantizer_scale;
+    const uint8_t * const scan = decoder->scan;
     int mismatch;
-    DCTtab * tab;
+    const DCTtab * tab;
     uint32_t bit_buf;
     int bits;
-    uint8_t * bit_ptr;
-    int16_t * dest;
+    const uint8_t * bit_ptr;
+    int16_t * const dest = decoder->DCTblock;
 
     i = -1;
-    mismatch = 1;
-    dest = picture->DCTblock;
+    mismatch = -1;
 
-    bit_buf = picture->bitstream_buf;
-    bits = picture->bitstream_bits;
-    bit_ptr = picture->bitstream_ptr;
+    bit_buf = decoder->bitstream_buf;
+    bits = decoder->bitstream_bits;
+    bit_ptr = decoder->bitstream_ptr;
 
     NEEDBITS (bit_buf, bits, bit_ptr);
     if (bit_buf >= 0x28000000) {
-       tab = DCT_B14DC_5 - 5 + UBITS (bit_buf, 5);
+       tab = DCT_B14DC_5 + (UBITS (bit_buf, 5) - 5);
        goto entry_1;
     } else
        goto entry_2;
@@ -608,7 +593,7 @@ static void get_non_intra_block (picture_t * picture)
     while (1) {
        if (bit_buf >= 0x28000000) {
 
-           tab = DCT_B14AC_5 - 5 + UBITS (bit_buf, 5);
+           tab = DCT_B14AC_5 + (UBITS (bit_buf, 5) - 5);
 
        entry_1:
            i += tab->run;
@@ -619,7 +604,7 @@ static void get_non_intra_block (picture_t * picture)
            j = scan[i];
            bit_buf <<= tab->len;
            bits += tab->len + 1;
-           val = ((2*tab->level+1) * quantizer_scale * quant_matrix[j]) >> 5;
+           val = ((2 * tab->level + 1) * quant_matrix[j]) >> 5;
 
            /* if (bitstream_get (1)) val = -val; */
            val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1);
@@ -638,7 +623,7 @@ static void get_non_intra_block (picture_t * picture)
     entry_2:
        if (bit_buf >= 0x04000000) {
 
-           tab = DCT_B14_8 - 4 + UBITS (bit_buf, 8);
+           tab = DCT_B14_8 + (UBITS (bit_buf, 8) - 4);
 
            i += tab->run;
            if (i < 64)
@@ -655,7 +640,7 @@ static void get_non_intra_block (picture_t * picture)
            DUMPBITS (bit_buf, bits, 12);
            NEEDBITS (bit_buf, bits, bit_ptr);
            val = 2 * (SBITS (bit_buf, 12) + SBITS (bit_buf, 1)) + 1;
-           val = (val * quantizer_scale * quant_matrix[j]) / 32;
+           val = (val * quant_matrix[j]) / 32;
 
            SATURATE (val);
            dest[j] = val;
@@ -667,17 +652,17 @@ static void get_non_intra_block (picture_t * picture)
            continue;
 
        } else if (bit_buf >= 0x02000000) {
-           tab = DCT_B14_10 - 8 + UBITS (bit_buf, 10);
+           tab = DCT_B14_10 + (UBITS (bit_buf, 10) - 8);
            i += tab->run;
            if (i < 64)
                goto normal_code;
        } else if (bit_buf >= 0x00800000) {
-           tab = DCT_13 - 16 + UBITS (bit_buf, 13);
+           tab = DCT_13 + (UBITS (bit_buf, 13) - 16);
            i += tab->run;
            if (i < 64)
                goto normal_code;
        } else if (bit_buf >= 0x00200000) {
-           tab = DCT_15 - 16 + UBITS (bit_buf, 15);
+           tab = DCT_15 + (UBITS (bit_buf, 15) - 16);
            i += tab->run;
            if (i < 64)
                goto normal_code;
@@ -691,40 +676,39 @@ static void get_non_intra_block (picture_t * picture)
        }
        break;  /* illegal, check needed to avoid buffer overflow */
     }
-    dest[63] ^= mismatch & 1;
+    dest[63] ^= mismatch & 16;
     DUMPBITS (bit_buf, bits, 2);       /* dump end of block code */
-    picture->bitstream_buf = bit_buf;
-    picture->bitstream_bits = bits;
-    picture->bitstream_ptr = bit_ptr;
+    decoder->bitstream_buf = bit_buf;
+    decoder->bitstream_bits = bits;
+    decoder->bitstream_ptr = bit_ptr;
+    return i;
 }
 
-static void get_mpeg1_intra_block (picture_t * picture)
+static void get_mpeg1_intra_block (mpeg2_decoder_t * const decoder)
 {
     int i;
     int j;
     int val;
-    uint8_t * scan = picture->scan;
-    uint8_t * quant_matrix = picture->intra_quantizer_matrix;
-    int quantizer_scale = picture->quantizer_scale;
-    DCTtab * tab;
+    const uint8_t * const scan = decoder->scan;
+    const uint16_t * const quant_matrix = decoder->quantizer_matrix[0];
+    const DCTtab * tab;
     uint32_t bit_buf;
     int bits;
-    uint8_t * bit_ptr;
-    int16_t * dest;
+    const uint8_t * bit_ptr;
+    int16_t * const dest = decoder->DCTblock;
 
     i = 0;
-    dest = picture->DCTblock;
 
-    bit_buf = picture->bitstream_buf;
-    bits = picture->bitstream_bits;
-    bit_ptr = picture->bitstream_ptr;
+    bit_buf = decoder->bitstream_buf;
+    bits = decoder->bitstream_bits;
+    bit_ptr = decoder->bitstream_ptr;
 
     NEEDBITS (bit_buf, bits, bit_ptr);
 
     while (1) {
        if (bit_buf >= 0x28000000) {
 
-           tab = DCT_B14AC_5 - 5 + UBITS (bit_buf, 5);
+           tab = DCT_B14AC_5 + (UBITS (bit_buf, 5) - 5);
 
            i += tab->run;
            if (i >= 64)
@@ -734,7 +718,7 @@ static void get_mpeg1_intra_block (picture_t * picture)
            j = scan[i];
            bit_buf <<= tab->len;
            bits += tab->len + 1;
-           val = (tab->level * quantizer_scale * quant_matrix[j]) >> 4;
+           val = (tab->level * quant_matrix[j]) >> 4;
 
            /* oddification */
            val = (val - 1) | 1;
@@ -752,7 +736,7 @@ static void get_mpeg1_intra_block (picture_t * picture)
 
        } else if (bit_buf >= 0x04000000) {
 
-           tab = DCT_B14_8 - 4 + UBITS (bit_buf, 8);
+           tab = DCT_B14_8 + (UBITS (bit_buf, 8) - 4);
 
            i += tab->run;
            if (i < 64)
@@ -773,7 +757,7 @@ static void get_mpeg1_intra_block (picture_t * picture)
                DUMPBITS (bit_buf, bits, 8);
                val = UBITS (bit_buf, 8) + 2 * val;
            }
-           val = (val * quantizer_scale * quant_matrix[j]) / 16;
+           val = (val * quant_matrix[j]) / 16;
 
            /* oddification */
            val = (val + ~SBITS (val, 1)) | 1;
@@ -787,17 +771,17 @@ static void get_mpeg1_intra_block (picture_t * picture)
            continue;
 
        } else if (bit_buf >= 0x02000000) {
-           tab = DCT_B14_10 - 8 + UBITS (bit_buf, 10);
+           tab = DCT_B14_10 + (UBITS (bit_buf, 10) - 8);
            i += tab->run;
            if (i < 64)
                goto normal_code;
        } else if (bit_buf >= 0x00800000) {
-           tab = DCT_13 - 16 + UBITS (bit_buf, 13);
+           tab = DCT_13 + (UBITS (bit_buf, 13) - 16);
            i += tab->run;
            if (i < 64)
                goto normal_code;
        } else if (bit_buf >= 0x00200000) {
-           tab = DCT_15 - 16 + UBITS (bit_buf, 15);
+           tab = DCT_15 + (UBITS (bit_buf, 15) - 16);
            i += tab->run;
            if (i < 64)
                goto normal_code;
@@ -812,35 +796,33 @@ static void get_mpeg1_intra_block (picture_t * picture)
        break;  /* illegal, check needed to avoid buffer overflow */
     }
     DUMPBITS (bit_buf, bits, 2);       /* dump end of block code */
-    picture->bitstream_buf = bit_buf;
-    picture->bitstream_bits = bits;
-    picture->bitstream_ptr = bit_ptr;
+    decoder->bitstream_buf = bit_buf;
+    decoder->bitstream_bits = bits;
+    decoder->bitstream_ptr = bit_ptr;
 }
 
-static void get_mpeg1_non_intra_block (picture_t * picture)
+static int get_mpeg1_non_intra_block (mpeg2_decoder_t * const decoder)
 {
     int i;
     int j;
     int val;
-    uint8_t * scan = picture->scan;
-    uint8_t * quant_matrix = picture->non_intra_quantizer_matrix;
-    int quantizer_scale = picture->quantizer_scale;
-    DCTtab * tab;
+    const uint8_t * const scan = decoder->scan;
+    const uint16_t * const quant_matrix = decoder->quantizer_matrix[1];
+    const DCTtab * tab;
     uint32_t bit_buf;
     int bits;
-    uint8_t * bit_ptr;
-    int16_t * dest;
+    const uint8_t * bit_ptr;
+    int16_t * const dest = decoder->DCTblock;
 
     i = -1;
-    dest = picture->DCTblock;
 
-    bit_buf = picture->bitstream_buf;
-    bits = picture->bitstream_bits;
-    bit_ptr = picture->bitstream_ptr;
+    bit_buf = decoder->bitstream_buf;
+    bits = decoder->bitstream_bits;
+    bit_ptr = decoder->bitstream_ptr;
 
     NEEDBITS (bit_buf, bits, bit_ptr);
     if (bit_buf >= 0x28000000) {
-       tab = DCT_B14DC_5 - 5 + UBITS (bit_buf, 5);
+       tab = DCT_B14DC_5 + (UBITS (bit_buf, 5) - 5);
        goto entry_1;
     } else
        goto entry_2;
@@ -848,7 +830,7 @@ static void get_mpeg1_non_intra_block (picture_t * picture)
     while (1) {
        if (bit_buf >= 0x28000000) {
 
-           tab = DCT_B14AC_5 - 5 + UBITS (bit_buf, 5);
+           tab = DCT_B14AC_5 + (UBITS (bit_buf, 5) - 5);
 
        entry_1:
            i += tab->run;
@@ -859,7 +841,7 @@ static void get_mpeg1_non_intra_block (picture_t * picture)
            j = scan[i];
            bit_buf <<= tab->len;
            bits += tab->len + 1;
-           val = ((2*tab->level+1) * quantizer_scale * quant_matrix[j]) >> 5;
+           val = ((2 * tab->level + 1) * quant_matrix[j]) >> 5;
 
            /* oddification */
            val = (val - 1) | 1;
@@ -880,7 +862,7 @@ static void get_mpeg1_non_intra_block (picture_t * picture)
     entry_2:
        if (bit_buf >= 0x04000000) {
 
-           tab = DCT_B14_8 - 4 + UBITS (bit_buf, 8);
+           tab = DCT_B14_8 + (UBITS (bit_buf, 8) - 4);
 
            i += tab->run;
            if (i < 64)
@@ -902,7 +884,7 @@ static void get_mpeg1_non_intra_block (picture_t * picture)
                val = UBITS (bit_buf, 8) + 2 * val;
            }
            val = 2 * (val + SBITS (val, 1)) + 1;
-           val = (val * quantizer_scale * quant_matrix[j]) / 32;
+           val = (val * quant_matrix[j]) / 32;
 
            /* oddification */
            val = (val + ~SBITS (val, 1)) | 1;
@@ -916,17 +898,17 @@ static void get_mpeg1_non_intra_block (picture_t * picture)
            continue;
 
        } else if (bit_buf >= 0x02000000) {
-           tab = DCT_B14_10 - 8 + UBITS (bit_buf, 10);
+           tab = DCT_B14_10 + (UBITS (bit_buf, 10) - 8);
            i += tab->run;
            if (i < 64)
                goto normal_code;
        } else if (bit_buf >= 0x00800000) {
-           tab = DCT_13 - 16 + UBITS (bit_buf, 13);
+           tab = DCT_13 + (UBITS (bit_buf, 13) - 16);
            i += tab->run;
            if (i < 64)
                goto normal_code;
        } else if (bit_buf >= 0x00200000) {
-           tab = DCT_15 - 16 + UBITS (bit_buf, 15);
+           tab = DCT_15 + (UBITS (bit_buf, 15) - 16);
            i += tab->run;
            if (i < 64)
                goto normal_code;
@@ -941,870 +923,1139 @@ static void get_mpeg1_non_intra_block (picture_t * picture)
        break;  /* illegal, check needed to avoid buffer overflow */
     }
     DUMPBITS (bit_buf, bits, 2);       /* dump end of block code */
-    picture->bitstream_buf = bit_buf;
-    picture->bitstream_bits = bits;
-    picture->bitstream_ptr = bit_ptr;
-}
-
-static inline int get_macroblock_address_increment (picture_t * picture)
-{
-#define bit_buf (picture->bitstream_buf)
-#define bits (picture->bitstream_bits)
-#define bit_ptr (picture->bitstream_ptr)
-
-    MBAtab * tab;
-    int mba;
-
-    mba = 0;
-
-    while (1) {
-       if (bit_buf >= 0x10000000) {
-           tab = MBA_5 - 2 + UBITS (bit_buf, 5);
-           DUMPBITS (bit_buf, bits, tab->len);
-           return mba + tab->mba;
-       } else if (bit_buf >= 0x03000000) {
-           tab = MBA_11 - 24 + UBITS (bit_buf, 11);
-           DUMPBITS (bit_buf, bits, tab->len);
-           return mba + tab->mba;
-       } else switch (UBITS (bit_buf, 11)) {
-       case 8:         /* macroblock_escape */
-           mba += 33;
-           /* no break here on purpose */
-       case 15:        /* macroblock_stuffing (MPEG1 only) */
-           DUMPBITS (bit_buf, bits, 11);
-           NEEDBITS (bit_buf, bits, bit_ptr);
-           break;
-       default:        /* end of slice, or error */
-//         printf("MB error: %d  \n",(UBITS (bit_buf, 11))); // FIXME!
-           return 0;
-       }
-    }
-
-#undef bit_buf
-#undef bits
-#undef bit_ptr
+    decoder->bitstream_buf = bit_buf;
+    decoder->bitstream_bits = bits;
+    decoder->bitstream_ptr = bit_ptr;
+    return i;
 }
 
-static inline void slice_intra_DCT (picture_t * picture, int cc,
-                                   uint8_t * dest, int stride)
+static inline void slice_intra_DCT (mpeg2_decoder_t * const decoder,
+                                   const int cc,
+                                   uint8_t * const dest, const int stride)
 {
-#define bit_buf (picture->bitstream_buf)
-#define bits (picture->bitstream_bits)  
-#define bit_ptr (picture->bitstream_ptr)
+#define bit_buf (decoder->bitstream_buf)
+#define bits (decoder->bitstream_bits)
+#define bit_ptr (decoder->bitstream_ptr)
     NEEDBITS (bit_buf, bits, bit_ptr);
     /* Get the intra DC coefficient and inverse quantize it */
     if (cc == 0)
-       picture->dc_dct_pred[0] += get_luma_dc_dct_diff (picture);
+       decoder->DCTblock[0] =
+           decoder->dc_dct_pred[0] += get_luma_dc_dct_diff (decoder);
     else
-       picture->dc_dct_pred[cc] += get_chroma_dc_dct_diff (picture);
-    picture->DCTblock[0] =
-       picture->dc_dct_pred[cc] << (3 - picture->intra_dc_precision);
-    memset (picture->DCTblock + 1, 0, 63 * sizeof (int16_t));
-
-    if (picture->mpeg1) {
-       if (picture->picture_coding_type != D_TYPE)
-           get_mpeg1_intra_block (picture);
-    } else if (picture->intra_vlc_format)
-       get_intra_block_B15 (picture);
+       decoder->DCTblock[0] =
+           decoder->dc_dct_pred[cc] += get_chroma_dc_dct_diff (decoder);
+
+    if (decoder->mpeg1) {
+       if (decoder->coding_type != D_TYPE)
+           get_mpeg1_intra_block (decoder);
+    } else if (decoder->intra_vlc_format)
+       get_intra_block_B15 (decoder, decoder->quantizer_matrix[cc ? 2 : 0]);
     else
-       get_intra_block_B14 (picture);
-    idct_block_copy (picture->DCTblock, dest, stride);
+       get_intra_block_B14 (decoder, decoder->quantizer_matrix[cc ? 2 : 0]);
+    mpeg2_idct_copy (decoder->DCTblock, dest, stride);
 #undef bit_buf
 #undef bits
 #undef bit_ptr
 }
 
-static inline void slice_non_intra_DCT (picture_t * picture, uint8_t * dest,
-                                       int stride)
-{
-    memset (picture->DCTblock, 0, 64 * sizeof (int16_t));
-    if (picture->mpeg1)
-       get_mpeg1_non_intra_block (picture);
-    else
-       get_non_intra_block (picture);
-    idct_block_add (picture->DCTblock, dest, stride);
-}
-
-#define MOTION_Y(table,offset_x,offset_y,motion_x,motion_y,            \
-                dest,src,offset_dest,offset_src,stride,height)         \
-do {                                                                   \
-    int xy_half;                                                       \
-    int total_offset;                                                  \
-                                                                       \
-    xy_half = ((motion_y & 1) << 1) | (motion_x & 1);                  \
-    total_offset = ((offset_y + (motion_y >> 1)) * stride +            \
-                   offset_x + (motion_x >> 1) + (offset_src));         \
-    table[xy_half] (dest[0] + offset_x + (offset_dest),                        \
-                   src[0] + total_offset, stride, height);             \
-} while (0)
-
-#define MOTION_UV(table,offset_x,offset_y,motion_x,motion_y,           \
-                 dest,src,offset_dest,offset_src,stride,height)        \
-do {                                                                   \
-    int xy_half;                                                       \
-    int total_offset;                                                  \
-                                                                       \
-    xy_half = ((motion_y & 1) << 1) | (motion_x & 1);                  \
-    total_offset = (((offset_y + motion_y) >> 1) * (stride) +          \
-                   ((offset_x + motion_x) >> 1) + (offset_src));       \
-    table[4+xy_half] (dest[1] + (offset_x >> 1) + (offset_dest),       \
-                     src[1] + total_offset, stride, height);           \
-    table[4+xy_half] (dest[2] + (offset_x >> 1) + (offset_dest),       \
-                     src[2] + total_offset, stride, height);           \
-} while (0)
-
-static inline void motion_block (void (** table) (uint8_t *, uint8_t *,
-                                                 int32_t, int32_t),
-                                int x_offset, int y_offset, int mb_y_8_offset,
-                                int src_field, int dest_field,
-                                int x_pred, int y_pred,
-                                uint8_t * dest[3], uint8_t * src[3],
-                                int stride, int height)
+static inline void slice_non_intra_DCT (mpeg2_decoder_t * const decoder,
+                                       const int cc,
+                                       uint8_t * const dest, const int stride)
 {
-    MOTION_Y (table, x_offset, y_offset, x_pred, y_pred, dest, src,
-             dest_field + mb_y_8_offset*8*stride, src_field, stride, height);
-
-    x_pred /= 2;
-    y_pred /= 2;
-    stride >>= 1;
-    height >>= 1;
+    int last;
 
-    MOTION_UV (table, x_offset, y_offset, x_pred, y_pred, dest, src,
-              (dest_field >> 1) + mb_y_8_offset*4*stride, src_field >> 1,
-              stride, height);
+    if (decoder->mpeg1)
+       last = get_mpeg1_non_intra_block (decoder);
+    else
+       last = get_non_intra_block (decoder,
+                                   decoder->quantizer_matrix[cc ? 3 : 1]);
+    mpeg2_idct_add (last, decoder->DCTblock, dest, stride);
 }
 
-static void motion_mp1 (picture_t * picture, motion_t * motion,
-                       uint8_t * dest[3], int offset, int stride,
-                       void (** table) (uint8_t *, uint8_t *, int, int))
+#define MOTION_420(table,ref,motion_x,motion_y,size,y)                       \
+    pos_x = 2 * decoder->offset + motion_x;                                  \
+    pos_y = 2 * decoder->v_offset + motion_y + 2 * y;                        \
+    if (unlikely (pos_x > decoder->limit_x)) {                               \
+       pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x;                      \
+       motion_x = pos_x - 2 * decoder->offset;                               \
+    }                                                                        \
+    if (unlikely (pos_y > decoder->limit_y_ ## size)) {                              \
+       pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y_ ## size;             \
+       motion_y = pos_y - 2 * decoder->v_offset - 2 * y;                     \
+    }                                                                        \
+    xy_half = ((pos_y & 1) << 1) | (pos_x & 1);                                      \
+    table[xy_half] (decoder->dest[0] + y * decoder->stride + decoder->offset, \
+                   ref[0] + (pos_x >> 1) + (pos_y >> 1) * decoder->stride,   \
+                   decoder->stride, size);                                   \
+    motion_x /= 2;     motion_y /= 2;                                        \
+    xy_half = ((motion_y & 1) << 1) | (motion_x & 1);                        \
+    offset = (((decoder->offset + motion_x) >> 1) +                          \
+             ((((decoder->v_offset + motion_y) >> 1) + y/2) *                \
+              decoder->uv_stride));                                          \
+    table[4+xy_half] (decoder->dest[1] + y/2 * decoder->uv_stride +          \
+                     (decoder->offset >> 1), ref[1] + offset,                \
+                     decoder->uv_stride, size/2);                            \
+    table[4+xy_half] (decoder->dest[2] + y/2 * decoder->uv_stride +          \
+                     (decoder->offset >> 1), ref[2] + offset,                \
+                     decoder->uv_stride, size/2)
+
+#define MOTION_FIELD_420(table,ref,motion_x,motion_y,dest_field,op,src_field) \
+    pos_x = 2 * decoder->offset + motion_x;                                  \
+    pos_y = decoder->v_offset + motion_y;                                    \
+    if (unlikely (pos_x > decoder->limit_x)) {                               \
+       pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x;                      \
+       motion_x = pos_x - 2 * decoder->offset;                               \
+    }                                                                        \
+    if (unlikely (pos_y > decoder->limit_y)) {                               \
+       pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y;                      \
+       motion_y = pos_y - decoder->v_offset;                                 \
+    }                                                                        \
+    xy_half = ((pos_y & 1) << 1) | (pos_x & 1);                                      \
+    table[xy_half] (decoder->dest[0] + dest_field * decoder->stride +        \
+                   decoder->offset,                                          \
+                   (ref[0] + (pos_x >> 1) +                                  \
+                    ((pos_y op) + src_field) * decoder->stride),             \
+                   2 * decoder->stride, 8);                                  \
+    motion_x /= 2;     motion_y /= 2;                                        \
+    xy_half = ((motion_y & 1) << 1) | (motion_x & 1);                        \
+    offset = (((decoder->offset + motion_x) >> 1) +                          \
+             (((decoder->v_offset >> 1) + (motion_y op) + src_field) *       \
+              decoder->uv_stride));                                          \
+    table[4+xy_half] (decoder->dest[1] + dest_field * decoder->uv_stride +    \
+                     (decoder->offset >> 1), ref[1] + offset,                \
+                     2 * decoder->uv_stride, 4);                             \
+    table[4+xy_half] (decoder->dest[2] + dest_field * decoder->uv_stride +    \
+                     (decoder->offset >> 1), ref[2] + offset,                \
+                     2 * decoder->uv_stride, 4)
+
+#define MOTION_DMV_420(table,ref,motion_x,motion_y)                          \
+    pos_x = 2 * decoder->offset + motion_x;                                  \
+    pos_y = decoder->v_offset + motion_y;                                    \
+    if (unlikely (pos_x > decoder->limit_x)) {                               \
+       pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x;                      \
+       motion_x = pos_x - 2 * decoder->offset;                               \
+    }                                                                        \
+    if (unlikely (pos_y > decoder->limit_y)) {                               \
+       pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y;                      \
+       motion_y = pos_y - decoder->v_offset;                                 \
+    }                                                                        \
+    xy_half = ((pos_y & 1) << 1) | (pos_x & 1);                                      \
+    offset = (pos_x >> 1) + (pos_y & ~1) * decoder->stride;                  \
+    table[xy_half] (decoder->dest[0] + decoder->offset,                              \
+                   ref[0] + offset, 2 * decoder->stride, 8);                 \
+    table[xy_half] (decoder->dest[0] + decoder->stride + decoder->offset,     \
+                   ref[0] + decoder->stride + offset,                        \
+                   2 * decoder->stride, 8);                                  \
+    motion_x /= 2;     motion_y /= 2;                                        \
+    xy_half = ((motion_y & 1) << 1) | (motion_x & 1);                        \
+    offset = (((decoder->offset + motion_x) >> 1) +                          \
+             (((decoder->v_offset >> 1) + (motion_y & ~1)) *                 \
+              decoder->uv_stride));                                          \
+    table[4+xy_half] (decoder->dest[1] + (decoder->offset >> 1),             \
+                     ref[1] + offset, 2 * decoder->uv_stride, 4);            \
+    table[4+xy_half] (decoder->dest[1] + decoder->uv_stride +                \
+                     (decoder->offset >> 1),                                 \
+                     ref[1] + decoder->uv_stride + offset,                   \
+                     2 * decoder->uv_stride, 4);                             \
+    table[4+xy_half] (decoder->dest[2] + (decoder->offset >> 1),             \
+                     ref[2] + offset, 2 * decoder->uv_stride, 4);            \
+    table[4+xy_half] (decoder->dest[2] + decoder->uv_stride +                \
+                     (decoder->offset >> 1),                                 \
+                     ref[2] + decoder->uv_stride + offset,                   \
+                     2 * decoder->uv_stride, 4)
+
+#define MOTION_ZERO_420(table,ref)                                           \
+    table[0] (decoder->dest[0] + decoder->offset,                            \
+             (ref[0] + decoder->offset +                                     \
+              decoder->v_offset * decoder->stride), decoder->stride, 16);    \
+    offset = ((decoder->offset >> 1) +                                       \
+             (decoder->v_offset >> 1) * decoder->uv_stride);                 \
+    table[4] (decoder->dest[1] + (decoder->offset >> 1),                     \
+             ref[1] + offset, decoder->uv_stride, 8);                        \
+    table[4] (decoder->dest[2] + (decoder->offset >> 1),                     \
+             ref[2] + offset, decoder->uv_stride, 8)
+
+#define MOTION_422(table,ref,motion_x,motion_y,size,y)                       \
+    pos_x = 2 * decoder->offset + motion_x;                                  \
+    pos_y = 2 * decoder->v_offset + motion_y + 2 * y;                        \
+    if (unlikely (pos_x > decoder->limit_x)) {                               \
+       pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x;                      \
+       motion_x = pos_x - 2 * decoder->offset;                               \
+    }                                                                        \
+    if (unlikely (pos_y > decoder->limit_y_ ## size)) {                              \
+       pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y_ ## size;             \
+       motion_y = pos_y - 2 * decoder->v_offset - 2 * y;                     \
+    }                                                                        \
+    xy_half = ((pos_y & 1) << 1) | (pos_x & 1);                                      \
+    offset = (pos_x >> 1) + (pos_y >> 1) * decoder->stride;                  \
+    table[xy_half] (decoder->dest[0] + y * decoder->stride + decoder->offset, \
+                   ref[0] + offset, decoder->stride, size);                  \
+    offset = (offset + (motion_x & (motion_x < 0))) >> 1;                    \
+    motion_x /= 2;                                                           \
+    xy_half = ((pos_y & 1) << 1) | (motion_x & 1);                           \
+    table[4+xy_half] (decoder->dest[1] + y * decoder->uv_stride +            \
+                     (decoder->offset >> 1), ref[1] + offset,                \
+                     decoder->uv_stride, size);                              \
+    table[4+xy_half] (decoder->dest[2] + y * decoder->uv_stride +            \
+                     (decoder->offset >> 1), ref[2] + offset,                \
+                     decoder->uv_stride, size)
+
+#define MOTION_FIELD_422(table,ref,motion_x,motion_y,dest_field,op,src_field) \
+    pos_x = 2 * decoder->offset + motion_x;                                  \
+    pos_y = decoder->v_offset + motion_y;                                    \
+    if (unlikely (pos_x > decoder->limit_x)) {                               \
+       pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x;                      \
+       motion_x = pos_x - 2 * decoder->offset;                               \
+    }                                                                        \
+    if (unlikely (pos_y > decoder->limit_y)) {                               \
+       pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y;                      \
+       motion_y = pos_y - decoder->v_offset;                                 \
+    }                                                                        \
+    xy_half = ((pos_y & 1) << 1) | (pos_x & 1);                                      \
+    offset = (pos_x >> 1) + ((pos_y op) + src_field) * decoder->stride;              \
+    table[xy_half] (decoder->dest[0] + dest_field * decoder->stride +        \
+                   decoder->offset, ref[0] + offset,                         \
+                   2 * decoder->stride, 8);                                  \
+    offset = (offset + (motion_x & (motion_x < 0))) >> 1;                    \
+    motion_x /= 2;                                                           \
+    xy_half = ((pos_y & 1) << 1) | (motion_x & 1);                           \
+    table[4+xy_half] (decoder->dest[1] + dest_field * decoder->uv_stride +    \
+                     (decoder->offset >> 1), ref[1] + offset,                \
+                     2 * decoder->uv_stride, 8);                             \
+    table[4+xy_half] (decoder->dest[2] + dest_field * decoder->uv_stride +    \
+                     (decoder->offset >> 1), ref[2] + offset,                \
+                     2 * decoder->uv_stride, 8)
+
+#define MOTION_DMV_422(table,ref,motion_x,motion_y)                          \
+    pos_x = 2 * decoder->offset + motion_x;                                  \
+    pos_y = decoder->v_offset + motion_y;                                    \
+    if (unlikely (pos_x > decoder->limit_x)) {                               \
+       pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x;                      \
+       motion_x = pos_x - 2 * decoder->offset;                               \
+    }                                                                        \
+    if (unlikely (pos_y > decoder->limit_y)) {                               \
+       pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y;                      \
+       motion_y = pos_y - decoder->v_offset;                                 \
+    }                                                                        \
+    xy_half = ((pos_y & 1) << 1) | (pos_x & 1);                                      \
+    offset = (pos_x >> 1) + (pos_y & ~1) * decoder->stride;                  \
+    table[xy_half] (decoder->dest[0] + decoder->offset,                              \
+                   ref[0] + offset, 2 * decoder->stride, 8);                 \
+    table[xy_half] (decoder->dest[0] + decoder->stride + decoder->offset,     \
+                   ref[0] + decoder->stride + offset,                        \
+                   2 * decoder->stride, 8);                                  \
+    offset = (offset + (motion_x & (motion_x < 0))) >> 1;                    \
+    motion_x /= 2;                                                           \
+    xy_half = ((pos_y & 1) << 1) | (motion_x & 1);                           \
+    table[4+xy_half] (decoder->dest[1] + (decoder->offset >> 1),             \
+                     ref[1] + offset, 2 * decoder->uv_stride, 8);            \
+    table[4+xy_half] (decoder->dest[1] + decoder->uv_stride +                \
+                     (decoder->offset >> 1),                                 \
+                     ref[1] + decoder->uv_stride + offset,                   \
+                     2 * decoder->uv_stride, 8);                             \
+    table[4+xy_half] (decoder->dest[2] + (decoder->offset >> 1),             \
+                     ref[2] + offset, 2 * decoder->uv_stride, 8);            \
+    table[4+xy_half] (decoder->dest[2] + decoder->uv_stride +                \
+                     (decoder->offset >> 1),                                 \
+                     ref[2] + decoder->uv_stride + offset,                   \
+                     2 * decoder->uv_stride, 8)
+
+#define MOTION_ZERO_422(table,ref)                                           \
+    offset = decoder->offset + decoder->v_offset * decoder->stride;          \
+    table[0] (decoder->dest[0] + decoder->offset,                            \
+             ref[0] + offset, decoder->stride, 16);                          \
+    offset >>= 1;                                                            \
+    table[4] (decoder->dest[1] + (decoder->offset >> 1),                     \
+             ref[1] + offset, decoder->uv_stride, 16);                       \
+    table[4] (decoder->dest[2] + (decoder->offset >> 1),                     \
+             ref[2] + offset, decoder->uv_stride, 16)
+
+#define MOTION_444(table,ref,motion_x,motion_y,size,y)                       \
+    pos_x = 2 * decoder->offset + motion_x;                                  \
+    pos_y = 2 * decoder->v_offset + motion_y + 2 * y;                        \
+    if (unlikely (pos_x > decoder->limit_x)) {                               \
+       pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x;                      \
+       motion_x = pos_x - 2 * decoder->offset;                               \
+    }                                                                        \
+    if (unlikely (pos_y > decoder->limit_y_ ## size)) {                              \
+       pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y_ ## size;             \
+       motion_y = pos_y - 2 * decoder->v_offset - 2 * y;                     \
+    }                                                                        \
+    xy_half = ((pos_y & 1) << 1) | (pos_x & 1);                                      \
+    offset = (pos_x >> 1) + (pos_y >> 1) * decoder->stride;                  \
+    table[xy_half] (decoder->dest[0] + y * decoder->stride + decoder->offset, \
+                   ref[0] + offset, decoder->stride, size);                  \
+    table[xy_half] (decoder->dest[1] + y * decoder->stride + decoder->offset, \
+                   ref[1] + offset, decoder->stride, size);                  \
+    table[xy_half] (decoder->dest[2] + y * decoder->stride + decoder->offset, \
+                   ref[2] + offset, decoder->stride, size)
+
+#define MOTION_FIELD_444(table,ref,motion_x,motion_y,dest_field,op,src_field) \
+    pos_x = 2 * decoder->offset + motion_x;                                  \
+    pos_y = decoder->v_offset + motion_y;                                    \
+    if (unlikely (pos_x > decoder->limit_x)) {                               \
+       pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x;                      \
+       motion_x = pos_x - 2 * decoder->offset;                               \
+    }                                                                        \
+    if (unlikely (pos_y > decoder->limit_y)) {                               \
+       pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y;                      \
+       motion_y = pos_y - decoder->v_offset;                                 \
+    }                                                                        \
+    xy_half = ((pos_y & 1) << 1) | (pos_x & 1);                                      \
+    offset = (pos_x >> 1) + ((pos_y op) + src_field) * decoder->stride;              \
+    table[xy_half] (decoder->dest[0] + dest_field * decoder->stride +        \
+                   decoder->offset, ref[0] + offset,                         \
+                   2 * decoder->stride, 8);                                  \
+    table[xy_half] (decoder->dest[1] + dest_field * decoder->stride +        \
+                   decoder->offset, ref[1] + offset,                         \
+                   2 * decoder->stride, 8);                                  \
+    table[xy_half] (decoder->dest[2] + dest_field * decoder->stride +        \
+                   decoder->offset, ref[2] + offset,                         \
+                   2 * decoder->stride, 8)
+
+#define MOTION_DMV_444(table,ref,motion_x,motion_y)                          \
+    pos_x = 2 * decoder->offset + motion_x;                                  \
+    pos_y = decoder->v_offset + motion_y;                                    \
+    if (unlikely (pos_x > decoder->limit_x)) {                               \
+       pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x;                      \
+       motion_x = pos_x - 2 * decoder->offset;                               \
+    }                                                                        \
+    if (unlikely (pos_y > decoder->limit_y)) {                               \
+       pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y;                      \
+       motion_y = pos_y - decoder->v_offset;                                 \
+    }                                                                        \
+    xy_half = ((pos_y & 1) << 1) | (pos_x & 1);                                      \
+    offset = (pos_x >> 1) + (pos_y & ~1) * decoder->stride;                  \
+    table[xy_half] (decoder->dest[0] + decoder->offset,                              \
+                   ref[0] + offset, 2 * decoder->stride, 8);                 \
+    table[xy_half] (decoder->dest[0] + decoder->stride + decoder->offset,     \
+                   ref[0] + decoder->stride + offset,                        \
+                   2 * decoder->stride, 8);                                  \
+    table[xy_half] (decoder->dest[1] + decoder->offset,                              \
+                   ref[1] + offset, 2 * decoder->stride, 8);                 \
+    table[xy_half] (decoder->dest[1] + decoder->stride + decoder->offset,     \
+                   ref[1] + decoder->stride + offset,                        \
+                   2 * decoder->stride, 8);                                  \
+    table[xy_half] (decoder->dest[2] + decoder->offset,                              \
+                   ref[2] + offset, 2 * decoder->stride, 8);                 \
+    table[xy_half] (decoder->dest[2] + decoder->stride + decoder->offset,     \
+                   ref[2] + decoder->stride + offset,                        \
+                   2 * decoder->stride, 8)
+
+#define MOTION_ZERO_444(table,ref)                                           \
+    offset = decoder->offset + decoder->v_offset * decoder->stride;          \
+    table[0] (decoder->dest[0] + decoder->offset,                            \
+             ref[0] + offset, decoder->stride, 16);                          \
+    table[4] (decoder->dest[1] + decoder->offset,                            \
+             ref[1] + offset, decoder->stride, 16);                          \
+    table[4] (decoder->dest[2] + (decoder->offset >> 1),                     \
+             ref[2] + offset, decoder->stride, 16)
+
+#define bit_buf (decoder->bitstream_buf)
+#define bits (decoder->bitstream_bits)
+#define bit_ptr (decoder->bitstream_ptr)
+
+static void motion_mp1 (mpeg2_decoder_t * const decoder,
+                       motion_t * const motion,
+                       mpeg2_mc_fct * const * const table)
 {
-#define bit_buf (picture->bitstream_buf)
-#define bits (picture->bitstream_bits)
-#define bit_ptr (picture->bitstream_ptr)
     int motion_x, motion_y;
+    unsigned int pos_x, pos_y, xy_half, offset;
 
     NEEDBITS (bit_buf, bits, bit_ptr);
-    motion_x = motion->pmv[0][0] + get_motion_delta (picture,
-                                                    motion->f_code[0]);
-    motion_x = bound_motion_vector (motion_x, motion->f_code[0]);
+    motion_x = (motion->pmv[0][0] +
+               (get_motion_delta (decoder,
+                                  motion->f_code[0]) << motion->f_code[1]));
+    motion_x = bound_motion_vector (motion_x,
+                                   motion->f_code[0] + motion->f_code[1]);
     motion->pmv[0][0] = motion_x;
 
     NEEDBITS (bit_buf, bits, bit_ptr);
-    motion_y = motion->pmv[0][1] + get_motion_delta (picture,
-                                                    motion->f_code[0]);
-    motion_y = bound_motion_vector (motion_y, motion->f_code[0]);
+    motion_y = (motion->pmv[0][1] +
+               (get_motion_delta (decoder,
+                                  motion->f_code[0]) << motion->f_code[1]));
+    motion_y = bound_motion_vector (motion_y,
+                                   motion->f_code[0] + motion->f_code[1]);
     motion->pmv[0][1] = motion_y;
 
-    if (motion->f_code[1]) {
-       motion_x <<= 1;
-       motion_y <<= 1;
-    }
-
-    motion_block (table, offset, picture->v_offset, 0, 0, 0,
-                 motion_x, motion_y, dest, motion->ref[0], stride, 16);
-#undef bit_buf
-#undef bits
-#undef bit_ptr
-}
-
-static void motion_mp1_reuse (picture_t * picture, motion_t * motion,
-                             uint8_t * dest[3], int offset, int stride,
-                             void (** table) (uint8_t *, uint8_t *, int, int))
-{
-    int motion_x, motion_y;
-
-    motion_x = motion->pmv[0][0];
-    motion_y = motion->pmv[0][1];
-
-    if (motion->f_code[1]) {
-       motion_x <<= 1;
-       motion_y <<= 1;
-    }
-
-    motion_block (table, offset, picture->v_offset, 0, 0, 0,
-                 motion_x, motion_y, dest, motion->ref[0], stride, 16);
-}
-
-static void motion_fr_frame (picture_t * picture, motion_t * motion,
-                            uint8_t * dest[3], int offset, int stride,
-                            void (** table) (uint8_t *, uint8_t *, int, int))
-{
-#define bit_buf (picture->bitstream_buf)
-#define bits (picture->bitstream_bits)
-#define bit_ptr (picture->bitstream_ptr)
-    int motion_x, motion_y;
-
-    NEEDBITS (bit_buf, bits, bit_ptr);
-    motion_x = motion->pmv[0][0] + get_motion_delta (picture,
-                                                    motion->f_code[0]);
-    motion_x = bound_motion_vector (motion_x, motion->f_code[0]);
-    motion->pmv[1][0] = motion->pmv[0][0] = motion_x;
-
-    NEEDBITS (bit_buf, bits, bit_ptr);
-    motion_y = motion->pmv[0][1] + get_motion_delta (picture,
-                                                    motion->f_code[1]);
-    motion_y = bound_motion_vector (motion_y, motion->f_code[1]);
-    motion->pmv[1][1] = motion->pmv[0][1] = motion_y;
-
-    motion_block (table, offset, picture->v_offset, 0, 0, 0,
-                 motion_x, motion_y, dest, motion->ref[0], stride, 16);
-#undef bit_buf
-#undef bits
-#undef bit_ptr
-}
-
-static void motion_fr_field (picture_t * picture, motion_t * motion,
-                            uint8_t * dest[3], int offset, int stride,
-                            void (** table) (uint8_t *, uint8_t *, int, int))
-{
-#define bit_buf (picture->bitstream_buf)
-#define bits (picture->bitstream_bits)
-#define bit_ptr (picture->bitstream_ptr)
-    int motion_x, motion_y;
-    int field_select;
-
-    NEEDBITS (bit_buf, bits, bit_ptr);
-    field_select = SBITS (bit_buf, 1);
-    DUMPBITS (bit_buf, bits, 1);
-
-    motion_x = motion->pmv[0][0] + get_motion_delta (picture,
-                                                    motion->f_code[0]);
-    motion_x = bound_motion_vector (motion_x, motion->f_code[0]);
-    motion->pmv[0][0] = motion_x;
-
-    NEEDBITS (bit_buf, bits, bit_ptr);
-    motion_y = (motion->pmv[0][1] >> 1) + get_motion_delta (picture,
-                                                           motion->f_code[1]);
-    /* motion_y = bound_motion_vector (motion_y, motion->f_code[1]); */
-    motion->pmv[0][1] = motion_y << 1;
-
-    motion_block (table, offset, picture->v_offset >> 1,
-                 0, (field_select & stride), 0,
-                 motion_x, motion_y, dest, motion->ref[0], stride * 2, 8);
-
-    NEEDBITS (bit_buf, bits, bit_ptr);
-    field_select = SBITS (bit_buf, 1);
-    DUMPBITS (bit_buf, bits, 1);
-
-    motion_x = motion->pmv[1][0] + get_motion_delta (picture,
-                                                    motion->f_code[0]);
-    motion_x = bound_motion_vector (motion_x, motion->f_code[0]);
-    motion->pmv[1][0] = motion_x;
-
-    NEEDBITS (bit_buf, bits, bit_ptr);
-    motion_y = (motion->pmv[1][1] >> 1) + get_motion_delta (picture,
-                                                           motion->f_code[1]);
-    /* motion_y = bound_motion_vector (motion_y, motion->f_code[1]); */
-    motion->pmv[1][1] = motion_y << 1;
-
-    motion_block (table, offset, picture->v_offset >> 1,
-                 0, (field_select & stride), stride,
-                 motion_x, motion_y, dest, motion->ref[0], stride * 2, 8);
-#undef bit_buf
-#undef bits
-#undef bit_ptr
-}
-
-static void motion_fr_dmv (picture_t * picture, motion_t * motion,
-                          uint8_t * dest[3], int offset, int stride,
-                          void (** table) (uint8_t *, uint8_t *, int, int))
-{
-#define bit_buf (picture->bitstream_buf)
-#define bits (picture->bitstream_bits)
-#define bit_ptr (picture->bitstream_ptr)
-    int motion_x, motion_y;
-    int dmv_x, dmv_y;
-    int m;
-    int other_x, other_y;
-
-    NEEDBITS (bit_buf, bits, bit_ptr);
-    motion_x = motion->pmv[0][0] + get_motion_delta (picture,
-                                                    motion->f_code[0]);
-    motion_x = bound_motion_vector (motion_x, motion->f_code[0]);
-    motion->pmv[1][0] = motion->pmv[0][0] = motion_x;
-
-    NEEDBITS (bit_buf, bits, bit_ptr);
-    dmv_x = get_dmv (picture);
-
-    NEEDBITS (bit_buf, bits, bit_ptr);
-    motion_y = (motion->pmv[0][1] >> 1) + get_motion_delta (picture,
-                                                           motion->f_code[1]);
-    /* motion_y = bound_motion_vector (motion_y, motion->f_code[1]); */
-    motion->pmv[1][1] = motion->pmv[0][1] = motion_y << 1;
-
-    NEEDBITS (bit_buf, bits, bit_ptr);
-    dmv_y = get_dmv (picture);
-
-    motion_block (mc_functions.put, offset, picture->v_offset >> 1, 0, 0, 0,
-                 motion_x, motion_y, dest, motion->ref[0], stride * 2, 8);
-
-    m = picture->top_field_first ? 1 : 3;
-    other_x = ((motion_x * m + (motion_x > 0)) >> 1) + dmv_x;
-    other_y = ((motion_y * m + (motion_y > 0)) >> 1) + dmv_y - 1;
-    motion_block (mc_functions.avg, offset, picture->v_offset >> 1, 0, stride, 0,
-                 other_x, other_y, dest, motion->ref[0], stride * 2, 8);
-
-    motion_block (mc_functions.put, offset, picture->v_offset >> 1,
-                 0, stride, stride,
-                 motion_x, motion_y, dest, motion->ref[0], stride * 2, 8);
-
-    m = picture->top_field_first ? 3 : 1;
-    other_x = ((motion_x * m + (motion_x > 0)) >> 1) + dmv_x;
-    other_y = ((motion_y * m + (motion_y > 0)) >> 1) + dmv_y + 1;
-    motion_block (mc_functions.avg, offset, picture->v_offset >> 1, 0, 0, stride,
-                 other_x, other_y, dest, motion->ref[0], stride * 2, 8);
-#undef bit_buf
-#undef bits
-#undef bit_ptr
+    MOTION_420 (table, motion->ref[0], motion_x, motion_y, 16, 0);
 }
 
-/* like motion_frame, but reuse previous motion vectors */
-static void motion_fr_reuse (picture_t * picture, motion_t * motion,
-                            uint8_t * dest[3], int offset, int stride,
-                            void (** table) (uint8_t *, uint8_t *, int, int))
-{
-    motion_block (table, offset, picture->v_offset, 0, 0, 0,
-                 motion->pmv[0][0], motion->pmv[0][1],
-                 dest, motion->ref[0], stride, 16);
-}
-
-/* like motion_frame, but use null motion vectors */
-static void motion_fr_zero (picture_t * picture, motion_t * motion,
-                           uint8_t * dest[3], int offset, int stride,
-                           void (** table) (uint8_t *, uint8_t *, int, int))
-{
-    motion_block (table, offset, picture->v_offset, 0, 0, 0, 0, 0,
-                 dest, motion->ref[0], stride, 16);
-}
+#define MOTION_FUNCTIONS(FORMAT,MOTION,MOTION_FIELD,MOTION_DMV,MOTION_ZERO)   \
+                                                                             \
+static void motion_fr_frame_##FORMAT (mpeg2_decoder_t * const decoder,       \
+                                     motion_t * const motion,                \
+                                     mpeg2_mc_fct * const * const table)     \
+{                                                                            \
+    int motion_x, motion_y;                                                  \
+    unsigned int pos_x, pos_y, xy_half, offset;                                      \
+                                                                             \
+    NEEDBITS (bit_buf, bits, bit_ptr);                                       \
+    motion_x = motion->pmv[0][0] + get_motion_delta (decoder,                \
+                                                    motion->f_code[0]);      \
+    motion_x = bound_motion_vector (motion_x, motion->f_code[0]);            \
+    motion->pmv[1][0] = motion->pmv[0][0] = motion_x;                        \
+                                                                             \
+    NEEDBITS (bit_buf, bits, bit_ptr);                                       \
+    motion_y = motion->pmv[0][1] + get_motion_delta (decoder,                \
+                                                    motion->f_code[1]);      \
+    motion_y = bound_motion_vector (motion_y, motion->f_code[1]);            \
+    motion->pmv[1][1] = motion->pmv[0][1] = motion_y;                        \
+                                                                             \
+    MOTION (table, motion->ref[0], motion_x, motion_y, 16, 0);               \
+}                                                                            \
+                                                                             \
+static void motion_fr_field_##FORMAT (mpeg2_decoder_t * const decoder,       \
+                                     motion_t * const motion,                \
+                                     mpeg2_mc_fct * const * const table)     \
+{                                                                            \
+    int motion_x, motion_y, field;                                           \
+    unsigned int pos_x, pos_y, xy_half, offset;                                      \
+                                                                             \
+    NEEDBITS (bit_buf, bits, bit_ptr);                                       \
+    field = UBITS (bit_buf, 1);                                                      \
+    DUMPBITS (bit_buf, bits, 1);                                             \
+                                                                             \
+    motion_x = motion->pmv[0][0] + get_motion_delta (decoder,                \
+                                                    motion->f_code[0]);      \
+    motion_x = bound_motion_vector (motion_x, motion->f_code[0]);            \
+    motion->pmv[0][0] = motion_x;                                            \
+                                                                             \
+    NEEDBITS (bit_buf, bits, bit_ptr);                                       \
+    motion_y = ((motion->pmv[0][1] >> 1) +                                   \
+               get_motion_delta (decoder, motion->f_code[1]));               \
+    /* motion_y = bound_motion_vector (motion_y, motion->f_code[1]); */              \
+    motion->pmv[0][1] = motion_y << 1;                                       \
+                                                                             \
+    MOTION_FIELD (table, motion->ref[0], motion_x, motion_y, 0, & ~1, field); \
+                                                                             \
+    NEEDBITS (bit_buf, bits, bit_ptr);                                       \
+    field = UBITS (bit_buf, 1);                                                      \
+    DUMPBITS (bit_buf, bits, 1);                                             \
+                                                                             \
+    motion_x = motion->pmv[1][0] + get_motion_delta (decoder,                \
+                                                    motion->f_code[0]);      \
+    motion_x = bound_motion_vector (motion_x, motion->f_code[0]);            \
+    motion->pmv[1][0] = motion_x;                                            \
+                                                                             \
+    NEEDBITS (bit_buf, bits, bit_ptr);                                       \
+    motion_y = ((motion->pmv[1][1] >> 1) +                                   \
+               get_motion_delta (decoder, motion->f_code[1]));               \
+    /* motion_y = bound_motion_vector (motion_y, motion->f_code[1]); */              \
+    motion->pmv[1][1] = motion_y << 1;                                       \
+                                                                             \
+    MOTION_FIELD (table, motion->ref[0], motion_x, motion_y, 1, & ~1, field); \
+}                                                                            \
+                                                                             \
+static void motion_fr_dmv_##FORMAT (mpeg2_decoder_t * const decoder,         \
+                                   motion_t * const motion,                  \
+                                   mpeg2_mc_fct * const * const table)       \
+{                                                                            \
+    int motion_x, motion_y, dmv_x, dmv_y, m, other_x, other_y;               \
+    unsigned int pos_x, pos_y, xy_half, offset;                                      \
+                                                                             \
+    NEEDBITS (bit_buf, bits, bit_ptr);                                       \
+    motion_x = motion->pmv[0][0] + get_motion_delta (decoder,                \
+                                                    motion->f_code[0]);      \
+    motion_x = bound_motion_vector (motion_x, motion->f_code[0]);            \
+    motion->pmv[1][0] = motion->pmv[0][0] = motion_x;                        \
+    NEEDBITS (bit_buf, bits, bit_ptr);                                       \
+    dmv_x = get_dmv (decoder);                                               \
+                                                                             \
+    motion_y = ((motion->pmv[0][1] >> 1) +                                   \
+               get_motion_delta (decoder, motion->f_code[1]));               \
+    /* motion_y = bound_motion_vector (motion_y, motion->f_code[1]); */              \
+    motion->pmv[1][1] = motion->pmv[0][1] = motion_y << 1;                   \
+    dmv_y = get_dmv (decoder);                                               \
+                                                                             \
+    m = decoder->top_field_first ? 1 : 3;                                    \
+    other_x = ((motion_x * m + (motion_x > 0)) >> 1) + dmv_x;                \
+    other_y = ((motion_y * m + (motion_y > 0)) >> 1) + dmv_y - 1;            \
+    MOTION_FIELD (mpeg2_mc.put, motion->ref[0], other_x, other_y, 0, | 1, 0); \
+                                                                             \
+    m = decoder->top_field_first ? 3 : 1;                                    \
+    other_x = ((motion_x * m + (motion_x > 0)) >> 1) + dmv_x;                \
+    other_y = ((motion_y * m + (motion_y > 0)) >> 1) + dmv_y + 1;            \
+    MOTION_FIELD (mpeg2_mc.put, motion->ref[0], other_x, other_y, 1, & ~1, 0);\
+                                                                             \
+    MOTION_DMV (mpeg2_mc.avg, motion->ref[0], motion_x, motion_y);           \
+}                                                                            \
+                                                                             \
+static void motion_reuse_##FORMAT (mpeg2_decoder_t * const decoder,          \
+                                  motion_t * const motion,                   \
+                                  mpeg2_mc_fct * const * const table)        \
+{                                                                            \
+    int motion_x, motion_y;                                                  \
+    unsigned int pos_x, pos_y, xy_half, offset;                                      \
+                                                                             \
+    motion_x = motion->pmv[0][0];                                            \
+    motion_y = motion->pmv[0][1];                                            \
+                                                                             \
+    MOTION (table, motion->ref[0], motion_x, motion_y, 16, 0);               \
+}                                                                            \
+                                                                             \
+static void motion_zero_##FORMAT (mpeg2_decoder_t * const decoder,           \
+                                 motion_t * const motion,                    \
+                                 mpeg2_mc_fct * const * const table)         \
+{                                                                            \
+    unsigned int offset;                                                     \
+                                                                             \
+    motion->pmv[0][0] = motion->pmv[0][1] = 0;                               \
+    motion->pmv[1][0] = motion->pmv[1][1] = 0;                               \
+                                                                             \
+    MOTION_ZERO (table, motion->ref[0]);                                     \
+}                                                                            \
+                                                                             \
+static void motion_fi_field_##FORMAT (mpeg2_decoder_t * const decoder,       \
+                                     motion_t * const motion,                \
+                                     mpeg2_mc_fct * const * const table)     \
+{                                                                            \
+    int motion_x, motion_y;                                                  \
+    uint8_t ** ref_field;                                                    \
+    unsigned int pos_x, pos_y, xy_half, offset;                                      \
+                                                                             \
+    NEEDBITS (bit_buf, bits, bit_ptr);                                       \
+    ref_field = motion->ref2[UBITS (bit_buf, 1)];                            \
+    DUMPBITS (bit_buf, bits, 1);                                             \
+                                                                             \
+    motion_x = motion->pmv[0][0] + get_motion_delta (decoder,                \
+                                                    motion->f_code[0]);      \
+    motion_x = bound_motion_vector (motion_x, motion->f_code[0]);            \
+    motion->pmv[1][0] = motion->pmv[0][0] = motion_x;                        \
+                                                                             \
+    NEEDBITS (bit_buf, bits, bit_ptr);                                       \
+    motion_y = motion->pmv[0][1] + get_motion_delta (decoder,                \
+                                                    motion->f_code[1]);      \
+    motion_y = bound_motion_vector (motion_y, motion->f_code[1]);            \
+    motion->pmv[1][1] = motion->pmv[0][1] = motion_y;                        \
+                                                                             \
+    MOTION (table, ref_field, motion_x, motion_y, 16, 0);                    \
+}                                                                            \
+                                                                             \
+static void motion_fi_16x8_##FORMAT (mpeg2_decoder_t * const decoder,        \
+                                    motion_t * const motion,                 \
+                                    mpeg2_mc_fct * const * const table)      \
+{                                                                            \
+    int motion_x, motion_y;                                                  \
+    uint8_t ** ref_field;                                                    \
+    unsigned int pos_x, pos_y, xy_half, offset;                                      \
+                                                                             \
+    NEEDBITS (bit_buf, bits, bit_ptr);                                       \
+    ref_field = motion->ref2[UBITS (bit_buf, 1)];                            \
+    DUMPBITS (bit_buf, bits, 1);                                             \
+                                                                             \
+    motion_x = motion->pmv[0][0] + get_motion_delta (decoder,                \
+                                                    motion->f_code[0]);      \
+    motion_x = bound_motion_vector (motion_x, motion->f_code[0]);            \
+    motion->pmv[0][0] = motion_x;                                            \
+                                                                             \
+    NEEDBITS (bit_buf, bits, bit_ptr);                                       \
+    motion_y = motion->pmv[0][1] + get_motion_delta (decoder,                \
+                                                    motion->f_code[1]);      \
+    motion_y = bound_motion_vector (motion_y, motion->f_code[1]);            \
+    motion->pmv[0][1] = motion_y;                                            \
+                                                                             \
+    MOTION (table, ref_field, motion_x, motion_y, 8, 0);                     \
+                                                                             \
+    NEEDBITS (bit_buf, bits, bit_ptr);                                       \
+    ref_field = motion->ref2[UBITS (bit_buf, 1)];                            \
+    DUMPBITS (bit_buf, bits, 1);                                             \
+                                                                             \
+    motion_x = motion->pmv[1][0] + get_motion_delta (decoder,                \
+                                                    motion->f_code[0]);      \
+    motion_x = bound_motion_vector (motion_x, motion->f_code[0]);            \
+    motion->pmv[1][0] = motion_x;                                            \
+                                                                             \
+    NEEDBITS (bit_buf, bits, bit_ptr);                                       \
+    motion_y = motion->pmv[1][1] + get_motion_delta (decoder,                \
+                                                    motion->f_code[1]);      \
+    motion_y = bound_motion_vector (motion_y, motion->f_code[1]);            \
+    motion->pmv[1][1] = motion_y;                                            \
+                                                                             \
+    MOTION (table, ref_field, motion_x, motion_y, 8, 8);                     \
+}                                                                            \
+                                                                             \
+static void motion_fi_dmv_##FORMAT (mpeg2_decoder_t * const decoder,         \
+                                   motion_t * const motion,                  \
+                                   mpeg2_mc_fct * const * const table)       \
+{                                                                            \
+    int motion_x, motion_y, other_x, other_y;                                \
+    unsigned int pos_x, pos_y, xy_half, offset;                                      \
+                                                                             \
+    NEEDBITS (bit_buf, bits, bit_ptr);                                       \
+    motion_x = motion->pmv[0][0] + get_motion_delta (decoder,                \
+                                                    motion->f_code[0]);      \
+    motion_x = bound_motion_vector (motion_x, motion->f_code[0]);            \
+    motion->pmv[1][0] = motion->pmv[0][0] = motion_x;                        \
+    NEEDBITS (bit_buf, bits, bit_ptr);                                       \
+    other_x = ((motion_x + (motion_x > 0)) >> 1) + get_dmv (decoder);        \
+                                                                             \
+    motion_y = motion->pmv[0][1] + get_motion_delta (decoder,                \
+                                                    motion->f_code[1]);      \
+    motion_y = bound_motion_vector (motion_y, motion->f_code[1]);            \
+    motion->pmv[1][1] = motion->pmv[0][1] = motion_y;                        \
+    other_y = (((motion_y + (motion_y > 0)) >> 1) + get_dmv (decoder) +              \
+              decoder->dmv_offset);                                          \
+                                                                             \
+    MOTION (mpeg2_mc.put, motion->ref[0], motion_x, motion_y, 16, 0);        \
+    MOTION (mpeg2_mc.avg, motion->ref[1], other_x, other_y, 16, 0);          \
+}                                                                            \
+
+MOTION_FUNCTIONS (420, MOTION_420, MOTION_FIELD_420, MOTION_DMV_420,
+                 MOTION_ZERO_420)
+MOTION_FUNCTIONS (422, MOTION_422, MOTION_FIELD_422, MOTION_DMV_422,
+                 MOTION_ZERO_422)
+MOTION_FUNCTIONS (444, MOTION_444, MOTION_FIELD_444, MOTION_DMV_444,
+                 MOTION_ZERO_444)
 
 /* like motion_frame, but parsing without actual motion compensation */
-static void motion_fr_conceal (picture_t * picture)
+static void motion_fr_conceal (mpeg2_decoder_t * const decoder)
 {
-#define bit_buf (picture->bitstream_buf)
-#define bits (picture->bitstream_bits)
-#define bit_ptr (picture->bitstream_ptr)
     int tmp;
 
     NEEDBITS (bit_buf, bits, bit_ptr);
-    tmp = (picture->f_motion.pmv[0][0] +
-          get_motion_delta (picture, picture->f_motion.f_code[0]));
-    tmp = bound_motion_vector (tmp, picture->f_motion.f_code[0]);
-    picture->f_motion.pmv[1][0] = picture->f_motion.pmv[0][0] = tmp;
+    tmp = (decoder->f_motion.pmv[0][0] +
+          get_motion_delta (decoder, decoder->f_motion.f_code[0]));
+    tmp = bound_motion_vector (tmp, decoder->f_motion.f_code[0]);
+    decoder->f_motion.pmv[1][0] = decoder->f_motion.pmv[0][0] = tmp;
 
     NEEDBITS (bit_buf, bits, bit_ptr);
-    tmp = (picture->f_motion.pmv[0][1] +
-          get_motion_delta (picture, picture->f_motion.f_code[1]));
-    tmp = bound_motion_vector (tmp, picture->f_motion.f_code[1]);
-    picture->f_motion.pmv[1][1] = picture->f_motion.pmv[0][1] = tmp;
+    tmp = (decoder->f_motion.pmv[0][1] +
+          get_motion_delta (decoder, decoder->f_motion.f_code[1]));
+    tmp = bound_motion_vector (tmp, decoder->f_motion.f_code[1]);
+    decoder->f_motion.pmv[1][1] = decoder->f_motion.pmv[0][1] = tmp;
 
     DUMPBITS (bit_buf, bits, 1); /* remove marker_bit */
-#undef bit_buf
-#undef bits
-#undef bit_ptr
-}
-
-static void motion_fi_field (picture_t * picture, motion_t * motion,
-                            uint8_t * dest[3], int offset, int stride,
-                            void (** table) (uint8_t *, uint8_t *, int, int))
-{
-#define bit_buf (picture->bitstream_buf)
-#define bits (picture->bitstream_bits)
-#define bit_ptr (picture->bitstream_ptr)
-    int motion_x, motion_y;
-    int field_select;
-
-    NEEDBITS (bit_buf, bits, bit_ptr);
-    field_select = UBITS (bit_buf, 1);
-    DUMPBITS (bit_buf, bits, 1);
-
-    NEEDBITS (bit_buf, bits, bit_ptr);
-    motion_x = motion->pmv[0][0] + get_motion_delta (picture,
-                                                    motion->f_code[0]);
-    motion_x = bound_motion_vector (motion_x, motion->f_code[0]);
-    motion->pmv[1][0] = motion->pmv[0][0] = motion_x;
-
-    NEEDBITS (bit_buf, bits, bit_ptr);
-    motion_y = motion->pmv[0][1] + get_motion_delta (picture,
-                                                    motion->f_code[1]);
-    motion_y = bound_motion_vector (motion_y, motion->f_code[1]);
-    motion->pmv[1][1] = motion->pmv[0][1] = motion_y;
-
-    motion_block (table, offset, picture->v_offset, 0, 0, 0,
-                 motion_x, motion_y,
-                 dest, motion->ref[field_select], stride, 16);
-#undef bit_buf
-#undef bits
-#undef bit_ptr
-}
-
-static void motion_fi_16x8 (picture_t * picture, motion_t * motion,
-                           uint8_t * dest[3], int offset, int stride,
-                           void (** table) (uint8_t *, uint8_t *, int, int))
-{
-#define bit_buf (picture->bitstream_buf)
-#define bits (picture->bitstream_bits)
-#define bit_ptr (picture->bitstream_ptr)
-    int motion_x, motion_y;
-    int field_select;
-
-    NEEDBITS (bit_buf, bits, bit_ptr);
-    field_select = UBITS (bit_buf, 1);
-    DUMPBITS (bit_buf, bits, 1);
-
-    NEEDBITS (bit_buf, bits, bit_ptr);
-    motion_x = motion->pmv[0][0] + get_motion_delta (picture,
-                                                    motion->f_code[0]);
-    motion_x = bound_motion_vector (motion_x, motion->f_code[0]);
-    motion->pmv[0][0] = motion_x;
-
-    NEEDBITS (bit_buf, bits, bit_ptr);
-    motion_y = motion->pmv[0][1] + get_motion_delta (picture,
-                                                    motion->f_code[1]);
-    motion_y = bound_motion_vector (motion_y, motion->f_code[1]);
-    motion->pmv[0][1] = motion_y;
-
-    motion_block (table, offset, picture->v_offset, 0, 0, 0,
-                 motion_x, motion_y,
-                 dest, motion->ref[field_select], stride, 8);
-
-    NEEDBITS (bit_buf, bits, bit_ptr);
-    field_select = UBITS (bit_buf, 1);
-    DUMPBITS (bit_buf, bits, 1);
-
-    NEEDBITS (bit_buf, bits, bit_ptr);
-    motion_x = motion->pmv[1][0] + get_motion_delta (picture,
-                                                    motion->f_code[0]);
-    motion_x = bound_motion_vector (motion_x, motion->f_code[0]);
-    motion->pmv[1][0] = motion_x;
-
-    NEEDBITS (bit_buf, bits, bit_ptr);
-    motion_y = motion->pmv[1][1] + get_motion_delta (picture,
-                                                    motion->f_code[1]);
-    motion_y = bound_motion_vector (motion_y, motion->f_code[1]);
-    motion->pmv[1][1] = motion_y;
-
-    motion_block (table, offset, picture->v_offset+8, 1, 0, 0,
-                 motion_x, motion_y,
-                 dest, motion->ref[field_select], stride, 8);
-#undef bit_buf
-#undef bits
-#undef bit_ptr
-}
-
-static void motion_fi_dmv (picture_t * picture, motion_t * motion,
-                          uint8_t * dest[3], int offset, int stride,
-                          void (** table) (uint8_t *, uint8_t *, int, int))
-{
-#define bit_buf (picture->bitstream_buf)
-#define bits (picture->bitstream_bits)
-#define bit_ptr (picture->bitstream_ptr)
-    int motion_x, motion_y;
-    int dmv_x, dmv_y;
-
-    NEEDBITS (bit_buf, bits, bit_ptr);
-    motion_x = motion->pmv[0][0] + get_motion_delta (picture,
-                                                    motion->f_code[0]);
-    motion_x = bound_motion_vector (motion_x, motion->f_code[0]);
-    motion->pmv[1][0] = motion->pmv[0][0] = motion_x;
-
-    NEEDBITS (bit_buf, bits, bit_ptr);
-    dmv_x = get_dmv (picture);
-
-    NEEDBITS (bit_buf, bits, bit_ptr);
-    motion_y = motion->pmv[0][1] + get_motion_delta (picture,
-                                                    motion->f_code[1]);
-    motion_y = bound_motion_vector (motion_y, motion->f_code[1]);
-    motion->pmv[1][1] = motion->pmv[0][1] = motion_y;
-
-    NEEDBITS (bit_buf, bits, bit_ptr);
-    dmv_y = get_dmv (picture);
-
-    motion_block (mc_functions.put, offset, picture->v_offset, 0, 0, 0,
-                 motion_x, motion_y,
-                 dest, motion->ref[picture->current_field], stride, 16);
-
-    motion_x = ((motion_x + (motion_x > 0)) >> 1) + dmv_x;
-    motion_y = ((motion_y + (motion_y > 0)) >> 1) + dmv_y +
-       2 * picture->current_field - 1;
-    motion_block (mc_functions.avg, offset, picture->v_offset, 0, 0, 0,
-                 motion_x, motion_y,
-                 dest, motion->ref[!picture->current_field], stride, 16);
-#undef bit_buf
-#undef bits
-#undef bit_ptr
 }
 
-static void motion_fi_reuse (picture_t * picture, motion_t * motion,
-                            uint8_t * dest[3], int offset, int stride,
-                            void (** table) (uint8_t *, uint8_t *, int, int))
+static void motion_fi_conceal (mpeg2_decoder_t * const decoder)
 {
-    motion_block (table, offset, picture->v_offset, 0, 0, 0,
-                 motion->pmv[0][0], motion->pmv[0][1],
-                 dest, motion->ref[picture->current_field], stride, 16);
-}
-
-static void motion_fi_zero (picture_t * picture, motion_t * motion,
-                           uint8_t * dest[3], int offset, int stride,
-                           void (** table) (uint8_t *, uint8_t *, int, int))
-{
-    motion_block (table, offset, picture->v_offset, 0, 0, 0, 0, 0,
-                 dest, motion->ref[picture->current_field], stride, 16);
-}
-
-static void motion_fi_conceal (picture_t * picture)
-{
-#define bit_buf (picture->bitstream_buf)
-#define bits (picture->bitstream_bits)
-#define bit_ptr (picture->bitstream_ptr)
     int tmp;
 
     NEEDBITS (bit_buf, bits, bit_ptr);
     DUMPBITS (bit_buf, bits, 1); /* remove field_select */
 
-    NEEDBITS (bit_buf, bits, bit_ptr);
-    tmp = (picture->f_motion.pmv[0][0] +
-          get_motion_delta (picture, picture->f_motion.f_code[0]));
-    tmp = bound_motion_vector (tmp, picture->f_motion.f_code[0]);
-    picture->f_motion.pmv[1][0] = picture->f_motion.pmv[0][0] = tmp;
+    tmp = (decoder->f_motion.pmv[0][0] +
+          get_motion_delta (decoder, decoder->f_motion.f_code[0]));
+    tmp = bound_motion_vector (tmp, decoder->f_motion.f_code[0]);
+    decoder->f_motion.pmv[1][0] = decoder->f_motion.pmv[0][0] = tmp;
 
     NEEDBITS (bit_buf, bits, bit_ptr);
-    tmp = (picture->f_motion.pmv[0][1] +
-          get_motion_delta (picture, picture->f_motion.f_code[1]));
-    tmp = bound_motion_vector (tmp, picture->f_motion.f_code[1]);
-    picture->f_motion.pmv[1][1] = picture->f_motion.pmv[0][1] = tmp;
+    tmp = (decoder->f_motion.pmv[0][1] +
+          get_motion_delta (decoder, decoder->f_motion.f_code[1]));
+    tmp = bound_motion_vector (tmp, decoder->f_motion.f_code[1]);
+    decoder->f_motion.pmv[1][1] = decoder->f_motion.pmv[0][1] = tmp;
 
     DUMPBITS (bit_buf, bits, 1); /* remove marker_bit */
+}
+
 #undef bit_buf
 #undef bits
 #undef bit_ptr
-}
 
-#define MOTION(routine,direction)                                      \
-do {                                                                   \
-    if ((direction) & MACROBLOCK_MOTION_FORWARD)                       \
-       routine (picture, &(picture->f_motion), dest, offset, stride,   \
-                mc_functions.put);                                     \
-    if ((direction) & MACROBLOCK_MOTION_BACKWARD)                      \
-       routine (picture, &(picture->b_motion), dest, offset, stride,   \
-                ((direction) & MACROBLOCK_MOTION_FORWARD ?             \
-                 mc_functions.avg : mc_functions.put));                \
+#define MOTION_CALL(routine,direction)                         \
+do {                                                           \
+    if ((direction) & MACROBLOCK_MOTION_FORWARD)               \
+       routine (decoder, &(decoder->f_motion), mpeg2_mc.put);  \
+    if ((direction) & MACROBLOCK_MOTION_BACKWARD)              \
+       routine (decoder, &(decoder->b_motion),                 \
+                ((direction) & MACROBLOCK_MOTION_FORWARD ?     \
+                 mpeg2_mc.avg : mpeg2_mc.put));                \
 } while (0)
 
-#define CHECK_DISPLAY                                                  \
+#define NEXT_MACROBLOCK                                                        \
 do {                                                                   \
-    if (offset == picture->coded_picture_width) {                      \
+    if(decoder->quant_store)                                            \
+        decoder->quant_store[decoder->quant_stride*(decoder->v_offset>>4) \
+                    +(decoder->offset>>4)] = decoder->quantizer_scale;  \
+    decoder->offset += 16;                                             \
+    if (decoder->offset == decoder->width) {                           \
        do { /* just so we can use the break statement */               \
-           if (picture->current_frame->copy) {                         \
-               picture->current_frame->copy (picture->current_frame,   \
-                                             dest);                    \
-               if (picture->picture_coding_type == B_TYPE)             \
+           if (decoder->convert) {                                     \
+               decoder->convert (decoder->convert_id, decoder->dest,   \
+                                 decoder->v_offset);                   \
+               if (decoder->coding_type == B_TYPE)                     \
                    break;                                              \
            }                                                           \
-           dest[0] += 16 * stride;                                     \
-           dest[1] += 4 * stride;                                      \
-           dest[2] += 4 * stride;                                      \
+           decoder->dest[0] += decoder->slice_stride;                  \
+           decoder->dest[1] += decoder->slice_uv_stride;               \
+           decoder->dest[2] += decoder->slice_uv_stride;               \
        } while (0);                                                    \
-       if (! (picture->mpeg1))                                         \
-           return 0;                                                   \
-       picture->v_offset += 16;                                        \
-       if (picture->v_offset >= picture->coded_picture_height)         \
-           return 0;                                                   \
-       offset = 0; ++code;                                             \
+       decoder->v_offset += 16;                                        \
+       if (decoder->v_offset > decoder->limit_y) {                     \
+           if (mpeg2_cpu_state_restore)                                \
+               mpeg2_cpu_state_restore (&cpu_state);                   \
+           return;                                                     \
+       }                                                               \
+       decoder->offset = 0;                                            \
     }                                                                  \
 } while (0)
 
-int slice_process (picture_t * picture, uint8_t code, uint8_t * buffer)
+void mpeg2_init_fbuf (mpeg2_decoder_t * decoder, uint8_t * current_fbuf[3],
+                     uint8_t * forward_fbuf[3], uint8_t * backward_fbuf[3])
 {
-#define bit_buf (picture->bitstream_buf)
-#define bits (picture->bitstream_bits)
-#define bit_ptr (picture->bitstream_ptr)
-    int macroblock_modes;
-    int stride;
-    uint8_t * dest[3];
-    int offset;
-    uint8_t ** forward_ref[2];
-
-    stride = picture->coded_picture_width;
-    offset = (code - 1) * stride * 4;
-    picture->v_offset = (code - 1) * 16;
-
-    forward_ref[0] = picture->forward_reference_frame->base;
-    if (picture->picture_structure != FRAME_PICTURE) {
-       forward_ref[1] = picture->forward_reference_frame->base;
-       offset <<= 1;
-       picture->current_field = (picture->picture_structure == BOTTOM_FIELD);
-       if ((picture->second_field) &&
-           (picture->picture_coding_type != B_TYPE))
-           forward_ref[picture->picture_structure == TOP_FIELD] =
-               picture->current_frame->base;
-
-       picture->f_motion.ref[1][0] = forward_ref[1][0] + stride;
-       picture->f_motion.ref[1][1] = forward_ref[1][1] + (stride >> 1);
-       picture->f_motion.ref[1][2] = forward_ref[1][2] + (stride >> 1);
-
-       picture->b_motion.ref[1][0] =
-           picture->backward_reference_frame->base[0] + stride;
-       picture->b_motion.ref[1][1] =
-           picture->backward_reference_frame->base[1] + (stride >> 1);
-       picture->b_motion.ref[1][2] =
-           picture->backward_reference_frame->base[2] + (stride >> 1);
-    }
+    int offset, stride, height, bottom_field;
+
+    stride = decoder->stride_frame;
+    bottom_field = (decoder->picture_structure == BOTTOM_FIELD);
+    offset = bottom_field ? stride : 0;
+    height = decoder->height;
 
-    picture->f_motion.ref[0][0] = forward_ref[0][0];
-    picture->f_motion.ref[0][1] = forward_ref[0][1];
-    picture->f_motion.ref[0][2] = forward_ref[0][2];
+    decoder->picture_dest[0] = current_fbuf[0] + offset;
+    decoder->picture_dest[1] = current_fbuf[1] + (offset >> 1);
+    decoder->picture_dest[2] = current_fbuf[2] + (offset >> 1);
 
-    picture->f_motion.pmv[0][0] = picture->f_motion.pmv[0][1] = 0;
-    picture->f_motion.pmv[1][0] = picture->f_motion.pmv[1][1] = 0;
+    decoder->f_motion.ref[0][0] = forward_fbuf[0] + offset;
+    decoder->f_motion.ref[0][1] = forward_fbuf[1] + (offset >> 1);
+    decoder->f_motion.ref[0][2] = forward_fbuf[2] + (offset >> 1);
 
-    picture->b_motion.ref[0][0] = picture->backward_reference_frame->base[0];
-    picture->b_motion.ref[0][1] = picture->backward_reference_frame->base[1];
-    picture->b_motion.ref[0][2] = picture->backward_reference_frame->base[2];
+    decoder->b_motion.ref[0][0] = backward_fbuf[0] + offset;
+    decoder->b_motion.ref[0][1] = backward_fbuf[1] + (offset >> 1);
+    decoder->b_motion.ref[0][2] = backward_fbuf[2] + (offset >> 1);
 
-    picture->b_motion.pmv[0][0] = picture->b_motion.pmv[0][1] = 0;
-    picture->b_motion.pmv[1][0] = picture->b_motion.pmv[1][1] = 0;
+    if (decoder->picture_structure != FRAME_PICTURE) {
+       decoder->dmv_offset = bottom_field ? 1 : -1;
+       decoder->f_motion.ref2[0] = decoder->f_motion.ref[bottom_field];
+       decoder->f_motion.ref2[1] = decoder->f_motion.ref[!bottom_field];
+       decoder->b_motion.ref2[0] = decoder->b_motion.ref[bottom_field];
+       decoder->b_motion.ref2[1] = decoder->b_motion.ref[!bottom_field];
+       offset = stride - offset;
 
-    if ((picture->current_frame->copy) &&
-       (picture->picture_coding_type == B_TYPE))
-       offset = 0;
+       if (decoder->second_field && (decoder->coding_type != B_TYPE))
+           forward_fbuf = current_fbuf;
 
-    dest[0] = picture->current_frame->base[0] + offset * 4;
-    dest[1] = picture->current_frame->base[1] + offset;
-    dest[2] = picture->current_frame->base[2] + offset;
+       decoder->f_motion.ref[1][0] = forward_fbuf[0] + offset;
+       decoder->f_motion.ref[1][1] = forward_fbuf[1] + (offset >> 1);
+       decoder->f_motion.ref[1][2] = forward_fbuf[2] + (offset >> 1);
+
+       decoder->b_motion.ref[1][0] = backward_fbuf[0] + offset;
+       decoder->b_motion.ref[1][1] = backward_fbuf[1] + (offset >> 1);
+       decoder->b_motion.ref[1][2] = backward_fbuf[2] + (offset >> 1);
 
-    switch (picture->picture_structure) {
-    case BOTTOM_FIELD:
-       dest[0] += stride;
-       dest[1] += stride >> 1;
-       dest[2] += stride >> 1;
-       /* follow thru */
-    case TOP_FIELD:
        stride <<= 1;
+       height >>= 1;
     }
 
-    picture->dc_dct_pred[0] = picture->dc_dct_pred[1] =
-       picture->dc_dct_pred[2] = 1 << (picture->intra_dc_precision + 7);
+    decoder->stride = stride;
+    decoder->uv_stride = stride >> 1;
+    decoder->slice_stride = 16 * stride;
+    decoder->slice_uv_stride =
+       decoder->slice_stride >> (2 - decoder->chroma_format);
+    decoder->limit_x = 2 * decoder->width - 32;
+    decoder->limit_y_16 = 2 * height - 32;
+    decoder->limit_y_8 = 2 * height - 16;
+    decoder->limit_y = height - 16;
+
+    if (decoder->mpeg1) {
+       decoder->motion_parser[0] = motion_zero_420;
+       decoder->motion_parser[MC_FRAME] = motion_mp1;
+       decoder->motion_parser[4] = motion_reuse_420;
+    } else if (decoder->picture_structure == FRAME_PICTURE) {
+       if (decoder->chroma_format == 0) {
+           decoder->motion_parser[0] = motion_zero_420;
+           decoder->motion_parser[MC_FIELD] = motion_fr_field_420;
+           decoder->motion_parser[MC_FRAME] = motion_fr_frame_420;
+           decoder->motion_parser[MC_DMV] = motion_fr_dmv_420;
+           decoder->motion_parser[4] = motion_reuse_420;
+       } else if (decoder->chroma_format == 1) {
+           decoder->motion_parser[0] = motion_zero_422;
+           decoder->motion_parser[MC_FIELD] = motion_fr_field_422;
+           decoder->motion_parser[MC_FRAME] = motion_fr_frame_422;
+           decoder->motion_parser[MC_DMV] = motion_fr_dmv_422;
+           decoder->motion_parser[4] = motion_reuse_422;
+       } else {
+           decoder->motion_parser[0] = motion_zero_444;
+           decoder->motion_parser[MC_FIELD] = motion_fr_field_444;
+           decoder->motion_parser[MC_FRAME] = motion_fr_frame_444;
+           decoder->motion_parser[MC_DMV] = motion_fr_dmv_444;
+           decoder->motion_parser[4] = motion_reuse_444;
+       }
+    } else {
+       if (decoder->chroma_format == 0) {
+           decoder->motion_parser[0] = motion_zero_420;
+           decoder->motion_parser[MC_FIELD] = motion_fi_field_420;
+           decoder->motion_parser[MC_16X8] = motion_fi_16x8_420;
+           decoder->motion_parser[MC_DMV] = motion_fi_dmv_420;
+           decoder->motion_parser[4] = motion_reuse_420;
+       } else if (decoder->chroma_format == 1) {
+           decoder->motion_parser[0] = motion_zero_422;
+           decoder->motion_parser[MC_FIELD] = motion_fi_field_422;
+           decoder->motion_parser[MC_16X8] = motion_fi_16x8_422;
+           decoder->motion_parser[MC_DMV] = motion_fi_dmv_422;
+           decoder->motion_parser[4] = motion_reuse_422;
+       } else {
+           decoder->motion_parser[0] = motion_zero_444;
+           decoder->motion_parser[MC_FIELD] = motion_fi_field_444;
+           decoder->motion_parser[MC_16X8] = motion_fi_16x8_444;
+           decoder->motion_parser[MC_DMV] = motion_fi_dmv_444;
+           decoder->motion_parser[4] = motion_reuse_444;
+       }
+    }
+}
+
+static inline int slice_init (mpeg2_decoder_t * const decoder, int code)
+{
+#define bit_buf (decoder->bitstream_buf)
+#define bits (decoder->bitstream_bits)
+#define bit_ptr (decoder->bitstream_ptr)
+    int offset;
+    const MBAtab * mba;
+
+    decoder->dc_dct_pred[0] = decoder->dc_dct_pred[1] =
+       decoder->dc_dct_pred[2] = 16384;
+
+    decoder->f_motion.pmv[0][0] = decoder->f_motion.pmv[0][1] = 0;
+    decoder->f_motion.pmv[1][0] = decoder->f_motion.pmv[1][1] = 0;
+    decoder->b_motion.pmv[0][0] = decoder->b_motion.pmv[0][1] = 0;
+    decoder->b_motion.pmv[1][0] = decoder->b_motion.pmv[1][1] = 0;
 
-    bitstream_init (picture, buffer);
+    if (decoder->vertical_position_extension) {
+       code += UBITS (bit_buf, 3) << 7;
+       DUMPBITS (bit_buf, bits, 3);
+    }
+    decoder->v_offset = (code - 1) * 16;
+    offset = 0;
+    if (!(decoder->convert) || decoder->coding_type != B_TYPE)
+       offset = (code - 1) * decoder->slice_stride;
+
+    decoder->dest[0] = decoder->picture_dest[0] + offset;
+    offset >>= (2 - decoder->chroma_format);
+    decoder->dest[1] = decoder->picture_dest[1] + offset;
+    decoder->dest[2] = decoder->picture_dest[2] + offset;
 
-    picture->quantizer_scale = get_quantizer_scale (picture);
+    get_quantizer_scale (decoder);
 
     /* ignore intra_slice and all the extra data */
     while (bit_buf & 0x80000000) {
        DUMPBITS (bit_buf, bits, 9);
        NEEDBITS (bit_buf, bits, bit_ptr);
     }
-    DUMPBITS (bit_buf, bits, 1);
 
-    NEEDBITS (bit_buf, bits, bit_ptr);
-    offset = get_macroblock_address_increment (picture) << 4;
+    /* decode initial macroblock address increment */
+    offset = 0;
+    while (1) {
+       if (bit_buf >= 0x08000000) {
+           mba = MBA_5 + (UBITS (bit_buf, 6) - 2);
+           break;
+       } else if (bit_buf >= 0x01800000) {
+           mba = MBA_11 + (UBITS (bit_buf, 12) - 24);
+           break;
+       } else switch (UBITS (bit_buf, 12)) {
+       case 8:         /* macroblock_escape */
+           offset += 33;
+           DUMPBITS (bit_buf, bits, 11);
+           NEEDBITS (bit_buf, bits, bit_ptr);
+           continue;
+       case 15:        /* macroblock_stuffing (MPEG1 only) */
+           bit_buf &= 0xfffff;
+           DUMPBITS (bit_buf, bits, 11);
+           NEEDBITS (bit_buf, bits, bit_ptr);
+           continue;
+       default:        /* error */
+           return 1;
+       }
+    }
+    DUMPBITS (bit_buf, bits, mba->len + 1);
+    decoder->offset = (offset + mba->mba) << 4;
+
+    while (decoder->offset - decoder->width >= 0) {
+       decoder->offset -= decoder->width;
+       if (!(decoder->convert) || decoder->coding_type != B_TYPE) {
+           decoder->dest[0] += decoder->slice_stride;
+           decoder->dest[1] += decoder->slice_uv_stride;
+           decoder->dest[2] += decoder->slice_uv_stride;
+       }
+       decoder->v_offset += 16;
+    }
+    if (decoder->v_offset > decoder->limit_y)
+       return 1;
+
+    return 0;
+#undef bit_buf
+#undef bits
+#undef bit_ptr
+}
+
+void mpeg2_slice (mpeg2_decoder_t * const decoder, const int code,
+                 const uint8_t * const buffer)
+{
+#define bit_buf (decoder->bitstream_buf)
+#define bits (decoder->bitstream_bits)
+#define bit_ptr (decoder->bitstream_ptr)
+    cpu_state_t cpu_state;
+
+    bitstream_init (decoder, buffer);
+
+    if (slice_init (decoder, code))
+       return;
+
+    if (mpeg2_cpu_state_save)
+       mpeg2_cpu_state_save (&cpu_state);
 
     while (1) {
+       int macroblock_modes;
+       int mba_inc;
+       const MBAtab * mba;
+
        NEEDBITS (bit_buf, bits, bit_ptr);
 
-       macroblock_modes = get_macroblock_modes (picture);
+       macroblock_modes = get_macroblock_modes (decoder);
 
        /* maybe integrate MACROBLOCK_QUANT test into get_macroblock_modes ? */
        if (macroblock_modes & MACROBLOCK_QUANT)
-           picture->quantizer_scale = get_quantizer_scale (picture);
+           get_quantizer_scale (decoder);
 
        if (macroblock_modes & MACROBLOCK_INTRA) {
 
            int DCT_offset, DCT_stride;
+           int offset;
+           uint8_t * dest_y;
 
-           if (picture->concealment_motion_vectors) {
-               if (picture->picture_structure == FRAME_PICTURE)
-                   motion_fr_conceal (picture);
+           if (decoder->concealment_motion_vectors) {
+               if (decoder->picture_structure == FRAME_PICTURE)
+                   motion_fr_conceal (decoder);
                else
-                   motion_fi_conceal (picture);
+                   motion_fi_conceal (decoder);
            } else {
-               picture->f_motion.pmv[0][0] = picture->f_motion.pmv[0][1] = 0;
-               picture->f_motion.pmv[1][0] = picture->f_motion.pmv[1][1] = 0;
-               picture->b_motion.pmv[0][0] = picture->b_motion.pmv[0][1] = 0;
-               picture->b_motion.pmv[1][0] = picture->b_motion.pmv[1][1] = 0;
+               decoder->f_motion.pmv[0][0] = decoder->f_motion.pmv[0][1] = 0;
+               decoder->f_motion.pmv[1][0] = decoder->f_motion.pmv[1][1] = 0;
+               decoder->b_motion.pmv[0][0] = decoder->b_motion.pmv[0][1] = 0;
+               decoder->b_motion.pmv[1][0] = decoder->b_motion.pmv[1][1] = 0;
            }
 
            if (macroblock_modes & DCT_TYPE_INTERLACED) {
-               DCT_offset = stride;
-               DCT_stride = stride * 2;
+               DCT_offset = decoder->stride;
+               DCT_stride = decoder->stride * 2;
            } else {
-               DCT_offset = stride * 8;
-               DCT_stride = stride;
+               DCT_offset = decoder->stride * 8;
+               DCT_stride = decoder->stride;
            }
 
-           slice_intra_DCT (picture, 0, dest[0] + offset, DCT_stride);
-           slice_intra_DCT (picture, 0, dest[0] + offset + 8, DCT_stride);
-           slice_intra_DCT (picture, 0, dest[0] + offset + DCT_offset,
-                            DCT_stride);
-           slice_intra_DCT (picture, 0, dest[0] + offset + DCT_offset + 8,
-                            DCT_stride);
-
-           slice_intra_DCT (picture, 1, dest[1] + (offset >> 1), stride >> 1);
-           slice_intra_DCT (picture, 2, dest[2] + (offset >> 1), stride >> 1);
-
-           if (picture->picture_coding_type == D_TYPE) {
-               NEEDBITS (bit_buf, bits, bit_ptr);
-               DUMPBITS (bit_buf, bits, 1);
+           offset = decoder->offset;
+           dest_y = decoder->dest[0] + offset;
+           slice_intra_DCT (decoder, 0, dest_y, DCT_stride);
+           slice_intra_DCT (decoder, 0, dest_y + 8, DCT_stride);
+           slice_intra_DCT (decoder, 0, dest_y + DCT_offset, DCT_stride);
+           slice_intra_DCT (decoder, 0, dest_y + DCT_offset + 8, DCT_stride);
+           if (likely (decoder->chroma_format == 0)) {
+               slice_intra_DCT (decoder, 1, decoder->dest[1] + (offset >> 1),
+                                decoder->uv_stride);
+               slice_intra_DCT (decoder, 2, decoder->dest[2] + (offset >> 1),
+                                decoder->uv_stride);
+               if (decoder->coding_type == D_TYPE) {
+                   NEEDBITS (bit_buf, bits, bit_ptr);
+                   DUMPBITS (bit_buf, bits, 1);
+               }
+           } else if (likely (decoder->chroma_format == 1)) {
+               uint8_t * dest_u = decoder->dest[1] + (offset >> 1);
+               uint8_t * dest_v = decoder->dest[2] + (offset >> 1);
+               DCT_stride >>= 1;
+               DCT_offset >>= 1;
+               slice_intra_DCT (decoder, 1, dest_u, DCT_stride);
+               slice_intra_DCT (decoder, 2, dest_v, DCT_stride);
+               slice_intra_DCT (decoder, 1, dest_u + DCT_offset, DCT_stride);
+               slice_intra_DCT (decoder, 2, dest_v + DCT_offset, DCT_stride);
+           } else {
+               uint8_t * dest_u = decoder->dest[1] + offset;
+               uint8_t * dest_v = decoder->dest[2] + offset;
+               slice_intra_DCT (decoder, 1, dest_u, DCT_stride);
+               slice_intra_DCT (decoder, 2, dest_v, DCT_stride);
+               slice_intra_DCT (decoder, 1, dest_u + DCT_offset, DCT_stride);
+               slice_intra_DCT (decoder, 2, dest_v + DCT_offset, DCT_stride);
+               slice_intra_DCT (decoder, 1, dest_u + 8, DCT_stride);
+               slice_intra_DCT (decoder, 2, dest_v + 8, DCT_stride);
+               slice_intra_DCT (decoder, 1, dest_u + DCT_offset + 8,
+                                DCT_stride);
+               slice_intra_DCT (decoder, 2, dest_v + DCT_offset + 8,
+                                DCT_stride);
            }
        } else {
 
-           if (picture->mpeg1) {
-               if ((macroblock_modes & MOTION_TYPE_MASK) == MC_FRAME)
-                   MOTION (motion_mp1, macroblock_modes);
-               else {
-                   /* non-intra mb without forward mv in a P picture */
-                   picture->f_motion.pmv[0][0] = 0;
-                   picture->f_motion.pmv[0][1] = 0;
-                   picture->f_motion.pmv[1][0] = 0;
-                   picture->f_motion.pmv[1][1] = 0;
-                   MOTION (motion_fr_zero, MACROBLOCK_MOTION_FORWARD);
-               }
-           } else if (picture->picture_structure == FRAME_PICTURE)
-               switch (macroblock_modes & MOTION_TYPE_MASK) {
-               case MC_FRAME:
-                   MOTION (motion_fr_frame, macroblock_modes);
-                   break;
-
-               case MC_FIELD:
-                   MOTION (motion_fr_field, macroblock_modes);
-                   break;
-
-               case MC_DMV:
-                   MOTION (motion_fr_dmv, MACROBLOCK_MOTION_FORWARD);
-                   break;
-
-               case 0:
-                   /* non-intra mb without forward mv in a P picture */
-                   picture->f_motion.pmv[0][0] = 0;
-                   picture->f_motion.pmv[0][1] = 0;
-                   picture->f_motion.pmv[1][0] = 0;
-                   picture->f_motion.pmv[1][1] = 0;
-                   MOTION (motion_fr_zero, MACROBLOCK_MOTION_FORWARD);
-                   break;
-               }
-           else
-               switch (macroblock_modes & MOTION_TYPE_MASK) {
-               case MC_FIELD:
-                   MOTION (motion_fi_field, macroblock_modes);
-                   break;
-
-               case MC_16X8:
-                   MOTION (motion_fi_16x8, macroblock_modes);
-                   break;
-
-               case MC_DMV:
-                   MOTION (motion_fi_dmv, MACROBLOCK_MOTION_FORWARD);
-                   break;
-
-               case 0:
-                   /* non-intra mb without forward mv in a P picture */
-                   picture->f_motion.pmv[0][0] = 0;
-                   picture->f_motion.pmv[0][1] = 0;
-                   picture->f_motion.pmv[1][0] = 0;
-                   picture->f_motion.pmv[1][1] = 0;
-                   MOTION (motion_fi_zero, MACROBLOCK_MOTION_FORWARD);
-                   break;
-               }
+           motion_parser_t * parser;
+
+           parser =
+               decoder->motion_parser[macroblock_modes >> MOTION_TYPE_SHIFT];
+           MOTION_CALL (parser, macroblock_modes);
 
            if (macroblock_modes & MACROBLOCK_PATTERN) {
                int coded_block_pattern;
                int DCT_offset, DCT_stride;
 
                if (macroblock_modes & DCT_TYPE_INTERLACED) {
-                   DCT_offset = stride;
-                   DCT_stride = stride * 2;
+                   DCT_offset = decoder->stride;
+                   DCT_stride = decoder->stride * 2;
                } else {
-                   DCT_offset = stride * 8;
-                   DCT_stride = stride;
+                   DCT_offset = decoder->stride * 8;
+                   DCT_stride = decoder->stride;
                }
 
-               coded_block_pattern = get_coded_block_pattern (picture);
-
-               if (coded_block_pattern & 0x20)
-                   slice_non_intra_DCT (picture, dest[0] + offset,
-                                        DCT_stride);
-               if (coded_block_pattern & 0x10)
-                   slice_non_intra_DCT (picture, dest[0] + offset + 8,
-                                        DCT_stride);
-               if (coded_block_pattern & 0x08)
-                   slice_non_intra_DCT (picture,
-                                        dest[0] + offset + DCT_offset,
-                                        DCT_stride);
-               if (coded_block_pattern & 0x04)
-                   slice_non_intra_DCT (picture,
-                                        dest[0] + offset + DCT_offset + 8,
-                                        DCT_stride);
-
-               if (coded_block_pattern & 0x2)
-                   slice_non_intra_DCT (picture, dest[1] + (offset >> 1),
-                                        stride >> 1);
-               if (coded_block_pattern & 0x1)
-                   slice_non_intra_DCT (picture, dest[2] + (offset >> 1),
-                                        stride >> 1);
+               coded_block_pattern = get_coded_block_pattern (decoder);
+
+               if (likely (decoder->chroma_format == 0)) {
+                   int offset = decoder->offset;
+                   uint8_t * dest_y = decoder->dest[0] + offset;
+                   if (coded_block_pattern & 1)
+                       slice_non_intra_DCT (decoder, 0, dest_y, DCT_stride);
+                   if (coded_block_pattern & 2)
+                       slice_non_intra_DCT (decoder, 0, dest_y + 8,
+                                            DCT_stride);
+                   if (coded_block_pattern & 4)
+                       slice_non_intra_DCT (decoder, 0, dest_y + DCT_offset,
+                                            DCT_stride);
+                   if (coded_block_pattern & 8)
+                       slice_non_intra_DCT (decoder, 0,
+                                            dest_y + DCT_offset + 8,
+                                            DCT_stride);
+                   if (coded_block_pattern & 16)
+                       slice_non_intra_DCT (decoder, 1,
+                                            decoder->dest[1] + (offset >> 1),
+                                            decoder->uv_stride);
+                   if (coded_block_pattern & 32)
+                       slice_non_intra_DCT (decoder, 2,
+                                            decoder->dest[2] + (offset >> 1),
+                                            decoder->uv_stride);
+               } else if (likely (decoder->chroma_format == 1)) {
+                   int offset;
+                   uint8_t * dest_y;
+
+                   coded_block_pattern |= bit_buf & (3 << 30);
+                   DUMPBITS (bit_buf, bits, 2);
+
+                   offset = decoder->offset;
+                   dest_y = decoder->dest[0] + offset;
+                   if (coded_block_pattern & 1)
+                       slice_non_intra_DCT (decoder, 0, dest_y, DCT_stride);
+                   if (coded_block_pattern & 2)
+                       slice_non_intra_DCT (decoder, 0, dest_y + 8,
+                                            DCT_stride);
+                   if (coded_block_pattern & 4)
+                       slice_non_intra_DCT (decoder, 0, dest_y + DCT_offset,
+                                            DCT_stride);
+                   if (coded_block_pattern & 8)
+                       slice_non_intra_DCT (decoder, 0,
+                                            dest_y + DCT_offset + 8,
+                                            DCT_stride);
+
+                   DCT_stride >>= 1;
+                   DCT_offset = (DCT_offset + offset) >> 1;
+                   if (coded_block_pattern & 16)
+                       slice_non_intra_DCT (decoder, 1,
+                                            decoder->dest[1] + (offset >> 1),
+                                            DCT_stride);
+                   if (coded_block_pattern & 32)
+                       slice_non_intra_DCT (decoder, 2,
+                                            decoder->dest[2] + (offset >> 1),
+                                            DCT_stride);
+                   if (coded_block_pattern & (2 << 30))
+                       slice_non_intra_DCT (decoder, 1,
+                                            decoder->dest[1] + DCT_offset,
+                                            DCT_stride);
+                   if (coded_block_pattern & (1 << 30))
+                       slice_non_intra_DCT (decoder, 2,
+                                            decoder->dest[2] + DCT_offset,
+                                            DCT_stride);
+               } else {
+                   int offset;
+                   uint8_t * dest_y, * dest_u, * dest_v;
+
+                   coded_block_pattern |= bit_buf & (63 << 26);
+                   DUMPBITS (bit_buf, bits, 6);
+
+                   offset = decoder->offset;
+                   dest_y = decoder->dest[0] + offset;
+                   dest_u = decoder->dest[1] + offset;
+                   dest_v = decoder->dest[2] + offset;
+
+                   if (coded_block_pattern & 1)
+                       slice_non_intra_DCT (decoder, 0, dest_y, DCT_stride);
+                   if (coded_block_pattern & 2)
+                       slice_non_intra_DCT (decoder, 0, dest_y + 8,
+                                            DCT_stride);
+                   if (coded_block_pattern & 4)
+                       slice_non_intra_DCT (decoder, 0, dest_y + DCT_offset,
+                                            DCT_stride);
+                   if (coded_block_pattern & 8)
+                       slice_non_intra_DCT (decoder, 0,
+                                            dest_y + DCT_offset + 8,
+                                            DCT_stride);
+
+                   if (coded_block_pattern & 16)
+                       slice_non_intra_DCT (decoder, 1, dest_u, DCT_stride);
+                   if (coded_block_pattern & 32)
+                       slice_non_intra_DCT (decoder, 2, dest_v, DCT_stride);
+                   if (coded_block_pattern & (32 << 26))
+                       slice_non_intra_DCT (decoder, 1, dest_u + DCT_offset,
+                                            DCT_stride);
+                   if (coded_block_pattern & (16 << 26))
+                       slice_non_intra_DCT (decoder, 2, dest_v + DCT_offset,
+                                            DCT_stride);
+                   if (coded_block_pattern & (8 << 26))
+                       slice_non_intra_DCT (decoder, 1, dest_u + 8,
+                                            DCT_stride);
+                   if (coded_block_pattern & (4 << 26))
+                       slice_non_intra_DCT (decoder, 2, dest_v + 8,
+                                            DCT_stride);
+                   if (coded_block_pattern & (2 << 26))
+                       slice_non_intra_DCT (decoder, 1,
+                                            dest_u + DCT_offset + 8,
+                                            DCT_stride);
+                   if (coded_block_pattern & (1 << 26))
+                       slice_non_intra_DCT (decoder, 2,
+                                            dest_v + DCT_offset + 8,
+                                            DCT_stride);
+               }
            }
 
-           picture->dc_dct_pred[0] = picture->dc_dct_pred[1] =
-               picture->dc_dct_pred[2] = 1 << (picture->intra_dc_precision+7);
+           decoder->dc_dct_pred[0] = decoder->dc_dct_pred[1] =
+               decoder->dc_dct_pred[2] = 16384;
        }
 
-#ifdef MPEG12_POSTPROC
-       quant_store[code][(offset>>4)+1] = picture->quantizer_scale;
-#endif
-       offset += 16;
-       CHECK_DISPLAY;
+       NEXT_MACROBLOCK;
 
        NEEDBITS (bit_buf, bits, bit_ptr);
-
-       if (bit_buf & 0x80000000) {
-           DUMPBITS (bit_buf, bits, 1);
-       } else {
-           int mba_inc;
-
-           mba_inc = get_macroblock_address_increment (picture);
-           if (!mba_inc)
+       mba_inc = 0;
+       while (1) {
+           if (bit_buf >= 0x10000000) {
+               mba = MBA_5 + (UBITS (bit_buf, 5) - 2);
                break;
+           } else if (bit_buf >= 0x03000000) {
+               mba = MBA_11 + (UBITS (bit_buf, 11) - 24);
+               break;
+           } else switch (UBITS (bit_buf, 11)) {
+           case 8:             /* macroblock_escape */
+               mba_inc += 33;
+               /* pass through */
+           case 15:    /* macroblock_stuffing (MPEG1 only) */
+               DUMPBITS (bit_buf, bits, 11);
+               NEEDBITS (bit_buf, bits, bit_ptr);
+               continue;
+           default:    /* end of slice, or error */
+               if (mpeg2_cpu_state_restore)
+                   mpeg2_cpu_state_restore (&cpu_state);
+               return;
+           }
+       }
+       DUMPBITS (bit_buf, bits, mba->len);
+       mba_inc += mba->mba;
 
-           picture->dc_dct_pred[0] = picture->dc_dct_pred[1] =
-               picture->dc_dct_pred[2] = 1 << (picture->intra_dc_precision+7);
-
-           if (picture->picture_coding_type == P_TYPE) {
-               picture->f_motion.pmv[0][0] = picture->f_motion.pmv[0][1] = 0;
-               picture->f_motion.pmv[1][0] = picture->f_motion.pmv[1][1] = 0;
+       if (mba_inc) {
+           decoder->dc_dct_pred[0] = decoder->dc_dct_pred[1] =
+               decoder->dc_dct_pred[2] = 16384;
 
+           if (decoder->coding_type == P_TYPE) {
                do {
-                   if (picture->picture_structure == FRAME_PICTURE)
-                       MOTION (motion_fr_zero, MACROBLOCK_MOTION_FORWARD);
-                   else
-                       MOTION (motion_fi_zero, MACROBLOCK_MOTION_FORWARD);
-
-#ifdef MPEG12_POSTPROC
-       quant_store[code][(offset>>4)+1] = picture->quantizer_scale;
-#endif
-
-                   offset += 16;
-                   CHECK_DISPLAY;
+                   MOTION_CALL (decoder->motion_parser[0],
+                                MACROBLOCK_MOTION_FORWARD);
+                   NEXT_MACROBLOCK;
                } while (--mba_inc);
            } else {
                do {
-                   if (picture->mpeg1)
-                       MOTION (motion_mp1_reuse, macroblock_modes);
-                   else if (picture->picture_structure == FRAME_PICTURE)
-                       MOTION (motion_fr_reuse, macroblock_modes);
-                   else
-                       MOTION (motion_fi_reuse, macroblock_modes);
-
-#ifdef MPEG12_POSTPROC
-       quant_store[code][(offset>>4)+1] = picture->quantizer_scale;
-#endif
-
-                   offset += 16;
-                   CHECK_DISPLAY;
+                   MOTION_CALL (decoder->motion_parser[4], macroblock_modes);
+                   NEXT_MACROBLOCK;
                } while (--mba_inc);
            }
        }
     }
-
-    return 0;
 #undef bit_buf
 #undef bits
 #undef bit_ptr