postproc fix
[mplayer.git] / libmpeg2 / slice.c
index 13940b3..a58487f 100644 (file)
@@ -1,8 +1,11 @@
 /*
  * slice.c
+ * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
+ * Copyright (C) 2003      Peter Gubanov <peter@elecard.net.ru>
  * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
  *
  * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+ * See http://libmpeg2.sourceforge.net/ for updates.
  *
  * mpeg2dec is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
 
 #include "config.h"
 
-#include <string.h>
 #include <inttypes.h>
 
-#include "mpeg2_internal.h"
+#include "mpeg2.h"
 #include "attributes.h"
+#include "mpeg2_internal.h"
 
-extern mc_functions_t mc_functions;
-extern void (* idct_block_copy) (int16_t * block, uint8_t * dest, int stride);
-extern void (* idct_block_add) (int16_t * block, uint8_t * dest, int stride);
-
-static int16_t DCTblock[64] ATTR_ALIGN(16);
+extern mpeg2_mc_t mpeg2_mc;
+extern void (* mpeg2_idct_copy) (int16_t * block, uint8_t * dest, int stride);
+extern void (* mpeg2_idct_add) (int last, int16_t * block,
+                               uint8_t * dest, int stride);
+extern void (* mpeg2_cpu_state_save) (cpu_state_t * state);
+extern void (* mpeg2_cpu_state_restore) (cpu_state_t * state);
 
 #include "vlc.h"
 
-static int non_linear_quantizer_scale [] = {
-     0,  1,  2,  3,  4,  5,   6,   7,
-     8, 10, 12, 14, 16, 18,  20,  22,
-    24, 28, 32, 36, 40, 44,  48,  52,
-    56, 64, 72, 80, 88, 96, 104, 112
-};
-
-static inline int get_macroblock_modes (slice_t * slice, int picture_structure,
-                                       int picture_coding_type,
-                                       int frame_pred_frame_dct)
+static inline int get_macroblock_modes (mpeg2_decoder_t * const decoder)
 {
-#define bit_buf (slice->bitstream_buf)
-#define bits (slice->bitstream_bits)
-#define bit_ptr (slice->bitstream_ptr)
+#define bit_buf (decoder->bitstream_buf)
+#define bits (decoder->bitstream_bits)
+#define bit_ptr (decoder->bitstream_ptr)
     int macroblock_modes;
-    MBtab * tab;
+    const MBtab * tab;
 
-    switch (picture_coding_type) {
+    switch (decoder->coding_type) {
     case I_TYPE:
 
        tab = MB_I + UBITS (bit_buf, 1);
        DUMPBITS (bit_buf, bits, tab->len);
        macroblock_modes = tab->modes;
 
-       if ((! frame_pred_frame_dct) && (picture_structure == FRAME_PICTURE)) {
+       if ((! (decoder->frame_pred_frame_dct)) &&
+           (decoder->picture_structure == FRAME_PICTURE)) {
            macroblock_modes |= UBITS (bit_buf, 1) * DCT_TYPE_INTERLACED;
            DUMPBITS (bit_buf, bits, 1);
        }
@@ -72,26 +68,26 @@ static inline int get_macroblock_modes (slice_t * slice, int picture_structure,
        DUMPBITS (bit_buf, bits, tab->len);
        macroblock_modes = tab->modes;
 
-       if (picture_structure != FRAME_PICTURE) {
+       if (decoder->picture_structure != FRAME_PICTURE) {
            if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) {
-               macroblock_modes |= UBITS (bit_buf, 2) * MOTION_TYPE_BASE;
+               macroblock_modes |= UBITS (bit_buf, 2) << MOTION_TYPE_SHIFT;
                DUMPBITS (bit_buf, bits, 2);
            }
-           return macroblock_modes;
-       } else if (frame_pred_frame_dct) {
+           return macroblock_modes | MACROBLOCK_MOTION_FORWARD;
+       } else if (decoder->frame_pred_frame_dct) {
            if (macroblock_modes & MACROBLOCK_MOTION_FORWARD)
-               macroblock_modes |= MC_FRAME;
-           return macroblock_modes;
+               macroblock_modes |= MC_FRAME << MOTION_TYPE_SHIFT;
+           return macroblock_modes | MACROBLOCK_MOTION_FORWARD;
        } else {
            if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) {
-               macroblock_modes |= UBITS (bit_buf, 2) * MOTION_TYPE_BASE;
+               macroblock_modes |= UBITS (bit_buf, 2) << MOTION_TYPE_SHIFT;
                DUMPBITS (bit_buf, bits, 2);
            }
            if (macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN)) {
                macroblock_modes |= UBITS (bit_buf, 1) * DCT_TYPE_INTERLACED;
                DUMPBITS (bit_buf, bits, 1);
            }
-           return macroblock_modes;
+           return macroblock_modes | MACROBLOCK_MOTION_FORWARD;
        }
 
     case B_TYPE:
@@ -100,20 +96,20 @@ static inline int get_macroblock_modes (slice_t * slice, int picture_structure,
        DUMPBITS (bit_buf, bits, tab->len);
        macroblock_modes = tab->modes;
 
-       if (picture_structure != FRAME_PICTURE) {
+       if (decoder->picture_structure != FRAME_PICTURE) {
            if (! (macroblock_modes & MACROBLOCK_INTRA)) {
-               macroblock_modes |= UBITS (bit_buf, 2) * MOTION_TYPE_BASE;
+               macroblock_modes |= UBITS (bit_buf, 2) << MOTION_TYPE_SHIFT;
                DUMPBITS (bit_buf, bits, 2);
            }
            return macroblock_modes;
-       } else if (frame_pred_frame_dct) {
-           //if (! (macroblock_modes & MACROBLOCK_INTRA))
-           macroblock_modes |= MC_FRAME;
+       } else if (decoder->frame_pred_frame_dct) {
+           /* if (! (macroblock_modes & MACROBLOCK_INTRA)) */
+           macroblock_modes |= MC_FRAME << MOTION_TYPE_SHIFT;
            return macroblock_modes;
        } else {
            if (macroblock_modes & MACROBLOCK_INTRA)
                goto intra;
-           macroblock_modes |= UBITS (bit_buf, 2) * MOTION_TYPE_BASE;
+           macroblock_modes |= UBITS (bit_buf, 2) << MOTION_TYPE_SHIFT;
            DUMPBITS (bit_buf, bits, 2);
            if (macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN)) {
            intra:
@@ -136,35 +132,41 @@ static inline int get_macroblock_modes (slice_t * slice, int picture_structure,
 #undef bit_ptr
 }
 
-static inline int get_quantizer_scale (slice_t * slice, int q_scale_type)
+static inline void get_quantizer_scale (mpeg2_decoder_t * const decoder)
 {
-#define bit_buf (slice->bitstream_buf)
-#define bits (slice->bitstream_bits)
-#define bit_ptr (slice->bitstream_ptr)
+#define bit_buf (decoder->bitstream_buf)
+#define bits (decoder->bitstream_bits)
+#define bit_ptr (decoder->bitstream_ptr)
 
     int quantizer_scale_code;
 
     quantizer_scale_code = UBITS (bit_buf, 5);
     DUMPBITS (bit_buf, bits, 5);
-
-    if (q_scale_type)
-       return non_linear_quantizer_scale [quantizer_scale_code];
-    else
-       return quantizer_scale_code << 1;
+    decoder->quantizer_scale = decoder->quantizer_scales[quantizer_scale_code];
+
+    decoder->quantizer_matrix[0] =
+       decoder->quantizer_prescale[0][quantizer_scale_code];
+    decoder->quantizer_matrix[1] =
+       decoder->quantizer_prescale[1][quantizer_scale_code];
+    decoder->quantizer_matrix[2] =
+       decoder->chroma_quantizer[0][quantizer_scale_code];
+    decoder->quantizer_matrix[3] =
+       decoder->chroma_quantizer[1][quantizer_scale_code];
 #undef bit_buf
 #undef bits
 #undef bit_ptr
 }
 
-static inline int get_motion_delta (slice_t * slice, int f_code)
+static inline int get_motion_delta (mpeg2_decoder_t * const decoder,
+                                   const int f_code)
 {
-#define bit_buf (slice->bitstream_buf)
-#define bits (slice->bitstream_bits)
-#define bit_ptr (slice->bitstream_ptr)
+#define bit_buf (decoder->bitstream_buf)
+#define bits (decoder->bitstream_bits)
+#define bit_ptr (decoder->bitstream_ptr)
 
     int delta;
     int sign;
-    MVtab * tab;
+    const MVtab * tab;
 
     if (bit_buf & 0x80000000) {
        DUMPBITS (bit_buf, bits, 1);
@@ -209,30 +211,18 @@ static inline int get_motion_delta (slice_t * slice, int f_code)
 #undef bit_ptr
 }
 
-static inline int bound_motion_vector (int vector, int f_code)
+static inline int bound_motion_vector (const int vector, const int f_code)
 {
-#if 1
-    int limit;
-
-    limit = 16 << f_code;
-
-    if (vector >= limit)
-       return vector - 2*limit;
-    else if (vector < -limit)
-       return vector + 2*limit;
-    else return vector;
-#else
-    return (vector << (27 - f_code)) >> (27 - f_code);
-#endif
+    return ((int32_t)vector << (27 - f_code)) >> (27 - f_code);
 }
 
-static inline int get_dmv (slice_t * slice)
+static inline int get_dmv (mpeg2_decoder_t * const decoder)
 {
-#define bit_buf (slice->bitstream_buf)
-#define bits (slice->bitstream_bits)
-#define bit_ptr (slice->bitstream_ptr)
+#define bit_buf (decoder->bitstream_buf)
+#define bits (decoder->bitstream_bits)
+#define bit_ptr (decoder->bitstream_ptr)
 
-    DMVtab * tab;
+    const DMVtab * tab;
 
     tab = DMV_2 + UBITS (bit_buf, 2);
     DUMPBITS (bit_buf, bits, tab->len);
@@ -242,19 +232,19 @@ static inline int get_dmv (slice_t * slice)
 #undef bit_ptr
 }
 
-static inline int get_coded_block_pattern (slice_t * slice)
+static inline int get_coded_block_pattern (mpeg2_decoder_t * const decoder)
 {
-#define bit_buf (slice->bitstream_buf)
-#define bits (slice->bitstream_bits)
-#define bit_ptr (slice->bitstream_ptr)
+#define bit_buf (decoder->bitstream_buf)
+#define bits (decoder->bitstream_bits)
+#define bit_ptr (decoder->bitstream_ptr)
 
-    CBPtab * tab;
+    const CBPtab * tab;
 
     NEEDBITS (bit_buf, bits, bit_ptr);
 
     if (bit_buf >= 0x20000000) {
 
-       tab = CBP_7 - 16 + UBITS (bit_buf, 7);
+       tab = CBP_7 + (UBITS (bit_buf, 7) - 16);
        DUMPBITS (bit_buf, bits, tab->len);
        return tab->cbp;
 
@@ -270,12 +260,12 @@ static inline int get_coded_block_pattern (slice_t * slice)
 #undef bit_ptr
 }
 
-static inline int get_luma_dc_dct_diff (slice_t * slice)
+static inline int get_luma_dc_dct_diff (mpeg2_decoder_t * const decoder)
 {
-#define bit_buf (slice->bitstream_buf)
-#define bits (slice->bitstream_bits)
-#define bit_ptr (slice->bitstream_ptr)
-    DCtab * tab;
+#define bit_buf (decoder->bitstream_buf)
+#define bits (decoder->bitstream_bits)
+#define bit_ptr (decoder->bitstream_ptr)
+    const DCtab * tab;
     int size;
     int dc_diff;
 
@@ -288,31 +278,31 @@ static inline int get_luma_dc_dct_diff (slice_t * slice)
            dc_diff =
                UBITS (bit_buf, size) - UBITS (SBITS (~bit_buf, 1), size);
            bit_buf <<= size;
-           return dc_diff;
+           return dc_diff << decoder->intra_dc_precision;
        } else {
            DUMPBITS (bit_buf, bits, 3);
            return 0;
        }
     } else {
-       tab = DC_long - 0x1e0 + UBITS (bit_buf, 9);
+       tab = DC_long + (UBITS (bit_buf, 9) - 0x1e0);
        size = tab->size;
        DUMPBITS (bit_buf, bits, tab->len);
        NEEDBITS (bit_buf, bits, bit_ptr);
        dc_diff = UBITS (bit_buf, size) - UBITS (SBITS (~bit_buf, 1), size);
        DUMPBITS (bit_buf, bits, size);
-       return dc_diff;
+       return dc_diff << decoder->intra_dc_precision;
     }
 #undef bit_buf
 #undef bits
 #undef bit_ptr
 }
 
-static inline int get_chroma_dc_dct_diff (slice_t * slice)
+static inline int get_chroma_dc_dct_diff (mpeg2_decoder_t * const decoder)
 {
-#define bit_buf (slice->bitstream_buf)
-#define bits (slice->bitstream_bits)
-#define bit_ptr (slice->bitstream_ptr)
-    DCtab * tab;
+#define bit_buf (decoder->bitstream_buf)
+#define bits (decoder->bitstream_bits)
+#define bit_ptr (decoder->bitstream_ptr)
+    const DCtab * tab;
     int size;
     int dc_diff;
 
@@ -325,73 +315,71 @@ static inline int get_chroma_dc_dct_diff (slice_t * slice)
            dc_diff =
                UBITS (bit_buf, size) - UBITS (SBITS (~bit_buf, 1), size);
            bit_buf <<= size;
-           return dc_diff;
+           return dc_diff << decoder->intra_dc_precision;
        } else {
            DUMPBITS (bit_buf, bits, 2);
            return 0;
        }
     } else {
-       tab = DC_long - 0x3e0 + UBITS (bit_buf, 10);
+       tab = DC_long + (UBITS (bit_buf, 10) - 0x3e0);
        size = tab->size;
        DUMPBITS (bit_buf, bits, tab->len + 1);
        NEEDBITS (bit_buf, bits, bit_ptr);
        dc_diff = UBITS (bit_buf, size) - UBITS (SBITS (~bit_buf, 1), size);
        DUMPBITS (bit_buf, bits, size);
-       return dc_diff;
+       return dc_diff << decoder->intra_dc_precision;
     }
 #undef bit_buf
 #undef bits
 #undef bit_ptr
 }
 
-#define SATURATE(val)          \
-do {                           \
-    if (val > 2047)            \
-       val = 2047;             \
-    else if (val < -2048)      \
-       val = -2048;            \
+#define SATURATE(val)                          \
+do {                                           \
+    val <<= 4;                                 \
+    if (unlikely (val != (int16_t) val))       \
+       val = (SBITS (val, 1) ^ 2047) << 4;     \
 } while (0)
 
-static void get_intra_block_B14 (picture_t * picture, slice_t * slice,
-                                int16_t * dest)
+static void get_intra_block_B14 (mpeg2_decoder_t * const decoder,
+                                const uint16_t * const quant_matrix)
 {
     int i;
     int j;
     int val;
-    uint8_t * scan = picture->scan;
-    uint8_t * quant_matrix = picture->intra_quantizer_matrix;
-    int quantizer_scale = slice->quantizer_scale;
+    const uint8_t * const scan = decoder->scan;
     int mismatch;
-    DCTtab * tab;
+    const DCTtab * tab;
     uint32_t bit_buf;
     int bits;
-    uint8_t * bit_ptr;
+    const uint8_t * bit_ptr;
+    int16_t * const dest = decoder->DCTblock;
 
     i = 0;
     mismatch = ~dest[0];
 
-    bit_buf = slice->bitstream_buf;
-    bits = slice->bitstream_bits;
-    bit_ptr = slice->bitstream_ptr;
+    bit_buf = decoder->bitstream_buf;
+    bits = decoder->bitstream_bits;
+    bit_ptr = decoder->bitstream_ptr;
 
     NEEDBITS (bit_buf, bits, bit_ptr);
 
     while (1) {
        if (bit_buf >= 0x28000000) {
 
-           tab = DCT_B14AC_5 - 5 + UBITS (bit_buf, 5);
+           tab = DCT_B14AC_5 + (UBITS (bit_buf, 5) - 5);
 
            i += tab->run;
            if (i >= 64)
-               break;  // end of block
+               break;  /* end of block */
 
        normal_code:
            j = scan[i];
            bit_buf <<= tab->len;
            bits += tab->len + 1;
-           val = (tab->level * quantizer_scale * quant_matrix[j]) >> 4;
+           val = (tab->level * quant_matrix[j]) >> 4;
 
-           // if (bitstream_get (1)) val = -val;
+           /* if (bitstream_get (1)) val = -val; */
            val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1);
 
            SATURATE (val);
@@ -405,24 +393,23 @@ static void get_intra_block_B14 (picture_t * picture, slice_t * slice,
 
        } else if (bit_buf >= 0x04000000) {
 
-           tab = DCT_B14_8 - 4 + UBITS (bit_buf, 8);
+           tab = DCT_B14_8 + (UBITS (bit_buf, 8) - 4);
 
            i += tab->run;
            if (i < 64)
                goto normal_code;
 
-           // escape code
+           /* escape code */
 
            i += UBITS (bit_buf << 6, 6) - 64;
            if (i >= 64)
-               break;  // illegal, but check needed to avoid buffer overflow
+               break;  /* illegal, check needed to avoid buffer overflow */
 
            j = scan[i];
 
            DUMPBITS (bit_buf, bits, 12);
            NEEDBITS (bit_buf, bits, bit_ptr);
-           val = (SBITS (bit_buf, 12) *
-                  quantizer_scale * quant_matrix[j]) / 16;
+           val = (SBITS (bit_buf, 12) * quant_matrix[j]) / 16;
 
            SATURATE (val);
            dest[j] = val;
@@ -434,17 +421,17 @@ static void get_intra_block_B14 (picture_t * picture, slice_t * slice,
            continue;
 
        } else if (bit_buf >= 0x02000000) {
-           tab = DCT_B14_10 - 8 + UBITS (bit_buf, 10);
+           tab = DCT_B14_10 + (UBITS (bit_buf, 10) - 8);
            i += tab->run;
            if (i < 64)
                goto normal_code;
        } else if (bit_buf >= 0x00800000) {
-           tab = DCT_13 - 16 + UBITS (bit_buf, 13);
+           tab = DCT_13 + (UBITS (bit_buf, 13) - 16);
            i += tab->run;
            if (i < 64)
                goto normal_code;
        } else if (bit_buf >= 0x00200000) {
-           tab = DCT_15 - 16 + UBITS (bit_buf, 15);
+           tab = DCT_15 + (UBITS (bit_buf, 15) - 16);
            i += tab->run;
            if (i < 64)
                goto normal_code;
@@ -456,43 +443,42 @@ static void get_intra_block_B14 (picture_t * picture, slice_t * slice,
            if (i < 64)
                goto normal_code;
        }
-       break;  // illegal, but check needed to avoid buffer overflow
+       break;  /* illegal, check needed to avoid buffer overflow */
     }
-    dest[63] ^= mismatch & 1;
-    DUMPBITS (bit_buf, bits, 2);       // dump end of block code
-    slice->bitstream_buf = bit_buf;
-    slice->bitstream_bits = bits;
-    slice->bitstream_ptr = bit_ptr;
+    dest[63] ^= mismatch & 16;
+    DUMPBITS (bit_buf, bits, 2);       /* dump end of block code */
+    decoder->bitstream_buf = bit_buf;
+    decoder->bitstream_bits = bits;
+    decoder->bitstream_ptr = bit_ptr;
 }
 
-static void get_intra_block_B15 (picture_t * picture, slice_t * slice,
-                                int16_t * dest)
+static void get_intra_block_B15 (mpeg2_decoder_t * const decoder,
+                                const uint16_t * const quant_matrix)
 {
     int i;
     int j;
     int val;
-    uint8_t * scan = picture->scan;
-    uint8_t * quant_matrix = picture->intra_quantizer_matrix;
-    int quantizer_scale = slice->quantizer_scale;
+    const uint8_t * const scan = decoder->scan;
     int mismatch;
-    DCTtab * tab;
+    const DCTtab * tab;
     uint32_t bit_buf;
     int bits;
-    uint8_t * bit_ptr;
+    const uint8_t * bit_ptr;
+    int16_t * const dest = decoder->DCTblock;
 
     i = 0;
     mismatch = ~dest[0];
 
-    bit_buf = slice->bitstream_buf;
-    bits = slice->bitstream_bits;
-    bit_ptr = slice->bitstream_ptr;
+    bit_buf = decoder->bitstream_buf;
+    bits = decoder->bitstream_bits;
+    bit_ptr = decoder->bitstream_ptr;
 
     NEEDBITS (bit_buf, bits, bit_ptr);
 
     while (1) {
        if (bit_buf >= 0x04000000) {
 
-           tab = DCT_B15_8 - 4 + UBITS (bit_buf, 8);
+           tab = DCT_B15_8 + (UBITS (bit_buf, 8) - 4);
 
            i += tab->run;
            if (i < 64) {
@@ -501,9 +487,9 @@ static void get_intra_block_B15 (picture_t * picture, slice_t * slice,
                j = scan[i];
                bit_buf <<= tab->len;
                bits += tab->len + 1;
-               val = (tab->level * quantizer_scale * quant_matrix[j]) >> 4;
+               val = (tab->level * quant_matrix[j]) >> 4;
 
-               // if (bitstream_get (1)) val = -val;
+               /* if (bitstream_get (1)) val = -val; */
                val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1);
 
                SATURATE (val);
@@ -517,23 +503,22 @@ static void get_intra_block_B15 (picture_t * picture, slice_t * slice,
 
            } else {
 
-               // end of block. I commented out this code because if we
-               // dont exit here we will still exit at the later test :)
+               /* end of block. I commented out this code because if we */
+               /* dont exit here we will still exit at the later test :) */
 
-               //if (i >= 128) break;  // end of block
+               /* if (i >= 128) break; */      /* end of block */
 
-               // escape code
+               /* escape code */
 
                i += UBITS (bit_buf << 6, 6) - 64;
                if (i >= 64)
-                   break;      // illegal, but check against buffer overflow
+                   break;      /* illegal, check against buffer overflow */
 
                j = scan[i];
 
                DUMPBITS (bit_buf, bits, 12);
                NEEDBITS (bit_buf, bits, bit_ptr);
-               val = (SBITS (bit_buf, 12) *
-                      quantizer_scale * quant_matrix[j]) / 16;
+               val = (SBITS (bit_buf, 12) * quant_matrix[j]) / 16;
 
                SATURATE (val);
                dest[j] = val;
@@ -546,17 +531,17 @@ static void get_intra_block_B15 (picture_t * picture, slice_t * slice,
 
            }
        } else if (bit_buf >= 0x02000000) {
-           tab = DCT_B15_10 - 8 + UBITS (bit_buf, 10);
+           tab = DCT_B15_10 + (UBITS (bit_buf, 10) - 8);
            i += tab->run;
            if (i < 64)
                goto normal_code;
        } else if (bit_buf >= 0x00800000) {
-           tab = DCT_13 - 16 + UBITS (bit_buf, 13);
+           tab = DCT_13 + (UBITS (bit_buf, 13) - 16);
            i += tab->run;
            if (i < 64)
                goto normal_code;
        } else if (bit_buf >= 0x00200000) {
-           tab = DCT_15 - 16 + UBITS (bit_buf, 15);
+           tab = DCT_15 + (UBITS (bit_buf, 15) - 16);
            i += tab->run;
            if (i < 64)
                goto normal_code;
@@ -568,40 +553,39 @@ static void get_intra_block_B15 (picture_t * picture, slice_t * slice,
            if (i < 64)
                goto normal_code;
        }
-       break;  // illegal, but check needed to avoid buffer overflow
+       break;  /* illegal, check needed to avoid buffer overflow */
     }
-    dest[63] ^= mismatch & 1;
-    DUMPBITS (bit_buf, bits, 4);       // dump end of block code
-    slice->bitstream_buf = bit_buf;
-    slice->bitstream_bits = bits;
-    slice->bitstream_ptr = bit_ptr;
+    dest[63] ^= mismatch & 16;
+    DUMPBITS (bit_buf, bits, 4);       /* dump end of block code */
+    decoder->bitstream_buf = bit_buf;
+    decoder->bitstream_bits = bits;
+    decoder->bitstream_ptr = bit_ptr;
 }
 
-static void get_non_intra_block (picture_t * picture, slice_t * slice,
-                                int16_t * dest)
+static int get_non_intra_block (mpeg2_decoder_t * const decoder,
+                               const uint16_t * const quant_matrix)
 {
     int i;
     int j;
     int val;
-    uint8_t * scan = picture->scan;
-    uint8_t * quant_matrix = picture->non_intra_quantizer_matrix;
-    int quantizer_scale = slice->quantizer_scale;
+    const uint8_t * const scan = decoder->scan;
     int mismatch;
-    DCTtab * tab;
+    const DCTtab * tab;
     uint32_t bit_buf;
     int bits;
-    uint8_t * bit_ptr;
+    const uint8_t * bit_ptr;
+    int16_t * const dest = decoder->DCTblock;
 
     i = -1;
-    mismatch = 1;
+    mismatch = -1;
 
-    bit_buf = slice->bitstream_buf;
-    bits = slice->bitstream_bits;
-    bit_ptr = slice->bitstream_ptr;
+    bit_buf = decoder->bitstream_buf;
+    bits = decoder->bitstream_bits;
+    bit_ptr = decoder->bitstream_ptr;
 
     NEEDBITS (bit_buf, bits, bit_ptr);
     if (bit_buf >= 0x28000000) {
-       tab = DCT_B14DC_5 - 5 + UBITS (bit_buf, 5);
+       tab = DCT_B14DC_5 + (UBITS (bit_buf, 5) - 5);
        goto entry_1;
     } else
        goto entry_2;
@@ -609,20 +593,20 @@ static void get_non_intra_block (picture_t * picture, slice_t * slice,
     while (1) {
        if (bit_buf >= 0x28000000) {
 
-           tab = DCT_B14AC_5 - 5 + UBITS (bit_buf, 5);
+           tab = DCT_B14AC_5 + (UBITS (bit_buf, 5) - 5);
 
        entry_1:
            i += tab->run;
            if (i >= 64)
-               break;  // end of block
+               break;  /* end of block */
 
        normal_code:
            j = scan[i];
            bit_buf <<= tab->len;
            bits += tab->len + 1;
-           val = ((2*tab->level+1) * quantizer_scale * quant_matrix[j]) >> 5;
+           val = ((2 * tab->level + 1) * quant_matrix[j]) >> 5;
 
-           // if (bitstream_get (1)) val = -val;
+           /* if (bitstream_get (1)) val = -val; */
            val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1);
 
            SATURATE (val);
@@ -639,24 +623,24 @@ static void get_non_intra_block (picture_t * picture, slice_t * slice,
     entry_2:
        if (bit_buf >= 0x04000000) {
 
-           tab = DCT_B14_8 - 4 + UBITS (bit_buf, 8);
+           tab = DCT_B14_8 + (UBITS (bit_buf, 8) - 4);
 
            i += tab->run;
            if (i < 64)
                goto normal_code;
 
-           // escape code
+           /* escape code */
 
            i += UBITS (bit_buf << 6, 6) - 64;
            if (i >= 64)
-               break;  // illegal, but check needed to avoid buffer overflow
+               break;  /* illegal, check needed to avoid buffer overflow */
 
            j = scan[i];
 
            DUMPBITS (bit_buf, bits, 12);
            NEEDBITS (bit_buf, bits, bit_ptr);
            val = 2 * (SBITS (bit_buf, 12) + SBITS (bit_buf, 1)) + 1;
-           val = (val * quantizer_scale * quant_matrix[j]) / 32;
+           val = (val * quant_matrix[j]) / 32;
 
            SATURATE (val);
            dest[j] = val;
@@ -668,17 +652,17 @@ static void get_non_intra_block (picture_t * picture, slice_t * slice,
            continue;
 
        } else if (bit_buf >= 0x02000000) {
-           tab = DCT_B14_10 - 8 + UBITS (bit_buf, 10);
+           tab = DCT_B14_10 + (UBITS (bit_buf, 10) - 8);
            i += tab->run;
            if (i < 64)
                goto normal_code;
        } else if (bit_buf >= 0x00800000) {
-           tab = DCT_13 - 16 + UBITS (bit_buf, 13);
+           tab = DCT_13 + (UBITS (bit_buf, 13) - 16);
            i += tab->run;
            if (i < 64)
                goto normal_code;
        } else if (bit_buf >= 0x00200000) {
-           tab = DCT_15 - 16 + UBITS (bit_buf, 15);
+           tab = DCT_15 + (UBITS (bit_buf, 15) - 16);
            i += tab->run;
            if (i < 64)
                goto normal_code;
@@ -690,56 +674,56 @@ static void get_non_intra_block (picture_t * picture, slice_t * slice,
            if (i < 64)
                goto normal_code;
        }
-       break;  // illegal, but check needed to avoid buffer overflow
+       break;  /* illegal, check needed to avoid buffer overflow */
     }
-    dest[63] ^= mismatch & 1;
-    DUMPBITS (bit_buf, bits, 2);       // dump end of block code
-    slice->bitstream_buf = bit_buf;
-    slice->bitstream_bits = bits;
-    slice->bitstream_ptr = bit_ptr;
+    dest[63] ^= mismatch & 16;
+    DUMPBITS (bit_buf, bits, 2);       /* dump end of block code */
+    decoder->bitstream_buf = bit_buf;
+    decoder->bitstream_bits = bits;
+    decoder->bitstream_ptr = bit_ptr;
+    return i;
 }
 
-static void get_mpeg1_intra_block (picture_t * picture, slice_t * slice,
-                                  int16_t * dest)
+static void get_mpeg1_intra_block (mpeg2_decoder_t * const decoder)
 {
     int i;
     int j;
     int val;
-    uint8_t * scan = picture->scan;
-    uint8_t * quant_matrix = picture->intra_quantizer_matrix;
-    int quantizer_scale = slice->quantizer_scale;
-    DCTtab * tab;
+    const uint8_t * const scan = decoder->scan;
+    const uint16_t * const quant_matrix = decoder->quantizer_matrix[0];
+    const DCTtab * tab;
     uint32_t bit_buf;
     int bits;
-    uint8_t * bit_ptr;
+    const uint8_t * bit_ptr;
+    int16_t * const dest = decoder->DCTblock;
 
     i = 0;
 
-    bit_buf = slice->bitstream_buf;
-    bits = slice->bitstream_bits;
-    bit_ptr = slice->bitstream_ptr;
+    bit_buf = decoder->bitstream_buf;
+    bits = decoder->bitstream_bits;
+    bit_ptr = decoder->bitstream_ptr;
 
     NEEDBITS (bit_buf, bits, bit_ptr);
 
     while (1) {
        if (bit_buf >= 0x28000000) {
 
-           tab = DCT_B14AC_5 - 5 + UBITS (bit_buf, 5);
+           tab = DCT_B14AC_5 + (UBITS (bit_buf, 5) - 5);
 
            i += tab->run;
            if (i >= 64)
-               break;  // end of block
+               break;  /* end of block */
 
        normal_code:
            j = scan[i];
            bit_buf <<= tab->len;
            bits += tab->len + 1;
-           val = (tab->level * quantizer_scale * quant_matrix[j]) >> 4;
+           val = (tab->level * quant_matrix[j]) >> 4;
 
-           // oddification
+           /* oddification */
            val = (val - 1) | 1;
 
-           // if (bitstream_get (1)) val = -val;
+           /* if (bitstream_get (1)) val = -val; */
            val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1);
 
            SATURATE (val);
@@ -752,17 +736,17 @@ static void get_mpeg1_intra_block (picture_t * picture, slice_t * slice,
 
        } else if (bit_buf >= 0x04000000) {
 
-           tab = DCT_B14_8 - 4 + UBITS (bit_buf, 8);
+           tab = DCT_B14_8 + (UBITS (bit_buf, 8) - 4);
 
            i += tab->run;
            if (i < 64)
                goto normal_code;
 
-           // escape code
+           /* escape code */
 
            i += UBITS (bit_buf << 6, 6) - 64;
            if (i >= 64)
-               break;  // illegal, but check needed to avoid buffer overflow
+               break;  /* illegal, check needed to avoid buffer overflow */
 
            j = scan[i];
 
@@ -773,9 +757,9 @@ static void get_mpeg1_intra_block (picture_t * picture, slice_t * slice,
                DUMPBITS (bit_buf, bits, 8);
                val = UBITS (bit_buf, 8) + 2 * val;
            }
-           val = (val * quantizer_scale * quant_matrix[j]) / 16;
+           val = (val * quant_matrix[j]) / 16;
 
-           // oddification
+           /* oddification */
            val = (val + ~SBITS (val, 1)) | 1;
 
            SATURATE (val);
@@ -787,17 +771,17 @@ static void get_mpeg1_intra_block (picture_t * picture, slice_t * slice,
            continue;
 
        } else if (bit_buf >= 0x02000000) {
-           tab = DCT_B14_10 - 8 + UBITS (bit_buf, 10);
+           tab = DCT_B14_10 + (UBITS (bit_buf, 10) - 8);
            i += tab->run;
            if (i < 64)
                goto normal_code;
        } else if (bit_buf >= 0x00800000) {
-           tab = DCT_13 - 16 + UBITS (bit_buf, 13);
+           tab = DCT_13 + (UBITS (bit_buf, 13) - 16);
            i += tab->run;
            if (i < 64)
                goto normal_code;
        } else if (bit_buf >= 0x00200000) {
-           tab = DCT_15 - 16 + UBITS (bit_buf, 15);
+           tab = DCT_15 + (UBITS (bit_buf, 15) - 16);
            i += tab->run;
            if (i < 64)
                goto normal_code;
@@ -809,37 +793,36 @@ static void get_mpeg1_intra_block (picture_t * picture, slice_t * slice,
            if (i < 64)
                goto normal_code;
        }
-       break;  // illegal, but check needed to avoid buffer overflow
+       break;  /* illegal, check needed to avoid buffer overflow */
     }
-    DUMPBITS (bit_buf, bits, 2);       // dump end of block code
-    slice->bitstream_buf = bit_buf;
-    slice->bitstream_bits = bits;
-    slice->bitstream_ptr = bit_ptr;
+    DUMPBITS (bit_buf, bits, 2);       /* dump end of block code */
+    decoder->bitstream_buf = bit_buf;
+    decoder->bitstream_bits = bits;
+    decoder->bitstream_ptr = bit_ptr;
 }
 
-static void get_mpeg1_non_intra_block (picture_t * picture, slice_t * slice,
-                                      int16_t * dest)
+static int get_mpeg1_non_intra_block (mpeg2_decoder_t * const decoder)
 {
     int i;
     int j;
     int val;
-    uint8_t * scan = picture->scan;
-    uint8_t * quant_matrix = picture->non_intra_quantizer_matrix;
-    int quantizer_scale = slice->quantizer_scale;
-    DCTtab * tab;
+    const uint8_t * const scan = decoder->scan;
+    const uint16_t * const quant_matrix = decoder->quantizer_matrix[1];
+    const DCTtab * tab;
     uint32_t bit_buf;
     int bits;
-    uint8_t * bit_ptr;
+    const uint8_t * bit_ptr;
+    int16_t * const dest = decoder->DCTblock;
 
     i = -1;
 
-    bit_buf = slice->bitstream_buf;
-    bits = slice->bitstream_bits;
-    bit_ptr = slice->bitstream_ptr;
+    bit_buf = decoder->bitstream_buf;
+    bits = decoder->bitstream_bits;
+    bit_ptr = decoder->bitstream_ptr;
 
     NEEDBITS (bit_buf, bits, bit_ptr);
     if (bit_buf >= 0x28000000) {
-       tab = DCT_B14DC_5 - 5 + UBITS (bit_buf, 5);
+       tab = DCT_B14DC_5 + (UBITS (bit_buf, 5) - 5);
        goto entry_1;
     } else
        goto entry_2;
@@ -847,23 +830,23 @@ static void get_mpeg1_non_intra_block (picture_t * picture, slice_t * slice,
     while (1) {
        if (bit_buf >= 0x28000000) {
 
-           tab = DCT_B14AC_5 - 5 + UBITS (bit_buf, 5);
+           tab = DCT_B14AC_5 + (UBITS (bit_buf, 5) - 5);
 
        entry_1:
            i += tab->run;
            if (i >= 64)
-               break;  // end of block
+               break;  /* end of block */
 
        normal_code:
            j = scan[i];
            bit_buf <<= tab->len;
            bits += tab->len + 1;
-           val = ((2*tab->level+1) * quantizer_scale * quant_matrix[j]) >> 5;
+           val = ((2 * tab->level + 1) * quant_matrix[j]) >> 5;
 
-           // oddification
+           /* oddification */
            val = (val - 1) | 1;
 
-           // if (bitstream_get (1)) val = -val;
+           /* if (bitstream_get (1)) val = -val; */
            val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1);
 
            SATURATE (val);
@@ -879,17 +862,17 @@ static void get_mpeg1_non_intra_block (picture_t * picture, slice_t * slice,
     entry_2:
        if (bit_buf >= 0x04000000) {
 
-           tab = DCT_B14_8 - 4 + UBITS (bit_buf, 8);
+           tab = DCT_B14_8 + (UBITS (bit_buf, 8) - 4);
 
            i += tab->run;
            if (i < 64)
                goto normal_code;
 
-           // escape code
+           /* escape code */
 
            i += UBITS (bit_buf << 6, 6) - 64;
            if (i >= 64)
-               break;  // illegal, but check needed to avoid buffer overflow
+               break;  /* illegal, check needed to avoid buffer overflow */
 
            j = scan[i];
 
@@ -901,9 +884,9 @@ static void get_mpeg1_non_intra_block (picture_t * picture, slice_t * slice,
                val = UBITS (bit_buf, 8) + 2 * val;
            }
            val = 2 * (val + SBITS (val, 1)) + 1;
-           val = (val * quantizer_scale * quant_matrix[j]) / 32;
+           val = (val * quant_matrix[j]) / 32;
 
-           // oddification
+           /* oddification */
            val = (val + ~SBITS (val, 1)) | 1;
 
            SATURATE (val);
@@ -915,17 +898,17 @@ static void get_mpeg1_non_intra_block (picture_t * picture, slice_t * slice,
            continue;
 
        } else if (bit_buf >= 0x02000000) {
-           tab = DCT_B14_10 - 8 + UBITS (bit_buf, 10);
+           tab = DCT_B14_10 + (UBITS (bit_buf, 10) - 8);
            i += tab->run;
            if (i < 64)
                goto normal_code;
        } else if (bit_buf >= 0x00800000) {
-           tab = DCT_13 - 16 + UBITS (bit_buf, 13);
+           tab = DCT_13 + (UBITS (bit_buf, 13) - 16);
            i += tab->run;
            if (i < 64)
                goto normal_code;
        } else if (bit_buf >= 0x00200000) {
-           tab = DCT_15 - 16 + UBITS (bit_buf, 15);
+           tab = DCT_15 + (UBITS (bit_buf, 15) - 16);
            i += tab->run;
            if (i < 64)
                goto normal_code;
@@ -937,860 +920,1142 @@ static void get_mpeg1_non_intra_block (picture_t * picture, slice_t * slice,
            if (i < 64)
                goto normal_code;
        }
-       break;  // illegal, but check needed to avoid buffer overflow
+       break;  /* illegal, check needed to avoid buffer overflow */
     }
-    DUMPBITS (bit_buf, bits, 2);       // dump end of block code
-    slice->bitstream_buf = bit_buf;
-    slice->bitstream_bits = bits;
-    slice->bitstream_ptr = bit_ptr;
+    DUMPBITS (bit_buf, bits, 2);       /* dump end of block code */
+    decoder->bitstream_buf = bit_buf;
+    decoder->bitstream_bits = bits;
+    decoder->bitstream_ptr = bit_ptr;
+    return i;
 }
 
-static inline int get_macroblock_address_increment (slice_t * slice)
+static inline void slice_intra_DCT (mpeg2_decoder_t * const decoder,
+                                   const int cc,
+                                   uint8_t * const dest, const int stride)
 {
-#define bit_buf (slice->bitstream_buf)
-#define bits (slice->bitstream_bits)
-#define bit_ptr (slice->bitstream_ptr)
-
-    MBAtab * tab;
-    int mba;
-
-    mba = 0;
-
-    while (1) {
-       if (bit_buf >= 0x10000000) {
-           tab = MBA_5 - 2 + UBITS (bit_buf, 5);
-           DUMPBITS (bit_buf, bits, tab->len);
-           return mba + tab->mba;
-       } else if (bit_buf >= 0x03000000) {
-           tab = MBA_11 - 24 + UBITS (bit_buf, 11);
-           DUMPBITS (bit_buf, bits, tab->len);
-           return mba + tab->mba;
-       } else switch (UBITS (bit_buf, 11)) {
-       case 8:         // macroblock_escape
-           mba += 33;
-           // no break here on purpose
-       case 15:        // macroblock_stuffing (MPEG1 only)
-           DUMPBITS (bit_buf, bits, 11);
-           NEEDBITS (bit_buf, bits, bit_ptr);
-           break;
-       default:        // end of slice, or error
-           return 0;
-       }
-    }
-
-#undef bit_buf
-#undef bits
-#undef bit_ptr
-}
-
-static inline void slice_intra_DCT (picture_t * picture, slice_t * slice,
-                                   int cc, uint8_t * dest, int stride)
-{
-#define bit_buf (slice->bitstream_buf)
-#define bits (slice->bitstream_bits)  
-#define bit_ptr (slice->bitstream_ptr)
+#define bit_buf (decoder->bitstream_buf)
+#define bits (decoder->bitstream_bits)
+#define bit_ptr (decoder->bitstream_ptr)
     NEEDBITS (bit_buf, bits, bit_ptr);
-    //Get the intra DC coefficient and inverse quantize it
+    /* Get the intra DC coefficient and inverse quantize it */
     if (cc == 0)
-       slice->dc_dct_pred[0] += get_luma_dc_dct_diff (slice);
+       decoder->DCTblock[0] =
+           decoder->dc_dct_pred[0] += get_luma_dc_dct_diff (decoder);
     else
-       slice->dc_dct_pred[cc] += get_chroma_dc_dct_diff (slice);
-    DCTblock[0] = slice->dc_dct_pred[cc] << (3 - picture->intra_dc_precision);
-
-    if (picture->mpeg1) {
-       if (picture->picture_coding_type != D_TYPE)
-           get_mpeg1_intra_block (picture, slice, DCTblock);
-    } else if (picture->intra_vlc_format)
-       get_intra_block_B15 (picture, slice, DCTblock);
+       decoder->DCTblock[0] =
+           decoder->dc_dct_pred[cc] += get_chroma_dc_dct_diff (decoder);
+
+    if (decoder->mpeg1) {
+       if (decoder->coding_type != D_TYPE)
+           get_mpeg1_intra_block (decoder);
+    } else if (decoder->intra_vlc_format)
+       get_intra_block_B15 (decoder, decoder->quantizer_matrix[cc ? 2 : 0]);
     else
-       get_intra_block_B14 (picture, slice, DCTblock);
-    idct_block_copy (DCTblock, dest, stride);
-    memset (DCTblock, 0, sizeof (DCTblock));
+       get_intra_block_B14 (decoder, decoder->quantizer_matrix[cc ? 2 : 0]);
+    mpeg2_idct_copy (decoder->DCTblock, dest, stride);
 #undef bit_buf
 #undef bits
 #undef bit_ptr
 }
 
-static inline void slice_non_intra_DCT (picture_t * picture, slice_t * slice,
-                                       uint8_t * dest, int stride)
+static inline void slice_non_intra_DCT (mpeg2_decoder_t * const decoder,
+                                       const int cc,
+                                       uint8_t * const dest, const int stride)
 {
-    if (picture->mpeg1)
-       get_mpeg1_non_intra_block (picture, slice, DCTblock);
-    else
-       get_non_intra_block (picture, slice, DCTblock);
-    idct_block_add (DCTblock, dest, stride);
-    memset (DCTblock, 0, sizeof (DCTblock));
-}
+    int last;
 
-static inline void motion_block (void (** table) (uint8_t *, uint8_t *,
-                                                 int32_t, int32_t), 
-                                int x_pred, int y_pred,
-                                uint8_t * dest[3], int dest_offset,
-                                uint8_t * src[3], int src_offset,
-                                int stride, int height, int second_half)
-{
-    int xy_half;
-    uint8_t * src1;
-    uint8_t * src2;
-
-    xy_half = ((y_pred & 1) << 1) | (x_pred & 1);
-
-    src1 = src[0] + src_offset + (x_pred >> 1) + (y_pred >> 1) * stride +
-       second_half * (stride << 3);
-
-    table[xy_half] (dest[0] + dest_offset + second_half * (stride << 3),
-                   src1, stride, height);
-
-    x_pred /= 2;
-    y_pred /= 2;
-
-    xy_half = ((y_pred & 1) << 1) | (x_pred & 1);
-    stride >>= 1;
-    height >>= 1;
-    src_offset >>= 1;  src_offset += second_half * (stride << 2);
-    dest_offset >>= 1; dest_offset += second_half * (stride << 2);
-
-    src1 = src[1] + src_offset + (x_pred >> 1) + (y_pred >> 1) * stride;
-    src2 = src[2] + src_offset + (x_pred >> 1) + (y_pred >> 1) * stride;
-
-    table[4+xy_half] (dest[1] + dest_offset, src1, stride, height);
-    table[4+xy_half] (dest[2] + dest_offset, src2, stride, height);
+    if (decoder->mpeg1)
+       last = get_mpeg1_non_intra_block (decoder);
+    else
+       last = get_non_intra_block (decoder,
+                                   decoder->quantizer_matrix[cc ? 3 : 1]);
+    mpeg2_idct_add (last, decoder->DCTblock, dest, stride);
 }
 
-
-static void motion_mp1 (slice_t * slice, motion_t * motion,
-                       uint8_t * dest[3], int offset, int width,
-                       void (** table) (uint8_t *, uint8_t *, int, int))
+#define MOTION_420(table,ref,motion_x,motion_y,size,y)                       \
+    pos_x = 2 * decoder->offset + motion_x;                                  \
+    pos_y = 2 * decoder->v_offset + motion_y + 2 * y;                        \
+    if (unlikely (pos_x > decoder->limit_x)) {                               \
+       pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x;                      \
+       motion_x = pos_x - 2 * decoder->offset;                               \
+    }                                                                        \
+    if (unlikely (pos_y > decoder->limit_y_ ## size)) {                              \
+       pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y_ ## size;             \
+       motion_y = pos_y - 2 * decoder->v_offset - 2 * y;                     \
+    }                                                                        \
+    xy_half = ((pos_y & 1) << 1) | (pos_x & 1);                                      \
+    table[xy_half] (decoder->dest[0] + y * decoder->stride + decoder->offset, \
+                   ref[0] + (pos_x >> 1) + (pos_y >> 1) * decoder->stride,   \
+                   decoder->stride, size);                                   \
+    motion_x /= 2;     motion_y /= 2;                                        \
+    xy_half = ((motion_y & 1) << 1) | (motion_x & 1);                        \
+    offset = (((decoder->offset + motion_x) >> 1) +                          \
+             ((((decoder->v_offset + motion_y) >> 1) + y/2) *                \
+              decoder->uv_stride));                                          \
+    table[4+xy_half] (decoder->dest[1] + y/2 * decoder->uv_stride +          \
+                     (decoder->offset >> 1), ref[1] + offset,                \
+                     decoder->uv_stride, size/2);                            \
+    table[4+xy_half] (decoder->dest[2] + y/2 * decoder->uv_stride +          \
+                     (decoder->offset >> 1), ref[2] + offset,                \
+                     decoder->uv_stride, size/2)
+
+#define MOTION_FIELD_420(table,ref,motion_x,motion_y,dest_field,op,src_field) \
+    pos_x = 2 * decoder->offset + motion_x;                                  \
+    pos_y = decoder->v_offset + motion_y;                                    \
+    if (unlikely (pos_x > decoder->limit_x)) {                               \
+       pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x;                      \
+       motion_x = pos_x - 2 * decoder->offset;                               \
+    }                                                                        \
+    if (unlikely (pos_y > decoder->limit_y)) {                               \
+       pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y;                      \
+       motion_y = pos_y - decoder->v_offset;                                 \
+    }                                                                        \
+    xy_half = ((pos_y & 1) << 1) | (pos_x & 1);                                      \
+    table[xy_half] (decoder->dest[0] + dest_field * decoder->stride +        \
+                   decoder->offset,                                          \
+                   (ref[0] + (pos_x >> 1) +                                  \
+                    ((pos_y op) + src_field) * decoder->stride),             \
+                   2 * decoder->stride, 8);                                  \
+    motion_x /= 2;     motion_y /= 2;                                        \
+    xy_half = ((motion_y & 1) << 1) | (motion_x & 1);                        \
+    offset = (((decoder->offset + motion_x) >> 1) +                          \
+             (((decoder->v_offset >> 1) + (motion_y op) + src_field) *       \
+              decoder->uv_stride));                                          \
+    table[4+xy_half] (decoder->dest[1] + dest_field * decoder->uv_stride +    \
+                     (decoder->offset >> 1), ref[1] + offset,                \
+                     2 * decoder->uv_stride, 4);                             \
+    table[4+xy_half] (decoder->dest[2] + dest_field * decoder->uv_stride +    \
+                     (decoder->offset >> 1), ref[2] + offset,                \
+                     2 * decoder->uv_stride, 4)
+
+#define MOTION_DMV_420(table,ref,motion_x,motion_y)                          \
+    pos_x = 2 * decoder->offset + motion_x;                                  \
+    pos_y = decoder->v_offset + motion_y;                                    \
+    if (unlikely (pos_x > decoder->limit_x)) {                               \
+       pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x;                      \
+       motion_x = pos_x - 2 * decoder->offset;                               \
+    }                                                                        \
+    if (unlikely (pos_y > decoder->limit_y)) {                               \
+       pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y;                      \
+       motion_y = pos_y - decoder->v_offset;                                 \
+    }                                                                        \
+    xy_half = ((pos_y & 1) << 1) | (pos_x & 1);                                      \
+    offset = (pos_x >> 1) + (pos_y & ~1) * decoder->stride;                  \
+    table[xy_half] (decoder->dest[0] + decoder->offset,                              \
+                   ref[0] + offset, 2 * decoder->stride, 8);                 \
+    table[xy_half] (decoder->dest[0] + decoder->stride + decoder->offset,     \
+                   ref[0] + decoder->stride + offset,                        \
+                   2 * decoder->stride, 8);                                  \
+    motion_x /= 2;     motion_y /= 2;                                        \
+    xy_half = ((motion_y & 1) << 1) | (motion_x & 1);                        \
+    offset = (((decoder->offset + motion_x) >> 1) +                          \
+             (((decoder->v_offset >> 1) + (motion_y & ~1)) *                 \
+              decoder->uv_stride));                                          \
+    table[4+xy_half] (decoder->dest[1] + (decoder->offset >> 1),             \
+                     ref[1] + offset, 2 * decoder->uv_stride, 4);            \
+    table[4+xy_half] (decoder->dest[1] + decoder->uv_stride +                \
+                     (decoder->offset >> 1),                                 \
+                     ref[1] + decoder->uv_stride + offset,                   \
+                     2 * decoder->uv_stride, 4);                             \
+    table[4+xy_half] (decoder->dest[2] + (decoder->offset >> 1),             \
+                     ref[2] + offset, 2 * decoder->uv_stride, 4);            \
+    table[4+xy_half] (decoder->dest[2] + decoder->uv_stride +                \
+                     (decoder->offset >> 1),                                 \
+                     ref[2] + decoder->uv_stride + offset,                   \
+                     2 * decoder->uv_stride, 4)
+
+#define MOTION_ZERO_420(table,ref)                                           \
+    table[0] (decoder->dest[0] + decoder->offset,                            \
+             (ref[0] + decoder->offset +                                     \
+              decoder->v_offset * decoder->stride), decoder->stride, 16);    \
+    offset = ((decoder->offset >> 1) +                                       \
+             (decoder->v_offset >> 1) * decoder->uv_stride);                 \
+    table[4] (decoder->dest[1] + (decoder->offset >> 1),                     \
+             ref[1] + offset, decoder->uv_stride, 8);                        \
+    table[4] (decoder->dest[2] + (decoder->offset >> 1),                     \
+             ref[2] + offset, decoder->uv_stride, 8)
+
+#define MOTION_422(table,ref,motion_x,motion_y,size,y)                       \
+    pos_x = 2 * decoder->offset + motion_x;                                  \
+    pos_y = 2 * decoder->v_offset + motion_y + 2 * y;                        \
+    if (unlikely (pos_x > decoder->limit_x)) {                               \
+       pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x;                      \
+       motion_x = pos_x - 2 * decoder->offset;                               \
+    }                                                                        \
+    if (unlikely (pos_y > decoder->limit_y_ ## size)) {                              \
+       pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y_ ## size;             \
+       motion_y = pos_y - 2 * decoder->v_offset - 2 * y;                     \
+    }                                                                        \
+    xy_half = ((pos_y & 1) << 1) | (pos_x & 1);                                      \
+    offset = (pos_x >> 1) + (pos_y >> 1) * decoder->stride;                  \
+    table[xy_half] (decoder->dest[0] + y * decoder->stride + decoder->offset, \
+                   ref[0] + offset, decoder->stride, size);                  \
+    offset = (offset + (motion_x & (motion_x < 0))) >> 1;                    \
+    motion_x /= 2;                                                           \
+    xy_half = ((pos_y & 1) << 1) | (motion_x & 1);                           \
+    table[4+xy_half] (decoder->dest[1] + y * decoder->uv_stride +            \
+                     (decoder->offset >> 1), ref[1] + offset,                \
+                     decoder->uv_stride, size);                              \
+    table[4+xy_half] (decoder->dest[2] + y * decoder->uv_stride +            \
+                     (decoder->offset >> 1), ref[2] + offset,                \
+                     decoder->uv_stride, size)
+
+#define MOTION_FIELD_422(table,ref,motion_x,motion_y,dest_field,op,src_field) \
+    pos_x = 2 * decoder->offset + motion_x;                                  \
+    pos_y = decoder->v_offset + motion_y;                                    \
+    if (unlikely (pos_x > decoder->limit_x)) {                               \
+       pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x;                      \
+       motion_x = pos_x - 2 * decoder->offset;                               \
+    }                                                                        \
+    if (unlikely (pos_y > decoder->limit_y)) {                               \
+       pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y;                      \
+       motion_y = pos_y - decoder->v_offset;                                 \
+    }                                                                        \
+    xy_half = ((pos_y & 1) << 1) | (pos_x & 1);                                      \
+    offset = (pos_x >> 1) + ((pos_y op) + src_field) * decoder->stride;              \
+    table[xy_half] (decoder->dest[0] + dest_field * decoder->stride +        \
+                   decoder->offset, ref[0] + offset,                         \
+                   2 * decoder->stride, 8);                                  \
+    offset = (offset + (motion_x & (motion_x < 0))) >> 1;                    \
+    motion_x /= 2;                                                           \
+    xy_half = ((pos_y & 1) << 1) | (motion_x & 1);                           \
+    table[4+xy_half] (decoder->dest[1] + dest_field * decoder->uv_stride +    \
+                     (decoder->offset >> 1), ref[1] + offset,                \
+                     2 * decoder->uv_stride, 8);                             \
+    table[4+xy_half] (decoder->dest[2] + dest_field * decoder->uv_stride +    \
+                     (decoder->offset >> 1), ref[2] + offset,                \
+                     2 * decoder->uv_stride, 8)
+
+#define MOTION_DMV_422(table,ref,motion_x,motion_y)                          \
+    pos_x = 2 * decoder->offset + motion_x;                                  \
+    pos_y = decoder->v_offset + motion_y;                                    \
+    if (unlikely (pos_x > decoder->limit_x)) {                               \
+       pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x;                      \
+       motion_x = pos_x - 2 * decoder->offset;                               \
+    }                                                                        \
+    if (unlikely (pos_y > decoder->limit_y)) {                               \
+       pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y;                      \
+       motion_y = pos_y - decoder->v_offset;                                 \
+    }                                                                        \
+    xy_half = ((pos_y & 1) << 1) | (pos_x & 1);                                      \
+    offset = (pos_x >> 1) + (pos_y & ~1) * decoder->stride;                  \
+    table[xy_half] (decoder->dest[0] + decoder->offset,                              \
+                   ref[0] + offset, 2 * decoder->stride, 8);                 \
+    table[xy_half] (decoder->dest[0] + decoder->stride + decoder->offset,     \
+                   ref[0] + decoder->stride + offset,                        \
+                   2 * decoder->stride, 8);                                  \
+    offset = (offset + (motion_x & (motion_x < 0))) >> 1;                    \
+    motion_x /= 2;                                                           \
+    xy_half = ((pos_y & 1) << 1) | (motion_x & 1);                           \
+    table[4+xy_half] (decoder->dest[1] + (decoder->offset >> 1),             \
+                     ref[1] + offset, 2 * decoder->uv_stride, 8);            \
+    table[4+xy_half] (decoder->dest[1] + decoder->uv_stride +                \
+                     (decoder->offset >> 1),                                 \
+                     ref[1] + decoder->uv_stride + offset,                   \
+                     2 * decoder->uv_stride, 8);                             \
+    table[4+xy_half] (decoder->dest[2] + (decoder->offset >> 1),             \
+                     ref[2] + offset, 2 * decoder->uv_stride, 8);            \
+    table[4+xy_half] (decoder->dest[2] + decoder->uv_stride +                \
+                     (decoder->offset >> 1),                                 \
+                     ref[2] + decoder->uv_stride + offset,                   \
+                     2 * decoder->uv_stride, 8)
+
+#define MOTION_ZERO_422(table,ref)                                           \
+    offset = decoder->offset + decoder->v_offset * decoder->stride;          \
+    table[0] (decoder->dest[0] + decoder->offset,                            \
+             ref[0] + offset, decoder->stride, 16);                          \
+    offset >>= 1;                                                            \
+    table[4] (decoder->dest[1] + (decoder->offset >> 1),                     \
+             ref[1] + offset, decoder->uv_stride, 16);                       \
+    table[4] (decoder->dest[2] + (decoder->offset >> 1),                     \
+             ref[2] + offset, decoder->uv_stride, 16)
+
+#define MOTION_444(table,ref,motion_x,motion_y,size,y)                       \
+    pos_x = 2 * decoder->offset + motion_x;                                  \
+    pos_y = 2 * decoder->v_offset + motion_y + 2 * y;                        \
+    if (unlikely (pos_x > decoder->limit_x)) {                               \
+       pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x;                      \
+       motion_x = pos_x - 2 * decoder->offset;                               \
+    }                                                                        \
+    if (unlikely (pos_y > decoder->limit_y_ ## size)) {                              \
+       pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y_ ## size;             \
+       motion_y = pos_y - 2 * decoder->v_offset - 2 * y;                     \
+    }                                                                        \
+    xy_half = ((pos_y & 1) << 1) | (pos_x & 1);                                      \
+    offset = (pos_x >> 1) + (pos_y >> 1) * decoder->stride;                  \
+    table[xy_half] (decoder->dest[0] + y * decoder->stride + decoder->offset, \
+                   ref[0] + offset, decoder->stride, size);                  \
+    table[xy_half] (decoder->dest[1] + y * decoder->stride + decoder->offset, \
+                   ref[1] + offset, decoder->stride, size);                  \
+    table[xy_half] (decoder->dest[2] + y * decoder->stride + decoder->offset, \
+                   ref[2] + offset, decoder->stride, size)
+
+#define MOTION_FIELD_444(table,ref,motion_x,motion_y,dest_field,op,src_field) \
+    pos_x = 2 * decoder->offset + motion_x;                                  \
+    pos_y = decoder->v_offset + motion_y;                                    \
+    if (unlikely (pos_x > decoder->limit_x)) {                               \
+       pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x;                      \
+       motion_x = pos_x - 2 * decoder->offset;                               \
+    }                                                                        \
+    if (unlikely (pos_y > decoder->limit_y)) {                               \
+       pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y;                      \
+       motion_y = pos_y - decoder->v_offset;                                 \
+    }                                                                        \
+    xy_half = ((pos_y & 1) << 1) | (pos_x & 1);                                      \
+    offset = (pos_x >> 1) + ((pos_y op) + src_field) * decoder->stride;              \
+    table[xy_half] (decoder->dest[0] + dest_field * decoder->stride +        \
+                   decoder->offset, ref[0] + offset,                         \
+                   2 * decoder->stride, 8);                                  \
+    table[xy_half] (decoder->dest[1] + dest_field * decoder->stride +        \
+                   decoder->offset, ref[1] + offset,                         \
+                   2 * decoder->stride, 8);                                  \
+    table[xy_half] (decoder->dest[2] + dest_field * decoder->stride +        \
+                   decoder->offset, ref[2] + offset,                         \
+                   2 * decoder->stride, 8)
+
+#define MOTION_DMV_444(table,ref,motion_x,motion_y)                          \
+    pos_x = 2 * decoder->offset + motion_x;                                  \
+    pos_y = decoder->v_offset + motion_y;                                    \
+    if (unlikely (pos_x > decoder->limit_x)) {                               \
+       pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x;                      \
+       motion_x = pos_x - 2 * decoder->offset;                               \
+    }                                                                        \
+    if (unlikely (pos_y > decoder->limit_y)) {                               \
+       pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y;                      \
+       motion_y = pos_y - decoder->v_offset;                                 \
+    }                                                                        \
+    xy_half = ((pos_y & 1) << 1) | (pos_x & 1);                                      \
+    offset = (pos_x >> 1) + (pos_y & ~1) * decoder->stride;                  \
+    table[xy_half] (decoder->dest[0] + decoder->offset,                              \
+                   ref[0] + offset, 2 * decoder->stride, 8);                 \
+    table[xy_half] (decoder->dest[0] + decoder->stride + decoder->offset,     \
+                   ref[0] + decoder->stride + offset,                        \
+                   2 * decoder->stride, 8);                                  \
+    table[xy_half] (decoder->dest[1] + decoder->offset,                              \
+                   ref[1] + offset, 2 * decoder->stride, 8);                 \
+    table[xy_half] (decoder->dest[1] + decoder->stride + decoder->offset,     \
+                   ref[1] + decoder->stride + offset,                        \
+                   2 * decoder->stride, 8);                                  \
+    table[xy_half] (decoder->dest[2] + decoder->offset,                              \
+                   ref[2] + offset, 2 * decoder->stride, 8);                 \
+    table[xy_half] (decoder->dest[2] + decoder->stride + decoder->offset,     \
+                   ref[2] + decoder->stride + offset,                        \
+                   2 * decoder->stride, 8)
+
+#define MOTION_ZERO_444(table,ref)                                           \
+    offset = decoder->offset + decoder->v_offset * decoder->stride;          \
+    table[0] (decoder->dest[0] + decoder->offset,                            \
+             ref[0] + offset, decoder->stride, 16);                          \
+    table[4] (decoder->dest[1] + decoder->offset,                            \
+             ref[1] + offset, decoder->stride, 16);                          \
+    table[4] (decoder->dest[2] + (decoder->offset >> 1),                     \
+             ref[2] + offset, decoder->stride, 16)
+
+#define bit_buf (decoder->bitstream_buf)
+#define bits (decoder->bitstream_bits)
+#define bit_ptr (decoder->bitstream_ptr)
+
+static void motion_mp1 (mpeg2_decoder_t * const decoder,
+                       motion_t * const motion,
+                       mpeg2_mc_fct * const * const table)
 {
-#define bit_buf (slice->bitstream_buf)
-#define bits (slice->bitstream_bits)
-#define bit_ptr (slice->bitstream_ptr)
     int motion_x, motion_y;
+    unsigned int pos_x, pos_y, xy_half, offset;
 
     NEEDBITS (bit_buf, bits, bit_ptr);
-    motion_x = motion->pmv[0][0] + get_motion_delta (slice, motion->f_code[0]);
-    motion_x = bound_motion_vector (motion_x, motion->f_code[0]);
+    motion_x = (motion->pmv[0][0] +
+               (get_motion_delta (decoder,
+                                  motion->f_code[0]) << motion->f_code[1]));
+    motion_x = bound_motion_vector (motion_x,
+                                   motion->f_code[0] + motion->f_code[1]);
     motion->pmv[0][0] = motion_x;
 
     NEEDBITS (bit_buf, bits, bit_ptr);
-    motion_y = motion->pmv[0][1] + get_motion_delta (slice, motion->f_code[0]);
-    motion_y = bound_motion_vector (motion_y, motion->f_code[0]);
+    motion_y = (motion->pmv[0][1] +
+               (get_motion_delta (decoder,
+                                  motion->f_code[0]) << motion->f_code[1]));
+    motion_y = bound_motion_vector (motion_y,
+                                   motion->f_code[0] + motion->f_code[1]);
     motion->pmv[0][1] = motion_y;
 
-    if (motion->f_code[1]) {
-       motion_x <<= 1;
-       motion_y <<= 1;
-    }
-
-    motion_block (table, motion_x, motion_y, dest, offset,
-                 motion->ref[0], offset, width, 16, 0);
-#undef bit_buf
-#undef bits
-#undef bit_ptr
-}
-
-static void motion_mp1_reuse (slice_t * slice, motion_t * motion,
-                             uint8_t * dest[3], int offset, int width,
-                             void (** table) (uint8_t *, uint8_t *, int, int))
-{
-    int motion_x, motion_y;
-
-    motion_x = motion->pmv[0][0];
-    motion_y = motion->pmv[0][1];
-
-    if (motion->f_code[1]) {
-       motion_x <<= 1;
-       motion_y <<= 1;
-    }
-
-    motion_block (table, motion_x, motion_y, dest, offset,
-                 motion->ref[0], offset, width, 16, 0);
+    MOTION_420 (table, motion->ref[0], motion_x, motion_y, 16, 0);
 }
 
-static void motion_fr_frame (slice_t * slice, motion_t * motion,
-                            uint8_t * dest[3], int offset, int width,
-                            void (** table) (uint8_t *, uint8_t *, int, int))
+#define MOTION_FUNCTIONS(FORMAT,MOTION,MOTION_FIELD,MOTION_DMV,MOTION_ZERO)   \
+                                                                             \
+static void motion_fr_frame_##FORMAT (mpeg2_decoder_t * const decoder,       \
+                                     motion_t * const motion,                \
+                                     mpeg2_mc_fct * const * const table)     \
+{                                                                            \
+    int motion_x, motion_y;                                                  \
+    unsigned int pos_x, pos_y, xy_half, offset;                                      \
+                                                                             \
+    NEEDBITS (bit_buf, bits, bit_ptr);                                       \
+    motion_x = motion->pmv[0][0] + get_motion_delta (decoder,                \
+                                                    motion->f_code[0]);      \
+    motion_x = bound_motion_vector (motion_x, motion->f_code[0]);            \
+    motion->pmv[1][0] = motion->pmv[0][0] = motion_x;                        \
+                                                                             \
+    NEEDBITS (bit_buf, bits, bit_ptr);                                       \
+    motion_y = motion->pmv[0][1] + get_motion_delta (decoder,                \
+                                                    motion->f_code[1]);      \
+    motion_y = bound_motion_vector (motion_y, motion->f_code[1]);            \
+    motion->pmv[1][1] = motion->pmv[0][1] = motion_y;                        \
+                                                                             \
+    MOTION (table, motion->ref[0], motion_x, motion_y, 16, 0);               \
+}                                                                            \
+                                                                             \
+static void motion_fr_field_##FORMAT (mpeg2_decoder_t * const decoder,       \
+                                     motion_t * const motion,                \
+                                     mpeg2_mc_fct * const * const table)     \
+{                                                                            \
+    int motion_x, motion_y, field;                                           \
+    unsigned int pos_x, pos_y, xy_half, offset;                                      \
+                                                                             \
+    NEEDBITS (bit_buf, bits, bit_ptr);                                       \
+    field = UBITS (bit_buf, 1);                                                      \
+    DUMPBITS (bit_buf, bits, 1);                                             \
+                                                                             \
+    motion_x = motion->pmv[0][0] + get_motion_delta (decoder,                \
+                                                    motion->f_code[0]);      \
+    motion_x = bound_motion_vector (motion_x, motion->f_code[0]);            \
+    motion->pmv[0][0] = motion_x;                                            \
+                                                                             \
+    NEEDBITS (bit_buf, bits, bit_ptr);                                       \
+    motion_y = ((motion->pmv[0][1] >> 1) +                                   \
+               get_motion_delta (decoder, motion->f_code[1]));               \
+    /* motion_y = bound_motion_vector (motion_y, motion->f_code[1]); */              \
+    motion->pmv[0][1] = motion_y << 1;                                       \
+                                                                             \
+    MOTION_FIELD (table, motion->ref[0], motion_x, motion_y, 0, & ~1, field); \
+                                                                             \
+    NEEDBITS (bit_buf, bits, bit_ptr);                                       \
+    field = UBITS (bit_buf, 1);                                                      \
+    DUMPBITS (bit_buf, bits, 1);                                             \
+                                                                             \
+    motion_x = motion->pmv[1][0] + get_motion_delta (decoder,                \
+                                                    motion->f_code[0]);      \
+    motion_x = bound_motion_vector (motion_x, motion->f_code[0]);            \
+    motion->pmv[1][0] = motion_x;                                            \
+                                                                             \
+    NEEDBITS (bit_buf, bits, bit_ptr);                                       \
+    motion_y = ((motion->pmv[1][1] >> 1) +                                   \
+               get_motion_delta (decoder, motion->f_code[1]));               \
+    /* motion_y = bound_motion_vector (motion_y, motion->f_code[1]); */              \
+    motion->pmv[1][1] = motion_y << 1;                                       \
+                                                                             \
+    MOTION_FIELD (table, motion->ref[0], motion_x, motion_y, 1, & ~1, field); \
+}                                                                            \
+                                                                             \
+static void motion_fr_dmv_##FORMAT (mpeg2_decoder_t * const decoder,         \
+                                   motion_t * const motion,                  \
+                                   mpeg2_mc_fct * const * const table)       \
+{                                                                            \
+    int motion_x, motion_y, dmv_x, dmv_y, m, other_x, other_y;               \
+    unsigned int pos_x, pos_y, xy_half, offset;                                      \
+                                                                             \
+    NEEDBITS (bit_buf, bits, bit_ptr);                                       \
+    motion_x = motion->pmv[0][0] + get_motion_delta (decoder,                \
+                                                    motion->f_code[0]);      \
+    motion_x = bound_motion_vector (motion_x, motion->f_code[0]);            \
+    motion->pmv[1][0] = motion->pmv[0][0] = motion_x;                        \
+    NEEDBITS (bit_buf, bits, bit_ptr);                                       \
+    dmv_x = get_dmv (decoder);                                               \
+                                                                             \
+    motion_y = ((motion->pmv[0][1] >> 1) +                                   \
+               get_motion_delta (decoder, motion->f_code[1]));               \
+    /* motion_y = bound_motion_vector (motion_y, motion->f_code[1]); */              \
+    motion->pmv[1][1] = motion->pmv[0][1] = motion_y << 1;                   \
+    dmv_y = get_dmv (decoder);                                               \
+                                                                             \
+    m = decoder->top_field_first ? 1 : 3;                                    \
+    other_x = ((motion_x * m + (motion_x > 0)) >> 1) + dmv_x;                \
+    other_y = ((motion_y * m + (motion_y > 0)) >> 1) + dmv_y - 1;            \
+    MOTION_FIELD (mpeg2_mc.put, motion->ref[0], other_x, other_y, 0, | 1, 0); \
+                                                                             \
+    m = decoder->top_field_first ? 3 : 1;                                    \
+    other_x = ((motion_x * m + (motion_x > 0)) >> 1) + dmv_x;                \
+    other_y = ((motion_y * m + (motion_y > 0)) >> 1) + dmv_y + 1;            \
+    MOTION_FIELD (mpeg2_mc.put, motion->ref[0], other_x, other_y, 1, & ~1, 0);\
+                                                                             \
+    MOTION_DMV (mpeg2_mc.avg, motion->ref[0], motion_x, motion_y);           \
+}                                                                            \
+                                                                             \
+static void motion_reuse_##FORMAT (mpeg2_decoder_t * const decoder,          \
+                                  motion_t * const motion,                   \
+                                  mpeg2_mc_fct * const * const table)        \
+{                                                                            \
+    int motion_x, motion_y;                                                  \
+    unsigned int pos_x, pos_y, xy_half, offset;                                      \
+                                                                             \
+    motion_x = motion->pmv[0][0];                                            \
+    motion_y = motion->pmv[0][1];                                            \
+                                                                             \
+    MOTION (table, motion->ref[0], motion_x, motion_y, 16, 0);               \
+}                                                                            \
+                                                                             \
+static void motion_zero_##FORMAT (mpeg2_decoder_t * const decoder,           \
+                                 motion_t * const motion,                    \
+                                 mpeg2_mc_fct * const * const table)         \
+{                                                                            \
+    unsigned int offset;                                                     \
+                                                                             \
+    motion->pmv[0][0] = motion->pmv[0][1] = 0;                               \
+    motion->pmv[1][0] = motion->pmv[1][1] = 0;                               \
+                                                                             \
+    MOTION_ZERO (table, motion->ref[0]);                                     \
+}                                                                            \
+                                                                             \
+static void motion_fi_field_##FORMAT (mpeg2_decoder_t * const decoder,       \
+                                     motion_t * const motion,                \
+                                     mpeg2_mc_fct * const * const table)     \
+{                                                                            \
+    int motion_x, motion_y;                                                  \
+    uint8_t ** ref_field;                                                    \
+    unsigned int pos_x, pos_y, xy_half, offset;                                      \
+                                                                             \
+    NEEDBITS (bit_buf, bits, bit_ptr);                                       \
+    ref_field = motion->ref2[UBITS (bit_buf, 1)];                            \
+    DUMPBITS (bit_buf, bits, 1);                                             \
+                                                                             \
+    motion_x = motion->pmv[0][0] + get_motion_delta (decoder,                \
+                                                    motion->f_code[0]);      \
+    motion_x = bound_motion_vector (motion_x, motion->f_code[0]);            \
+    motion->pmv[1][0] = motion->pmv[0][0] = motion_x;                        \
+                                                                             \
+    NEEDBITS (bit_buf, bits, bit_ptr);                                       \
+    motion_y = motion->pmv[0][1] + get_motion_delta (decoder,                \
+                                                    motion->f_code[1]);      \
+    motion_y = bound_motion_vector (motion_y, motion->f_code[1]);            \
+    motion->pmv[1][1] = motion->pmv[0][1] = motion_y;                        \
+                                                                             \
+    MOTION (table, ref_field, motion_x, motion_y, 16, 0);                    \
+}                                                                            \
+                                                                             \
+static void motion_fi_16x8_##FORMAT (mpeg2_decoder_t * const decoder,        \
+                                    motion_t * const motion,                 \
+                                    mpeg2_mc_fct * const * const table)      \
+{                                                                            \
+    int motion_x, motion_y;                                                  \
+    uint8_t ** ref_field;                                                    \
+    unsigned int pos_x, pos_y, xy_half, offset;                                      \
+                                                                             \
+    NEEDBITS (bit_buf, bits, bit_ptr);                                       \
+    ref_field = motion->ref2[UBITS (bit_buf, 1)];                            \
+    DUMPBITS (bit_buf, bits, 1);                                             \
+                                                                             \
+    motion_x = motion->pmv[0][0] + get_motion_delta (decoder,                \
+                                                    motion->f_code[0]);      \
+    motion_x = bound_motion_vector (motion_x, motion->f_code[0]);            \
+    motion->pmv[0][0] = motion_x;                                            \
+                                                                             \
+    NEEDBITS (bit_buf, bits, bit_ptr);                                       \
+    motion_y = motion->pmv[0][1] + get_motion_delta (decoder,                \
+                                                    motion->f_code[1]);      \
+    motion_y = bound_motion_vector (motion_y, motion->f_code[1]);            \
+    motion->pmv[0][1] = motion_y;                                            \
+                                                                             \
+    MOTION (table, ref_field, motion_x, motion_y, 8, 0);                     \
+                                                                             \
+    NEEDBITS (bit_buf, bits, bit_ptr);                                       \
+    ref_field = motion->ref2[UBITS (bit_buf, 1)];                            \
+    DUMPBITS (bit_buf, bits, 1);                                             \
+                                                                             \
+    motion_x = motion->pmv[1][0] + get_motion_delta (decoder,                \
+                                                    motion->f_code[0]);      \
+    motion_x = bound_motion_vector (motion_x, motion->f_code[0]);            \
+    motion->pmv[1][0] = motion_x;                                            \
+                                                                             \
+    NEEDBITS (bit_buf, bits, bit_ptr);                                       \
+    motion_y = motion->pmv[1][1] + get_motion_delta (decoder,                \
+                                                    motion->f_code[1]);      \
+    motion_y = bound_motion_vector (motion_y, motion->f_code[1]);            \
+    motion->pmv[1][1] = motion_y;                                            \
+                                                                             \
+    MOTION (table, ref_field, motion_x, motion_y, 8, 8);                     \
+}                                                                            \
+                                                                             \
+static void motion_fi_dmv_##FORMAT (mpeg2_decoder_t * const decoder,         \
+                                   motion_t * const motion,                  \
+                                   mpeg2_mc_fct * const * const table)       \
+{                                                                            \
+    int motion_x, motion_y, other_x, other_y;                                \
+    unsigned int pos_x, pos_y, xy_half, offset;                                      \
+                                                                             \
+    NEEDBITS (bit_buf, bits, bit_ptr);                                       \
+    motion_x = motion->pmv[0][0] + get_motion_delta (decoder,                \
+                                                    motion->f_code[0]);      \
+    motion_x = bound_motion_vector (motion_x, motion->f_code[0]);            \
+    motion->pmv[1][0] = motion->pmv[0][0] = motion_x;                        \
+    NEEDBITS (bit_buf, bits, bit_ptr);                                       \
+    other_x = ((motion_x + (motion_x > 0)) >> 1) + get_dmv (decoder);        \
+                                                                             \
+    motion_y = motion->pmv[0][1] + get_motion_delta (decoder,                \
+                                                    motion->f_code[1]);      \
+    motion_y = bound_motion_vector (motion_y, motion->f_code[1]);            \
+    motion->pmv[1][1] = motion->pmv[0][1] = motion_y;                        \
+    other_y = (((motion_y + (motion_y > 0)) >> 1) + get_dmv (decoder) +              \
+              decoder->dmv_offset);                                          \
+                                                                             \
+    MOTION (mpeg2_mc.put, motion->ref[0], motion_x, motion_y, 16, 0);        \
+    MOTION (mpeg2_mc.avg, motion->ref[1], other_x, other_y, 16, 0);          \
+}                                                                            \
+
+MOTION_FUNCTIONS (420, MOTION_420, MOTION_FIELD_420, MOTION_DMV_420,
+                 MOTION_ZERO_420)
+MOTION_FUNCTIONS (422, MOTION_422, MOTION_FIELD_422, MOTION_DMV_422,
+                 MOTION_ZERO_422)
+MOTION_FUNCTIONS (444, MOTION_444, MOTION_FIELD_444, MOTION_DMV_444,
+                 MOTION_ZERO_444)
+
+/* like motion_frame, but parsing without actual motion compensation */
+static void motion_fr_conceal (mpeg2_decoder_t * const decoder)
 {
-#define bit_buf (slice->bitstream_buf)
-#define bits (slice->bitstream_bits)
-#define bit_ptr (slice->bitstream_ptr)
-    int motion_x, motion_y;
-
-    NEEDBITS (bit_buf, bits, bit_ptr);
-    motion_x = motion->pmv[0][0] + get_motion_delta (slice, motion->f_code[0]);
-    motion_x = bound_motion_vector (motion_x, motion->f_code[0]);
-    motion->pmv[1][0] = motion->pmv[0][0] = motion_x;
-
-    NEEDBITS (bit_buf, bits, bit_ptr);
-    motion_y = motion->pmv[0][1] + get_motion_delta (slice, motion->f_code[1]);
-    motion_y = bound_motion_vector (motion_y, motion->f_code[1]);
-    motion->pmv[1][1] = motion->pmv[0][1] = motion_y;
-
-    motion_block (table, motion_x, motion_y, dest, offset,
-                 motion->ref[0], offset, width, 16, 0);
-#undef bit_buf
-#undef bits
-#undef bit_ptr
-}
-
-static void motion_fr_field (slice_t * slice, motion_t * motion,
-                            uint8_t * dest[3], int offset, int width,
-                            void (** table) (uint8_t *, uint8_t *, int, int))
-{
-#define bit_buf (slice->bitstream_buf)
-#define bits (slice->bitstream_bits)
-#define bit_ptr (slice->bitstream_ptr)
-    int motion_x, motion_y;
-    int field_select;
-
-    NEEDBITS (bit_buf, bits, bit_ptr);
-    field_select = SBITS (bit_buf, 1);
-    DUMPBITS (bit_buf, bits, 1);
-
-    motion_x = motion->pmv[0][0] + get_motion_delta (slice, motion->f_code[0]);
-    motion_x = bound_motion_vector (motion_x, motion->f_code[0]);
-    motion->pmv[0][0] = motion_x;
+    int tmp;
 
     NEEDBITS (bit_buf, bits, bit_ptr);
-    motion_y = (motion->pmv[0][1] >> 1) + get_motion_delta (slice,
-                                                           motion->f_code[1]);
-    //motion_y = bound_motion_vector (motion_y, motion->f_code[1]);
-    motion->pmv[0][1] = motion_y << 1;
-
-    motion_block (table, motion_x, motion_y, dest, offset,
-                 motion->ref[0], offset + (field_select & width),
-                 width * 2, 8, 0);
+    tmp = (decoder->f_motion.pmv[0][0] +
+          get_motion_delta (decoder, decoder->f_motion.f_code[0]));
+    tmp = bound_motion_vector (tmp, decoder->f_motion.f_code[0]);
+    decoder->f_motion.pmv[1][0] = decoder->f_motion.pmv[0][0] = tmp;
 
     NEEDBITS (bit_buf, bits, bit_ptr);
-    field_select = SBITS (bit_buf, 1);
-    DUMPBITS (bit_buf, bits, 1);
-
-    motion_x = motion->pmv[1][0] + get_motion_delta (slice, motion->f_code[0]);
-    motion_x = bound_motion_vector (motion_x, motion->f_code[0]);
-    motion->pmv[1][0] = motion_x;
+    tmp = (decoder->f_motion.pmv[0][1] +
+          get_motion_delta (decoder, decoder->f_motion.f_code[1]));
+    tmp = bound_motion_vector (tmp, decoder->f_motion.f_code[1]);
+    decoder->f_motion.pmv[1][1] = decoder->f_motion.pmv[0][1] = tmp;
 
-    NEEDBITS (bit_buf, bits, bit_ptr);
-    motion_y = (motion->pmv[1][1] >> 1) + get_motion_delta (slice,
-                                                           motion->f_code[1]);
-    //motion_y = bound_motion_vector (motion_y, motion->f_code[1]);
-    motion->pmv[1][1] = motion_y << 1;
-
-    motion_block (table, motion_x, motion_y, dest, offset + width,
-                 motion->ref[0], offset + (field_select & width),
-                 width * 2, 8, 0);
-#undef bit_buf
-#undef bits
-#undef bit_ptr
+    DUMPBITS (bit_buf, bits, 1); /* remove marker_bit */
 }
 
-static int motion_dmv_top_field_first;
-static void motion_fr_dmv (slice_t * slice, motion_t * motion,
-                          uint8_t * dest[3], int offset, int width,
-                          void (** table) (uint8_t *, uint8_t *, int, int))
+static void motion_fi_conceal (mpeg2_decoder_t * const decoder)
 {
-#define bit_buf (slice->bitstream_buf)
-#define bits (slice->bitstream_bits)
-#define bit_ptr (slice->bitstream_ptr)
-    int motion_x, motion_y;
-    int dmv_x, dmv_y;
-    int m;
-    int other_x, other_y;
-
-    NEEDBITS (bit_buf, bits, bit_ptr);
-    motion_x = motion->pmv[0][0] + get_motion_delta (slice, motion->f_code[0]);
-    motion_x = bound_motion_vector (motion_x, motion->f_code[0]);
-    motion->pmv[1][0] = motion->pmv[0][0] = motion_x;
+    int tmp;
 
     NEEDBITS (bit_buf, bits, bit_ptr);
-    dmv_x = get_dmv (slice);
+    DUMPBITS (bit_buf, bits, 1); /* remove field_select */
 
-    NEEDBITS (bit_buf, bits, bit_ptr);
-    motion_y = (motion->pmv[0][1] >> 1) + get_motion_delta (slice,
-                                                           motion->f_code[1]);
-    //motion_y = bound_motion_vector (motion_y, motion->f_code[1]);
-    motion->pmv[1][1] = motion->pmv[0][1] = motion_y << 1;
+    tmp = (decoder->f_motion.pmv[0][0] +
+          get_motion_delta (decoder, decoder->f_motion.f_code[0]));
+    tmp = bound_motion_vector (tmp, decoder->f_motion.f_code[0]);
+    decoder->f_motion.pmv[1][0] = decoder->f_motion.pmv[0][0] = tmp;
 
     NEEDBITS (bit_buf, bits, bit_ptr);
-    dmv_y = get_dmv (slice);
-
-    motion_block (mc_functions.put, motion_x, motion_y, dest, offset,
-                 motion->ref[0], offset, width * 2, 8, 0);
-
-    m = motion_dmv_top_field_first ? 1 : 3;
-    other_x = ((motion_x * m + (motion_x > 0)) >> 1) + dmv_x;
-    other_y = ((motion_y * m + (motion_y > 0)) >> 1) + dmv_y - 1;
-    motion_block (mc_functions.avg, other_x, other_y, dest, offset,
-                 motion->ref[0], offset + width, width * 2, 8, 0);
-
-    motion_block (mc_functions.put, motion_x, motion_y, dest, offset + width,
-                 motion->ref[0], offset + width, width * 2, 8, 0);
-
-    m = motion_dmv_top_field_first ? 3 : 1;
-    other_x = ((motion_x * m + (motion_x > 0)) >> 1) + dmv_x;
-    other_y = ((motion_y * m + (motion_y > 0)) >> 1) + dmv_y + 1;
-    motion_block (mc_functions.avg, other_x, other_y, dest, offset + width,
-                 motion->ref[0], offset, width * 2, 8, 0);
-#undef bit_buf
-#undef bits
-#undef bit_ptr
-}
+    tmp = (decoder->f_motion.pmv[0][1] +
+          get_motion_delta (decoder, decoder->f_motion.f_code[1]));
+    tmp = bound_motion_vector (tmp, decoder->f_motion.f_code[1]);
+    decoder->f_motion.pmv[1][1] = decoder->f_motion.pmv[0][1] = tmp;
 
-// like motion_frame, but reuse previous motion vectors
-static void motion_fr_reuse (slice_t * slice, motion_t * motion,
-                            uint8_t * dest[3], int offset, int width,
-                            void (** table) (uint8_t *, uint8_t *, int, int))
-{
-    motion_block (table, motion->pmv[0][0], motion->pmv[0][1], dest, offset,
-                 motion->ref[0], offset, width, 16, 0);
-}
-
-// like motion_frame, but use null motion vectors
-static void motion_fr_zero (slice_t * slice, motion_t * motion,
-                           uint8_t * dest[3], int offset, int width,
-                           void (** table) (uint8_t *, uint8_t *, int, int))
-{
-    motion_block (table, 0, 0, dest, offset,
-                 motion->ref[0], offset, width, 16, 0);
+    DUMPBITS (bit_buf, bits, 1); /* remove marker_bit */
 }
 
-// like motion_frame, but parsing without actual motion compensation
-static void motion_fr_conceal (slice_t * slice, motion_t * motion)
-{
-#define bit_buf (slice->bitstream_buf)
-#define bits (slice->bitstream_bits)
-#define bit_ptr (slice->bitstream_ptr)
-    int tmp;
-
-    NEEDBITS (bit_buf, bits, bit_ptr);
-    tmp = motion->pmv[0][0] + get_motion_delta (slice, motion->f_code[0]);
-    tmp = bound_motion_vector (tmp, motion->f_code[0]);
-    motion->pmv[1][0] = motion->pmv[0][0] = tmp;
-
-    NEEDBITS (bit_buf, bits, bit_ptr);
-    tmp = motion->pmv[0][1] + get_motion_delta (slice, motion->f_code[1]);
-    tmp = bound_motion_vector (tmp, motion->f_code[1]);
-    motion->pmv[1][1] = motion->pmv[0][1] = tmp;
-
-    DUMPBITS (bit_buf, bits, 1); // remove marker_bit
 #undef bit_buf
 #undef bits
 #undef bit_ptr
-}
-
-static void motion_fi_field (slice_t * slice, motion_t * motion,
-                            uint8_t * dest[3], int offset, int width,
-                            void (** table) (uint8_t *, uint8_t *, int, int))
-{
-#define bit_buf (slice->bitstream_buf)
-#define bits (slice->bitstream_bits)
-#define bit_ptr (slice->bitstream_ptr)
-    int motion_x, motion_y;
-    int field_select;
-
-    NEEDBITS (bit_buf, bits, bit_ptr);
-    field_select = UBITS (bit_buf, 1);
-    DUMPBITS (bit_buf, bits, 1);
-
-    NEEDBITS (bit_buf, bits, bit_ptr);
-    motion_x = motion->pmv[0][0] + get_motion_delta (slice, motion->f_code[0]);
-    motion_x = bound_motion_vector (motion_x, motion->f_code[0]);
-    motion->pmv[1][0] = motion->pmv[0][0] = motion_x;
 
-    NEEDBITS (bit_buf, bits, bit_ptr);
-    motion_y = motion->pmv[0][1] + get_motion_delta (slice, motion->f_code[1]);
-    motion_y = bound_motion_vector (motion_y, motion->f_code[1]);
-    motion->pmv[1][1] = motion->pmv[0][1] = motion_y;
+#define MOTION_CALL(routine,direction)                         \
+do {                                                           \
+    if ((direction) & MACROBLOCK_MOTION_FORWARD)               \
+       routine (decoder, &(decoder->f_motion), mpeg2_mc.put);  \
+    if ((direction) & MACROBLOCK_MOTION_BACKWARD)              \
+       routine (decoder, &(decoder->b_motion),                 \
+                ((direction) & MACROBLOCK_MOTION_FORWARD ?     \
+                 mpeg2_mc.avg : mpeg2_mc.put));                \
+} while (0)
 
-    motion_block (table, motion_x, motion_y, dest, offset,
-                 motion->ref[field_select], offset, width, 16, 0);
-#undef bit_buf
-#undef bits
-#undef bit_ptr
-}
+#define NEXT_MACROBLOCK                                                        \
+do {                                                                   \
+    if(decoder->quant_store)                                            \
+        decoder->quant_store[decoder->quant_stride*(decoder->v_offset>>4) \
+                    +(decoder->offset>>4)] = decoder->quantizer_scale;  \
+    decoder->offset += 16;                                             \
+    if (decoder->offset == decoder->width) {                           \
+       do { /* just so we can use the break statement */               \
+           if (decoder->convert) {                                     \
+               decoder->convert (decoder->convert_id, decoder->dest,   \
+                                 decoder->v_offset);                   \
+               if (decoder->coding_type == B_TYPE)                     \
+                   break;                                              \
+           }                                                           \
+           decoder->dest[0] += decoder->slice_stride;                  \
+           decoder->dest[1] += decoder->slice_uv_stride;               \
+           decoder->dest[2] += decoder->slice_uv_stride;               \
+       } while (0);                                                    \
+       decoder->v_offset += 16;                                        \
+       if (decoder->v_offset > decoder->limit_y) {                     \
+           if (mpeg2_cpu_state_restore)                                \
+               mpeg2_cpu_state_restore (&cpu_state);                   \
+           return;                                                     \
+       }                                                               \
+       decoder->offset = 0;                                            \
+    }                                                                  \
+} while (0)
 
-static void motion_fi_16x8 (slice_t * slice, motion_t * motion,
-                           uint8_t * dest[3], int offset, int width,
-                           void (** table) (uint8_t *, uint8_t *, int, int))
+void mpeg2_init_fbuf (mpeg2_decoder_t * decoder, uint8_t * current_fbuf[3],
+                     uint8_t * forward_fbuf[3], uint8_t * backward_fbuf[3])
 {
-#define bit_buf (slice->bitstream_buf)
-#define bits (slice->bitstream_bits)
-#define bit_ptr (slice->bitstream_ptr)
-    int motion_x, motion_y;
-    int field_select;
-
-    NEEDBITS (bit_buf, bits, bit_ptr);
-    field_select = UBITS (bit_buf, 1);
-    DUMPBITS (bit_buf, bits, 1);
-
-    NEEDBITS (bit_buf, bits, bit_ptr);
-    motion_x = motion->pmv[0][0] + get_motion_delta (slice, motion->f_code[0]);
-    motion_x = bound_motion_vector (motion_x, motion->f_code[0]);
-    motion->pmv[0][0] = motion_x;
-
-    NEEDBITS (bit_buf, bits, bit_ptr);
-    motion_y = motion->pmv[0][1] + get_motion_delta (slice, motion->f_code[1]);
-    motion_y = bound_motion_vector (motion_y, motion->f_code[1]);
-    motion->pmv[0][1] = motion_y;
-
-    motion_block (table, motion_x, motion_y, dest, offset,
-                 motion->ref[field_select], offset, width, 8, 0);
-
-    NEEDBITS (bit_buf, bits, bit_ptr);
-    field_select = UBITS (bit_buf, 1);
-    DUMPBITS (bit_buf, bits, 1);
-
-    NEEDBITS (bit_buf, bits, bit_ptr);
-    motion_x = motion->pmv[1][0] + get_motion_delta (slice, motion->f_code[0]);
-    motion_x = bound_motion_vector (motion_x, motion->f_code[0]);
-    motion->pmv[1][0] = motion_x;
-
-    NEEDBITS (bit_buf, bits, bit_ptr);
-    motion_y = motion->pmv[1][1] + get_motion_delta (slice, motion->f_code[1]);
-    motion_y = bound_motion_vector (motion_y, motion->f_code[1]);
-    motion->pmv[1][1] = motion_y;
+    int offset, stride, height, bottom_field;
+
+    stride = decoder->stride_frame;
+    bottom_field = (decoder->picture_structure == BOTTOM_FIELD);
+    offset = bottom_field ? stride : 0;
+    height = decoder->height;
+
+    decoder->picture_dest[0] = current_fbuf[0] + offset;
+    decoder->picture_dest[1] = current_fbuf[1] + (offset >> 1);
+    decoder->picture_dest[2] = current_fbuf[2] + (offset >> 1);
+
+    decoder->f_motion.ref[0][0] = forward_fbuf[0] + offset;
+    decoder->f_motion.ref[0][1] = forward_fbuf[1] + (offset >> 1);
+    decoder->f_motion.ref[0][2] = forward_fbuf[2] + (offset >> 1);
+
+    decoder->b_motion.ref[0][0] = backward_fbuf[0] + offset;
+    decoder->b_motion.ref[0][1] = backward_fbuf[1] + (offset >> 1);
+    decoder->b_motion.ref[0][2] = backward_fbuf[2] + (offset >> 1);
+
+    if (decoder->picture_structure != FRAME_PICTURE) {
+       decoder->dmv_offset = bottom_field ? 1 : -1;
+       decoder->f_motion.ref2[0] = decoder->f_motion.ref[bottom_field];
+       decoder->f_motion.ref2[1] = decoder->f_motion.ref[!bottom_field];
+       decoder->b_motion.ref2[0] = decoder->b_motion.ref[bottom_field];
+       decoder->b_motion.ref2[1] = decoder->b_motion.ref[!bottom_field];
+       offset = stride - offset;
+
+       if (decoder->second_field && (decoder->coding_type != B_TYPE))
+           forward_fbuf = current_fbuf;
+
+       decoder->f_motion.ref[1][0] = forward_fbuf[0] + offset;
+       decoder->f_motion.ref[1][1] = forward_fbuf[1] + (offset >> 1);
+       decoder->f_motion.ref[1][2] = forward_fbuf[2] + (offset >> 1);
+
+       decoder->b_motion.ref[1][0] = backward_fbuf[0] + offset;
+       decoder->b_motion.ref[1][1] = backward_fbuf[1] + (offset >> 1);
+       decoder->b_motion.ref[1][2] = backward_fbuf[2] + (offset >> 1);
+
+       stride <<= 1;
+       height >>= 1;
+    }
 
-    motion_block (table, motion_x, motion_y, dest, offset,
-                 motion->ref[field_select], offset, width, 8, 1);
-#undef bit_buf
-#undef bits
-#undef bit_ptr
+    decoder->stride = stride;
+    decoder->uv_stride = stride >> 1;
+    decoder->slice_stride = 16 * stride;
+    decoder->slice_uv_stride =
+       decoder->slice_stride >> (2 - decoder->chroma_format);
+    decoder->limit_x = 2 * decoder->width - 32;
+    decoder->limit_y_16 = 2 * height - 32;
+    decoder->limit_y_8 = 2 * height - 16;
+    decoder->limit_y = height - 16;
+
+    if (decoder->mpeg1) {
+       decoder->motion_parser[0] = motion_zero_420;
+       decoder->motion_parser[MC_FRAME] = motion_mp1;
+       decoder->motion_parser[4] = motion_reuse_420;
+    } else if (decoder->picture_structure == FRAME_PICTURE) {
+       if (decoder->chroma_format == 0) {
+           decoder->motion_parser[0] = motion_zero_420;
+           decoder->motion_parser[MC_FIELD] = motion_fr_field_420;
+           decoder->motion_parser[MC_FRAME] = motion_fr_frame_420;
+           decoder->motion_parser[MC_DMV] = motion_fr_dmv_420;
+           decoder->motion_parser[4] = motion_reuse_420;
+       } else if (decoder->chroma_format == 1) {
+           decoder->motion_parser[0] = motion_zero_422;
+           decoder->motion_parser[MC_FIELD] = motion_fr_field_422;
+           decoder->motion_parser[MC_FRAME] = motion_fr_frame_422;
+           decoder->motion_parser[MC_DMV] = motion_fr_dmv_422;
+           decoder->motion_parser[4] = motion_reuse_422;
+       } else {
+           decoder->motion_parser[0] = motion_zero_444;
+           decoder->motion_parser[MC_FIELD] = motion_fr_field_444;
+           decoder->motion_parser[MC_FRAME] = motion_fr_frame_444;
+           decoder->motion_parser[MC_DMV] = motion_fr_dmv_444;
+           decoder->motion_parser[4] = motion_reuse_444;
+       }
+    } else {
+       if (decoder->chroma_format == 0) {
+           decoder->motion_parser[0] = motion_zero_420;
+           decoder->motion_parser[MC_FIELD] = motion_fi_field_420;
+           decoder->motion_parser[MC_16X8] = motion_fi_16x8_420;
+           decoder->motion_parser[MC_DMV] = motion_fi_dmv_420;
+           decoder->motion_parser[4] = motion_reuse_420;
+       } else if (decoder->chroma_format == 1) {
+           decoder->motion_parser[0] = motion_zero_422;
+           decoder->motion_parser[MC_FIELD] = motion_fi_field_422;
+           decoder->motion_parser[MC_16X8] = motion_fi_16x8_422;
+           decoder->motion_parser[MC_DMV] = motion_fi_dmv_422;
+           decoder->motion_parser[4] = motion_reuse_422;
+       } else {
+           decoder->motion_parser[0] = motion_zero_444;
+           decoder->motion_parser[MC_FIELD] = motion_fi_field_444;
+           decoder->motion_parser[MC_16X8] = motion_fi_16x8_444;
+           decoder->motion_parser[MC_DMV] = motion_fi_dmv_444;
+           decoder->motion_parser[4] = motion_reuse_444;
+       }
+    }
 }
 
-static int current_field = 0;
-static void motion_fi_dmv (slice_t * slice, motion_t * motion,
-                          uint8_t * dest[3], int offset, int width,
-                          void (** table) (uint8_t *, uint8_t *, int, int))
+static inline int slice_init (mpeg2_decoder_t * const decoder, int code)
 {
-#define bit_buf (slice->bitstream_buf)
-#define bits (slice->bitstream_bits)
-#define bit_ptr (slice->bitstream_ptr)
-    int motion_x, motion_y;
-    int dmv_x, dmv_y;
-
-    NEEDBITS (bit_buf, bits, bit_ptr);
-    motion_x = motion->pmv[0][0] + get_motion_delta (slice, motion->f_code[0]);
-    motion_x = bound_motion_vector (motion_x, motion->f_code[0]);
-    motion->pmv[1][0] = motion->pmv[0][0] = motion_x;
-
-    NEEDBITS (bit_buf, bits, bit_ptr);
-    dmv_x = get_dmv (slice);
-
-    NEEDBITS (bit_buf, bits, bit_ptr);
-    motion_y = motion->pmv[0][1] + get_motion_delta (slice, motion->f_code[1]);
-    motion_y = bound_motion_vector (motion_y, motion->f_code[1]);
-    motion->pmv[1][1] = motion->pmv[0][1] = motion_y;
-
-    NEEDBITS (bit_buf, bits, bit_ptr);
-    dmv_y = get_dmv (slice);
-
-    motion_block (mc_functions.put, motion_x, motion_y, dest, offset,
-                 motion->ref[current_field], offset, width, 16, 0);
+#define bit_buf (decoder->bitstream_buf)
+#define bits (decoder->bitstream_bits)
+#define bit_ptr (decoder->bitstream_ptr)
+    int offset;
+    const MBAtab * mba;
 
-    motion_x = ((motion_x + (motion_x > 0)) >> 1) + dmv_x;
-    motion_y = ((motion_y + (motion_y > 0)) >> 1) + dmv_y +
-       2 * current_field - 1;
-    motion_block (mc_functions.avg, motion_x, motion_y, dest, offset,
-                 motion->ref[!current_field], offset, width, 16, 0);
-#undef bit_buf
-#undef bits
-#undef bit_ptr
-}
+    decoder->dc_dct_pred[0] = decoder->dc_dct_pred[1] =
+       decoder->dc_dct_pred[2] = 16384;
 
-static void motion_fi_reuse (slice_t * slice, motion_t * motion,
-                            uint8_t * dest[3], int offset, int width,
-                            void (** table) (uint8_t *, uint8_t *, int, int))
-{
-    motion_block (table, motion->pmv[0][0], motion->pmv[0][1], dest, offset,
-                 motion->ref[current_field], offset, width, 16, 0);
-}
+    decoder->f_motion.pmv[0][0] = decoder->f_motion.pmv[0][1] = 0;
+    decoder->f_motion.pmv[1][0] = decoder->f_motion.pmv[1][1] = 0;
+    decoder->b_motion.pmv[0][0] = decoder->b_motion.pmv[0][1] = 0;
+    decoder->b_motion.pmv[1][0] = decoder->b_motion.pmv[1][1] = 0;
 
-static void motion_fi_zero (slice_t * slice, motion_t * motion,
-                           uint8_t * dest[3], int offset, int width,
-                           void (** table) (uint8_t *, uint8_t *, int, int))
-{
-    motion_block (table, 0, 0, dest, offset,
-                 motion->ref[current_field], offset, width, 16, 0);
-}
+    if (decoder->vertical_position_extension) {
+       code += UBITS (bit_buf, 3) << 7;
+       DUMPBITS (bit_buf, bits, 3);
+    }
+    decoder->v_offset = (code - 1) * 16;
+    offset = 0;
+    if (!(decoder->convert) || decoder->coding_type != B_TYPE)
+       offset = (code - 1) * decoder->slice_stride;
 
-static void motion_fi_conceal (slice_t * slice, motion_t * motion)
-{
-#define bit_buf (slice->bitstream_buf)
-#define bits (slice->bitstream_bits)
-#define bit_ptr (slice->bitstream_ptr)
-    int tmp;
+    decoder->dest[0] = decoder->picture_dest[0] + offset;
+    offset >>= (2 - decoder->chroma_format);
+    decoder->dest[1] = decoder->picture_dest[1] + offset;
+    decoder->dest[2] = decoder->picture_dest[2] + offset;
 
-    NEEDBITS (bit_buf, bits, bit_ptr);
-    DUMPBITS (bit_buf, bits, 1); // remove field_select
+    get_quantizer_scale (decoder);
 
-    NEEDBITS (bit_buf, bits, bit_ptr);
-    tmp = motion->pmv[0][0] + get_motion_delta (slice, motion->f_code[0]);
-    tmp = bound_motion_vector (tmp, motion->f_code[0]);
-    motion->pmv[1][0] = motion->pmv[0][0] = tmp;
+    /* ignore intra_slice and all the extra data */
+    while (bit_buf & 0x80000000) {
+       DUMPBITS (bit_buf, bits, 9);
+       NEEDBITS (bit_buf, bits, bit_ptr);
+    }
 
-    NEEDBITS (bit_buf, bits, bit_ptr);
-    tmp = motion->pmv[0][1] + get_motion_delta (slice, motion->f_code[1]);
-    tmp = bound_motion_vector (tmp, motion->f_code[1]);
-    motion->pmv[1][1] = motion->pmv[0][1] = tmp;
+    /* decode initial macroblock address increment */
+    offset = 0;
+    while (1) {
+       if (bit_buf >= 0x08000000) {
+           mba = MBA_5 + (UBITS (bit_buf, 6) - 2);
+           break;
+       } else if (bit_buf >= 0x01800000) {
+           mba = MBA_11 + (UBITS (bit_buf, 12) - 24);
+           break;
+       } else switch (UBITS (bit_buf, 12)) {
+       case 8:         /* macroblock_escape */
+           offset += 33;
+           DUMPBITS (bit_buf, bits, 11);
+           NEEDBITS (bit_buf, bits, bit_ptr);
+           continue;
+       case 15:        /* macroblock_stuffing (MPEG1 only) */
+           bit_buf &= 0xfffff;
+           DUMPBITS (bit_buf, bits, 11);
+           NEEDBITS (bit_buf, bits, bit_ptr);
+           continue;
+       default:        /* error */
+           return 1;
+       }
+    }
+    DUMPBITS (bit_buf, bits, mba->len + 1);
+    decoder->offset = (offset + mba->mba) << 4;
+
+    while (decoder->offset - decoder->width >= 0) {
+       decoder->offset -= decoder->width;
+       if (!(decoder->convert) || decoder->coding_type != B_TYPE) {
+           decoder->dest[0] += decoder->slice_stride;
+           decoder->dest[1] += decoder->slice_uv_stride;
+           decoder->dest[2] += decoder->slice_uv_stride;
+       }
+       decoder->v_offset += 16;
+    }
+    if (decoder->v_offset > decoder->limit_y)
+       return 1;
 
-    DUMPBITS (bit_buf, bits, 1); // remove marker_bit
+    return 0;
 #undef bit_buf
 #undef bits
 #undef bit_ptr
 }
 
-#define MOTION(routine,direction,slice,dest,offset,stride)             \
-do {                                                                   \
-    if ((direction) & MACROBLOCK_MOTION_FORWARD)                       \
-       routine (&slice, &((slice).f_motion), dest, offset, stride,     \
-                mc_functions.put);                                     \
-    if ((direction) & MACROBLOCK_MOTION_BACKWARD)                      \
-       routine (&slice, &((slice).b_motion), dest, offset, stride,     \
-                ((direction) & MACROBLOCK_MOTION_FORWARD ?             \
-                 mc_functions.avg : mc_functions.put));                \
-} while (0)
-
-#define CHECK_DISPLAY                                  \
-do {                                                   \
-    if (offset == width) {                             \
-       slice.f_motion.ref[0][0] += 16 * offset;        \
-       slice.f_motion.ref[0][1] += 4 * offset;         \
-       slice.f_motion.ref[0][2] += 4 * offset;         \
-       slice.b_motion.ref[0][0] += 16 * offset;        \
-       slice.b_motion.ref[0][1] += 4 * offset;         \
-       slice.b_motion.ref[0][2] += 4 * offset;         \
-       dest[0] += 16 * offset;                         \
-       dest[1] += 4 * offset;                          \
-       dest[2] += 4 * offset;                          \
-       offset = 0; ++ypos;                             \
-    }                                                  \
-} while (0)
-
-int slice_process (picture_t * picture, uint8_t code, uint8_t * buffer)
+void mpeg2_slice (mpeg2_decoder_t * const decoder, const int code,
+                 const uint8_t * const buffer)
 {
-#define bit_buf (slice.bitstream_buf)
-#define bits (slice.bitstream_bits)
-#define bit_ptr (slice.bitstream_ptr)
-    slice_t slice;
-    int macroblock_modes;
-    int width;
-    int ypos=code-1;
-    uint8_t * dest[3];
-    int offset;
-    uint8_t ** forward_ref[2];
-
-    width = picture->coded_picture_width;
-    offset = ypos * width * 4;
-
-    forward_ref[0] = picture->forward_reference_frame;
-    if (picture->picture_structure != FRAME_PICTURE) {
-       offset <<= 1;
-       forward_ref[1] = picture->forward_reference_frame;
-       current_field = (picture->picture_structure == BOTTOM_FIELD);
-       if ((picture->second_field) &&
-           (picture->picture_coding_type != B_TYPE))
-           forward_ref[picture->picture_structure == TOP_FIELD] =
-               picture->current_frame;
-       slice.f_motion.ref[1][0] = forward_ref[1][0] + offset * 4 + width;
-       slice.f_motion.ref[1][1] = forward_ref[1][1] + offset + (width >> 1);
-       slice.f_motion.ref[1][2] = forward_ref[1][2] + offset + (width >> 1);
-       slice.b_motion.ref[1][0] =
-           picture->backward_reference_frame[0] + offset * 4 + width;
-       slice.b_motion.ref[1][1] =
-           picture->backward_reference_frame[1] + offset + (width >> 1);
-       slice.b_motion.ref[1][2] =
-           picture->backward_reference_frame[2] + offset + (width >> 1);
-    }
-
-    slice.f_motion.ref[0][0] = forward_ref[0][0] + offset * 4;
-    slice.f_motion.ref[0][1] = forward_ref[0][1] + offset;
-    slice.f_motion.ref[0][2] = forward_ref[0][2] + offset;
-    slice.f_motion.f_code[0] = picture->f_code[0][0];
-    slice.f_motion.f_code[1] = picture->f_code[0][1];
-    slice.f_motion.pmv[0][0] = slice.f_motion.pmv[0][1] = 0;
-    slice.f_motion.pmv[1][0] = slice.f_motion.pmv[1][1] = 0;
-    slice.b_motion.ref[0][0] =
-       picture->backward_reference_frame[0] + offset * 4;
-    slice.b_motion.ref[0][1] =
-       picture->backward_reference_frame[1] + offset;
-    slice.b_motion.ref[0][2] =
-       picture->backward_reference_frame[2] + offset;
-    slice.b_motion.f_code[0] = picture->f_code[1][0];
-    slice.b_motion.f_code[1] = picture->f_code[1][1];
-    slice.b_motion.pmv[0][0] = slice.b_motion.pmv[0][1] = 0;
-    slice.b_motion.pmv[1][0] = slice.b_motion.pmv[1][1] = 0;
-
-    if ((! HACK_MODE) && (!picture->mpeg1) &&
-       (picture->picture_coding_type == B_TYPE))
-       offset = 0;
-
-    dest[0] = picture->current_frame[0] + offset * 4;
-    dest[1] = picture->current_frame[1] + offset;
-    dest[2] = picture->current_frame[2] + offset;
-
-    switch (picture->picture_structure) {
-    case BOTTOM_FIELD:
-       dest[0] += width;
-       dest[1] += width >> 1;
-       dest[2] += width >> 1;
-       // follow thru
-    case TOP_FIELD:
-       width <<= 1;
-    }
+#define bit_buf (decoder->bitstream_buf)
+#define bits (decoder->bitstream_bits)
+#define bit_ptr (decoder->bitstream_ptr)
+    cpu_state_t cpu_state;
 
-    //reset intra dc predictor
-    slice.dc_dct_pred[0]=slice.dc_dct_pred[1]=slice.dc_dct_pred[2]= 
-       1<< (picture->intra_dc_precision + 7) ;
+    bitstream_init (decoder, buffer);
 
-    bitstream_init (&slice, buffer);
-
-    slice.quantizer_scale = get_quantizer_scale (&slice,
-                                                picture->q_scale_type);
-
-    //Ignore intra_slice and all the extra data
-    while (bit_buf & 0x80000000) {
-       DUMPBITS (bit_buf, bits, 9);
-       NEEDBITS (bit_buf, bits, bit_ptr);
-    }
-    DUMPBITS (bit_buf, bits, 1);
+    if (slice_init (decoder, code))
+       return;
 
-    NEEDBITS (bit_buf, bits, bit_ptr);
-    offset = get_macroblock_address_increment (&slice) << 4;
+    if (mpeg2_cpu_state_save)
+       mpeg2_cpu_state_save (&cpu_state);
 
     while (1) {
+       int macroblock_modes;
+       int mba_inc;
+       const MBAtab * mba;
+
        NEEDBITS (bit_buf, bits, bit_ptr);
 
-       macroblock_modes =
-           get_macroblock_modes (&slice, picture->picture_structure,
-                                 picture->picture_coding_type,
-                                 picture->frame_pred_frame_dct);
+       macroblock_modes = get_macroblock_modes (decoder);
 
-       // maybe integrate MACROBLOCK_QUANT test into get_macroblock_modes ?
+       /* maybe integrate MACROBLOCK_QUANT test into get_macroblock_modes ? */
        if (macroblock_modes & MACROBLOCK_QUANT)
-           slice.quantizer_scale =
-               get_quantizer_scale (&slice, picture->q_scale_type);
+           get_quantizer_scale (decoder);
 
        if (macroblock_modes & MACROBLOCK_INTRA) {
 
            int DCT_offset, DCT_stride;
+           int offset;
+           uint8_t * dest_y;
 
-           if (picture->concealment_motion_vectors) {
-               if (picture->picture_structure == FRAME_PICTURE)
-                   motion_fr_conceal (&slice, &slice.f_motion);
+           if (decoder->concealment_motion_vectors) {
+               if (decoder->picture_structure == FRAME_PICTURE)
+                   motion_fr_conceal (decoder);
                else
-                   motion_fi_conceal (&slice, &slice.f_motion);
+                   motion_fi_conceal (decoder);
            } else {
-               slice.f_motion.pmv[0][0] = slice.f_motion.pmv[0][1] = 0;
-               slice.f_motion.pmv[1][0] = slice.f_motion.pmv[1][1] = 0;
-               slice.b_motion.pmv[0][0] = slice.b_motion.pmv[0][1] = 0;
-               slice.b_motion.pmv[1][0] = slice.b_motion.pmv[1][1] = 0;
+               decoder->f_motion.pmv[0][0] = decoder->f_motion.pmv[0][1] = 0;
+               decoder->f_motion.pmv[1][0] = decoder->f_motion.pmv[1][1] = 0;
+               decoder->b_motion.pmv[0][0] = decoder->b_motion.pmv[0][1] = 0;
+               decoder->b_motion.pmv[1][0] = decoder->b_motion.pmv[1][1] = 0;
            }
 
            if (macroblock_modes & DCT_TYPE_INTERLACED) {
-               DCT_offset = width;
-               DCT_stride = width * 2;
+               DCT_offset = decoder->stride;
+               DCT_stride = decoder->stride * 2;
            } else {
-               DCT_offset = width * 8;
-               DCT_stride = width;
+               DCT_offset = decoder->stride * 8;
+               DCT_stride = decoder->stride;
            }
 
-           // Decode lum blocks
-           slice_intra_DCT (picture, &slice, 0,
-                            dest[0] + offset, DCT_stride);
-           slice_intra_DCT (picture, &slice, 0,
-                            dest[0] + offset + 8, DCT_stride);
-           slice_intra_DCT (picture, &slice, 0,
-                            dest[0] + offset + DCT_offset, DCT_stride);
-           slice_intra_DCT (picture, &slice, 0,
-                            dest[0] + offset + DCT_offset + 8, DCT_stride);
-
-           // Decode chroma blocks
-           slice_intra_DCT (picture, &slice, 1,
-                            dest[1] + (offset>>1), width>>1);
-           slice_intra_DCT (picture, &slice, 2,
-                            dest[2] + (offset>>1), width>>1);
-
-           if (picture->picture_coding_type == D_TYPE) {
-               NEEDBITS (bit_buf, bits, bit_ptr);
-               DUMPBITS (bit_buf, bits, 1);
+           offset = decoder->offset;
+           dest_y = decoder->dest[0] + offset;
+           slice_intra_DCT (decoder, 0, dest_y, DCT_stride);
+           slice_intra_DCT (decoder, 0, dest_y + 8, DCT_stride);
+           slice_intra_DCT (decoder, 0, dest_y + DCT_offset, DCT_stride);
+           slice_intra_DCT (decoder, 0, dest_y + DCT_offset + 8, DCT_stride);
+           if (likely (decoder->chroma_format == 0)) {
+               slice_intra_DCT (decoder, 1, decoder->dest[1] + (offset >> 1),
+                                decoder->uv_stride);
+               slice_intra_DCT (decoder, 2, decoder->dest[2] + (offset >> 1),
+                                decoder->uv_stride);
+               if (decoder->coding_type == D_TYPE) {
+                   NEEDBITS (bit_buf, bits, bit_ptr);
+                   DUMPBITS (bit_buf, bits, 1);
+               }
+           } else if (likely (decoder->chroma_format == 1)) {
+               uint8_t * dest_u = decoder->dest[1] + (offset >> 1);
+               uint8_t * dest_v = decoder->dest[2] + (offset >> 1);
+               DCT_stride >>= 1;
+               DCT_offset >>= 1;
+               slice_intra_DCT (decoder, 1, dest_u, DCT_stride);
+               slice_intra_DCT (decoder, 2, dest_v, DCT_stride);
+               slice_intra_DCT (decoder, 1, dest_u + DCT_offset, DCT_stride);
+               slice_intra_DCT (decoder, 2, dest_v + DCT_offset, DCT_stride);
+           } else {
+               uint8_t * dest_u = decoder->dest[1] + offset;
+               uint8_t * dest_v = decoder->dest[2] + offset;
+               slice_intra_DCT (decoder, 1, dest_u, DCT_stride);
+               slice_intra_DCT (decoder, 2, dest_v, DCT_stride);
+               slice_intra_DCT (decoder, 1, dest_u + DCT_offset, DCT_stride);
+               slice_intra_DCT (decoder, 2, dest_v + DCT_offset, DCT_stride);
+               slice_intra_DCT (decoder, 1, dest_u + 8, DCT_stride);
+               slice_intra_DCT (decoder, 2, dest_v + 8, DCT_stride);
+               slice_intra_DCT (decoder, 1, dest_u + DCT_offset + 8,
+                                DCT_stride);
+               slice_intra_DCT (decoder, 2, dest_v + DCT_offset + 8,
+                                DCT_stride);
            }
        } else {
 
-           if (picture->mpeg1) {
-               if ((macroblock_modes & MOTION_TYPE_MASK) == MC_FRAME)
-                   MOTION (motion_mp1, macroblock_modes, slice,
-                           dest, offset,width);
-               else {
-                   // non-intra mb without forward mv in a P picture
-                   slice.f_motion.pmv[0][0] = slice.f_motion.pmv[0][1] = 0;
-                   slice.f_motion.pmv[1][0] = slice.f_motion.pmv[1][1] = 0;
-
-                   MOTION (motion_fr_zero, MACROBLOCK_MOTION_FORWARD, slice,
-                           dest, offset, width);
-               }
-           } else if (picture->picture_structure == FRAME_PICTURE)
-               switch (macroblock_modes & MOTION_TYPE_MASK) {
-               case MC_FRAME:
-                   MOTION (motion_fr_frame, macroblock_modes, slice,
-                           dest, offset, width);
-                   break;
-
-               case MC_FIELD:
-                   MOTION (motion_fr_field, macroblock_modes, slice,
-                           dest, offset, width);
-                   break;
-
-               case MC_DMV:
-                   motion_dmv_top_field_first = picture->top_field_first;
-                   MOTION (motion_fr_dmv, MACROBLOCK_MOTION_FORWARD, slice,
-                           dest, offset, width);
-                   break;
-
-               case 0:
-                   // non-intra mb without forward mv in a P picture
-                   slice.f_motion.pmv[0][0] = slice.f_motion.pmv[0][1] = 0;
-                   slice.f_motion.pmv[1][0] = slice.f_motion.pmv[1][1] = 0;
-
-                   MOTION (motion_fr_zero, MACROBLOCK_MOTION_FORWARD, slice,
-                           dest, offset, width);
-                   break;
-               }
-           else
-               switch (macroblock_modes & MOTION_TYPE_MASK) {
-               case MC_FIELD:
-                   MOTION (motion_fi_field, macroblock_modes, slice,
-                           dest, offset, width);
-                   break;
-
-               case MC_16X8:
-                   MOTION (motion_fi_16x8, macroblock_modes, slice,
-                           dest, offset, width);
-                   break;
-
-               case MC_DMV:
-                   motion_dmv_top_field_first = picture->top_field_first;
-                   MOTION (motion_fi_dmv, MACROBLOCK_MOTION_FORWARD, slice,
-                           dest, offset, width);
-                   break;
-
-               case 0:
-                   // non-intra mb without forward mv in a P picture
-                   slice.f_motion.pmv[0][0] = slice.f_motion.pmv[0][1] = 0;
-                   slice.f_motion.pmv[1][0] = slice.f_motion.pmv[1][1] = 0;
-
-                   MOTION (motion_fi_zero, MACROBLOCK_MOTION_FORWARD, slice,
-                           dest, offset, width);
-                   break;
-               }
+           motion_parser_t * parser;
+
+           parser =
+               decoder->motion_parser[macroblock_modes >> MOTION_TYPE_SHIFT];
+           MOTION_CALL (parser, macroblock_modes);
 
-           //6.3.17.4 Coded block pattern
            if (macroblock_modes & MACROBLOCK_PATTERN) {
                int coded_block_pattern;
                int DCT_offset, DCT_stride;
 
                if (macroblock_modes & DCT_TYPE_INTERLACED) {
-                   DCT_offset = width;
-                   DCT_stride = width * 2;
+                   DCT_offset = decoder->stride;
+                   DCT_stride = decoder->stride * 2;
                } else {
-                   DCT_offset = width * 8;
-                   DCT_stride = width;
+                   DCT_offset = decoder->stride * 8;
+                   DCT_stride = decoder->stride;
                }
 
-               coded_block_pattern = get_coded_block_pattern (&slice);
-
-               // Decode lum blocks
-
-               if (coded_block_pattern & 0x20)
-                   slice_non_intra_DCT (picture, &slice,
-                                        dest[0] + offset, DCT_stride);
-               if (coded_block_pattern & 0x10)
-                   slice_non_intra_DCT (picture, &slice,
-                                        dest[0] + offset + 8, DCT_stride);
-               if (coded_block_pattern & 0x08)
-                   slice_non_intra_DCT (picture, &slice,
-                                        dest[0] + offset + DCT_offset,
-                                        DCT_stride);
-               if (coded_block_pattern & 0x04)
-                   slice_non_intra_DCT (picture, &slice,
-                                        dest[0] + offset + DCT_offset + 8,
-                                        DCT_stride);
-
-               // Decode chroma blocks
-
-               if (coded_block_pattern & 0x2)
-                   slice_non_intra_DCT (picture, &slice,
-                                        dest[1] + (offset>>1), width >> 1);
-               if (coded_block_pattern & 0x1)
-                   slice_non_intra_DCT (picture, &slice,
-                                        dest[2] + (offset>>1), width >> 1);
+               coded_block_pattern = get_coded_block_pattern (decoder);
+
+               if (likely (decoder->chroma_format == 0)) {
+                   int offset = decoder->offset;
+                   uint8_t * dest_y = decoder->dest[0] + offset;
+                   if (coded_block_pattern & 1)
+                       slice_non_intra_DCT (decoder, 0, dest_y, DCT_stride);
+                   if (coded_block_pattern & 2)
+                       slice_non_intra_DCT (decoder, 0, dest_y + 8,
+                                            DCT_stride);
+                   if (coded_block_pattern & 4)
+                       slice_non_intra_DCT (decoder, 0, dest_y + DCT_offset,
+                                            DCT_stride);
+                   if (coded_block_pattern & 8)
+                       slice_non_intra_DCT (decoder, 0,
+                                            dest_y + DCT_offset + 8,
+                                            DCT_stride);
+                   if (coded_block_pattern & 16)
+                       slice_non_intra_DCT (decoder, 1,
+                                            decoder->dest[1] + (offset >> 1),
+                                            decoder->uv_stride);
+                   if (coded_block_pattern & 32)
+                       slice_non_intra_DCT (decoder, 2,
+                                            decoder->dest[2] + (offset >> 1),
+                                            decoder->uv_stride);
+               } else if (likely (decoder->chroma_format == 1)) {
+                   int offset;
+                   uint8_t * dest_y;
+
+                   coded_block_pattern |= bit_buf & (3 << 30);
+                   DUMPBITS (bit_buf, bits, 2);
+
+                   offset = decoder->offset;
+                   dest_y = decoder->dest[0] + offset;
+                   if (coded_block_pattern & 1)
+                       slice_non_intra_DCT (decoder, 0, dest_y, DCT_stride);
+                   if (coded_block_pattern & 2)
+                       slice_non_intra_DCT (decoder, 0, dest_y + 8,
+                                            DCT_stride);
+                   if (coded_block_pattern & 4)
+                       slice_non_intra_DCT (decoder, 0, dest_y + DCT_offset,
+                                            DCT_stride);
+                   if (coded_block_pattern & 8)
+                       slice_non_intra_DCT (decoder, 0,
+                                            dest_y + DCT_offset + 8,
+                                            DCT_stride);
+
+                   DCT_stride >>= 1;
+                   DCT_offset = (DCT_offset + offset) >> 1;
+                   if (coded_block_pattern & 16)
+                       slice_non_intra_DCT (decoder, 1,
+                                            decoder->dest[1] + (offset >> 1),
+                                            DCT_stride);
+                   if (coded_block_pattern & 32)
+                       slice_non_intra_DCT (decoder, 2,
+                                            decoder->dest[2] + (offset >> 1),
+                                            DCT_stride);
+                   if (coded_block_pattern & (2 << 30))
+                       slice_non_intra_DCT (decoder, 1,
+                                            decoder->dest[1] + DCT_offset,
+                                            DCT_stride);
+                   if (coded_block_pattern & (1 << 30))
+                       slice_non_intra_DCT (decoder, 2,
+                                            decoder->dest[2] + DCT_offset,
+                                            DCT_stride);
+               } else {
+                   int offset;
+                   uint8_t * dest_y, * dest_u, * dest_v;
+
+                   coded_block_pattern |= bit_buf & (63 << 26);
+                   DUMPBITS (bit_buf, bits, 6);
+
+                   offset = decoder->offset;
+                   dest_y = decoder->dest[0] + offset;
+                   dest_u = decoder->dest[1] + offset;
+                   dest_v = decoder->dest[2] + offset;
+
+                   if (coded_block_pattern & 1)
+                       slice_non_intra_DCT (decoder, 0, dest_y, DCT_stride);
+                   if (coded_block_pattern & 2)
+                       slice_non_intra_DCT (decoder, 0, dest_y + 8,
+                                            DCT_stride);
+                   if (coded_block_pattern & 4)
+                       slice_non_intra_DCT (decoder, 0, dest_y + DCT_offset,
+                                            DCT_stride);
+                   if (coded_block_pattern & 8)
+                       slice_non_intra_DCT (decoder, 0,
+                                            dest_y + DCT_offset + 8,
+                                            DCT_stride);
+
+                   if (coded_block_pattern & 16)
+                       slice_non_intra_DCT (decoder, 1, dest_u, DCT_stride);
+                   if (coded_block_pattern & 32)
+                       slice_non_intra_DCT (decoder, 2, dest_v, DCT_stride);
+                   if (coded_block_pattern & (32 << 26))
+                       slice_non_intra_DCT (decoder, 1, dest_u + DCT_offset,
+                                            DCT_stride);
+                   if (coded_block_pattern & (16 << 26))
+                       slice_non_intra_DCT (decoder, 2, dest_v + DCT_offset,
+                                            DCT_stride);
+                   if (coded_block_pattern & (8 << 26))
+                       slice_non_intra_DCT (decoder, 1, dest_u + 8,
+                                            DCT_stride);
+                   if (coded_block_pattern & (4 << 26))
+                       slice_non_intra_DCT (decoder, 2, dest_v + 8,
+                                            DCT_stride);
+                   if (coded_block_pattern & (2 << 26))
+                       slice_non_intra_DCT (decoder, 1,
+                                            dest_u + DCT_offset + 8,
+                                            DCT_stride);
+                   if (coded_block_pattern & (1 << 26))
+                       slice_non_intra_DCT (decoder, 2,
+                                            dest_v + DCT_offset + 8,
+                                            DCT_stride);
+               }
            }
 
-           slice.dc_dct_pred[0]=slice.dc_dct_pred[1]=slice.dc_dct_pred[2]=
-               1 << (picture->intra_dc_precision + 7);
+           decoder->dc_dct_pred[0] = decoder->dc_dct_pred[1] =
+               decoder->dc_dct_pred[2] = 16384;
        }
 
-//        printf("[%d]",slice.quantizer_scale);
-//        printf("[%d,%d]",offset>>4,ypos);
-       quant_store[ypos+1][(offset>>4)+1] = slice.quantizer_scale;
-
-       offset += 16;
-       CHECK_DISPLAY;
+       NEXT_MACROBLOCK;
 
        NEEDBITS (bit_buf, bits, bit_ptr);
-
-       if (bit_buf & 0x80000000) {
-           DUMPBITS (bit_buf, bits, 1);
-       } else {
-           int mba_inc;
-
-           mba_inc = get_macroblock_address_increment (&slice);
-           if (!mba_inc)
+       mba_inc = 0;
+       while (1) {
+           if (bit_buf >= 0x10000000) {
+               mba = MBA_5 + (UBITS (bit_buf, 5) - 2);
                break;
+           } else if (bit_buf >= 0x03000000) {
+               mba = MBA_11 + (UBITS (bit_buf, 11) - 24);
+               break;
+           } else switch (UBITS (bit_buf, 11)) {
+           case 8:             /* macroblock_escape */
+               mba_inc += 33;
+               /* pass through */
+           case 15:    /* macroblock_stuffing (MPEG1 only) */
+               DUMPBITS (bit_buf, bits, 11);
+               NEEDBITS (bit_buf, bits, bit_ptr);
+               continue;
+           default:    /* end of slice, or error */
+               if (mpeg2_cpu_state_restore)
+                   mpeg2_cpu_state_restore (&cpu_state);
+               return;
+           }
+       }
+       DUMPBITS (bit_buf, bits, mba->len);
+       mba_inc += mba->mba;
 
-           //reset intra dc predictor on skipped block
-           slice.dc_dct_pred[0]=slice.dc_dct_pred[1]=slice.dc_dct_pred[2]=
-               1<< (picture->intra_dc_precision + 7);
-
-           //handling of skipped mb's differs between P_TYPE and B_TYPE
-           //pictures
-           if (picture->picture_coding_type == P_TYPE) {
-               slice.f_motion.pmv[0][0] = slice.f_motion.pmv[0][1] = 0;
-               slice.f_motion.pmv[1][0] = slice.f_motion.pmv[1][1] = 0;
+       if (mba_inc) {
+           decoder->dc_dct_pred[0] = decoder->dc_dct_pred[1] =
+               decoder->dc_dct_pred[2] = 16384;
 
+           if (decoder->coding_type == P_TYPE) {
                do {
-                   if (picture->picture_structure == FRAME_PICTURE)
-                       MOTION (motion_fr_zero, MACROBLOCK_MOTION_FORWARD,
-                               slice, dest, offset, width);
-                   else
-                       MOTION (motion_fi_zero, MACROBLOCK_MOTION_FORWARD,
-                               slice, dest, offset, width);
-
-       quant_store[ypos+1][(offset>>4)+1] = slice.quantizer_scale;
-//        printf("[%d,%d]",offset>>4,ypos);
-                   offset += 16;
-                   CHECK_DISPLAY;
+                   MOTION_CALL (decoder->motion_parser[0],
+                                MACROBLOCK_MOTION_FORWARD);
+                   NEXT_MACROBLOCK;
                } while (--mba_inc);
            } else {
                do {
-                   if (picture->mpeg1)
-                       MOTION (motion_mp1_reuse, macroblock_modes,
-                               slice, dest, offset, width);
-                   else if (picture->picture_structure == FRAME_PICTURE)
-                       MOTION (motion_fr_reuse, macroblock_modes,
-                               slice, dest, offset, width);
-                   else
-                       MOTION (motion_fi_reuse, macroblock_modes,
-                               slice, dest, offset, width);
-
-       quant_store[ypos+1][(offset>>4)+1] = slice.quantizer_scale;
-//        printf("[%d,%d]",offset>>4,ypos);
-                   offset += 16;
-                   CHECK_DISPLAY;
+                   MOTION_CALL (decoder->motion_parser[4], macroblock_modes);
+                   NEXT_MACROBLOCK;
                } while (--mba_inc);
            }
        }
     }
-
-    return 0;
 #undef bit_buf
 #undef bits
 #undef bit_ptr