sizeof(s->block) isnt 64*6*2 anymore bugfix
authorMichael Niedermayer <michaelni@gmx.at>
Thu, 28 Mar 2002 13:41:04 +0000 (13:41 +0000)
committerMichael Niedermayer <michaelni@gmx.at>
Thu, 28 Mar 2002 13:41:04 +0000 (13:41 +0000)
mpeg12 decoding optimization

Originally committed as revision 364 to svn://svn.ffmpeg.org/ffmpeg/trunk

libavcodec/dsputil.c
libavcodec/dsputil.h
libavcodec/h263dec.c
libavcodec/i386/dsputil_mmx.c
libavcodec/mpeg12.c

index d27687d84a46baec414dd4cce0d4606a7ef0ef43..dcfad05a5df40bd3b61413eae673ebe517c5f2ee 100644 (file)
@@ -30,6 +30,7 @@ void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size);
 void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
 void (*add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
 void (*gmc1)(UINT8 *dst, UINT8 *src, int srcStride, int h, int x16, int y16, int rounder);
+void (*clear_blocks)(DCTELEM *blocks);
 
 op_pixels_abs_func pix_abs16x16;
 op_pixels_abs_func pix_abs16x16_x2;
@@ -866,6 +867,11 @@ void block_permute(INT16 *block)
 }
 #endif
 
+void clear_blocks_c(DCTELEM *blocks)
+{
+    memset(blocks, 0, sizeof(DCTELEM)*6*64);
+}
+
 void dsputil_init(void)
 {
     int i, j;
@@ -890,6 +896,7 @@ void dsputil_init(void)
     put_pixels_clamped = put_pixels_clamped_c;
     add_pixels_clamped = add_pixels_clamped_c;
     gmc1= gmc1_c;
+    clear_blocks= clear_blocks_c;
 
     pix_abs16x16     = pix_abs16x16_c;
     pix_abs16x16_x2  = pix_abs16x16_x2_c;
index 45c1a695a79649f01a2e5e722912ea81b227e7d1..ea6a3d84d223f4db718704267525c6cf97a4de44 100644 (file)
@@ -40,11 +40,13 @@ extern void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size);
 extern void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
 extern void (*add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
 extern void (*gmc1)(UINT8 *dst, UINT8 *src, int srcStride, int h, int x16, int y16, int rounder);
+extern void (*clear_blocks)(DCTELEM *blocks);
 
 
 void get_pixels_c(DCTELEM *block, const UINT8 *pixels, int line_size);
 void put_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size);
 void add_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size);
+void clear_blocks_c(DCTELEM *blocks);
 
 /* add and put pixel (decoding) */
 typedef void (*op_pixels_func)(UINT8 *block, const UINT8 *pixels, int line_size, int h);
index aa822800bd973df615aa4a55481da02d5965dc1f..de9919fdade0d64489c1c61f312ebb137b43d2e8 100644 (file)
@@ -156,6 +156,7 @@ static int h263_decode_frame(AVCodecContext *avctx,
         if (s->mb_y && !s->h263_pred) {
             s->first_gob_line = h263_decode_gob_header(s);
         }
+
         s->block_index[0]= s->block_wrap[0]*(s->mb_y*2 + 1) - 1;
         s->block_index[1]= s->block_wrap[0]*(s->mb_y*2 + 1);
         s->block_index[2]= s->block_wrap[0]*(s->mb_y*2 + 2) - 1;
@@ -183,28 +184,8 @@ static int h263_decode_frame(AVCodecContext *avctx,
                 s->y_dc_scale = 8;
                 s->c_dc_scale = 8;
             }
-
-#ifdef HAVE_MMX
-            if (mm_flags & MM_MMX) {
-                asm volatile(
-                       "pxor %%mm7, %%mm7              \n\t"
-                       "movl $-128*6, %%eax            \n\t"
-                       "1:                             \n\t"
-                       "movq %%mm7, (%0, %%eax)        \n\t"
-                       "movq %%mm7, 8(%0, %%eax)       \n\t"
-                       "movq %%mm7, 16(%0, %%eax)      \n\t"
-                       "movq %%mm7, 24(%0, %%eax)      \n\t"
-                       "addl $32, %%eax                \n\t"
-                       " js 1b                         \n\t"
-                       : : "r" (((int)s->block)+128*6)
-                       : "%eax"
-                );
-            }else{
-                memset(s->block, 0, sizeof(s->block));
-            }
-#else
-            memset(s->block, 0, sizeof(s->block));
-#endif
+            clear_blocks(s->block[0]);
+            
             s->mv_dir = MV_DIR_FORWARD;
             s->mv_type = MV_TYPE_16X16; 
             if (s->h263_msmpeg4) {
index 09a7174126791c4cbdd60543aa57b28cc956ce57..bf729d9638fc3ac2a70f66e82fac820412c4a0d4 100644 (file)
@@ -1025,6 +1025,23 @@ static void   sub_pixels_xy2_mmx( DCTELEM  *block, const UINT8 *pixels, int line
   } while(--h);
 }
 
+static void clear_blocks_mmx(DCTELEM *blocks)
+{
+        asm volatile(
+                "pxor %%mm7, %%mm7             \n\t"
+                "movl $-128*6, %%eax           \n\t"
+                "1:                            \n\t"
+                "movq %%mm7, (%0, %%eax)       \n\t"
+                "movq %%mm7, 8(%0, %%eax)      \n\t"
+                "movq %%mm7, 16(%0, %%eax)     \n\t"
+                "movq %%mm7, 24(%0, %%eax)     \n\t"
+                "addl $32, %%eax               \n\t"
+                " js 1b                                \n\t"
+                : : "r" (((int)blocks)+128*6)
+                : "%eax"
+        );
+}
+
 static void just_return() { return; }
 
 void dsputil_init_mmx(void)
@@ -1049,7 +1066,8 @@ void dsputil_init_mmx(void)
         get_pixels = get_pixels_mmx;
         put_pixels_clamped = put_pixels_clamped_mmx;
         add_pixels_clamped = add_pixels_clamped_mmx;
-        
+        clear_blocks= clear_blocks_mmx;
+       
         pix_abs16x16     = pix_abs16x16_mmx;
         pix_abs16x16_x2  = pix_abs16x16_x2_mmx;
         pix_abs16x16_y2  = pix_abs16x16_y2_mmx;
index 4d61df53c2ac14dfd2b1cdb6159692b536ee8ed5..24fc5db85b5f975cd12f6f576a7c2b1d3adb9e50 100644 (file)
@@ -1402,7 +1402,7 @@ static int mpeg_decode_slice(AVCodecContext *avctx,
     }
 
     for(;;) {
-        memset(s->block, 0, sizeof(s->block));
+        clear_blocks(s->block[0]);
         ret = mpeg_decode_mb(s, s->block);
         dprintf("ret=%d\n", ret);
         if (ret < 0)