1) remove TBL support in PPC performance. It's much more useful to use the
authorRomain Dolbeau <dolbeau@irisa.fr>
Wed, 9 Jul 2003 20:18:13 +0000 (20:18 +0000)
committerMichael Niedermayer <michaelni@gmx.at>
Wed, 9 Jul 2003 20:18:13 +0000 (20:18 +0000)
    PMCs, and with Apple's CHUD it's fairly easy too. No reason to keep useless
    code around
2) make the PPC perf stuff a configure option
3) make put_pixels16_altivec a bit faster by unrolling the loop by 4
patch by (Romain Dolbeau <dolbeau at irisa dot fr>)

Originally committed as revision 2022 to svn://svn.ffmpeg.org/ffmpeg/trunk

configure
ffmpeg.c
libavcodec/ppc/dsputil_altivec.c
libavcodec/ppc/dsputil_ppc.c
libavcodec/ppc/dsputil_ppc.h
libavcodec/ppc/fft_altivec.c
libavcodec/ppc/gmc_altivec.c
libavcodec/ppc/idct_altivec.c
libavcodec/ppc/mpegvideo_altivec.c

index 44e183a3999a537216f426bb2200be93bd17520b..90cd0a6d6a6db2d1788441ed886e44ea72271c8c 100755 (executable)
--- a/configure
+++ b/configure
@@ -27,6 +27,7 @@ make="make"
 strip="strip"
 cpu=`uname -m`
 tune="generic"
+powerpc_perf="no"
 mmx="default"
 altivec="default"
 mmi="default"
@@ -275,6 +276,8 @@ for opt do
   ;;
   --tune=*) tune=`echo $opt | cut -d '=' -f 2`
   ;;
+  --powerpc-perf-enable) powerpc_perf="yes"
+  ;;
   --disable-mmx) mmx="no"
   ;;
   --disable-altivec) altivec="no"
@@ -398,7 +401,7 @@ if test $tune != "generic"; then
            if test $altivec = "no"; then
                echo "WARNING: tuning for PPC74xx but altivec disabled !";
            fi
-           TUNECPU=ppc7450
+           TUNECPU=ppc7400
        ;;
        G5|970|ppc970|PowerPC970|power4*|Power4*)
            CFLAGS="$CFLAGS -mcpu=970 -mtune=970 -mpowerpc64 -force_cpusubtype_ALL "
@@ -749,6 +752,7 @@ echo "  --extra-ldflags=ELDFLAGS add ELDFLAGS to LDFLAGS [$LDFLAGS]"
 echo "  --extra-libs=ELIBS       add ELIBS [$ELIBS]"
 echo "  --cpu=CPU                force cpu to CPU  [$cpu]"
 echo "  --tune=PROCESSOR         tune code for a particular CPU (may fails or misperforms on other CPUs)"
+echo "  --powerpc-perf-enable    enable performance report on PPC (requires enabling PMC)"
 echo "  --disable-mmx            disable mmx usage"
 echo "  --disable-altivec        disable AltiVec usage"
 echo "  --disable-audio-oss      disable OSS audio support [default=no]"
@@ -847,10 +851,9 @@ elif test "$cpu" = "sparc64" ; then
 elif test "$cpu" = "powerpc" ; then
   echo "TARGET_ARCH_POWERPC=yes" >> config.mak
   echo "#define ARCH_POWERPC 1" >> $TMPH
-  echo "// Enable the next line to get PowerPC performance report" >> $TMPH
-  echo "// #define POWERPC_TBL_PERFORMANCE_REPORT 1" >> $TMPH
-  echo "// Enable the next line to use PMC registers instead of TBL" >> $TMPH
-  echo "// #define POWERPC_PERF_USE_PMC 1" >> $TMPH
+  if test "$powerpc_perf" = "yes"; then
+    echo "#define POWERPC_PERFORMANCE_REPORT 1" >> $TMPH
+  fi
 elif test "$cpu" = "mips" ; then
   echo "TARGET_ARCH_MIPS=yes" >> config.mak
   echo "#define ARCH_MIPS 1" >> $TMPH
index 820f159c6223c70aa450437459c793e79e4f5f00..14819028113c8f95e761971692e0fd3fd289540b 100644 (file)
--- a/ffmpeg.c
+++ b/ffmpeg.c
@@ -2757,10 +2757,10 @@ int main(int argc, char **argv)
     av_free_static();
 
     
-#ifdef POWERPC_TBL_PERFORMANCE_REPORT
+#ifdef POWERPC_PERFORMANCE_REPORT
     extern void powerpc_display_perf_report(void);
     powerpc_display_perf_report();
-#endif /* POWERPC_TBL_PERFORMANCE_REPORT */
+#endif /* POWERPC_PERFORMANCE_REPORT */
 
 #ifndef CONFIG_WIN32
     if (received_sigterm) {
index 2c71d8e7bf3fe2f12c74942e8df1f64ff3f53be6..635480784461212896b7cf8fcd245aba9ee9a0c8 100644 (file)
@@ -655,11 +655,11 @@ void add_bytes_altivec(uint8_t *dst, uint8_t *src, int w) {
 /* next one assumes that ((line_size % 16) == 0) */
 void put_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h)
 {
-POWERPC_TBL_DECLARE(altivec_put_pixels16_num, 1);
+POWERPC_PERF_DECLARE(altivec_put_pixels16_num, 1);
 #ifdef ALTIVEC_USE_REFERENCE_C_CODE
     int i;
 
-POWERPC_TBL_START_COUNT(altivec_put_pixels16_num, 1);
+POWERPC_PERF_START_COUNT(altivec_put_pixels16_num, 1);
 
     for(i=0; i<h; i++) {
       *((uint32_t*)(block )) = (((const struct unaligned_32 *) (pixels))->l);
@@ -670,15 +670,27 @@ POWERPC_TBL_START_COUNT(altivec_put_pixels16_num, 1);
       block +=line_size;
     }
 
-POWERPC_TBL_STOP_COUNT(altivec_put_pixels16_num, 1);
+POWERPC_PERF_STOP_COUNT(altivec_put_pixels16_num, 1);
 
 #else /* ALTIVEC_USE_REFERENCE_C_CODE */
     register vector unsigned char pixelsv1, pixelsv2;
+    register vector unsigned char pixelsv1B, pixelsv2B;
+    register vector unsigned char pixelsv1C, pixelsv2C;
+    register vector unsigned char pixelsv1D, pixelsv2D;
+
     register vector unsigned char perm = vec_lvsl(0, pixels);
     int i;
-
-POWERPC_TBL_START_COUNT(altivec_put_pixels16_num, 1);
-
+    register int line_size_2 = line_size << 1;
+    register int line_size_3 = line_size + line_size_2;
+    register int line_size_4 = line_size << 2;
+
+POWERPC_PERF_START_COUNT(altivec_put_pixels16_num, 1);
+// hand-unrolling the loop by 4 gains about 15%
+// mininum execution time goes from 74 to 60 cycles
+// it's faster than -funroll-loops, but using
+// -funroll-loops w/ this is bad - 74 cycles again.
+// all this is on a 7450, tuning for the 7450
+#if 0
     for(i=0; i<h; i++) {
       pixelsv1 = vec_ld(0, (unsigned char*)pixels);
       pixelsv2 = vec_ld(16, (unsigned char*)pixels);
@@ -687,8 +699,29 @@ POWERPC_TBL_START_COUNT(altivec_put_pixels16_num, 1);
       pixels+=line_size;
       block +=line_size;
     }
-
-POWERPC_TBL_STOP_COUNT(altivec_put_pixels16_num, 1);
+#else
+    for(i=0; i<h; i+=4) {
+      pixelsv1 = vec_ld(0, (unsigned char*)pixels);
+      pixelsv2 = vec_ld(16, (unsigned char*)pixels);
+      pixelsv1B = vec_ld(line_size, (unsigned char*)pixels);
+      pixelsv2B = vec_ld(16 + line_size, (unsigned char*)pixels);
+      pixelsv1C = vec_ld(line_size_2, (unsigned char*)pixels);
+      pixelsv2C = vec_ld(16 + line_size_2, (unsigned char*)pixels);
+      pixelsv1D = vec_ld(line_size_3, (unsigned char*)pixels);
+      pixelsv2D = vec_ld(16 + line_size_3, (unsigned char*)pixels);
+      vec_st(vec_perm(pixelsv1, pixelsv2, perm),
+             0, (unsigned char*)block);
+      vec_st(vec_perm(pixelsv1B, pixelsv2B, perm),
+             line_size, (unsigned char*)block);
+      vec_st(vec_perm(pixelsv1C, pixelsv2C, perm),
+             line_size_2, (unsigned char*)block);
+      vec_st(vec_perm(pixelsv1D, pixelsv2D, perm),
+             line_size_3, (unsigned char*)block);
+      pixels+=line_size_4;
+      block +=line_size_4;
+    }
+#endif
+POWERPC_PERF_STOP_COUNT(altivec_put_pixels16_num, 1);
 
 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */
 }
@@ -697,11 +730,11 @@ POWERPC_TBL_STOP_COUNT(altivec_put_pixels16_num, 1);
 #define op_avg(a,b)  a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEUL)>>1) )
 void avg_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h)
 {
-POWERPC_TBL_DECLARE(altivec_avg_pixels16_num, 1);
+POWERPC_PERF_DECLARE(altivec_avg_pixels16_num, 1);
 #ifdef ALTIVEC_USE_REFERENCE_C_CODE
     int i;
 
-POWERPC_TBL_START_COUNT(altivec_avg_pixels16_num, 1);
+POWERPC_PERF_START_COUNT(altivec_avg_pixels16_num, 1);
 
     for(i=0; i<h; i++) {
       op_avg(*((uint32_t*)(block)),(((const struct unaligned_32 *)(pixels))->l));
@@ -712,14 +745,14 @@ POWERPC_TBL_START_COUNT(altivec_avg_pixels16_num, 1);
       block +=line_size;
     }
 
-POWERPC_TBL_STOP_COUNT(altivec_avg_pixels16_num, 1);
+POWERPC_PERF_STOP_COUNT(altivec_avg_pixels16_num, 1);
 
 #else /* ALTIVEC_USE_REFERENCE_C_CODE */
     register vector unsigned char pixelsv1, pixelsv2, pixelsv, blockv;
     register vector unsigned char perm = vec_lvsl(0, pixels);
     int i;
 
-POWERPC_TBL_START_COUNT(altivec_avg_pixels16_num, 1);
+POWERPC_PERF_START_COUNT(altivec_avg_pixels16_num, 1);
 
     for(i=0; i<h; i++) {
       pixelsv1 = vec_ld(0, (unsigned char*)pixels);
@@ -732,7 +765,7 @@ POWERPC_TBL_START_COUNT(altivec_avg_pixels16_num, 1);
       block +=line_size;
     }
 
-POWERPC_TBL_STOP_COUNT(altivec_avg_pixels16_num, 1);
+POWERPC_PERF_STOP_COUNT(altivec_avg_pixels16_num, 1);
 
 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */
 }
@@ -740,10 +773,10 @@ POWERPC_TBL_STOP_COUNT(altivec_avg_pixels16_num, 1);
 /* next one assumes that ((line_size % 8) == 0) */
 void avg_pixels8_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h)
 {
-POWERPC_TBL_DECLARE(altivec_avg_pixels8_num, 1);
+POWERPC_PERF_DECLARE(altivec_avg_pixels8_num, 1);
 #ifdef ALTIVEC_USE_REFERENCE_C_CODE
     int i;
-POWERPC_TBL_START_COUNT(altivec_avg_pixels8_num, 1);
+POWERPC_PERF_START_COUNT(altivec_avg_pixels8_num, 1);
     for (i = 0; i < h; i++) {
         *((uint32_t *) (block)) =
             (((*((uint32_t *) (block))) |
@@ -761,13 +794,13 @@ POWERPC_TBL_START_COUNT(altivec_avg_pixels8_num, 1);
         pixels += line_size;
         block += line_size;
     }
-POWERPC_TBL_STOP_COUNT(altivec_avg_pixels8_num, 1);
+POWERPC_PERF_STOP_COUNT(altivec_avg_pixels8_num, 1);
 
 #else /* ALTIVEC_USE_REFERENCE_C_CODE */
     register vector unsigned char pixelsv1, pixelsv2, pixelsv, blockv;
     int i;
 
-POWERPC_TBL_START_COUNT(altivec_avg_pixels8_num, 1);
+POWERPC_PERF_START_COUNT(altivec_avg_pixels8_num, 1);
  
    for (i = 0; i < h; i++) {
      /*
@@ -798,7 +831,7 @@ POWERPC_TBL_START_COUNT(altivec_avg_pixels8_num, 1);
      block += line_size;
    }
    
-POWERPC_TBL_STOP_COUNT(altivec_avg_pixels8_num, 1);
+POWERPC_PERF_STOP_COUNT(altivec_avg_pixels8_num, 1);
  
 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */
 }
@@ -806,10 +839,10 @@ POWERPC_TBL_STOP_COUNT(altivec_avg_pixels8_num, 1);
 /* next one assumes that ((line_size % 8) == 0) */
 void put_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h)
 {
-POWERPC_TBL_DECLARE(altivec_put_pixels8_xy2_num, 1);
+POWERPC_PERF_DECLARE(altivec_put_pixels8_xy2_num, 1);
 #ifdef ALTIVEC_USE_REFERENCE_C_CODE
     int j;
-POWERPC_TBL_START_COUNT(altivec_put_pixels8_xy2_num, 1);
+POWERPC_PERF_START_COUNT(altivec_put_pixels8_xy2_num, 1);
     for (j = 0; j < 2; j++) {
       int i;
       const uint32_t a = (((const struct unaligned_32 *) (pixels))->l);
@@ -842,7 +875,7 @@ POWERPC_TBL_START_COUNT(altivec_put_pixels8_xy2_num, 1);
       block += 4 - line_size * h;
     }
 
-POWERPC_TBL_STOP_COUNT(altivec_put_pixels8_xy2_num, 1);
+POWERPC_PERF_STOP_COUNT(altivec_put_pixels8_xy2_num, 1);
 
 #else /* ALTIVEC_USE_REFERENCE_C_CODE */
    register int i;
@@ -873,7 +906,7 @@ POWERPC_TBL_STOP_COUNT(altivec_put_pixels8_xy2_num, 1);
                         (vector unsigned short)pixelsv2);
    pixelssum1 = vec_add(pixelssum1, vctwo);
    
-POWERPC_TBL_START_COUNT(altivec_put_pixels8_xy2_num, 1); 
+POWERPC_PERF_START_COUNT(altivec_put_pixels8_xy2_num, 1); 
    for (i = 0; i < h ; i++) {
      int rightside = ((unsigned long)block & 0x0000000F);
      blockv = vec_ld(0, block);
@@ -914,17 +947,17 @@ POWERPC_TBL_START_COUNT(altivec_put_pixels8_xy2_num, 1);
      pixels += line_size;
    }
    
-POWERPC_TBL_STOP_COUNT(altivec_put_pixels8_xy2_num, 1);
+POWERPC_PERF_STOP_COUNT(altivec_put_pixels8_xy2_num, 1);
 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */
 }
 
 /* next one assumes that ((line_size % 8) == 0) */
 void put_no_rnd_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h)
 {
-POWERPC_TBL_DECLARE(altivec_put_no_rnd_pixels8_xy2_num, 1);
+POWERPC_PERF_DECLARE(altivec_put_no_rnd_pixels8_xy2_num, 1);
 #ifdef ALTIVEC_USE_REFERENCE_C_CODE
     int j;
-POWERPC_TBL_START_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1);
+POWERPC_PERF_START_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1);
     for (j = 0; j < 2; j++) {
       int i;
       const uint32_t a = (((const struct unaligned_32 *) (pixels))->l);
@@ -957,7 +990,7 @@ POWERPC_TBL_START_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1);
       block += 4 - line_size * h;
     }
     
-POWERPC_TBL_STOP_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1);
+POWERPC_PERF_STOP_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1);
 
 #else /* ALTIVEC_USE_REFERENCE_C_CODE */
    register int i;
@@ -989,7 +1022,7 @@ POWERPC_TBL_STOP_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1);
                         (vector unsigned short)pixelsv2);
    pixelssum1 = vec_add(pixelssum1, vcone);
    
-POWERPC_TBL_START_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1); 
+POWERPC_PERF_START_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1); 
    for (i = 0; i < h ; i++) {
      int rightside = ((unsigned long)block & 0x0000000F);
      blockv = vec_ld(0, block);
@@ -1030,17 +1063,17 @@ POWERPC_TBL_START_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1);
      pixels += line_size;
    }
    
-POWERPC_TBL_STOP_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1);
+POWERPC_PERF_STOP_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1);
 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */
 }
 
 /* next one assumes that ((line_size % 16) == 0) */
 void put_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h)
 {
-POWERPC_TBL_DECLARE(altivec_put_pixels16_xy2_num, 1);
+POWERPC_PERF_DECLARE(altivec_put_pixels16_xy2_num, 1);
 #ifdef ALTIVEC_USE_REFERENCE_C_CODE
     int j;
-POWERPC_TBL_START_COUNT(altivec_put_pixels16_xy2_num, 1);
+POWERPC_PERF_START_COUNT(altivec_put_pixels16_xy2_num, 1);
       for (j = 0; j < 4; j++) {
       int i;
       const uint32_t a = (((const struct unaligned_32 *) (pixels))->l);
@@ -1073,7 +1106,7 @@ POWERPC_TBL_START_COUNT(altivec_put_pixels16_xy2_num, 1);
       block += 4 - line_size * h;
     }
 
-POWERPC_TBL_STOP_COUNT(altivec_put_pixels16_xy2_num, 1);
+POWERPC_PERF_STOP_COUNT(altivec_put_pixels16_xy2_num, 1);
 
 #else /* ALTIVEC_USE_REFERENCE_C_CODE */
    register int i;
@@ -1087,7 +1120,7 @@ POWERPC_TBL_STOP_COUNT(altivec_put_pixels16_xy2_num, 1);
    register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0);
    register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2);
 
-POWERPC_TBL_START_COUNT(altivec_put_pixels16_xy2_num, 1);
+POWERPC_PERF_START_COUNT(altivec_put_pixels16_xy2_num, 1);
  
    temp1 = vec_ld(0, pixels);
    temp2 = vec_ld(16, pixels);
@@ -1151,17 +1184,17 @@ POWERPC_TBL_START_COUNT(altivec_put_pixels16_xy2_num, 1);
      pixels += line_size;
    }
    
-POWERPC_TBL_STOP_COUNT(altivec_put_pixels16_xy2_num, 1);
+POWERPC_PERF_STOP_COUNT(altivec_put_pixels16_xy2_num, 1);
 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */
 }
 
 /* next one assumes that ((line_size % 16) == 0) */
 void put_no_rnd_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h)
 {
-POWERPC_TBL_DECLARE(altivec_put_no_rnd_pixels16_xy2_num, 1);
+POWERPC_PERF_DECLARE(altivec_put_no_rnd_pixels16_xy2_num, 1);
 #ifdef ALTIVEC_USE_REFERENCE_C_CODE
     int j;
-POWERPC_TBL_START_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1);
+POWERPC_PERF_START_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1);
       for (j = 0; j < 4; j++) {
       int i;
       const uint32_t a = (((const struct unaligned_32 *) (pixels))->l);
@@ -1194,7 +1227,7 @@ POWERPC_TBL_START_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1);
       block += 4 - line_size * h;
     }
 
-POWERPC_TBL_STOP_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1);
+POWERPC_PERF_STOP_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1);
 
 #else /* ALTIVEC_USE_REFERENCE_C_CODE */
    register int i;
@@ -1209,7 +1242,7 @@ POWERPC_TBL_STOP_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1);
    register const vector unsigned short vcone = (const vector unsigned short)vec_splat_u16(1);
    register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2);
 
-POWERPC_TBL_START_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1);
+POWERPC_PERF_START_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1);
  
    temp1 = vec_ld(0, pixels);
    temp2 = vec_ld(16, pixels);
@@ -1273,7 +1306,7 @@ POWERPC_TBL_START_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1);
      pixels += line_size;
    }
    
-POWERPC_TBL_STOP_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1);
+POWERPC_PERF_STOP_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1);
 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */
 }
 
index 87772b4580c3958ef90185c27f4b82193ab0b6d1..73ae8b4a734f633a268257a54207cc0aeb6139cd 100644 (file)
@@ -41,8 +41,8 @@ int mm_support(void)
     return result;
 }
 
-#ifdef POWERPC_TBL_PERFORMANCE_REPORT
-unsigned long long perfdata[powerpc_perf_total][powerpc_data_total];
+#ifdef POWERPC_PERFORMANCE_REPORT
+unsigned long long perfdata[POWERPC_NUM_PMC_ENABLED][powerpc_perf_total][powerpc_data_total];
 /* list below must match enum in dsputil_ppc.h */
 static unsigned char* perfname[] = {
   "fft_calc_altivec",
@@ -60,53 +60,32 @@ static unsigned char* perfname[] = {
   "clear_blocks_dcbz32_ppc",
   "clear_blocks_dcbz128_ppc"
 };
-#ifdef POWERPC_PERF_USE_PMC
-unsigned long long perfdata_pmc2[powerpc_perf_total][powerpc_data_total];
-unsigned long long perfdata_pmc3[powerpc_perf_total][powerpc_data_total];
-#endif
 #include <stdio.h>
 #endif
 
-#ifdef POWERPC_TBL_PERFORMANCE_REPORT
+#ifdef POWERPC_PERFORMANCE_REPORT
 void powerpc_display_perf_report(void)
 {
-  int i;
-#ifndef POWERPC_PERF_USE_PMC
-  fprintf(stderr, "PowerPC performance report\n Values are from the Time Base register, and represent 4 bus cycles.\n");
-#else /* POWERPC_PERF_USE_PMC */
+  int i, j;
   fprintf(stderr, "PowerPC performance report\n Values are from the PMC registers, and represent whatever the registers are set to record.\n");
-#endif /* POWERPC_PERF_USE_PMC */
   for(i = 0 ; i < powerpc_perf_total ; i++)
   {
-    if (perfdata[i][powerpc_data_num] != (unsigned long long)0)
-      fprintf(stderr, " Function \"%s\" (pmc1):\n\tmin: %llu\n\tmax: %llu\n\tavg: %1.2lf (%llu)\n",
-              perfname[i],
-              perfdata[i][powerpc_data_min],
-              perfdata[i][powerpc_data_max],
-              (double)perfdata[i][powerpc_data_sum] /
-              (double)perfdata[i][powerpc_data_num],
-              perfdata[i][powerpc_data_num]);
-#ifdef POWERPC_PERF_USE_PMC
-    if (perfdata_pmc2[i][powerpc_data_num] != (unsigned long long)0)
-      fprintf(stderr, " Function \"%s\" (pmc2):\n\tmin: %llu\n\tmax: %llu\n\tavg: %1.2lf (%llu)\n",
-              perfname[i],
-              perfdata_pmc2[i][powerpc_data_min],
-              perfdata_pmc2[i][powerpc_data_max],
-              (double)perfdata_pmc2[i][powerpc_data_sum] /
-              (double)perfdata_pmc2[i][powerpc_data_num],
-              perfdata_pmc2[i][powerpc_data_num]);
-    if (perfdata_pmc3[i][powerpc_data_num] != (unsigned long long)0)
-      fprintf(stderr, " Function \"%s\" (pmc3):\n\tmin: %llu\n\tmax: %llu\n\tavg: %1.2lf (%llu)\n",
-              perfname[i],
-              perfdata_pmc3[i][powerpc_data_min],
-              perfdata_pmc3[i][powerpc_data_max],
-              (double)perfdata_pmc3[i][powerpc_data_sum] /
-              (double)perfdata_pmc3[i][powerpc_data_num],
-              perfdata_pmc3[i][powerpc_data_num]);
-#endif
+    for (j = 0; j < POWERPC_NUM_PMC_ENABLED ; j++)
+      {
+       if (perfdata[j][i][powerpc_data_num] != (unsigned long long)0)
+         fprintf(stderr,
+                 " Function \"%s\" (pmc%d):\n\tmin: %llu\n\tmax: %llu\n\tavg: %1.2lf (%llu)\n",
+                 perfname[i],
+                 j+1,
+                 perfdata[j][i][powerpc_data_min],
+                 perfdata[j][i][powerpc_data_max],
+                 (double)perfdata[j][i][powerpc_data_sum] /
+                 (double)perfdata[j][i][powerpc_data_num],
+                 perfdata[j][i][powerpc_data_num]);
+      }
   }
 }
-#endif /* POWERPC_TBL_PERFORMANCE_REPORT */
+#endif /* POWERPC_PERFORMANCE_REPORT */
 
 /* ***** WARNING ***** WARNING ***** WARNING ***** */
 /*
@@ -135,10 +114,10 @@ void powerpc_display_perf_report(void)
 */
 void clear_blocks_dcbz32_ppc(DCTELEM *blocks)
 {
-POWERPC_TBL_DECLARE(powerpc_clear_blocks_dcbz32, 1);
+POWERPC_PERF_DECLARE(powerpc_clear_blocks_dcbz32, 1);
     register int misal = ((unsigned long)blocks & 0x00000010);
     register int i = 0;
-POWERPC_TBL_START_COUNT(powerpc_clear_blocks_dcbz32, 1);
+POWERPC_PERF_START_COUNT(powerpc_clear_blocks_dcbz32, 1);
 #if 1
     if (misal) {
       ((unsigned long*)blocks)[0] = 0L;
@@ -160,7 +139,7 @@ POWERPC_TBL_START_COUNT(powerpc_clear_blocks_dcbz32, 1);
 #else
     memset(blocks, 0, sizeof(DCTELEM)*6*64);
 #endif
-POWERPC_TBL_STOP_COUNT(powerpc_clear_blocks_dcbz32, 1);
+POWERPC_PERF_STOP_COUNT(powerpc_clear_blocks_dcbz32, 1);
 }
 
 /* same as above, when dcbzl clear a whole 128B cache line
@@ -168,10 +147,10 @@ POWERPC_TBL_STOP_COUNT(powerpc_clear_blocks_dcbz32, 1);
 #ifndef NO_DCBZL
 void clear_blocks_dcbz128_ppc(DCTELEM *blocks)
 {
-POWERPC_TBL_DECLARE(powerpc_clear_blocks_dcbz128, 1);
+POWERPC_PERF_DECLARE(powerpc_clear_blocks_dcbz128, 1);
     register int misal = ((unsigned long)blocks & 0x0000007f);
     register int i = 0;
-POWERPC_TBL_START_COUNT(powerpc_clear_blocks_dcbz128, 1);
+POWERPC_PERF_START_COUNT(powerpc_clear_blocks_dcbz128, 1);
 #if 1
  if (misal) {
    // we could probably also optimize this case,
@@ -186,7 +165,7 @@ POWERPC_TBL_START_COUNT(powerpc_clear_blocks_dcbz128, 1);
 #else
     memset(blocks, 0, sizeof(DCTELEM)*6*64);
 #endif
-POWERPC_TBL_STOP_COUNT(powerpc_clear_blocks_dcbz128, 1);
+POWERPC_PERF_STOP_COUNT(powerpc_clear_blocks_dcbz128, 1);
 }
 #else
 void clear_blocks_dcbz128_ppc(DCTELEM *blocks)
@@ -277,6 +256,8 @@ void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx)
         c->add_bytes= add_bytes_altivec;
 #endif /* 0 */
         c->put_pixels_tab[0][0] = put_pixels16_altivec;
+        /* the tow functions do the same thing, so use the same code */
+        c->put_no_rnd_pixels_tab[0][0] = put_pixels16_altivec;
         c->avg_pixels_tab[0][0] = avg_pixels16_altivec;
 // next one disabled as it's untested.
 #if 0
@@ -301,28 +282,21 @@ void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx)
 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */
         }
         
-#ifdef POWERPC_TBL_PERFORMANCE_REPORT
+#ifdef POWERPC_PERFORMANCE_REPORT
         {
-          int i;
+          int i, j;
           for (i = 0 ; i < powerpc_perf_total ; i++)
           {
-            perfdata[i][powerpc_data_min] = 0xFFFFFFFFFFFFFFFF;
-            perfdata[i][powerpc_data_max] = 0x0000000000000000;
-            perfdata[i][powerpc_data_sum] = 0x0000000000000000;
-            perfdata[i][powerpc_data_num] = 0x0000000000000000;
-#ifdef POWERPC_PERF_USE_PMC
-            perfdata_pmc2[i][powerpc_data_min] = 0xFFFFFFFFFFFFFFFF;
-            perfdata_pmc2[i][powerpc_data_max] = 0x0000000000000000;
-            perfdata_pmc2[i][powerpc_data_sum] = 0x0000000000000000;
-            perfdata_pmc2[i][powerpc_data_num] = 0x0000000000000000;
-            perfdata_pmc3[i][powerpc_data_min] = 0xFFFFFFFFFFFFFFFF;
-            perfdata_pmc3[i][powerpc_data_max] = 0x0000000000000000;
-            perfdata_pmc3[i][powerpc_data_sum] = 0x0000000000000000;
-            perfdata_pmc3[i][powerpc_data_num] = 0x0000000000000000;
-#endif /* POWERPC_PERF_USE_PMC */
-          }
+           for (j = 0; j < POWERPC_NUM_PMC_ENABLED ; j++)
+             {
+               perfdata[j][i][powerpc_data_min] = (unsigned long long)0xFFFFFFFFFFFFFFFF;
+               perfdata[j][i][powerpc_data_max] = (unsigned long long)0x0000000000000000;
+               perfdata[j][i][powerpc_data_sum] = (unsigned long long)0x0000000000000000;
+               perfdata[j][i][powerpc_data_num] = (unsigned long long)0x0000000000000000;
+             }
+         }
         }
-#endif /* POWERPC_TBL_PERFORMANCE_REPORT */
+#endif /* POWERPC_PERFORMANCE_REPORT */
     } else
 #endif /* HAVE_ALTIVEC */
     {
index 8c325fbc75fdf62cafb13df23dc0885a886c6fe4..4cb299dd9b081e1f430c015d08d128188ae08dc6 100644 (file)
 #define NO_DCBZL
 #endif /* CONFIG_DARWIN */
 
-#ifdef POWERPC_TBL_PERFORMANCE_REPORT
+#ifdef POWERPC_PERFORMANCE_REPORT
 void powerpc_display_perf_report(void);
+/* the 604* have 2, the G3* have 4, the G4s have 6 */
+#define POWERPC_NUM_PMC_ENABLED 4
 /* if you add to the enum below, also add to the perfname array
    in dsputil_ppc.c */
 enum powerpc_perf_index {
@@ -58,98 +60,65 @@ enum powerpc_data_index {
   powerpc_data_num,
   powerpc_data_total
 };
-extern unsigned long long perfdata[powerpc_perf_total][powerpc_data_total];
-#ifdef POWERPC_PERF_USE_PMC
-extern unsigned long long perfdata_pmc2[powerpc_perf_total][powerpc_data_total];
-extern unsigned long long perfdata_pmc3[powerpc_perf_total][powerpc_data_total];
-#endif
-
-#ifndef POWERPC_PERF_USE_PMC
-#define POWERPC_GET_CYCLES(a) asm volatile("mftb %0" : "=r" (a))
-#define POWERPC_TBL_DECLARE(a, cond) register unsigned long tbl_start, tbl_stop
-#define POWERPC_TBL_START_COUNT(a, cond) do { POWERPC_GET_CYCLES(tbl_start); } while (0)
-#define POWERPC_TBL_STOP_COUNT(a, cond) do {     \
-  POWERPC_GET_CYCLES(tbl_stop);                  \
-  if (tbl_stop > tbl_start)                      \
-  {                                              \
-    unsigned long diff =  tbl_stop - tbl_start;  \
-    if (cond)                                    \
-    {                                            \
-      if (diff < perfdata[a][powerpc_data_min])  \
-        perfdata[a][powerpc_data_min] = diff;    \
-      if (diff > perfdata[a][powerpc_data_max])  \
-        perfdata[a][powerpc_data_max] = diff;    \
-      perfdata[a][powerpc_data_sum] += diff;     \
-      perfdata[a][powerpc_data_num] ++;          \
-    }                                            \
-  }                                              \
-} while (0)
+extern unsigned long long perfdata[POWERPC_NUM_PMC_ENABLED][powerpc_perf_total][powerpc_data_total];
 
-#else /* POWERPC_PERF_USE_PMC */
-#define POWERPC_GET_CYCLES(a) asm volatile("mfspr %0, 937" : "=r" (a))
+#define POWERPC_GET_PMC1(a) asm volatile("mfspr %0, 937" : "=r" (a))
 #define POWERPC_GET_PMC2(a) asm volatile("mfspr %0, 938" : "=r" (a))
+#if (POWERPC_NUM_PMC_ENABLED > 2)
 #define POWERPC_GET_PMC3(a) asm volatile("mfspr %0, 941" : "=r" (a))
-#define POWERPC_TBL_DECLARE(a, cond) register unsigned long cycles_start, cycles_stop, pmc2_start, pmc2_stop, pmc3_start, pmc3_stop
-#define POWERPC_TBL_START_COUNT(a, cond) do {    \
-  POWERPC_GET_PMC3(pmc3_start);                  \
-  POWERPC_GET_PMC2(pmc2_start);                  \
-  POWERPC_GET_CYCLES(cycles_start); } while (0)
-#define POWERPC_TBL_STOP_COUNT(a, cond) do {     \
-  POWERPC_GET_CYCLES(cycles_stop);               \
-  POWERPC_GET_PMC2(pmc2_stop);                   \
-  POWERPC_GET_PMC3(pmc3_stop);                   \
-  if (cycles_stop >= cycles_start)               \
-  {                                              \
-    unsigned long diff =                         \
-                cycles_stop - cycles_start;      \
-    if (cond)                                    \
-    {                                            \
-      if (diff < perfdata[a][powerpc_data_min])  \
-        perfdata[a][powerpc_data_min] = diff;    \
-      if (diff > perfdata[a][powerpc_data_max])  \
-        perfdata[a][powerpc_data_max] = diff;    \
-      perfdata[a][powerpc_data_sum] += diff;     \
-      perfdata[a][powerpc_data_num] ++;          \
-    }                                            \
-  }                                              \
-  if (pmc2_stop >= pmc2_start)                   \
-  {                                              \
-    unsigned long diff =                         \
-                pmc2_stop - pmc2_start;          \
-    if (cond)                                    \
-    {                                            \
-      if (diff < perfdata_pmc2[a][powerpc_data_min]) \
-        perfdata_pmc2[a][powerpc_data_min] = diff;   \
-      if (diff > perfdata_pmc2[a][powerpc_data_max]) \
-        perfdata_pmc2[a][powerpc_data_max] = diff;   \
-      perfdata_pmc2[a][powerpc_data_sum] += diff;    \
-      perfdata_pmc2[a][powerpc_data_num] ++;         \
-    }                                            \
-  }                                              \
-  if (pmc3_stop >= pmc3_start)                   \
-  {                                              \
-    unsigned long diff =                         \
-                pmc3_stop - pmc3_start;          \
-    if (cond)                                    \
-    {                                            \
-      if (diff < perfdata_pmc3[a][powerpc_data_min]) \
-        perfdata_pmc3[a][powerpc_data_min] = diff;   \
-      if (diff > perfdata_pmc3[a][powerpc_data_max]) \
-        perfdata_pmc3[a][powerpc_data_max] = diff;   \
-      perfdata_pmc3[a][powerpc_data_sum] += diff;    \
-      perfdata_pmc3[a][powerpc_data_num] ++;         \
-    }                                            \
-  }                                              \
+#define POWERPC_GET_PMC4(a) asm volatile("mfspr %0, 942" : "=r" (a))
+#else
+#define POWERPC_GET_PMC3(a) do {} while (0)
+#define POWERPC_GET_PMC4(a) do {} while (0)
+#endif
+#if (POWERPC_NUM_PMC_ENABLED > 4)
+#define POWERPC_GET_PMC5(a) asm volatile("mfspr %0, 929" : "=r" (a))
+#define POWERPC_GET_PMC6(a) asm volatile("mfspr %0, 930" : "=r" (a))
+#else
+#define POWERPC_GET_PMC5(a) do {} while (0)
+#define POWERPC_GET_PMC6(a) do {} while (0)
+#endif
+#define POWERPC_PERF_DECLARE(a, cond) unsigned long pmc_start[POWERPC_NUM_PMC_ENABLED], pmc_stop[POWERPC_NUM_PMC_ENABLED], pmc_loop_index;
+#define POWERPC_PERF_START_COUNT(a, cond) do { \
+  POWERPC_GET_PMC6(pmc_start[5]); \
+  POWERPC_GET_PMC5(pmc_start[4]); \
+  POWERPC_GET_PMC4(pmc_start[3]); \
+  POWERPC_GET_PMC3(pmc_start[2]); \
+  POWERPC_GET_PMC2(pmc_start[1]); \
+  POWERPC_GET_PMC1(pmc_start[0]); \
+  } while (0)
+#define POWERPC_PERF_STOP_COUNT(a, cond) do { \
+  POWERPC_GET_PMC1(pmc_stop[0]); \
+  POWERPC_GET_PMC2(pmc_stop[1]); \
+  POWERPC_GET_PMC3(pmc_stop[2]); \
+  POWERPC_GET_PMC4(pmc_stop[3]); \
+  POWERPC_GET_PMC5(pmc_stop[4]); \
+  POWERPC_GET_PMC6(pmc_stop[5]); \
+  if (cond)                       \
+  {                               \
+    for(pmc_loop_index = 0;       \
+        pmc_loop_index < POWERPC_NUM_PMC_ENABLED; \
+        pmc_loop_index++)         \
+    {                             \
+      if (pmc_stop[pmc_loop_index] >= pmc_start[pmc_loop_index]) \
+      {                           \
+        unsigned long diff =      \
+          pmc_stop[pmc_loop_index] - pmc_start[pmc_loop_index];   \
+        if (diff < perfdata[pmc_loop_index][a][powerpc_data_min]) \
+          perfdata[pmc_loop_index][a][powerpc_data_min] = diff;   \
+        if (diff > perfdata[pmc_loop_index][a][powerpc_data_max]) \
+          perfdata[pmc_loop_index][a][powerpc_data_max] = diff;   \
+        perfdata[pmc_loop_index][a][powerpc_data_sum] += diff;    \
+        perfdata[pmc_loop_index][a][powerpc_data_num] ++;         \
+      }                           \
+    }                             \
+  }                               \
 } while (0)
-
-#endif /* POWERPC_PERF_USE_PMC */
-
-
-#else /* POWERPC_TBL_PERFORMANCE_REPORT */
+#else /* POWERPC_PERFORMANCE_REPORT */
 // those are needed to avoid empty statements.
-#define POWERPC_TBL_DECLARE(a, cond)        int altivec_placeholder __attribute__ ((unused))
-#define POWERPC_TBL_START_COUNT(a, cond)    do {} while (0)
-#define POWERPC_TBL_STOP_COUNT(a, cond)     do {} while (0)
-#endif /* POWERPC_TBL_PERFORMANCE_REPORT */
+#define POWERPC_PERF_DECLARE(a, cond)        int altivec_placeholder __attribute__ ((unused))
+#define POWERPC_PERF_START_COUNT(a, cond)    do {} while (0)
+#define POWERPC_PERF_STOP_COUNT(a, cond)     do {} while (0)
+#endif /* POWERPC_PERFORMANCE_REPORT */
 
 #endif /*  _DSPUTIL_PPC_ */
index 75c95bb879c1581cf42ed3e03115f7ba5e867f76..e39c9dbb736f8f6f47a600d9228c21016e5cfc54 100644 (file)
@@ -62,7 +62,7 @@
  */
 void fft_calc_altivec(FFTContext *s, FFTComplex *z)
 {
-POWERPC_TBL_DECLARE(altivec_fft_num, s->nbits >= 6);
+POWERPC_PERF_DECLARE(altivec_fft_num, s->nbits >= 6);
 #ifdef ALTIVEC_USE_REFERENCE_C_CODE
     int ln = s->nbits;
     int        j, np, np2;
@@ -72,7 +72,7 @@ POWERPC_TBL_DECLARE(altivec_fft_num, s->nbits >= 6);
     int l;
     FFTSample tmp_re, tmp_im;
     
-POWERPC_TBL_START_COUNT(altivec_fft_num, s->nbits >= 6);
+POWERPC_PERF_START_COUNT(altivec_fft_num, s->nbits >= 6);
  
     np = 1 << ln;
 
@@ -137,7 +137,7 @@ POWERPC_TBL_START_COUNT(altivec_fft_num, s->nbits >= 6);
         nloops = nloops << 1;
     } while (nblocks != 0);
 
-POWERPC_TBL_STOP_COUNT(altivec_fft_num, s->nbits >= 6);
+POWERPC_PERF_STOP_COUNT(altivec_fft_num, s->nbits >= 6);
 
 #else /* ALTIVEC_USE_REFERENCE_C_CODE */
 #ifdef CONFIG_DARWIN
@@ -153,7 +153,7 @@ POWERPC_TBL_STOP_COUNT(altivec_fft_num, s->nbits >= 6);
     FFTComplex *cptr, *cptr1;
     int k;
 
-POWERPC_TBL_START_COUNT(altivec_fft_num, s->nbits >= 6);
+POWERPC_PERF_START_COUNT(altivec_fft_num, s->nbits >= 6);
 
     np = 1 << ln;
 
@@ -241,7 +241,7 @@ POWERPC_TBL_START_COUNT(altivec_fft_num, s->nbits >= 6);
         nloops = nloops << 1;
     } while (nblocks != 0);
 
-POWERPC_TBL_STOP_COUNT(altivec_fft_num, s->nbits >= 6);
+POWERPC_PERF_STOP_COUNT(altivec_fft_num, s->nbits >= 6);
 
 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */
 }
index 9b141078bba4026f3f20d841165a027e49bf09a8..671ee110aafe7696df93375bb46d2e7c823f3c71 100644 (file)
@@ -31,7 +31,7 @@
 #define GMC1_PERF_COND (h==8)
 void gmc1_altivec(uint8_t *dst /* align 8 */, uint8_t *src /* align1 */, int stride, int h, int x16, int y16, int rounder)
 {
-POWERPC_TBL_DECLARE(altivec_gmc1_num, GMC1_PERF_COND);
+POWERPC_PERF_DECLARE(altivec_gmc1_num, GMC1_PERF_COND);
 #ifdef ALTIVEC_USE_REFERENCE_C_CODE
     const int A=(16-x16)*(16-y16);
     const int B=(   x16)*(16-y16);
@@ -39,7 +39,7 @@ POWERPC_TBL_DECLARE(altivec_gmc1_num, GMC1_PERF_COND);
     const int D=(   x16)*(   y16);
     int i;
 
-POWERPC_TBL_START_COUNT(altivec_gmc1_num, GMC1_PERF_COND);
+POWERPC_PERF_START_COUNT(altivec_gmc1_num, GMC1_PERF_COND);
     
     for(i=0; i<h; i++)
     {
@@ -55,7 +55,7 @@ POWERPC_TBL_START_COUNT(altivec_gmc1_num, GMC1_PERF_COND);
         src+= stride;
     }
 
-POWERPC_TBL_STOP_COUNT(altivec_gmc1_num, GMC1_PERF_COND);
+POWERPC_PERF_STOP_COUNT(altivec_gmc1_num, GMC1_PERF_COND);
 
 #else /* ALTIVEC_USE_REFERENCE_C_CODE */
     const unsigned short __attribute__ ((aligned(16))) rounder_a[8] =
@@ -78,7 +78,7 @@ POWERPC_TBL_STOP_COUNT(altivec_gmc1_num, GMC1_PERF_COND);
     unsigned long src_really_odd = (unsigned long)src & 0x0000000F;
 
 
-POWERPC_TBL_START_COUNT(altivec_gmc1_num, GMC1_PERF_COND);
+POWERPC_PERF_START_COUNT(altivec_gmc1_num, GMC1_PERF_COND);
 
     tempA = vec_ld(0, (unsigned short*)ABCD);
     Av = vec_splat(tempA, 0);
@@ -166,7 +166,7 @@ POWERPC_TBL_START_COUNT(altivec_gmc1_num, GMC1_PERF_COND);
       src += stride;
     }
 
-POWERPC_TBL_STOP_COUNT(altivec_gmc1_num, GMC1_PERF_COND);
+POWERPC_PERF_STOP_COUNT(altivec_gmc1_num, GMC1_PERF_COND);
 
 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */
 }
index f8a8aa6787da9fee01aee7127a57f0d0c71d35cb..d821ecd223212b18934e6d733a7aa28140266518 100644 (file)
@@ -165,16 +165,16 @@ static const vector_s16_t constants[5] = {
 
 void idct_put_altivec(uint8_t* dest, int stride, vector_s16_t* block)
 {
-POWERPC_TBL_DECLARE(altivec_idct_put_num, 1);
+POWERPC_PERF_DECLARE(altivec_idct_put_num, 1);
 #ifdef ALTIVEC_USE_REFERENCE_C_CODE
-POWERPC_TBL_START_COUNT(altivec_idct_put_num, 1);
+POWERPC_PERF_START_COUNT(altivec_idct_put_num, 1);
     void simple_idct_put(uint8_t *dest, int line_size, int16_t *block);
     simple_idct_put(dest, stride, (int16_t*)block);
-POWERPC_TBL_STOP_COUNT(altivec_idct_put_num, 1);
+POWERPC_PERF_STOP_COUNT(altivec_idct_put_num, 1);
 #else /* ALTIVEC_USE_REFERENCE_C_CODE */
     vector_u8_t tmp;
 
-POWERPC_TBL_START_COUNT(altivec_idct_put_num, 1);
+POWERPC_PERF_START_COUNT(altivec_idct_put_num, 1);
 
     IDCT
 
@@ -192,18 +192,18 @@ POWERPC_TBL_START_COUNT(altivec_idct_put_num, 1);
     COPY (dest, vx6)   dest += stride;
     COPY (dest, vx7)
 
-POWERPC_TBL_STOP_COUNT(altivec_idct_put_num, 1);
+POWERPC_PERF_STOP_COUNT(altivec_idct_put_num, 1);
 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */
 }
 
 void idct_add_altivec(uint8_t* dest, int stride, vector_s16_t* block)
 {
-POWERPC_TBL_DECLARE(altivec_idct_add_num, 1);
+POWERPC_PERF_DECLARE(altivec_idct_add_num, 1);
 #ifdef ALTIVEC_USE_REFERENCE_C_CODE
-POWERPC_TBL_START_COUNT(altivec_idct_add_num, 1);
+POWERPC_PERF_START_COUNT(altivec_idct_add_num, 1);
     void simple_idct_add(uint8_t *dest, int line_size, int16_t *block);
     simple_idct_add(dest, stride, (int16_t*)block);
-POWERPC_TBL_STOP_COUNT(altivec_idct_add_num, 1);
+POWERPC_PERF_STOP_COUNT(altivec_idct_add_num, 1);
 #else /* ALTIVEC_USE_REFERENCE_C_CODE */
     vector_u8_t tmp;
     vector_s16_t tmp2, tmp3;
@@ -211,7 +211,7 @@ POWERPC_TBL_STOP_COUNT(altivec_idct_add_num, 1);
     vector_u8_t perm1;
     vector_u8_t p0, p1, p;
 
-POWERPC_TBL_START_COUNT(altivec_idct_add_num, 1);
+POWERPC_PERF_START_COUNT(altivec_idct_add_num, 1);
 
     IDCT
 
@@ -239,7 +239,7 @@ POWERPC_TBL_START_COUNT(altivec_idct_add_num, 1);
     ADD (dest, vx6, perm0)     dest += stride;
     ADD (dest, vx7, perm1)
 
-POWERPC_TBL_STOP_COUNT(altivec_idct_add_num, 1);
+POWERPC_PERF_STOP_COUNT(altivec_idct_add_num, 1);
 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */
 }
 
index bbf9c443356282760ea519b40100b377e256bfe3..ae3170d91a2b15178e98c0d2679a573a0c3736d1 100644 (file)
@@ -522,13 +522,13 @@ int dct_quantize_altivec(MpegEncContext* s,
 void dct_unquantize_h263_altivec(MpegEncContext *s, 
                                  DCTELEM *block, int n, int qscale)
 {
-POWERPC_TBL_DECLARE(altivec_dct_unquantize_h263_num, 1);
+POWERPC_PERF_DECLARE(altivec_dct_unquantize_h263_num, 1);
     int i, level, qmul, qadd;
     int nCoeffs;
     
     assert(s->block_last_index[n]>=0);
 
-POWERPC_TBL_START_COUNT(altivec_dct_unquantize_h263_num, 1);
+POWERPC_PERF_START_COUNT(altivec_dct_unquantize_h263_num, 1);
     
     qadd = (qscale - 1) | 1;
     qmul = qscale << 1;
@@ -641,5 +641,5 @@ POWERPC_TBL_START_COUNT(altivec_dct_unquantize_h263_num, 1);
     }
 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */
 
-POWERPC_TBL_STOP_COUNT(altivec_dct_unquantize_h263_num, nCoeffs == 63);
+POWERPC_PERF_STOP_COUNT(altivec_dct_unquantize_h263_num, nCoeffs == 63);
 }