better deblocking filter
authorMichael Niedermayer <michaelni@gmx.at>
Tue, 29 Oct 2002 18:35:15 +0000 (18:35 +0000)
committerMichael Niedermayer <michaelni@gmx.at>
Tue, 29 Oct 2002 18:35:15 +0000 (18:35 +0000)
Originally committed as revision 7961 to svn://svn.mplayerhq.hu/mplayer/trunk/postproc

postproc/postprocess.c
postproc/postprocess.h
postproc/postprocess_template.c

index a732d400a41fe055fa2d7f8a73803e458d6b77d2..599d6064b75f8d31d2c6b106a5576876cb3f1ca9 100644 (file)
@@ -59,7 +59,6 @@ compare the quality & speed of all filters
 split this huge file
 optimize c versions
 try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
-put fastmemcpy back
 ...
 */
 
@@ -149,13 +148,14 @@ typedef struct PPContext{
 
        uint64_t __attribute__((aligned(8))) pQPb;
        uint64_t __attribute__((aligned(8))) pQPb2;
-       
-       uint64_t __attribute__((aligned(8))) mmxDcOffset;
-       uint64_t __attribute__((aligned(8))) mmxDcThreshold;
 
+       uint64_t __attribute__((aligned(8))) mmxDcOffset[32];
+       uint64_t __attribute__((aligned(8))) mmxDcThreshold[32];
+       
+       QP_STORE_T *nonBQPTable;
+       
        int QP;
-       int dcOffset;
-       int dcThreshold;
+       int nonBQP;
 
        int frameNum;
 
@@ -247,8 +247,8 @@ static inline int isHorizDC(uint8_t src[], int stride, PPContext *c)
 {
        int numEq= 0;
        int y;
-       const int dcOffset= c->dcOffset;
-       const int dcThreshold= c->dcThreshold;
+       const int dcOffset= ((c->QP*c->ppMode.baseDcDiff)>>8) + 1;
+       const int dcThreshold= dcOffset*2 + 1;
        for(y=0; y<BLOCK_SIZE; y++)
        {
                if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
@@ -269,8 +269,8 @@ static inline int isHorizDC(uint8_t src[], int stride, PPContext *c)
 static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c){
        int numEq= 0;
        int y;
-       const int dcOffset= c->dcOffset;
-       const int dcThreshold= c->dcThreshold;
+       const int dcOffset= ((c->QP*c->ppMode.baseDcDiff)>>8) + 1;
+       const int dcThreshold= dcOffset*2 + 1;
        src+= stride*4; // src points to begin of the 8x8 Block
        for(y=0; y<BLOCK_SIZE-1; y++)
        {
@@ -725,7 +725,7 @@ struct PPMode getPPModeByNameAndQuality(char *name, int quality)
                                else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK)
                                {
                                        int o;
-                                       ppMode.maxDcDiff=1;
+                                       ppMode.baseDcDiff=256/4;
 //                                     hFlatnessThreshold= 40;
 //                                     vFlatnessThreshold= 40;
 
@@ -736,7 +736,7 @@ struct PPMode getPPModeByNameAndQuality(char *name, int quality)
                                                if(tail==options[o]) break;
 
                                                numOfUnknownOptions--;
-                                               if(o==0) ppMode.maxDcDiff= val;
+                                               if(o==0) ppMode.baseDcDiff= val;
                                                else ppMode.flatnessThreshold= val;
                                        }
                                }
@@ -768,6 +768,8 @@ struct PPMode getPPModeByNameAndQuality(char *name, int quality)
 void *getPPContext(int width, int height){
        PPContext *c= memalign(32, sizeof(PPContext));
        int i;
+       int mbWidth = (width+15)>>4;
+       int mbHeight= (height+15)>>4;
 
        c->tempBlocks= (uint8_t*)memalign(8, 2*16*8);
        c->yHistogram= (uint64_t*)memalign(8, 256*sizeof(uint64_t));
@@ -789,6 +791,8 @@ void *getPPContext(int width, int height){
        c->tempDstBlock= (uint8_t*)memalign(8, 1024*24);
        c->tempSrcBlock= (uint8_t*)memalign(8, 1024*24);
        c->deintTemp= (uint8_t*)memalign(8, width+16);
+       c->nonBQPTable= (QP_STORE_T*)memalign(8, mbWidth*mbHeight*sizeof(QP_STORE_T));
+       memset(c->nonBQPTable, 0, mbWidth*mbHeight*sizeof(QP_STORE_T));
 
        c->frameNum=-1;
 
@@ -809,6 +813,7 @@ void freePPContext(void *vc){
        free(c->tempDstBlock);
        free(c->tempSrcBlock);
        free(c->deintTemp);
+       free(c->nonBQPTable);
        
        free(c);
 }
@@ -841,12 +846,14 @@ void revertPPOpt(void *conf, char* opt)
 
 void  postprocess(uint8_t * src[3], int srcStride[3],
                  uint8_t * dst[3], int dstStride[3],
-                 int horizontalSize, int verticalSize,
+                 int width, int height,
                  QP_STORE_T *QP_store,  int QPStride,
-                PPMode *mode,  void *c)
+                PPMode *mode,  void *vc, int pict_type)
 {
-
+       int mbWidth = (width+15)>>4;
+       int mbHeight= (height+15)>>4;
        QP_STORE_T quantArray[2048/8];
+       PPContext *c = (PPContext*)vc;
 
        if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)) 
        {
@@ -858,6 +865,29 @@ void  postprocess(uint8_t * src[3], int srcStride[3],
                else
                        for(i=0; i<2048/8; i++) quantArray[i]= 1;
        }
+if(0){
+int x,y;
+for(y=0; y<mbHeight; y++){
+       for(x=0; x<mbWidth; x++){
+               printf("%2d ", QP_store[x + y*QPStride]);
+       }
+       printf("\n");
+}
+       printf("\n");
+}
+//printf("pict_type:%d\n", pict_type);
+       if(pict_type!=3)
+       {
+               int x,y;
+               for(y=0; y<mbHeight; y++){
+                       for(x=0; x<mbWidth; x++){
+                               int qscale= QP_store[x + y*QPStride];
+                               if(qscale&~31)
+                                   qscale=31;
+                               c->nonBQPTable[y*mbWidth + x]= qscale;
+                       }
+               }
+       }
 
        if(firstTime2 && verbose)
        {
@@ -866,30 +896,30 @@ void  postprocess(uint8_t * src[3], int srcStride[3],
        }
 
        postProcess(src[0], srcStride[0], dst[0], dstStride[0],
-               horizontalSize, verticalSize, QP_store, QPStride, 0, mode, c);
+               width, height, QP_store, QPStride, 0, mode, c);
 
-       horizontalSize = (horizontalSize+1)>> 1;
-       verticalSize   = (verticalSize+1)>>1;
+       width  = (width +1)>>1;
+       height = (height+1)>>1;
 
        if(mode->chromMode)
        {
                postProcess(src[1], srcStride[1], dst[1], dstStride[1],
-                       horizontalSize, verticalSize, QP_store, QPStride, 1, mode, c);
+                       width, height, QP_store, QPStride, 1, mode, c);
                postProcess(src[2], srcStride[2], dst[2], dstStride[2],
-                       horizontalSize, verticalSize, QP_store, QPStride, 2, mode, c);
+                       width, height, QP_store, QPStride, 2, mode, c);
        }
        else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2])
        {
-               memcpy(dst[1], src[1], srcStride[1]*verticalSize);
-               memcpy(dst[2], src[2], srcStride[2]*verticalSize);
+               memcpy(dst[1], src[1], srcStride[1]*height);
+               memcpy(dst[2], src[2], srcStride[2]*height);
        }
        else
        {
                int y;
-               for(y=0; y<verticalSize; y++)
+               for(y=0; y<height; y++)
                {
-                       memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), horizontalSize);
-                       memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), horizontalSize);
+                       memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
+                       memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
                }
        }
 }
index 66cccd1131f67181a061fc8e4dfd28707b9b5558..751d95956d0b7c8909c25619a5188ee7ade98e10 100644 (file)
@@ -54,7 +54,6 @@
 #define TEMP_NOISE_FILTER              0x100000
 #define FORCE_QUANT                    0x200000
 
-
 #define GET_PP_QUALITY_MAX 6
 
 //use if u want a faster postprocessing code
@@ -76,8 +75,8 @@ typedef struct PPMode{
        int maxAllowedY; // for brihtness correction
 
        int maxTmpNoise[3]; // for Temporal Noise Reducing filter (Maximal sum of abs differences)
-       
-       int maxDcDiff; // max abs diff between pixels to be considered flat
+
+       int baseDcDiff;
        int flatnessThreshold;
 
        int forcedQuant; // quantizer if FORCE_QUANT is used
@@ -87,7 +86,7 @@ void  postprocess(uint8_t * src[3], int srcStride[3],
                  uint8_t * dst[3], int dstStride[3],
                  int horizontalSize, int verticalSize,
                  QP_STORE_T *QP_store,  int QP_stride,
-                PPMode *mode, void *ppContext);
+                PPMode *mode, void *ppContext, int pict_type);
 
 // name is the stuff after "-pp" on the command line
 PPMode getPPModeByNameAndQuality(char *name, int quality);
index f24eccf2cecd243d2c09b964975e19d5c97ca52c..beea9604f58f6bd9f8cba6296d45d5e47318d3ad 100644 (file)
@@ -56,8 +56,9 @@ asm volatile(
                "leal (%1, %2), %%eax                           \n\t"
 //     0       1       2       3       4       5       6       7       8       9
 //     %1      eax     eax+%2  eax+2%2 %1+4%2  ecx     ecx+%2  ecx+2%2 %1+8%2  ecx+4%2
-               "movq %3, %%mm7                                 \n\t" // mm7 = 0x7F
-               "movq %4, %%mm6                                 \n\t" // mm6 = 0x7D
+               "movq %3, %%mm7                                 \n\t" 
+               "movq %4, %%mm6                                 \n\t" 
+
                "movq (%1), %%mm0                               \n\t"
                "movq (%%eax), %%mm1                            \n\t"
                "psubb %%mm1, %%mm0                             \n\t" // mm0 = differnece
@@ -119,7 +120,7 @@ asm volatile(
 #endif
                "movd %%mm0, %0                                 \n\t"
                : "=r" (numEq)
-               : "r" (src), "r" (stride), "m" (c->mmxDcOffset), "m" (c->mmxDcThreshold)
+               : "r" (src), "r" (stride), "m" (c->mmxDcOffset[c->nonBQP]),  "m" (c->mmxDcThreshold[c->nonBQP])
                : "%eax"
                );
        numEq= (-numEq) &0xFF;
@@ -150,6 +151,7 @@ static inline int RENAME(isVertMinMaxOk)(uint8_t src[], int stride, PPContext *c
                );
        return isOk==0;
 #else
+#if 1
        int x;
        const int QP= c->QP;
        src+= stride*3;
@@ -159,6 +161,24 @@ static inline int RENAME(isVertMinMaxOk)(uint8_t src[], int stride, PPContext *c
        }
 
        return 1;
+#else
+       int x;
+       const int QP= c->QP;
+       src+= stride*4;
+       for(x=0; x<BLOCK_SIZE; x++)
+       {
+               int min=255;
+               int max=0;
+               int y;
+               for(y=0; y<8; y++){
+                       int v= src[x + y*stride];
+                       if(v>max) max=v;
+                       if(v<min) min=v;
+               }
+               if(max-min > 2*QP) return 0;
+       }
+       return 1;
+#endif
 #endif
 }
 
@@ -2639,22 +2659,23 @@ static void RENAME(postProcess)(uint8_t src[], int srcStride, uint8_t dst[], int
        int black=0, white=255; // blackest black and whitest white in the picture
        int QPCorrecture= 256*256;
 
-       int copyAhead;
+       int copyAhead, i;
 
        //FIXME remove
        uint64_t * const yHistogram= c.yHistogram;
        uint8_t * const tempSrc= c.tempSrc;
        uint8_t * const tempDst= c.tempDst;
-
-       c.dcOffset= c.ppMode.maxDcDiff;
-       c.dcThreshold= c.ppMode.maxDcDiff*2 + 1;
+       const int mbWidth= isColor ? (width+7)>>3 : (width+15)>>4;
 
 #ifdef HAVE_MMX
-       c.mmxDcOffset= 0x7F - c.dcOffset;
-       c.mmxDcThreshold= 0x7F - c.dcThreshold;
-
-       c.mmxDcOffset*= 0x0101010101010101LL;
-       c.mmxDcThreshold*= 0x0101010101010101LL;
+       for(i=0; i<32; i++){
+               int offset= ((i*c.ppMode.baseDcDiff)>>8) + 1;
+               int threshold= offset*2 + 1;
+               c.mmxDcOffset[i]= 0x7F - offset;
+               c.mmxDcThreshold[i]= 0x7F - threshold;
+               c.mmxDcOffset[i]*= 0x0101010101010101LL;
+               c.mmxDcThreshold[i]*= 0x0101010101010101LL;
+       }
 #endif
 
        if(mode & CUBIC_IPOL_DEINT_FILTER) copyAhead=16;
@@ -2814,11 +2835,8 @@ static void RENAME(postProcess)(uint8_t src[], int srcStride, uint8_t dst[], int
                uint8_t *tempBlock1= c.tempBlocks;
                uint8_t *tempBlock2= c.tempBlocks + 8;
 #endif
-#ifdef ARCH_X86
                int *QPptr= isColor ? &QPs[(y>>3)*QPStride] :&QPs[(y>>4)*QPStride];
-               int QPDelta= isColor ? (-1) : 1<<31;
-               int QPFrac= 1<<30;
-#endif
+               int *nonBQPptr= isColor ? &c.nonBQPTable[(y>>3)*mbWidth] :&c.nonBQPTable[(y>>4)*mbWidth];
                int QP=0;
                /* can we mess with a 8x16 block from srcBlock/dstBlock downwards and 1 line upwards
                   if not than use a temporary buffer */
@@ -2855,28 +2873,19 @@ static void RENAME(postProcess)(uint8_t src[], int srcStride, uint8_t dst[], int
 #ifdef HAVE_MMX
                        uint8_t *tmpXchg;
 #endif
-#ifdef ARCH_X86
-                       QP= *QPptr;
-                       asm volatile(
-                               "addl %2, %1            \n\t"
-                               "sbbl %%eax, %%eax      \n\t"
-                               "shll $2, %%eax         \n\t"
-                               "subl %%eax, %0         \n\t"
-                               : "+r" (QPptr), "+m" (QPFrac)
-                               : "r" (QPDelta)
-                               : "%eax"
-                       );
-#else
-                       QP= isColor ?
-                                QPs[(y>>3)*QPStride + (x>>3)]:
-                                QPs[(y>>4)*QPStride + (x>>4)];
-#endif
-                       if(!isColor)
+                       if(isColor)
+                       {
+                               QP= QPptr[x>>3];
+                               c.nonBQP= nonBQPptr[x>>3];
+                       }
+                       else
                        {
+                               QP= QPptr[x>>4];
                                QP= (QP* QPCorrecture + 256*128)>>16;
+                               c.nonBQP= nonBQPptr[x>>4];
+                               c.nonBQP= (c.nonBQP* QPCorrecture + 256*128)>>16;
                                yHistogram[ srcBlock[srcStride*12 + 4] ]++;
                        }
-//printf("%d ", QP);
                        c.QP= QP;
 #ifdef HAVE_MMX
                        asm volatile(