avcodec/fft_template: improve performance of the ff_fft_init in fft_template
authorSteven Liu <lq@chinaffmpeg.org>
Wed, 26 Dec 2018 08:09:49 +0000 (16:09 +0800)
committerSteven Liu <lq@chinaffmpeg.org>
Fri, 4 Jan 2019 03:21:02 +0000 (11:21 +0800)
Before patch:
init nbits = 17, get 10000 samples, average cost: 16175 us
After patch:
init nbits = 17, get 10000 samples, average cost: 14989 us

Reviewed-by: Michael Niedermayer <michael@niedermayer.cc>
Signed-off-by: Steven Liu <lq@chinaffmpeg.org>
libavcodec/fft_template.c

index 762c014..20a62e4 100644 (file)
@@ -261,17 +261,41 @@ av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse)
     if (s->fft_permutation == FF_FFT_PERM_AVX) {
         fft_perm_avx(s);
     } else {
-        for(i=0; i<n; i++) {
-            int k;
-            j = i;
-            if (s->fft_permutation == FF_FFT_PERM_SWAP_LSBS)
-                j = (j&~3) | ((j>>1)&1) | ((j<<1)&2);
-            k = -split_radix_permutation(i, n, s->inverse) & (n-1);
-            if (s->revtab)
-                s->revtab[k] = j;
-            if (s->revtab32)
-                s->revtab32[k] = j;
-        }
+#define PROCESS_FFT_PERM_SWAP_LSBS(num) do {\
+    for(i = 0; i < n; i++) {\
+        int k;\
+        j = i;\
+        j = (j & ~3) | ((j >> 1) & 1) | ((j << 1) & 2);\
+        k = -split_radix_permutation(i, n, s->inverse) & (n - 1);\
+        s->revtab##num[k] = j;\
+    } \
+} while(0);
+
+#define PROCESS_FFT_PERM_DEFAULT(num) do {\
+    for(i = 0; i < n; i++) {\
+        int k;\
+        j = i;\
+        k = -split_radix_permutation(i, n, s->inverse) & (n - 1);\
+        s->revtab##num[k] = j;\
+    } \
+} while(0);
+
+#define SPLIT_RADIX_PERMUTATION(num) do { \
+    if (s->fft_permutation == FF_FFT_PERM_SWAP_LSBS) {\
+        PROCESS_FFT_PERM_SWAP_LSBS(num) \
+    } else {\
+        PROCESS_FFT_PERM_DEFAULT(num) \
+    }\
+} while(0);
+
+    if (s->revtab)
+        SPLIT_RADIX_PERMUTATION()
+    if (s->revtab32)
+        SPLIT_RADIX_PERMUTATION(32)
+
+#undef PROCESS_FFT_PERM_DEFAULT
+#undef PROCESS_FFT_PERM_SWAP_LSBS
+#undef SPLIT_RADIX_PERMUTATION
     }
 
     return 0;