Merge commit '81d7f0bbca837afda1f7e60d3ae52ab1360ab44b'
authorClément Bœsch <u@pkh.me>
Sat, 1 Apr 2017 09:39:09 +0000 (11:39 +0200)
committerClément Bœsch <u@pkh.me>
Sat, 1 Apr 2017 09:54:29 +0000 (11:54 +0200)
* commit '81d7f0bbca837afda1f7e60d3ae52ab1360ab44b':
  checkasm: vp9dsp: Benchmark the dc-only version of idct_idct separately

Merged-by: Clément Bœsch <u@pkh.me>
1  2 
tests/checkasm/vp9dsp.c

@@@ -323,51 -262,46 +323,57 @@@ static void check_itxfm(void
          [ADST_DCT] = "dct_adst", [ADST_ADST] = "adst_adst"
      };
  
 -    ff_vp9dsp_init(&dsp);
 -
 -    for (tx = TX_4X4; tx <= N_TXFM_SIZES /* 4 = lossless */; tx++) {
 -        int sz = 4 << (tx & 3);
 -        int n_txtps = tx < TX_32X32 ? N_TXFM_TYPES : 1;
 -
 -        for (txtp = 0; txtp < n_txtps; txtp++) {
 -            if (check_func(dsp.itxfm_add[tx][txtp], "vp9_inv_%s_%dx%d_add",
 -                           tx == 4 ? "wht_wht" : txtp_types[txtp], sz, sz)) {
 -                randomize_buffers();
 -                ftx(coef, tx, txtp, sz, BIT_DEPTH);
 -
 -                for (sub = (txtp == 0) ? 1 : 2; sub <= sz; sub <<= 1) {
 -                    int eob;
 +    for (bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
 +        ff_vp9dsp_init(&dsp, bit_depth, 0);
 +
 +        for (tx = TX_4X4; tx <= N_TXFM_SIZES /* 4 = lossless */; tx++) {
 +            int sz = 4 << (tx & 3);
 +            int n_txtps = tx < TX_32X32 ? N_TXFM_TYPES : 1;
 +
 +            for (txtp = 0; txtp < n_txtps; txtp++) {
 +                // skip testing sub-IDCTs for WHT or ADST since they don't
 +                // implement it in any of the SIMD functions. If they do,
 +                // consider changing this to ensure we have complete test
 +                // coverage. Test sub=1 for dc-only, then 2, 4, 8, 12, etc,
 +                // since the arm version can distinguish them at that level.
 +                for (sub = (txtp == 0 && tx < 4) ? 1 : sz; sub <= sz;
 +                     sub < 4 ? (sub <<= 1) : (sub += 4)) {
 +                    if (check_func(dsp.itxfm_add[tx][txtp],
 +                                   "vp9_inv_%s_%dx%d_sub%d_add_%d",
 +                                   tx == 4 ? "wht_wht" : txtp_types[txtp],
 +                                   sz, sz, sub, bit_depth)) {
 +                        int eob;
 +
 +                        randomize_buffers();
 +                        ftx(coef, tx, txtp, sz, bit_depth);
 +
 +                        if (sub < sz) {
 +                            eob = copy_subcoefs(subcoef0, coef, tx, txtp,
 +                                                sz, sub, bit_depth);
 +                        } else {
 +                            eob = sz * sz;
 +                            memcpy(subcoef0, coef, sz * sz * SIZEOF_COEF);
 +                        }
  
 -                    if (sub < sz) {
 -                        eob = copy_subcoefs(subcoef0, coef, tx, txtp,
 -                                            sz, sub, BIT_DEPTH);
 -                    } else {
 -                        eob = sz * sz;
 -                        memcpy(subcoef0, coef, sz * sz * SIZEOF_COEF);
 +                        memcpy(dst0, dst, sz * sz * SIZEOF_PIXEL);
 +                        memcpy(dst1, dst, sz * sz * SIZEOF_PIXEL);
 +                        memcpy(subcoef1, subcoef0, sz * sz * SIZEOF_COEF);
 +                        call_ref(dst0, sz * SIZEOF_PIXEL, subcoef0, eob);
 +                        call_new(dst1, sz * SIZEOF_PIXEL, subcoef1, eob);
 +                        if (memcmp(dst0, dst1, sz * sz * SIZEOF_PIXEL) ||
 +                            !iszero(subcoef0, sz * sz * SIZEOF_COEF) ||
 +                            !iszero(subcoef1, sz * sz * SIZEOF_COEF))
 +                            fail();
 +
 +                        bench_new(dst, sz * SIZEOF_PIXEL, coef, eob);
                      }
 -
 -                    memcpy(dst0, dst, sz * sz * SIZEOF_PIXEL);
 -                    memcpy(dst1, dst, sz * sz * SIZEOF_PIXEL);
 -                    memcpy(subcoef1, subcoef0, sz * sz * SIZEOF_COEF);
 -                    call_ref(dst0, sz * SIZEOF_PIXEL, subcoef0, eob);
 -                    call_new(dst1, sz * SIZEOF_PIXEL, subcoef1, eob);
 -                    if (memcmp(dst0, dst1, sz * sz * SIZEOF_PIXEL) ||
 -                        !iszero(subcoef0, sz * sz * SIZEOF_COEF) ||
 -                        !iszero(subcoef1, sz * sz * SIZEOF_COEF))
 -                        fail();
                  }
 -                bench_new(dst, sz * SIZEOF_PIXEL, coef, sz * sz);
 -            }
 -            if (txtp == 0 && tx != 4) {
 -                if (check_func(dsp.itxfm_add[tx][txtp], "vp9_inv_%s_%dx%d_dc_add",
 -                               txtp_types[txtp], sz, sz)) {
 -                    bench_new(dst, sz * SIZEOF_PIXEL, coef, 1);
++                if (txtp == 0 && tx != 4) {
++                    if (check_func(dsp.itxfm_add[tx][txtp], "vp9_inv_%s_%dx%d_dc_add_%d",
++                                   txtp_types[txtp], sz, sz, bit_depth)) {
++                        bench_new(dst, sz * SIZEOF_PIXEL, coef, 1);
++                    }
+                 }
              }
          }
      }