This makes it match the pattern already used for VP8 MC functions.
This also makes the signature match ffmpeg's version of these
functions, easing porting of code in both directions.
Signed-off-by: Martin Storsjö <martin@martin.st>
uint8_t partition[4][4][3];
} ProbContext;
-typedef void (*vp9_mc_func)(uint8_t *dst, const uint8_t *ref,
- ptrdiff_t dst_stride,
- ptrdiff_t ref_stride,
+typedef void (*vp9_mc_func)(uint8_t *dst, ptrdiff_t dst_stride,
+ const uint8_t *ref, ptrdiff_t ref_stride,
int h, int mx, int my);
typedef struct VP9DSPContext {
ref = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
ref_stride = 80;
}
- mc[!!mx][!!my](dst, ref, dst_stride, ref_stride, bh, mx << 1, my << 1);
+ mc[!!mx][!!my](dst, dst_stride, ref, ref_stride, bh, mx << 1, my << 1);
}
static av_always_inline void mc_chroma_dir(VP9Context *s, vp9_mc_func(*mc)[2],
bw + !!mx * 7, bh + !!my * 7,
x - !!mx * 3, y - !!my * 3, w, h);
ref_u = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
- mc[!!mx][!!my](dst_u, ref_u, dst_stride, 80, bh, mx, my);
+ mc[!!mx][!!my](dst_u, dst_stride, ref_u, 80, bh, mx, my);
s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
ref_v - !!my * 3 * src_stride_v - !!mx * 3,
bw + !!mx * 7, bh + !!my * 7,
x - !!mx * 3, y - !!my * 3, w, h);
ref_v = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
- mc[!!mx][!!my](dst_v, ref_v, dst_stride, 80, bh, mx, my);
+ mc[!!mx][!!my](dst_v, dst_stride, ref_v, 80, bh, mx, my);
} else {
- mc[!!mx][!!my](dst_u, ref_u, dst_stride, src_stride_u, bh, mx, my);
- mc[!!mx][!!my](dst_v, ref_v, dst_stride, src_stride_v, bh, mx, my);
+ mc[!!mx][!!my](dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my);
+ mc[!!mx][!!my](dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my);
}
}
av_assert2(n <= 4);
if (w & bw) {
s->dsp.mc[n][0][0][0][0](f->data[0] + yoff + o,
- s->tmp_y + o,
f->linesize[0],
+ s->tmp_y + o,
64, h, 0, 0);
o += bw;
}
av_assert2(n <= 4);
if (w & bw) {
s->dsp.mc[n][0][0][0][0](f->data[1] + uvoff + o,
- s->tmp_uv[0] + o,
f->linesize[1],
+ s->tmp_uv[0] + o,
32, h, 0, 0);
s->dsp.mc[n][0][0][0][0](f->data[2] + uvoff + o,
- s->tmp_uv[1] + o,
f->linesize[2],
+ s->tmp_uv[1] + o,
32, h, 0, 0);
o += bw;
}
dsp->loop_filter_mix2[1][1][1] = loop_filter_v_88_16_c;
}
-static av_always_inline void copy_c(uint8_t *dst, const uint8_t *src,
- ptrdiff_t dst_stride,
- ptrdiff_t src_stride,
+static av_always_inline void copy_c(uint8_t *dst, ptrdiff_t dst_stride,
+ const uint8_t *src, ptrdiff_t src_stride,
int w, int h)
{
do {
} while (--h);
}
-static av_always_inline void avg_c(uint8_t *dst, const uint8_t *src,
- ptrdiff_t dst_stride,
- ptrdiff_t src_stride,
+static av_always_inline void avg_c(uint8_t *dst, ptrdiff_t dst_stride,
+ const uint8_t *src, ptrdiff_t src_stride,
int w, int h)
{
do {
} while (--h);
}
-#define fpel_fn(type, sz) \
-static void type ## sz ## _c(uint8_t *dst, const uint8_t *src, \
- ptrdiff_t dst_stride, \
- ptrdiff_t src_stride, \
- int h, int mx, int my) \
-{ \
- type ## _c(dst, src, dst_stride, src_stride, sz, h); \
+#define fpel_fn(type, sz) \
+static void type ## sz ## _c(uint8_t *dst, ptrdiff_t dst_stride, \
+ const uint8_t *src, ptrdiff_t src_stride, \
+ int h, int mx, int my) \
+{ \
+ type ## _c(dst, dst_stride, src, src_stride, sz, h); \
}
#define copy_avg_fn(sz) \
F[6] * src[x + +3 * stride] + \
F[7] * src[x + +4 * stride] + 64) >> 7)
-static av_always_inline void do_8tap_1d_c(uint8_t *dst, const uint8_t *src,
- ptrdiff_t dst_stride,
- ptrdiff_t src_stride,
+static av_always_inline void do_8tap_1d_c(uint8_t *dst, ptrdiff_t dst_stride,
+ const uint8_t *src, ptrdiff_t src_stride,
int w, int h, ptrdiff_t ds,
const int8_t *filter, int avg)
{
#define filter_8tap_1d_fn(opn, opa, dir, ds) \
static av_noinline void opn ## _8tap_1d_ ## dir ## _c(uint8_t *dst, \
- const uint8_t *src, \
ptrdiff_t dst_stride, \
+ const uint8_t *src, \
ptrdiff_t src_stride, \
int w, int h, \
const int8_t *filter) \
{ \
- do_8tap_1d_c(dst, src, dst_stride, src_stride, w, h, ds, filter, opa); \
+ do_8tap_1d_c(dst, dst_stride, src, src_stride, w, h, ds, filter, opa); \
}
filter_8tap_1d_fn(put, 0, v, src_stride)
#undef filter_8tap_1d_fn
-static av_always_inline void do_8tap_2d_c(uint8_t *dst, const uint8_t *src,
- ptrdiff_t dst_stride,
- ptrdiff_t src_stride,
+static av_always_inline void do_8tap_2d_c(uint8_t *dst, ptrdiff_t dst_stride,
+ const uint8_t *src, ptrdiff_t src_stride,
int w, int h, const int8_t *filterx,
const int8_t *filtery, int avg)
{
#define filter_8tap_2d_fn(opn, opa) \
static av_noinline void opn ## _8tap_2d_hv_c(uint8_t *dst, \
- const uint8_t *src, \
ptrdiff_t dst_stride, \
+ const uint8_t *src, \
ptrdiff_t src_stride, \
int w, int h, \
const int8_t *filterx, \
const int8_t *filtery) \
{ \
- do_8tap_2d_c(dst, src, dst_stride, src_stride, \
+ do_8tap_2d_c(dst, dst_stride, src, src_stride, \
w, h, filterx, filtery, opa); \
}
#define filter_fn_1d(sz, dir, dir_m, type, type_idx, avg) \
static void \
avg ## _8tap_ ## type ## _ ## sz ## dir ## _c(uint8_t *dst, \
- const uint8_t *src, \
ptrdiff_t dst_stride, \
+ const uint8_t *src, \
ptrdiff_t src_stride, \
int h, int mx, int my) \
{ \
- avg ## _8tap_1d_ ## dir ## _c(dst, src, dst_stride, src_stride, sz, h, \
+ avg ## _8tap_1d_ ## dir ## _c(dst, dst_stride, src, src_stride, sz, h, \
vp9_subpel_filters[type_idx][dir_m - 1]); \
}
#define filter_fn_2d(sz, type, type_idx, avg) \
static void avg ## _8tap_ ## type ## _ ## sz ## hv_c(uint8_t *dst, \
- const uint8_t *src, \
ptrdiff_t dst_stride, \
+ const uint8_t *src, \
ptrdiff_t src_stride, \
int h, int mx, int my) \
{ \
- avg ## _8tap_2d_hv_c(dst, src, dst_stride, src_stride, sz, h, \
+ avg ## _8tap_2d_hv_c(dst, dst_stride, src, src_stride, sz, h, \
vp9_subpel_filters[type_idx][mx - 1], \
vp9_subpel_filters[type_idx][my - 1]); \
}
(src[x] + ((mxy * (src[x + stride] - src[x]) + 8) >> 4))
static av_always_inline void do_bilin_1d_c(uint8_t *dst,
- const uint8_t *src,
ptrdiff_t dst_stride,
+ const uint8_t *src,
ptrdiff_t src_stride,
int w, int h, ptrdiff_t ds,
int mxy, int avg)
#define bilin_1d_fn(opn, opa, dir, ds) \
static av_noinline void opn ## _bilin_1d_ ## dir ## _c(uint8_t *dst, \
- const uint8_t *src, \
ptrdiff_t dst_stride, \
+ const uint8_t *src, \
ptrdiff_t src_stride, \
int w, int h, int mxy) \
{ \
- do_bilin_1d_c(dst, src, dst_stride, src_stride, w, h, ds, mxy, opa); \
+ do_bilin_1d_c(dst, dst_stride, src, src_stride, w, h, ds, mxy, opa); \
}
bilin_1d_fn(put, 0, v, src_stride)
#undef bilin_1d_fn
static av_always_inline void do_bilin_2d_c(uint8_t *dst,
- const uint8_t *src,
ptrdiff_t dst_stride,
+ const uint8_t *src,
ptrdiff_t src_stride,
int w, int h, int mx, int my,
int avg)
#define bilin_2d_fn(opn, opa) \
static av_noinline void opn ## _bilin_2d_hv_c(uint8_t *dst, \
- const uint8_t *src, \
ptrdiff_t dst_stride, \
+ const uint8_t *src, \
ptrdiff_t src_stride, \
int w, int h, \
int mx, int my) \
{ \
- do_bilin_2d_c(dst, src, dst_stride, src_stride, w, h, mx, my, opa); \
+ do_bilin_2d_c(dst, dst_stride, src, src_stride, w, h, mx, my, opa); \
}
bilin_2d_fn(put, 0)
#define bilinf_fn_1d(sz, dir, dir_m, avg) \
static void avg ## _bilin_ ## sz ## dir ## _c(uint8_t *dst, \
- const uint8_t *src, \
ptrdiff_t dst_stride, \
+ const uint8_t *src, \
ptrdiff_t src_stride, \
int h, int mx, int my) \
{ \
- avg ## _bilin_1d_ ## dir ## _c(dst, src, dst_stride, src_stride, \
+ avg ## _bilin_1d_ ## dir ## _c(dst, dst_stride, src, src_stride, \
sz, h, dir_m); \
}
#define bilinf_fn_2d(sz, avg) \
static void avg ## _bilin_ ## sz ## hv_c(uint8_t *dst, \
- const uint8_t *src, \
ptrdiff_t dst_stride, \
+ const uint8_t *src, \
ptrdiff_t src_stride, \
int h, int mx, int my) \
{ \
- avg ## _bilin_2d_hv_c(dst, src, dst_stride, src_stride, \
+ avg ## _bilin_2d_hv_c(dst, dst_stride, src, src_stride, \
sz, h, mx, my); \
}
#if HAVE_YASM
-#define fpel_func(avg, sz, opt) \
-void ff_vp9_ ## avg ## sz ## _ ## opt(uint8_t *dst, const uint8_t *src, \
- ptrdiff_t dst_stride, \
- ptrdiff_t src_stride, \
+#define fpel_func(avg, sz, opt) \
+void ff_vp9_ ## avg ## sz ## _ ## opt(uint8_t *dst, ptrdiff_t dst_stride, \
+ const uint8_t *src, ptrdiff_t src_stride, \
int h, int mx, int my)
fpel_func(put, 4, mmx);
#define mc_func(avg, sz, dir, opt, type, f_sz) \
void \
ff_vp9_ ## avg ## _8tap_1d_ ## dir ## _ ## sz ## _ ## opt(uint8_t *dst, \
- const uint8_t *src, \
ptrdiff_t dst_stride, \
+ const uint8_t *src, \
ptrdiff_t src_stride, \
int h, \
const type (*filter)[f_sz])
#define mc_rep_func(avg, sz, hsz, dir, opt, type, f_sz) \
static av_always_inline void \
ff_vp9_ ## avg ## _8tap_1d_ ## dir ## _ ## sz ## _ ## opt(uint8_t *dst, \
- const uint8_t *src, \
ptrdiff_t dst_stride, \
+ const uint8_t *src, \
ptrdiff_t src_stride, \
int h, \
const type (*filter)[f_sz]) \
{ \
- ff_vp9_ ## avg ## _8tap_1d_ ## dir ## _ ## hsz ## _ ## opt(dst, src, \
+ ff_vp9_ ## avg ## _8tap_1d_ ## dir ## _ ## hsz ## _ ## opt(dst, \
dst_stride, \
+ src, \
src_stride, \
h, \
filter); \
ff_vp9_ ## avg ## _8tap_1d_ ## dir ## _ ## hsz ## _ ## opt(dst + hsz, \
- src + hsz, \
dst_stride, \
+ src + hsz, \
src_stride, \
h, filter); \
}
#define filter_8tap_2d_fn(op, sz, f, f_opt, fname, align, opt) \
static void \
op ## _8tap_ ## fname ## _ ## sz ## hv_ ## opt(uint8_t *dst, \
- const uint8_t *src, \
ptrdiff_t dst_stride, \
+ const uint8_t *src, \
ptrdiff_t src_stride, \
int h, int mx, int my) \
{ \
LOCAL_ALIGNED_ ## align(uint8_t, temp, [71 * 64]); \
- ff_vp9_put_8tap_1d_h_ ## sz ## _ ## opt(temp, src - 3 * src_stride, \
- 64, src_stride, \
- h + 7, \
+ ff_vp9_put_8tap_1d_h_ ## sz ## _ ## opt(temp, 64, \
+ src - 3 * src_stride, \
+ src_stride, h + 7, \
ff_filters_ ## f_opt[f][mx - 1]); \
- ff_vp9_ ## op ## _8tap_1d_v_ ## sz ## _ ## opt(dst, temp + 3 * 64, \
- dst_stride, 64, \
- h, \
+ ff_vp9_ ## op ## _8tap_1d_v_ ## sz ## _ ## opt(dst, dst_stride, \
+ temp + 3 * 64, 64, h, \
ff_filters_ ## f_opt[f][my - 1]); \
}
#define filter_8tap_1d_fn(op, sz, f, f_opt, fname, dir, dvar, opt) \
static void \
op ## _8tap_ ## fname ## _ ## sz ## dir ## _ ## opt(uint8_t *dst, \
- const uint8_t *src, \
ptrdiff_t dst_stride, \
+ const uint8_t *src, \
ptrdiff_t src_stride, \
int h, int mx, \
int my) \
{ \
- ff_vp9_ ## op ## _8tap_1d_ ## dir ## _ ## sz ## _ ## opt(dst, src, \
+ ff_vp9_ ## op ## _8tap_1d_ ## dir ## _ ## sz ## _ ## opt(dst, \
dst_stride, \
+ src, \
src_stride, h,\
ff_filters_ ## f_opt[f][dvar - 1]); \
}
%macro filter_sse2_h_fn 1
%assign %%px mmsize/2
-cglobal vp9_%1_8tap_1d_h_ %+ %%px, 6, 6, 15, dst, src, dstride, sstride, h, filtery
+cglobal vp9_%1_8tap_1d_h_ %+ %%px, 6, 6, 15, dst, dstride, src, sstride, h, filtery
pxor m5, m5
mova m6, [pw_64]
mova m7, [filteryq+ 0]
%macro filter_h_fn 1
%assign %%px mmsize/2
-cglobal vp9_%1_8tap_1d_h_ %+ %%px, 6, 6, 11, dst, src, dstride, sstride, h, filtery
+cglobal vp9_%1_8tap_1d_h_ %+ %%px, 6, 6, 11, dst, dstride, src, sstride, h, filtery
mova m6, [pw_256]
mova m7, [filteryq+ 0]
%if ARCH_X86_64 && mmsize > 8
%if ARCH_X86_64
%macro filter_hx2_fn 1
%assign %%px mmsize
-cglobal vp9_%1_8tap_1d_h_ %+ %%px, 6, 6, 14, dst, src, dstride, sstride, h, filtery
+cglobal vp9_%1_8tap_1d_h_ %+ %%px, 6, 6, 14, dst, dstride, src, sstride, h, filtery
mova m13, [pw_256]
mova m8, [filteryq+ 0]
mova m9, [filteryq+32]
%macro filter_sse2_v_fn 1
%assign %%px mmsize/2
%if ARCH_X86_64
-cglobal vp9_%1_8tap_1d_v_ %+ %%px, 6, 8, 15, dst, src, dstride, sstride, h, filtery, src4, sstride3
+cglobal vp9_%1_8tap_1d_v_ %+ %%px, 6, 8, 15, dst, dstride, src, sstride, h, filtery, src4, sstride3
%else
-cglobal vp9_%1_8tap_1d_v_ %+ %%px, 4, 7, 15, dst, src, dstride, sstride, filtery, src4, sstride3
+cglobal vp9_%1_8tap_1d_v_ %+ %%px, 4, 7, 15, dst, dstride, src, sstride, filtery, src4, sstride3
mov filteryq, r5mp
%define hd r4mp
%endif
%macro filter_v_fn 1
%assign %%px mmsize/2
%if ARCH_X86_64
-cglobal vp9_%1_8tap_1d_v_ %+ %%px, 6, 8, 11, dst, src, dstride, sstride, h, filtery, src4, sstride3
+cglobal vp9_%1_8tap_1d_v_ %+ %%px, 6, 8, 11, dst, dstride, src, sstride, h, filtery, src4, sstride3
%else
-cglobal vp9_%1_8tap_1d_v_ %+ %%px, 4, 7, 11, dst, src, dstride, sstride, filtery, src4, sstride3
+cglobal vp9_%1_8tap_1d_v_ %+ %%px, 4, 7, 11, dst, dstride, src, sstride, filtery, src4, sstride3
mov filteryq, r5mp
%define hd r4mp
%endif
%macro filter_vx2_fn 1
%assign %%px mmsize
-cglobal vp9_%1_8tap_1d_v_ %+ %%px, 6, 8, 14, dst, src, dstride, sstride, h, filtery, src4, sstride3
+cglobal vp9_%1_8tap_1d_v_ %+ %%px, 6, 8, 14, dst, dstride, src, sstride, h, filtery, src4, sstride3
mova m13, [pw_256]
lea sstride3q, [sstrideq*3]
lea src4q, [srcq+sstrideq]
%endif
%if %2 <= mmsize
-cglobal vp9_%1%2, 5, 7, 4, dst, src, dstride, sstride, h, dstride3, sstride3
+cglobal vp9_%1%2, 5, 7, 4, dst, dstride, src, sstride, h, dstride3, sstride3
lea sstride3q, [sstrideq*3]
lea dstride3q, [dstrideq*3]
%else
-cglobal vp9_%1%2, 5, 5, 4, dst, src, dstride, sstride, h
+cglobal vp9_%1%2, 5, 5, 4, dst, dstride, src, sstride, h
%endif
.loop:
%%srcfn m0, [srcq]
int op, hsize, filter, dx, dy;
declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT,
- void, uint8_t *dst, const uint8_t *ref,
- ptrdiff_t dst_stride, ptrdiff_t ref_stride,
+ void, uint8_t *dst, ptrdiff_t dst_stride,
+ const uint8_t *ref, ptrdiff_t ref_stride,
int h, int mx, int my);
for (op = 0; op < 2; op++) {
int mx = dx ? 1 + (rnd() % 14) : 0;
int my = dy ? 1 + (rnd() % 14) : 0;
randomize_buffers();
- call_ref(dst0, src,
- size * SIZEOF_PIXEL,
- SRC_BUF_STRIDE * SIZEOF_PIXEL,
+ call_ref(dst0, size * SIZEOF_PIXEL,
+ src, SRC_BUF_STRIDE * SIZEOF_PIXEL,
size, mx, my);
- call_new(dst1, src,
- size * SIZEOF_PIXEL,
- SRC_BUF_STRIDE * SIZEOF_PIXEL,
+ call_new(dst1, size * SIZEOF_PIXEL,
+ src, SRC_BUF_STRIDE * SIZEOF_PIXEL,
size, mx, my);
if (memcmp(dst0, dst1, DST_BUF_SIZE))
fail();
// functions are identical
if (filter >= 1 && filter <= 2) continue;
- bench_new(dst1, src, size * SIZEOF_PIXEL,
- SRC_BUF_STRIDE * SIZEOF_PIXEL,
+ bench_new(dst1, size * SIZEOF_PIXEL,
+ src, SRC_BUF_STRIDE * SIZEOF_PIXEL,
size, mx, my);
}
}