Merge commit 'ba479f3daafc7e4359ec1212164569ebe59f0bb7'
authorJames Almer <jamrial@gmail.com>
Tue, 21 Mar 2017 18:49:09 +0000 (15:49 -0300)
committerJames Almer <jamrial@gmail.com>
Tue, 21 Mar 2017 18:49:09 +0000 (15:49 -0300)
* commit 'ba479f3daafc7e4359ec1212164569ebe59f0bb7':
  hevc: Change type of array stride parameters to ptrdiff_t

Merged-by: James Almer <jamrial@gmail.com>
1  2 
libavcodec/hevc.c
libavcodec/hevc_filter.c
libavcodec/hevcdsp_template.c

@@@ -1280,20 -1433,18 +1280,20 @@@ do 
  
  static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size)
  {
 -    //TODO: non-4:2:0 support
 -    HEVCLocalContext *lc = &s->HEVClc;
 +    HEVCLocalContext *lc = s->HEVClc;
      GetBitContext gb;
      int cb_size   = 1 << log2_cb_size;
-     int stride0   = s->frame->linesize[0];
+     ptrdiff_t stride0 = s->frame->linesize[0];
+     ptrdiff_t stride1 = s->frame->linesize[1];
+     ptrdiff_t stride2 = s->frame->linesize[2];
      uint8_t *dst0 = &s->frame->data[0][y0 * stride0 + (x0 << s->ps.sps->pixel_shift)];
-     int   stride1 = s->frame->linesize[1];
      uint8_t *dst1 = &s->frame->data[1][(y0 >> s->ps.sps->vshift[1]) * stride1 + ((x0 >> s->ps.sps->hshift[1]) << s->ps.sps->pixel_shift)];
-     int   stride2 = s->frame->linesize[2];
      uint8_t *dst2 = &s->frame->data[2][(y0 >> s->ps.sps->vshift[2]) * stride2 + ((x0 >> s->ps.sps->hshift[2]) << s->ps.sps->pixel_shift)];
  
 -    int length         = cb_size * cb_size * s->ps.sps->pcm.bit_depth + ((cb_size * cb_size) >> 1) * s->ps.sps->pcm.bit_depth_chroma;
 +    int length         = cb_size * cb_size * s->ps.sps->pcm.bit_depth +
 +                         (((cb_size >> s->ps.sps->hshift[1]) * (cb_size >> s->ps.sps->vshift[1])) +
 +                          ((cb_size >> s->ps.sps->hshift[2]) * (cb_size >> s->ps.sps->vshift[2]))) *
 +                          s->ps.sps->pcm.bit_depth_chroma;
      const uint8_t *pcm = skip_bytes(&lc->cc, (length + 7) >> 3);
      int ret;
  
@@@ -1354,12 -1517,13 +1354,12 @@@ static void luma_mc_uni(HEVCContext *s
      y_off += mv->y >> 2;
      src   += y_off * srcstride + (x_off * (1 << s->ps.sps->pixel_shift));
  
 -    if (x_off < extra_left || y_off < extra_top ||
 -        x_off >= pic_width - block_w - ff_hevc_qpel_extra_after[mx] ||
 -        y_off >= pic_height - block_h - ff_hevc_qpel_extra_after[my]) {
 +    if (x_off < QPEL_EXTRA_BEFORE || y_off < QPEL_EXTRA_AFTER ||
 +        x_off >= pic_width - block_w - QPEL_EXTRA_AFTER ||
 +        y_off >= pic_height - block_h - QPEL_EXTRA_AFTER) {
-         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
+         const ptrdiff_t edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
 -        int offset = extra_top * srcstride + (extra_left << s->ps.sps->pixel_shift);
 -        int buf_offset = extra_top *
 -                         edge_emu_stride + (extra_left << s->ps.sps->pixel_shift);
 +        int offset     = QPEL_EXTRA_BEFORE * srcstride       + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
 +        int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
  
          s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src - offset,
                                   edge_emu_stride, srcstride,
          src = lc->edge_emu_buffer + buf_offset;
          srcstride = edge_emu_stride;
      }
 -    s->hevcdsp.put_hevc_qpel[!!my][!!mx][pred_idx](dst, dststride, src, srcstride,
 -                                                   block_h, mx, my, lc->mc_buffer);
 +
 +    if (!weight_flag)
 +        s->hevcdsp.put_hevc_qpel_uni[idx][!!my][!!mx](dst, dststride, src, srcstride,
 +                                                      block_h, mx, my, block_w);
 +    else
 +        s->hevcdsp.put_hevc_qpel_uni_w[idx][!!my][!!mx](dst, dststride, src, srcstride,
 +                                                        block_h, s->sh.luma_log2_weight_denom,
 +                                                        luma_weight, luma_offset, mx, my, block_w);
 +}
 +
 +/**
 + * 8.5.3.2.2.1 Luma sample bidirectional interpolation process
 + *
 + * @param s HEVC decoding context
 + * @param dst target buffer for block data at block position
 + * @param dststride stride of the dst buffer
 + * @param ref0 reference picture0 buffer at origin (0, 0)
 + * @param mv0 motion vector0 (relative to block position) to get pixel data from
 + * @param x_off horizontal position of block from origin (0, 0)
 + * @param y_off vertical position of block from origin (0, 0)
 + * @param block_w width of block
 + * @param block_h height of block
 + * @param ref1 reference picture1 buffer at origin (0, 0)
 + * @param mv1 motion vector1 (relative to block position) to get pixel data from
 + * @param current_mv current motion vector structure
 + */
 + static void luma_mc_bi(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride,
 +                       AVFrame *ref0, const Mv *mv0, int x_off, int y_off,
 +                       int block_w, int block_h, AVFrame *ref1, const Mv *mv1, struct MvField *current_mv)
 +{
 +    HEVCLocalContext *lc = s->HEVClc;
 +    ptrdiff_t src0stride  = ref0->linesize[0];
 +    ptrdiff_t src1stride  = ref1->linesize[0];
 +    int pic_width        = s->ps.sps->width;
 +    int pic_height       = s->ps.sps->height;
 +    int mx0              = mv0->x & 3;
 +    int my0              = mv0->y & 3;
 +    int mx1              = mv1->x & 3;
 +    int my1              = mv1->y & 3;
 +    int weight_flag      = (s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
 +                           (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag);
 +    int x_off0           = x_off + (mv0->x >> 2);
 +    int y_off0           = y_off + (mv0->y >> 2);
 +    int x_off1           = x_off + (mv1->x >> 2);
 +    int y_off1           = y_off + (mv1->y >> 2);
 +    int idx              = ff_hevc_pel_weight[block_w];
 +
 +    uint8_t *src0  = ref0->data[0] + y_off0 * src0stride + (int)((unsigned)x_off0 << s->ps.sps->pixel_shift);
 +    uint8_t *src1  = ref1->data[0] + y_off1 * src1stride + (int)((unsigned)x_off1 << s->ps.sps->pixel_shift);
 +
 +    if (x_off0 < QPEL_EXTRA_BEFORE || y_off0 < QPEL_EXTRA_AFTER ||
 +        x_off0 >= pic_width - block_w - QPEL_EXTRA_AFTER ||
 +        y_off0 >= pic_height - block_h - QPEL_EXTRA_AFTER) {
-         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
++        const ptrdiff_t edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
 +        int offset     = QPEL_EXTRA_BEFORE * src0stride       + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
 +        int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
 +
 +        s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src0 - offset,
 +                                 edge_emu_stride, src0stride,
 +                                 block_w + QPEL_EXTRA,
 +                                 block_h + QPEL_EXTRA,
 +                                 x_off0 - QPEL_EXTRA_BEFORE, y_off0 - QPEL_EXTRA_BEFORE,
 +                                 pic_width, pic_height);
 +        src0 = lc->edge_emu_buffer + buf_offset;
 +        src0stride = edge_emu_stride;
 +    }
 +
 +    if (x_off1 < QPEL_EXTRA_BEFORE || y_off1 < QPEL_EXTRA_AFTER ||
 +        x_off1 >= pic_width - block_w - QPEL_EXTRA_AFTER ||
 +        y_off1 >= pic_height - block_h - QPEL_EXTRA_AFTER) {
-         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
++        const ptrdiff_t edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
 +        int offset     = QPEL_EXTRA_BEFORE * src1stride       + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
 +        int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
 +
 +        s->vdsp.emulated_edge_mc(lc->edge_emu_buffer2, src1 - offset,
 +                                 edge_emu_stride, src1stride,
 +                                 block_w + QPEL_EXTRA,
 +                                 block_h + QPEL_EXTRA,
 +                                 x_off1 - QPEL_EXTRA_BEFORE, y_off1 - QPEL_EXTRA_BEFORE,
 +                                 pic_width, pic_height);
 +        src1 = lc->edge_emu_buffer2 + buf_offset;
 +        src1stride = edge_emu_stride;
 +    }
 +
 +    s->hevcdsp.put_hevc_qpel[idx][!!my0][!!mx0](lc->tmp, src0, src0stride,
 +                                                block_h, mx0, my0, block_w);
 +    if (!weight_flag)
 +        s->hevcdsp.put_hevc_qpel_bi[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, lc->tmp,
 +                                                       block_h, mx1, my1, block_w);
 +    else
 +        s->hevcdsp.put_hevc_qpel_bi_w[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, lc->tmp,
 +                                                         block_h, s->sh.luma_log2_weight_denom,
 +                                                         s->sh.luma_weight_l0[current_mv->ref_idx[0]],
 +                                                         s->sh.luma_weight_l1[current_mv->ref_idx[1]],
 +                                                         s->sh.luma_offset_l0[current_mv->ref_idx[0]],
 +                                                         s->sh.luma_offset_l1[current_mv->ref_idx[1]],
 +                                                         mx1, my1, block_w);
 +
  }
  
  /**
@@@ -138,106 -171,15 +138,106 @@@ static int get_qPy(HEVCContext *s, int 
      return s->qp_y_tab[x + y * s->ps.sps->min_cb_width];
  }
  
 -static void copy_CTB(uint8_t *dst, uint8_t *src,
 -                     int width, int height, ptrdiff_t stride)
 +static void copy_CTB(uint8_t *dst, const uint8_t *src, int width, int height,
-                      intptr_t stride_dst, intptr_t stride_src)
++                     ptrdiff_t stride_dst, ptrdiff_t stride_src)
 +{
 +int i, j;
 +
 +    if (((intptr_t)dst | (intptr_t)src | stride_dst | stride_src) & 15) {
 +        for (i = 0; i < height; i++) {
 +            for (j = 0; j < width; j+=8)
 +                AV_COPY64U(dst+j, src+j);
 +            dst += stride_dst;
 +            src += stride_src;
 +        }
 +    } else {
 +        for (i = 0; i < height; i++) {
 +            for (j = 0; j < width; j+=16)
 +                AV_COPY128(dst+j, src+j);
 +            dst += stride_dst;
 +            src += stride_src;
 +        }
 +    }
 +}
 +
 +static void copy_pixel(uint8_t *dst, const uint8_t *src, int pixel_shift)
 +{
 +    if (pixel_shift)
 +        *(uint16_t *)dst = *(uint16_t *)src;
 +    else
 +        *dst = *src;
 +}
 +
 +static void copy_vert(uint8_t *dst, const uint8_t *src,
 +                      int pixel_shift, int height,
-                       int stride_dst, int stride_src)
++                      ptrdiff_t stride_dst, ptrdiff_t stride_src)
  {
      int i;
-                            int stride_src, int x, int y, int width, int height,
 +    if (pixel_shift == 0) {
 +        for (i = 0; i < height; i++) {
 +            *dst = *src;
 +            dst += stride_dst;
 +            src += stride_src;
 +        }
 +    } else {
 +        for (i = 0; i < height; i++) {
 +            *(uint16_t *)dst = *(uint16_t *)src;
 +            dst += stride_dst;
 +            src += stride_src;
 +        }
 +    }
 +}
 +
 +static void copy_CTB_to_hv(HEVCContext *s, const uint8_t *src,
++                           ptrdiff_t stride_src, int x, int y, int width, int height,
 +                           int c_idx, int x_ctb, int y_ctb)
 +{
 +    int sh = s->ps.sps->pixel_shift;
 +    int w = s->ps.sps->width >> s->ps.sps->hshift[c_idx];
 +    int h = s->ps.sps->height >> s->ps.sps->vshift[c_idx];
  
 -    for (i = 0; i < height; i++) {
 -        memcpy(dst, src, width);
 -        dst += stride;
 -        src += stride;
 +    /* copy horizontal edges */
 +    memcpy(s->sao_pixel_buffer_h[c_idx] + (((2 * y_ctb) * w + x) << sh),
 +        src, width << sh);
 +    memcpy(s->sao_pixel_buffer_h[c_idx] + (((2 * y_ctb + 1) * w + x) << sh),
 +        src + stride_src * (height - 1), width << sh);
 +
 +    /* copy vertical edges */
 +    copy_vert(s->sao_pixel_buffer_v[c_idx] + (((2 * x_ctb) * h + y) << sh), src, sh, height, 1 << sh, stride_src);
 +
 +    copy_vert(s->sao_pixel_buffer_v[c_idx] + (((2 * x_ctb + 1) * h + y) << sh), src + ((width - 1) << sh), sh, height, 1 << sh, stride_src);
 +}
 +
 +static void restore_tqb_pixels(HEVCContext *s,
 +                               uint8_t *src1, const uint8_t *dst1,
 +                               ptrdiff_t stride_src, ptrdiff_t stride_dst,
 +                               int x0, int y0, int width, int height, int c_idx)
 +{
 +    if ( s->ps.pps->transquant_bypass_enable_flag ||
 +            (s->ps.sps->pcm.loop_filter_disable_flag && s->ps.sps->pcm_enabled_flag)) {
 +        int x, y;
 +        int min_pu_size  = 1 << s->ps.sps->log2_min_pu_size;
 +        int hshift       = s->ps.sps->hshift[c_idx];
 +        int vshift       = s->ps.sps->vshift[c_idx];
 +        int x_min        = ((x0         ) >> s->ps.sps->log2_min_pu_size);
 +        int y_min        = ((y0         ) >> s->ps.sps->log2_min_pu_size);
 +        int x_max        = ((x0 + width ) >> s->ps.sps->log2_min_pu_size);
 +        int y_max        = ((y0 + height) >> s->ps.sps->log2_min_pu_size);
 +        int len          = (min_pu_size >> hshift) << s->ps.sps->pixel_shift;
 +        for (y = y_min; y < y_max; y++) {
 +            for (x = x_min; x < x_max; x++) {
 +                if (s->is_pcm[y * s->ps.sps->min_pu_width + x]) {
 +                    int n;
 +                    uint8_t *src = src1 + (((y << s->ps.sps->log2_min_pu_size) - y0) >> vshift) * stride_src + ((((x << s->ps.sps->log2_min_pu_size) - x0) >> hshift) << s->ps.sps->pixel_shift);
 +                    const uint8_t *dst = dst1 + (((y << s->ps.sps->log2_min_pu_size) - y0) >> vshift) * stride_dst + ((((x << s->ps.sps->log2_min_pu_size) - x0) >> hshift) << s->ps.sps->pixel_shift);
 +                    for (n = 0; n < (min_pu_size >> vshift); n++) {
 +                        memcpy(src, dst, len);
 +                        src += stride_src;
 +                        dst += stride_dst;
 +                    }
 +                }
 +            }
 +        }
      }
  }
  
@@@ -303,151 -269,46 +303,151 @@@ static void sao_filter_CTB(HEVCContext 
          }
      }
  
 -    for (c_idx = 0; c_idx < 3; c_idx++) {
 -        int chroma = c_idx ? 1 : 0;
 -        int x0 = x >> chroma;
 -        int y0 = y >> chroma;
 -        ptrdiff_t stride = s->frame->linesize[c_idx];
 -        int ctb_size = (1 << (s->ps.sps->log2_ctb_size)) >> s->ps.sps->hshift[c_idx];
 -        int width = FFMIN(ctb_size,
 -                          (s->ps.sps->width >> s->ps.sps->hshift[c_idx]) - x0);
 -        int height = FFMIN(ctb_size,
 -                           (s->ps.sps->height >> s->ps.sps->vshift[c_idx]) - y0);
 -
 -        uint8_t *src = &s->frame->data[c_idx][y0 * stride + (x0 << s->ps.sps->pixel_shift)];
 -        uint8_t *dst = &s->sao_frame->data[c_idx][y0 * stride + (x0 << s->ps.sps->pixel_shift)];
 -        int offset = (y_shift >> chroma) * stride + ((x_shift >> chroma) << s->ps.sps->pixel_shift);
 -
 -        copy_CTB(dst - offset, src - offset,
 -                 (edges[2] ? width  + (x_shift >> chroma) : width)  << s->ps.sps->pixel_shift,
 -                 (edges[3] ? height + (y_shift >> chroma) : height), stride);
 -
 -        for (class_index = 0; class_index < class; class_index++) {
 -
 -            switch (sao[class_index]->type_idx[c_idx]) {
 -            case SAO_BAND:
 -                s->hevcdsp.sao_band_filter[classes[class_index]](dst, src,
 -                                                                 stride,
 -                                                                 sao[class_index],
 -                                                                 edges, width,
 -                                                                 height, c_idx);
 -                break;
 -            case SAO_EDGE:
 -                s->hevcdsp.sao_edge_filter[classes[class_index]](dst, src,
 -                                                                 stride,
 -                                                                 sao[class_index],
 -                                                                 edges, width,
 -                                                                 height, c_idx,
 -                                                                 vert_edge[classes[class_index]],
 -                                                                 horiz_edge[classes[class_index]],
 -                                                                 diag_edge[classes[class_index]]);
 -                break;
 +    for (c_idx = 0; c_idx < (s->ps.sps->chroma_format_idc ? 3 : 1); c_idx++) {
 +        int x0       = x >> s->ps.sps->hshift[c_idx];
 +        int y0       = y >> s->ps.sps->vshift[c_idx];
-         int stride_src = s->frame->linesize[c_idx];
++        ptrdiff_t stride_src = s->frame->linesize[c_idx];
 +        int ctb_size_h = (1 << (s->ps.sps->log2_ctb_size)) >> s->ps.sps->hshift[c_idx];
 +        int ctb_size_v = (1 << (s->ps.sps->log2_ctb_size)) >> s->ps.sps->vshift[c_idx];
 +        int width    = FFMIN(ctb_size_h, (s->ps.sps->width  >> s->ps.sps->hshift[c_idx]) - x0);
 +        int height   = FFMIN(ctb_size_v, (s->ps.sps->height >> s->ps.sps->vshift[c_idx]) - y0);
 +        int tab      = sao_tab[(FFALIGN(width, 8) >> 3) - 1];
 +        uint8_t *src = &s->frame->data[c_idx][y0 * stride_src + (x0 << s->ps.sps->pixel_shift)];
-         int stride_dst;
++        ptrdiff_t stride_dst;
 +        uint8_t *dst;
 +
 +        switch (sao->type_idx[c_idx]) {
 +        case SAO_BAND:
 +            copy_CTB_to_hv(s, src, stride_src, x0, y0, width, height, c_idx,
 +                           x_ctb, y_ctb);
 +            if (s->ps.pps->transquant_bypass_enable_flag ||
 +                (s->ps.sps->pcm.loop_filter_disable_flag && s->ps.sps->pcm_enabled_flag)) {
 +            dst = lc->edge_emu_buffer;
 +            stride_dst = 2*MAX_PB_SIZE;
 +            copy_CTB(dst, src, width << s->ps.sps->pixel_shift, height, stride_dst, stride_src);
 +            s->hevcdsp.sao_band_filter[tab](src, dst, stride_src, stride_dst,
 +                                            sao->offset_val[c_idx], sao->band_position[c_idx],
 +                                            width, height);
 +            restore_tqb_pixels(s, src, dst, stride_src, stride_dst,
 +                               x, y, width, height, c_idx);
 +            } else {
 +            s->hevcdsp.sao_band_filter[tab](src, src, stride_src, stride_src,
 +                                            sao->offset_val[c_idx], sao->band_position[c_idx],
 +                                            width, height);
 +            }
 +            sao->type_idx[c_idx] = SAO_APPLIED;
 +            break;
 +        case SAO_EDGE:
 +        {
 +            int w = s->ps.sps->width >> s->ps.sps->hshift[c_idx];
 +            int h = s->ps.sps->height >> s->ps.sps->vshift[c_idx];
 +            int left_edge = edges[0];
 +            int top_edge = edges[1];
 +            int right_edge = edges[2];
 +            int bottom_edge = edges[3];
 +            int sh = s->ps.sps->pixel_shift;
 +            int left_pixels, right_pixels;
 +
 +            stride_dst = 2*MAX_PB_SIZE + AV_INPUT_BUFFER_PADDING_SIZE;
 +            dst = lc->edge_emu_buffer + stride_dst + AV_INPUT_BUFFER_PADDING_SIZE;
 +
 +            if (!top_edge) {
 +                int left = 1 - left_edge;
 +                int right = 1 - right_edge;
 +                const uint8_t *src1[2];
 +                uint8_t *dst1;
 +                int src_idx, pos;
 +
 +                dst1 = dst - stride_dst - (left << sh);
 +                src1[0] = src - stride_src - (left << sh);
 +                src1[1] = s->sao_pixel_buffer_h[c_idx] + (((2 * y_ctb - 1) * w + x0 - left) << sh);
 +                pos = 0;
 +                if (left) {
 +                    src_idx = (CTB(s->sao, x_ctb-1, y_ctb-1).type_idx[c_idx] ==
 +                               SAO_APPLIED);
 +                    copy_pixel(dst1, src1[src_idx], sh);
 +                    pos += (1 << sh);
 +                }
 +                src_idx = (CTB(s->sao, x_ctb, y_ctb-1).type_idx[c_idx] ==
 +                           SAO_APPLIED);
 +                memcpy(dst1 + pos, src1[src_idx] + pos, width << sh);
 +                if (right) {
 +                    pos += width << sh;
 +                    src_idx = (CTB(s->sao, x_ctb+1, y_ctb-1).type_idx[c_idx] ==
 +                               SAO_APPLIED);
 +                    copy_pixel(dst1 + pos, src1[src_idx] + pos, sh);
 +                }
              }
 +            if (!bottom_edge) {
 +                int left = 1 - left_edge;
 +                int right = 1 - right_edge;
 +                const uint8_t *src1[2];
 +                uint8_t *dst1;
 +                int src_idx, pos;
 +
 +                dst1 = dst + height * stride_dst - (left << sh);
 +                src1[0] = src + height * stride_src - (left << sh);
 +                src1[1] = s->sao_pixel_buffer_h[c_idx] + (((2 * y_ctb + 2) * w + x0 - left) << sh);
 +                pos = 0;
 +                if (left) {
 +                    src_idx = (CTB(s->sao, x_ctb-1, y_ctb+1).type_idx[c_idx] ==
 +                               SAO_APPLIED);
 +                    copy_pixel(dst1, src1[src_idx], sh);
 +                    pos += (1 << sh);
 +                }
 +                src_idx = (CTB(s->sao, x_ctb, y_ctb+1).type_idx[c_idx] ==
 +                           SAO_APPLIED);
 +                memcpy(dst1 + pos, src1[src_idx] + pos, width << sh);
 +                if (right) {
 +                    pos += width << sh;
 +                    src_idx = (CTB(s->sao, x_ctb+1, y_ctb+1).type_idx[c_idx] ==
 +                               SAO_APPLIED);
 +                    copy_pixel(dst1 + pos, src1[src_idx] + pos, sh);
 +                }
 +            }
 +            left_pixels = 0;
 +            if (!left_edge) {
 +                if (CTB(s->sao, x_ctb-1, y_ctb).type_idx[c_idx] == SAO_APPLIED) {
 +                    copy_vert(dst - (1 << sh),
 +                              s->sao_pixel_buffer_v[c_idx] + (((2 * x_ctb - 1) * h + y0) << sh),
 +                              sh, height, stride_dst, 1 << sh);
 +                } else {
 +                    left_pixels = 1;
 +                }
 +            }
 +            right_pixels = 0;
 +            if (!right_edge) {
 +                if (CTB(s->sao, x_ctb+1, y_ctb).type_idx[c_idx] == SAO_APPLIED) {
 +                    copy_vert(dst + (width << sh),
 +                              s->sao_pixel_buffer_v[c_idx] + (((2 * x_ctb + 2) * h + y0) << sh),
 +                              sh, height, stride_dst, 1 << sh);
 +                } else {
 +                    right_pixels = 1;
 +                }
 +            }
 +
 +            copy_CTB(dst - (left_pixels << sh),
 +                     src - (left_pixels << sh),
 +                     (width + left_pixels + right_pixels) << sh,
 +                     height, stride_dst, stride_src);
 +
 +            copy_CTB_to_hv(s, src, stride_src, x0, y0, width, height, c_idx,
 +                           x_ctb, y_ctb);
 +            s->hevcdsp.sao_edge_filter[tab](src, dst, stride_src, sao->offset_val[c_idx],
 +                                            sao->eo_class[c_idx], width, height);
 +            s->hevcdsp.sao_edge_restore[restore](src, dst,
 +                                                stride_src, stride_dst,
 +                                                sao,
 +                                                edges, width,
 +                                                height, c_idx,
 +                                                vert_edge,
 +                                                horiz_edge,
 +                                                diag_edge);
 +            restore_tqb_pixels(s, src, dst, stride_src, stride_dst,
 +                               x, y, width, height, c_idx);
 +            sao->type_idx[c_idx] = SAO_APPLIED;
 +            break;
 +        }
          }
      }
  }
@@@ -390,13 -418,13 +390,13 @@@ static void FUNC(sao_edge_restore_0)(ui
              int offset_val = sao_offset_val[0];
              for (x = init_x; x < width; x++)
                  dst[x] = av_clip_pixel(src[x] + offset_val);
 -            init_y = 1;
          }
          if (borders[3]) {
 -            int offset_val = sao_offset_val[0];
 -            ptrdiff_t y_stride = stride * (height - 1);
 +            int offset_val   = sao_offset_val[0];
-             int y_stride_dst = stride_dst * (height - 1);
-             int y_stride_src = stride_src * (height - 1);
++            ptrdiff_t y_stride_dst = stride_dst * (height - 1);
++            ptrdiff_t y_stride_src = stride_src * (height - 1);
              for (x = init_x; x < width; x++)
 -                dst[x + y_stride] = av_clip_pixel(src[x + y_stride] + offset_val);
 +                dst[x + y_stride_dst] = av_clip_pixel(src[x + y_stride_src] + offset_val);
              height--;
          }
      }
@@@ -443,11 -600,10 +443,11 @@@ static void FUNC(sao_edge_restore_1)(ui
              init_y = 1;
          }
          if (borders[3]) {
 -            int offset_val = sao_offset_val[0];
 -            ptrdiff_t y_stride = stride * (height - 1);
 +            int offset_val   = sao_offset_val[0];
-             int y_stride_dst = stride_dst * (height - 1);
-             int y_stride_src = stride_src * (height - 1);
++            ptrdiff_t y_stride_dst = stride_dst * (height - 1);
++            ptrdiff_t y_stride_src = stride_src * (height - 1);
              for (x = init_x; x < width; x++)
 -                dst[x + y_stride] = av_clip_pixel(src[x + y_stride] + offset_val);
 +                dst[x + y_stride_dst] = av_clip_pixel(src[x + y_stride_src] + offset_val);
              height--;
          }
      }