avcodec/nvdec: Add support for decoding HEVC 4:4:4 content
authorPhilip Langdale <philipl@overt.org>
Sun, 7 Oct 2018 01:11:52 +0000 (18:11 -0700)
committerPhilip Langdale <philipl@overt.org>
Sat, 16 Feb 2019 16:47:36 +0000 (08:47 -0800)
The latest generation video decoder on the Turing chips supports
decoding HEVC 4:4:4. Supporting this is relatively straight-forward;
we need to account for the different chroma format and pick the
right output and sw formats at the right times.

There was one bug which was the hard-coded assumption that the
first chroma plane would be half-height; I fixed this to use the
actual shift value on the plane.

We also need to pass the SPS and PPS range extension flags.

libavcodec/hevcdec.c
libavcodec/nvdec.c
libavcodec/nvdec_hevc.c

index b2a87d5..967f8f1 100644 (file)
@@ -409,6 +409,9 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps)
 #endif
         break;
     case AV_PIX_FMT_YUV420P12:
+    case AV_PIX_FMT_YUV444P:
+    case AV_PIX_FMT_YUV444P10:
+    case AV_PIX_FMT_YUV444P12:
 #if CONFIG_HEVC_NVDEC_HWACCEL
         *fmt++ = AV_PIX_FMT_CUDA;
 #endif
index c7d5379..72201a1 100644 (file)
 #include "nvdec.h"
 #include "internal.h"
 
+#if !NVDECAPI_CHECK_VERSION(9, 0)
+#define cudaVideoSurfaceFormat_YUV444 2
+#define cudaVideoSurfaceFormat_YUV444_16Bit 3
+#endif
+
 typedef struct NVDECDecoder {
     CUvideodecoder decoder;
 
@@ -274,7 +279,8 @@ int ff_nvdec_decode_init(AVCodecContext *avctx)
 
     CUVIDDECODECREATEINFO params = { 0 };
 
-    int cuvid_codec_type, cuvid_chroma_format;
+    cudaVideoSurfaceFormat output_format;
+    int cuvid_codec_type, cuvid_chroma_format, chroma_444;
     int ret = 0;
 
     sw_desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
@@ -292,6 +298,7 @@ int ff_nvdec_decode_init(AVCodecContext *avctx)
         av_log(avctx, AV_LOG_ERROR, "Unsupported chroma format\n");
         return AVERROR(ENOSYS);
     }
+    chroma_444 = cuvid_chroma_format == cudaVideoChromaFormat_444;
 
     if (!avctx->hw_frames_ctx) {
         ret = ff_decode_get_hw_frames_ctx(avctx, AV_HWDEVICE_TYPE_CUDA);
@@ -299,6 +306,21 @@ int ff_nvdec_decode_init(AVCodecContext *avctx)
             return ret;
     }
 
+    switch (sw_desc->comp[0].depth) {
+    case 8:
+        output_format = chroma_444 ? cudaVideoSurfaceFormat_YUV444 :
+                                     cudaVideoSurfaceFormat_NV12;
+        break;
+    case 10:
+    case 12:
+        output_format = chroma_444 ? cudaVideoSurfaceFormat_YUV444_16Bit :
+                                     cudaVideoSurfaceFormat_P016;
+        break;
+    default:
+        av_log(avctx, AV_LOG_ERROR, "Unsupported bit depth\n");
+        return AVERROR(ENOSYS);
+    }
+
     frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
 
     params.ulWidth             = avctx->coded_width;
@@ -306,8 +328,7 @@ int ff_nvdec_decode_init(AVCodecContext *avctx)
     params.ulTargetWidth       = avctx->coded_width;
     params.ulTargetHeight      = avctx->coded_height;
     params.bitDepthMinus8      = sw_desc->comp[0].depth - 8;
-    params.OutputFormat        = params.bitDepthMinus8 ?
-                                 cudaVideoSurfaceFormat_P016 : cudaVideoSurfaceFormat_NV12;
+    params.OutputFormat        = output_format;
     params.CodecType           = cuvid_codec_type;
     params.ChromaFormat        = cuvid_chroma_format;
     params.ulNumDecodeSurfaces = frames_ctx->initial_pool_size;
@@ -386,6 +407,8 @@ static int nvdec_retrieve_data(void *logctx, AVFrame *frame)
     NVDECFrame        *cf = (NVDECFrame*)fdd->hwaccel_priv;
     NVDECDecoder *decoder = (NVDECDecoder*)cf->decoder_ref->data;
 
+    AVHWFramesContext *hwctx = (AVHWFramesContext *)frame->hw_frames_ctx->data;
+
     CUVIDPROCPARAMS vpp = { 0 };
     NVDECFrame *unmap_data = NULL;
 
@@ -394,6 +417,7 @@ static int nvdec_retrieve_data(void *logctx, AVFrame *frame)
 
     unsigned int pitch, i;
     unsigned int offset = 0;
+    int shift_h = 0, shift_v = 0;
     int ret = 0;
 
     vpp.progressive_frame = 1;
@@ -427,10 +451,11 @@ static int nvdec_retrieve_data(void *logctx, AVFrame *frame)
     unmap_data->idx_ref = av_buffer_ref(cf->idx_ref);
     unmap_data->decoder_ref = av_buffer_ref(cf->decoder_ref);
 
+    av_pix_fmt_get_chroma_sub_sample(hwctx->sw_format, &shift_h, &shift_v);
     for (i = 0; frame->linesize[i]; i++) {
         frame->data[i] = (uint8_t*)(devptr + offset);
         frame->linesize[i] = pitch;
-        offset += pitch * (frame->height >> (i ? 1 : 0));
+        offset += pitch * (frame->height >> (i ? shift_v : 0));
     }
 
     goto finish;
@@ -566,7 +591,7 @@ int ff_nvdec_frame_params(AVCodecContext *avctx,
 {
     AVHWFramesContext *frames_ctx = (AVHWFramesContext*)hw_frames_ctx->data;
     const AVPixFmtDescriptor *sw_desc;
-    int cuvid_codec_type, cuvid_chroma_format;
+    int cuvid_codec_type, cuvid_chroma_format, chroma_444;
 
     sw_desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
     if (!sw_desc)
@@ -583,6 +608,7 @@ int ff_nvdec_frame_params(AVCodecContext *avctx,
         av_log(avctx, AV_LOG_VERBOSE, "Unsupported chroma format\n");
         return AVERROR(EINVAL);
     }
+    chroma_444 = cuvid_chroma_format == cudaVideoChromaFormat_444;
 
     frames_ctx->format            = AV_PIX_FMT_CUDA;
     frames_ctx->width             = (avctx->coded_width + 1) & ~1;
@@ -601,13 +627,13 @@ int ff_nvdec_frame_params(AVCodecContext *avctx,
 
     switch (sw_desc->comp[0].depth) {
     case 8:
-        frames_ctx->sw_format = AV_PIX_FMT_NV12;
+        frames_ctx->sw_format = chroma_444 ? AV_PIX_FMT_YUV444P : AV_PIX_FMT_NV12;
         break;
     case 10:
-        frames_ctx->sw_format = AV_PIX_FMT_P010;
+        frames_ctx->sw_format = chroma_444 ? AV_PIX_FMT_YUV444P16 : AV_PIX_FMT_P010;
         break;
     case 12:
-        frames_ctx->sw_format = AV_PIX_FMT_P016;
+        frames_ctx->sw_format = chroma_444 ? AV_PIX_FMT_YUV444P16 : AV_PIX_FMT_P016;
         break;
     default:
         return AVERROR(EINVAL);
index e04a701..d11b5e8 100644 (file)
@@ -131,6 +131,17 @@ static int nvdec_hevc_start_frame(AVCodecContext *avctx,
             .IdrPicFlag                                   = IS_IDR(s),
             .bit_depth_luma_minus8                        = sps->bit_depth - 8,
             .bit_depth_chroma_minus8                      = sps->bit_depth - 8,
+#if NVDECAPI_CHECK_VERSION(9, 0)
+            .sps_range_extension_flag                     = sps->sps_range_extension_flag,
+            .transform_skip_rotation_enabled_flag         = sps->transform_skip_rotation_enabled_flag,
+            .transform_skip_context_enabled_flag          = sps->transform_skip_context_enabled_flag,
+            .implicit_rdpcm_enabled_flag                  = sps->implicit_rdpcm_enabled_flag,
+            .explicit_rdpcm_enabled_flag                  = sps->explicit_rdpcm_enabled_flag,
+            .extended_precision_processing_flag           = sps->extended_precision_processing_flag,
+            .intra_smoothing_disabled_flag                = sps->intra_smoothing_disabled_flag,
+            .persistent_rice_adaptation_enabled_flag      = sps->persistent_rice_adaptation_enabled_flag,
+            .cabac_bypass_alignment_enabled_flag          = sps->cabac_bypass_alignment_enabled_flag,
+#endif
 
             .dependent_slice_segments_enabled_flag        = pps->dependent_slice_segments_enabled_flag,
             .slice_segment_header_extension_present_flag  = pps->slice_header_extension_present_flag,
@@ -164,6 +175,13 @@ static int nvdec_hevc_start_frame(AVCodecContext *avctx,
             .uniform_spacing_flag                         = pps->uniform_spacing_flag,
             .num_tile_columns_minus1                      = pps->num_tile_columns - 1,
             .num_tile_rows_minus1                         = pps->num_tile_rows - 1,
+#if NVDECAPI_CHECK_VERSION(9, 0)
+            .pps_range_extension_flag                     = pps->pps_range_extensions_flag,
+            .cross_component_prediction_enabled_flag      = pps->cross_component_prediction_enabled_flag,
+            .chroma_qp_offset_list_enabled_flag           = pps->chroma_qp_offset_list_enabled_flag,
+            .diff_cu_chroma_qp_offset_depth               = pps->diff_cu_chroma_qp_offset_depth,
+            .chroma_qp_offset_list_len_minus1             = pps->chroma_qp_offset_list_len_minus1,
+#endif
 
             .NumBitsForShortTermRPSInSlice                = s->sh.short_term_rps ? s->sh.short_term_ref_pic_set_size : 0,
             .NumDeltaPocsOfRefRpsIdx                      = s->sh.short_term_rps ? s->sh.short_term_rps->rps_idx_num_delta_pocs : 0,
@@ -185,6 +203,18 @@ static int nvdec_hevc_start_frame(AVCodecContext *avctx,
     for (i = 0; i < pps->num_tile_rows; i++)
         ppc->row_height_minus1[i] = pps->row_height[i] - 1;
 
+#if NVDECAPI_CHECK_VERSION(9, 0)
+    if (pps->chroma_qp_offset_list_len_minus1 > FF_ARRAY_ELEMS(ppc->cb_qp_offset_list) ||
+        pps->chroma_qp_offset_list_len_minus1 > FF_ARRAY_ELEMS(ppc->cr_qp_offset_list)) {
+        av_log(avctx, AV_LOG_ERROR, "Too many chroma_qp_offsets\n");
+        return AVERROR(ENOSYS);
+    }
+    for (i = 0; i <= pps->chroma_qp_offset_list_len_minus1; i++) {
+        ppc->cb_qp_offset_list[i] = pps->cb_qp_offset_list[i];
+        ppc->cr_qp_offset_list[i] = pps->cr_qp_offset_list[i];
+    }
+#endif
+
     if (s->rps[LT_CURR].nb_refs     > FF_ARRAY_ELEMS(ppc->RefPicSetLtCurr)       ||
         s->rps[ST_CURR_BEF].nb_refs > FF_ARRAY_ELEMS(ppc->RefPicSetStCurrBefore) ||
         s->rps[ST_CURR_AFT].nb_refs > FF_ARRAY_ELEMS(ppc->RefPicSetStCurrAfter)) {