Merge commit 'ad884d100259e55cb51a4239cd8a4fd5154c2073'
authorDerek Buitenhuis <derek.buitenhuis@gmail.com>
Wed, 24 Feb 2016 15:23:16 +0000 (15:23 +0000)
committerDerek Buitenhuis <derek.buitenhuis@gmail.com>
Wed, 24 Feb 2016 15:23:16 +0000 (15:23 +0000)
* commit 'ad884d100259e55cb51a4239cd8a4fd5154c2073':
  hwcontext: add a CUDA implementation

Merged-by: Derek Buitenhuis <derek.buitenhuis@gmail.com>
1  2 
doc/APIchanges
libavutil/Makefile
libavutil/hwcontext.c
libavutil/hwcontext.h
libavutil/hwcontext_cuda.c
libavutil/hwcontext_cuda.h
libavutil/hwcontext_internal.h

diff --cc doc/APIchanges
Simple merge
@@@ -29,9 -22,9 +29,10 @@@ HEADERS = adler32.
            fifo.h                                                        \
            file.h                                                        \
            frame.h                                                       \
 +          hash.h                                                        \
            hmac.h                                                        \
            hwcontext.h                                                   \
+           hwcontext_cuda.h                                              \
            hwcontext_vdpau.h                                             \
            imgutils.h                                                    \
            intfloat.h                                                    \
@@@ -127,30 -95,19 +128,31 @@@ OBJS = adler32.
         opt.o                                                            \
         parseutils.o                                                     \
         pixdesc.o                                                        \
 +       pixelutils.o                                                     \
         random_seed.o                                                    \
         rational.o                                                       \
 +       reverse.o                                                        \
         rc4.o                                                            \
 +       ripemd.o                                                         \
         samplefmt.o                                                      \
         sha.o                                                            \
 +       sha512.o                                                         \
         stereo3d.o                                                       \
 +       threadmessage.o                                                  \
         time.o                                                           \
 +       timecode.o                                                       \
         tree.o                                                           \
 +       twofish.o                                                        \
         utils.o                                                          \
 +       xga_font_data.o                                                  \
         xtea.o                                                           \
 +       tea.o                                                            \
 +
 +OBJS-$(!HAVE_ATOMICS_NATIVE)            += atomic.o                     \
  
  OBJS-$(CONFIG_LZO)                      += lzo.o
 +OBJS-$(CONFIG_OPENCL)                   += opencl.o opencl_internal.o
+ OBJS-$(CONFIG_CUDA)                     += hwcontext_cuda.o
  OBJS-$(CONFIG_VDPAU)                    += hwcontext_vdpau.o
  
  OBJS += $(COMPAT_OBJS:%=../compat/%)
Simple merge
Simple merge
index 0000000,6b87b61..2c5980d
mode 000000,100644..100644
--- /dev/null
@@@ -1,0 -1,270 +1,270 @@@
 - * This file is part of Libav.
+ /*
 - * Libav is free software; you can redistribute it and/or
++ * This file is part of FFmpeg.
+  *
 - * Libav is distributed in the hope that it will be useful,
++ * FFmpeg is free software; you can redistribute it and/or
+  * modify it under the terms of the GNU Lesser General Public
+  * License as published by the Free Software Foundation; either
+  * version 2.1 of the License, or (at your option) any later version.
+  *
 - * License along with Libav; if not, write to the Free Software
++ * FFmpeg is distributed in the hope that it will be useful,
+  * but WITHOUT ANY WARRANTY; without even the implied warranty of
+  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  * Lesser General Public License for more details.
+  *
+  * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
+  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+  */
+ #include "buffer.h"
+ #include "common.h"
+ #include "hwcontext.h"
+ #include "hwcontext_internal.h"
+ #include "hwcontext_cuda.h"
+ #include "mem.h"
+ #include "pixdesc.h"
+ #include "pixfmt.h"
+ typedef struct CUDAFramesContext {
+     int shift_width, shift_height;
+ } CUDAFramesContext;
+ static const enum AVPixelFormat supported_formats[] = {
+     AV_PIX_FMT_NV12,
+     AV_PIX_FMT_YUV420P,
+     AV_PIX_FMT_YUV444P,
+ };
+ static void cuda_buffer_free(void *opaque, uint8_t *data)
+ {
+     AVHWFramesContext *ctx = opaque;
+     AVCUDADeviceContext *hwctx = ctx->device_ctx->hwctx;
+     CUcontext dummy;
+     cuCtxPushCurrent(hwctx->cuda_ctx);
+     cuMemFree((CUdeviceptr)data);
+     cuCtxPopCurrent(&dummy);
+ }
+ static AVBufferRef *cuda_pool_alloc(void *opaque, int size)
+ {
+     AVHWFramesContext     *ctx = opaque;
+     AVCUDADeviceContext *hwctx = ctx->device_ctx->hwctx;
+     AVBufferRef *ret = NULL;
+     CUcontext dummy = NULL;
+     CUdeviceptr data;
+     CUresult err;
+     err = cuCtxPushCurrent(hwctx->cuda_ctx);
+     if (err != CUDA_SUCCESS) {
+         av_log(ctx, AV_LOG_ERROR, "Error setting current CUDA context\n");
+         return NULL;
+     }
+     err = cuMemAlloc(&data, size);
+     if (err != CUDA_SUCCESS)
+         goto fail;
+     ret = av_buffer_create((uint8_t*)data, size, cuda_buffer_free, ctx, 0);
+     if (!ret) {
+         cuMemFree(data);
+         goto fail;
+     }
+ fail:
+     cuCtxPopCurrent(&dummy);
+     return ret;
+ }
+ static int cuda_frames_init(AVHWFramesContext *ctx)
+ {
+     CUDAFramesContext *priv = ctx->internal->priv;
+     int i;
+     for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++) {
+         if (ctx->sw_format == supported_formats[i])
+             break;
+     }
+     if (i == FF_ARRAY_ELEMS(supported_formats)) {
+         av_log(ctx, AV_LOG_ERROR, "Pixel format '%s' is not supported\n",
+                av_get_pix_fmt_name(ctx->sw_format));
+         return AVERROR(ENOSYS);
+     }
+     av_pix_fmt_get_chroma_sub_sample(ctx->sw_format, &priv->shift_width, &priv->shift_height);
+     if (!ctx->pool) {
+         int size;
+         switch (ctx->sw_format) {
+         case AV_PIX_FMT_NV12:
+         case AV_PIX_FMT_YUV420P:
+             size = ctx->width * ctx->height * 3 / 2;
+             break;
+         case AV_PIX_FMT_YUV444P:
+             size = ctx->width * ctx->height * 3;
+             break;
+         }
+         ctx->internal->pool_internal = av_buffer_pool_init2(size, ctx, cuda_pool_alloc, NULL);
+         if (!ctx->internal->pool_internal)
+             return AVERROR(ENOMEM);
+     }
+     return 0;
+ }
+ static int cuda_get_buffer(AVHWFramesContext *ctx, AVFrame *frame)
+ {
+     frame->buf[0] = av_buffer_pool_get(ctx->pool);
+     if (!frame->buf[0])
+         return AVERROR(ENOMEM);
+     switch (ctx->sw_format) {
+     case AV_PIX_FMT_NV12:
+         frame->data[0]     = frame->buf[0]->data;
+         frame->data[1]     = frame->data[0] + ctx->width * ctx->height;
+         frame->linesize[0] = ctx->width;
+         frame->linesize[1] = ctx->width;
+         break;
+     case AV_PIX_FMT_YUV420P:
+         frame->data[0]     = frame->buf[0]->data;
+         frame->data[2]     = frame->data[0] + ctx->width * ctx->height;
+         frame->data[1]     = frame->data[2] + ctx->width * ctx->height / 4;
+         frame->linesize[0] = ctx->width;
+         frame->linesize[1] = ctx->width / 2;
+         frame->linesize[2] = ctx->width / 2;
+         break;
+     case AV_PIX_FMT_YUV444P:
+         frame->data[0]     = frame->buf[0]->data;
+         frame->data[1]     = frame->data[0] + ctx->width * ctx->height;
+         frame->data[2]     = frame->data[1] + ctx->width * ctx->height;
+         frame->linesize[0] = ctx->width;
+         frame->linesize[1] = ctx->width;
+         frame->linesize[2] = ctx->width;
+         break;
+     default:
+         av_frame_unref(frame);
+         return AVERROR_BUG;
+     }
+     frame->format = AV_PIX_FMT_CUDA;
+     frame->width  = ctx->width;
+     frame->height = ctx->height;
+     return 0;
+ }
+ static int cuda_transfer_get_formats(AVHWFramesContext *ctx,
+                                      enum AVHWFrameTransferDirection dir,
+                                      enum AVPixelFormat **formats)
+ {
+     enum AVPixelFormat *fmts;
+     fmts = av_malloc_array(2, sizeof(*fmts));
+     if (!fmts)
+         return AVERROR(ENOMEM);
+     fmts[0] = ctx->sw_format;
+     fmts[1] = AV_PIX_FMT_NONE;
+     *formats = fmts;
+     return 0;
+ }
+ static int cuda_transfer_data_from(AVHWFramesContext *ctx, AVFrame *dst,
+                                    const AVFrame *src)
+ {
+     CUDAFramesContext           *priv = ctx->internal->priv;
+     AVCUDADeviceContext *device_hwctx = ctx->device_ctx->hwctx;
+     CUcontext dummy;
+     CUresult err;
+     int i;
+     err = cuCtxPushCurrent(device_hwctx->cuda_ctx);
+     if (err != CUDA_SUCCESS)
+         return AVERROR_UNKNOWN;
+     for (i = 0; i < FF_ARRAY_ELEMS(src->data) && src->data[i]; i++) {
+         CUDA_MEMCPY2D cpy = {
+             .srcMemoryType = CU_MEMORYTYPE_DEVICE,
+             .dstMemoryType = CU_MEMORYTYPE_HOST,
+             .srcDevice     = (CUdeviceptr)src->data[i],
+             .dstHost       = dst->data[i],
+             .srcPitch      = src->linesize[i],
+             .dstPitch      = dst->linesize[i],
+             .WidthInBytes  = FFMIN(src->linesize[i], dst->linesize[i]),
+             .Height        = src->height >> (i ? priv->shift_height : 0),
+         };
+         err = cuMemcpy2D(&cpy);
+         if (err != CUDA_SUCCESS) {
+             av_log(ctx, AV_LOG_ERROR, "Error transferring the data from the CUDA frame\n");
+             return AVERROR_UNKNOWN;
+         }
+     }
+     cuCtxPopCurrent(&dummy);
+     return 0;
+ }
+ static int cuda_transfer_data_to(AVHWFramesContext *ctx, AVFrame *dst,
+                                  const AVFrame *src)
+ {
+     CUDAFramesContext           *priv = ctx->internal->priv;
+     AVCUDADeviceContext *device_hwctx = ctx->device_ctx->hwctx;
+     CUcontext dummy;
+     CUresult err;
+     int i;
+     err = cuCtxPushCurrent(device_hwctx->cuda_ctx);
+     if (err != CUDA_SUCCESS)
+         return AVERROR_UNKNOWN;
+     for (i = 0; i < FF_ARRAY_ELEMS(src->data) && src->data[i]; i++) {
+         CUDA_MEMCPY2D cpy = {
+             .srcMemoryType = CU_MEMORYTYPE_HOST,
+             .dstMemoryType = CU_MEMORYTYPE_DEVICE,
+             .srcHost       = src->data[i],
+             .dstDevice     = (CUdeviceptr)dst->data[i],
+             .srcPitch      = src->linesize[i],
+             .dstPitch      = dst->linesize[i],
+             .WidthInBytes  = FFMIN(src->linesize[i], dst->linesize[i]),
+             .Height        = src->height >> (i ? priv->shift_height : 0),
+         };
+         err = cuMemcpy2D(&cpy);
+         if (err != CUDA_SUCCESS) {
+             av_log(ctx, AV_LOG_ERROR, "Error transferring the data from the CUDA frame\n");
+             return AVERROR_UNKNOWN;
+         }
+     }
+     cuCtxPopCurrent(&dummy);
+     return 0;
+ }
+ const HWContextType ff_hwcontext_type_cuda = {
+     .type                 = AV_HWDEVICE_TYPE_CUDA,
+     .name                 = "CUDA",
+     .device_hwctx_size    = sizeof(AVCUDADeviceContext),
+     .frames_priv_size     = sizeof(CUDAFramesContext),
+     .frames_init          = cuda_frames_init,
+     .frames_get_buffer    = cuda_get_buffer,
+     .transfer_get_formats = cuda_transfer_get_formats,
+     .transfer_data_to     = cuda_transfer_data_to,
+     .transfer_data_from   = cuda_transfer_data_from,
+     .pix_fmts             = (const enum AVPixelFormat[]){ AV_PIX_FMT_CUDA, AV_PIX_FMT_NONE },
+ };
index 0000000,7f067c7..23a77ce
mode 000000,100644..100644
--- /dev/null
@@@ -1,0 -1,46 +1,46 @@@
 - * This file is part of Libav.
+ /*
 - * Libav is free software; you can redistribute it and/or
++ * This file is part of FFmpeg.
+  *
 - * Libav is distributed in the hope that it will be useful,
++ * FFmpeg is free software; you can redistribute it and/or
+  * modify it under the terms of the GNU Lesser General Public
+  * License as published by the Free Software Foundation; either
+  * version 2.1 of the License, or (at your option) any later version.
+  *
 - * License along with Libav; if not, write to the Free Software
++ * FFmpeg is distributed in the hope that it will be useful,
+  * but WITHOUT ANY WARRANTY; without even the implied warranty of
+  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  * Lesser General Public License for more details.
+  *
+  * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
+  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+  */
+ #ifndef AVUTIL_HWCONTEXT_CUDA_H
+ #define AVUTIL_HWCONTEXT_CUDA_H
+ #include <cuda.h>
+ #include "pixfmt.h"
+ /**
+  * @file
+  * An API-specific header for AV_HWDEVICE_TYPE_CUDA.
+  *
+  * This API supports dynamic frame pools. AVHWFramesContext.pool must return
+  * AVBufferRefs whose data pointer is a CUdeviceptr.
+  */
+ /**
+  * This struct is allocated as AVHWDeviceContext.hwctx
+  */
+ typedef struct AVCUDADeviceContext {
+     CUcontext cuda_ctx;
+ } AVCUDADeviceContext;
+ /**
+  * AVHWFramesContext.hwctx is currently not used
+  */
+ #endif /* AVUTIL_HWCONTEXT_CUDA_H */
Simple merge