lavfi/vulkan: use all enabled queues in the queue family
authorLynne <dev@lynne.ee>
Wed, 13 May 2020 23:37:21 +0000 (00:37 +0100)
committerLynne <dev@lynne.ee>
Sat, 23 May 2020 18:07:50 +0000 (19:07 +0100)
This should significantly improve the performance with certain
filterchains.

libavfilter/vf_avgblur_vulkan.c
libavfilter/vf_chromaber_vulkan.c
libavfilter/vf_overlay_vulkan.c
libavfilter/vf_scale_vulkan.c
libavfilter/vulkan.c
libavfilter/vulkan.h

index 105d753..12d57e0 100644 (file)
@@ -97,6 +97,10 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
     if (!sampler)
         return AVERROR_EXTERNAL;
 
+    s->vkctx.queue_family_idx = s->vkctx.hwctx->queue_family_comp_index;
+    s->vkctx.queue_count = GET_QUEUE_COUNT(s->vkctx.hwctx, 0, 1, 0);
+    s->vkctx.cur_queue_idx = rand() % s->vkctx.queue_count;
+
     { /* Create shader for the horizontal pass */
         desc_i[0].updater = s->input_images;
         desc_i[1].updater = s->tmp_images;
@@ -184,8 +188,7 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
     }
 
     /* Execution context */
-    RET(ff_vk_create_exec_ctx(ctx, &s->exec,
-                              s->vkctx.hwctx->queue_family_comp_index));
+    RET(ff_vk_create_exec_ctx(ctx, &s->exec));
 
     s->initialized = 1;
 
@@ -198,22 +201,30 @@ fail:
 static int process_frames(AVFilterContext *avctx, AVFrame *out_f, AVFrame *tmp_f, AVFrame *in_f)
 {
     int err;
+    VkCommandBuffer cmd_buf;
     AvgBlurVulkanContext *s = avctx->priv;
     AVVkFrame *in = (AVVkFrame *)in_f->data[0];
     AVVkFrame *tmp = (AVVkFrame *)tmp_f->data[0];
     AVVkFrame *out = (AVVkFrame *)out_f->data[0];
     int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
 
+    /* Update descriptors and init the exec context */
+    ff_vk_start_exec_recording(avctx, s->exec);
+    cmd_buf = ff_vk_get_exec_buf(avctx, s->exec);
+
     for (int i = 0; i < planes; i++) {
-        RET(ff_vk_create_imageview(avctx, &s->input_images[i].imageView, in->img[i],
+        RET(ff_vk_create_imageview(avctx, s->exec, &s->input_images[i].imageView,
+                                   in->img[i],
                                    av_vkfmt_from_pixfmt(s->vkctx.input_format)[i],
                                    ff_comp_identity_map));
 
-        RET(ff_vk_create_imageview(avctx, &s->tmp_images[i].imageView, tmp->img[i],
+        RET(ff_vk_create_imageview(avctx, s->exec, &s->tmp_images[i].imageView,
+                                   tmp->img[i],
                                    av_vkfmt_from_pixfmt(s->vkctx.output_format)[i],
                                    ff_comp_identity_map));
 
-        RET(ff_vk_create_imageview(avctx, &s->output_images[i].imageView, out->img[i],
+        RET(ff_vk_create_imageview(avctx, s->exec, &s->output_images[i].imageView,
+                                   out->img[i],
                                    av_vkfmt_from_pixfmt(s->vkctx.output_format)[i],
                                    ff_comp_identity_map));
 
@@ -225,8 +236,6 @@ static int process_frames(AVFilterContext *avctx, AVFrame *out_f, AVFrame *tmp_f
     ff_vk_update_descriptor_set(avctx, s->pl_hor, 0);
     ff_vk_update_descriptor_set(avctx, s->pl_ver, 0);
 
-    ff_vk_start_exec_recording(avctx, s->exec);
-
     for (int i = 0; i < planes; i++) {
         VkImageMemoryBarrier bar[] = {
             {
@@ -270,7 +279,7 @@ static int process_frames(AVFilterContext *avctx, AVFrame *out_f, AVFrame *tmp_f
             },
         };
 
-        vkCmdPipelineBarrier(s->exec->buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+        vkCmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
                              VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0,
                              0, NULL, 0, NULL, FF_ARRAY_ELEMS(bar), bar);
 
@@ -286,12 +295,12 @@ static int process_frames(AVFilterContext *avctx, AVFrame *out_f, AVFrame *tmp_f
 
     ff_vk_bind_pipeline_exec(avctx, s->exec, s->pl_hor);
 
-    vkCmdDispatch(s->exec->buf, FFALIGN(s->vkctx.output_width, CGS)/CGS,
+    vkCmdDispatch(cmd_buf, FFALIGN(s->vkctx.output_width, CGS)/CGS,
                   s->vkctx.output_height, 1);
 
     ff_vk_bind_pipeline_exec(avctx, s->exec, s->pl_ver);
 
-    vkCmdDispatch(s->exec->buf, s->vkctx.output_width,
+    vkCmdDispatch(cmd_buf, s->vkctx.output_width,
                   FFALIGN(s->vkctx.output_height, CGS)/CGS, 1);
 
     ff_vk_add_exec_dep(avctx, s->exec, in_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
@@ -301,14 +310,10 @@ static int process_frames(AVFilterContext *avctx, AVFrame *out_f, AVFrame *tmp_f
     if (err)
         return err;
 
-fail:
-
-    for (int i = 0; i < planes; i++) {
-        ff_vk_destroy_imageview(avctx, &s->input_images[i].imageView);
-        ff_vk_destroy_imageview(avctx, &s->tmp_images[i].imageView);
-        ff_vk_destroy_imageview(avctx, &s->output_images[i].imageView);
-    }
+    return err;
 
+fail:
+    ff_vk_discard_exec_deps(avctx, s->exec);
     return err;
 }
 
index 673b3a7..1bee5e1 100644 (file)
@@ -73,6 +73,10 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
     if (!sampler)
         return AVERROR_EXTERNAL;
 
+    s->vkctx.queue_family_idx = s->vkctx.hwctx->queue_family_comp_index;
+    s->vkctx.queue_count = GET_QUEUE_COUNT(s->vkctx.hwctx, 0, 1, 0);
+    s->vkctx.cur_queue_idx = rand() % s->vkctx.queue_count;
+
     s->pl = ff_vk_create_pipeline(ctx);
     if (!s->pl)
         return AVERROR(ENOMEM);
@@ -154,8 +158,7 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
     RET(ff_vk_init_compute_pipeline(ctx, s->pl));
 
     /* Execution context */
-    RET(ff_vk_create_exec_ctx(ctx, &s->exec,
-                              s->vkctx.hwctx->queue_family_comp_index));
+    RET(ff_vk_create_exec_ctx(ctx, &s->exec));
 
     s->initialized = 1;
 
@@ -168,17 +171,24 @@ fail:
 static int process_frames(AVFilterContext *avctx, AVFrame *out_f, AVFrame *in_f)
 {
     int err = 0;
+    VkCommandBuffer cmd_buf;
     ChromaticAberrationVulkanContext *s = avctx->priv;
     AVVkFrame *in = (AVVkFrame *)in_f->data[0];
     AVVkFrame *out = (AVVkFrame *)out_f->data[0];
     int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
 
+    /* Update descriptors and init the exec context */
+    ff_vk_start_exec_recording(avctx, s->exec);
+    cmd_buf = ff_vk_get_exec_buf(avctx, s->exec);
+
     for (int i = 0; i < planes; i++) {
-        RET(ff_vk_create_imageview(avctx, &s->input_images[i].imageView, in->img[i],
+        RET(ff_vk_create_imageview(avctx, s->exec, &s->input_images[i].imageView,
+                                   in->img[i],
                                    av_vkfmt_from_pixfmt(s->vkctx.input_format)[i],
                                    ff_comp_identity_map));
 
-        RET(ff_vk_create_imageview(avctx, &s->output_images[i].imageView, out->img[i],
+        RET(ff_vk_create_imageview(avctx, s->exec, &s->output_images[i].imageView,
+                                   out->img[i],
                                    av_vkfmt_from_pixfmt(s->vkctx.output_format)[i],
                                    ff_comp_identity_map));
 
@@ -188,8 +198,6 @@ static int process_frames(AVFilterContext *avctx, AVFrame *out_f, AVFrame *in_f)
 
     ff_vk_update_descriptor_set(avctx, s->pl, 0);
 
-    ff_vk_start_exec_recording(avctx, s->exec);
-
     for (int i = 0; i < planes; i++) {
         VkImageMemoryBarrier bar[2] = {
             {
@@ -220,7 +228,7 @@ static int process_frames(AVFilterContext *avctx, AVFrame *out_f, AVFrame *in_f)
             },
         };
 
-        vkCmdPipelineBarrier(s->exec->buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+        vkCmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
                              VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0,
                              0, NULL, 0, NULL, FF_ARRAY_ELEMS(bar), bar);
 
@@ -236,7 +244,7 @@ static int process_frames(AVFilterContext *avctx, AVFrame *out_f, AVFrame *in_f)
     ff_vk_update_push_exec(avctx, s->exec, VK_SHADER_STAGE_COMPUTE_BIT,
                            0, sizeof(s->opts), &s->opts);
 
-    vkCmdDispatch(s->exec->buf,
+    vkCmdDispatch(cmd_buf,
                   FFALIGN(s->vkctx.output_width,  CGROUPS[0])/CGROUPS[0],
                   FFALIGN(s->vkctx.output_height, CGROUPS[1])/CGROUPS[1], 1);
 
@@ -247,12 +255,10 @@ static int process_frames(AVFilterContext *avctx, AVFrame *out_f, AVFrame *in_f)
     if (err)
         return err;
 
-    for (int i = 0; i < planes; i++) {
-        ff_vk_destroy_imageview(avctx, &s->input_images[i].imageView);
-        ff_vk_destroy_imageview(avctx, &s->output_images[i].imageView);
-    }
+    return err;
 
 fail:
+    ff_vk_discard_exec_deps(avctx, s->exec);
     return err;
 }
 
index 83cfae4..60a7356 100644 (file)
@@ -87,6 +87,10 @@ static av_cold int init_filter(AVFilterContext *ctx)
     if (!s->pl)
         return AVERROR(ENOMEM);
 
+    s->vkctx.queue_family_idx = s->vkctx.hwctx->queue_family_comp_index;
+    s->vkctx.queue_count = GET_QUEUE_COUNT(s->vkctx.hwctx, 0, 1, 0);
+    s->vkctx.cur_queue_idx = rand() % s->vkctx.queue_count;
+
     { /* Create the shader */
         const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
         const int ialpha = av_pix_fmt_desc_get(s->vkctx.input_format)->flags & AV_PIX_FMT_FLAG_ALPHA;
@@ -211,8 +215,7 @@ static av_cold int init_filter(AVFilterContext *ctx)
     }
 
     /* Execution context */
-    RET(ff_vk_create_exec_ctx(ctx, &s->exec,
-                              s->vkctx.hwctx->queue_family_comp_index));
+    RET(ff_vk_create_exec_ctx(ctx, &s->exec));
 
     s->initialized = 1;
 
@@ -226,6 +229,7 @@ static int process_frames(AVFilterContext *avctx, AVFrame *out_f,
                           AVFrame *main_f, AVFrame *overlay_f)
 {
     int err;
+    VkCommandBuffer cmd_buf;
     OverlayVulkanContext *s = avctx->priv;
     int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
 
@@ -236,16 +240,23 @@ static int process_frames(AVFilterContext *avctx, AVFrame *out_f,
     AVHWFramesContext *main_fc = (AVHWFramesContext*)main_f->hw_frames_ctx->data;
     AVHWFramesContext *overlay_fc = (AVHWFramesContext*)overlay_f->hw_frames_ctx->data;
 
+    /* Update descriptors and init the exec context */
+    ff_vk_start_exec_recording(avctx, s->exec);
+    cmd_buf = ff_vk_get_exec_buf(avctx, s->exec);
+
     for (int i = 0; i < planes; i++) {
-        RET(ff_vk_create_imageview(avctx, &s->main_images[i].imageView, main->img[i],
+        RET(ff_vk_create_imageview(avctx, s->exec, &s->main_images[i].imageView,
+                                   main->img[i],
                                    av_vkfmt_from_pixfmt(main_fc->sw_format)[i],
                                    ff_comp_identity_map));
 
-        RET(ff_vk_create_imageview(avctx, &s->overlay_images[i].imageView, overlay->img[i],
+        RET(ff_vk_create_imageview(avctx, s->exec, &s->overlay_images[i].imageView,
+                                   overlay->img[i],
                                    av_vkfmt_from_pixfmt(overlay_fc->sw_format)[i],
                                    ff_comp_identity_map));
 
-        RET(ff_vk_create_imageview(avctx, &s->output_images[i].imageView, out->img[i],
+        RET(ff_vk_create_imageview(avctx, s->exec, &s->output_images[i].imageView,
+                                   out->img[i],
                                    av_vkfmt_from_pixfmt(s->vkctx.output_format)[i],
                                    ff_comp_identity_map));
 
@@ -256,8 +267,6 @@ static int process_frames(AVFilterContext *avctx, AVFrame *out_f,
 
     ff_vk_update_descriptor_set(avctx, s->pl, 0);
 
-    ff_vk_start_exec_recording(avctx, s->exec);
-
     for (int i = 0; i < planes; i++) {
         VkImageMemoryBarrier bar[3] = {
             {
@@ -301,7 +310,7 @@ static int process_frames(AVFilterContext *avctx, AVFrame *out_f,
             },
         };
 
-        vkCmdPipelineBarrier(s->exec->buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+        vkCmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
                              VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0,
                              0, NULL, 0, NULL, FF_ARRAY_ELEMS(bar), bar);
 
@@ -317,7 +326,7 @@ static int process_frames(AVFilterContext *avctx, AVFrame *out_f,
 
     ff_vk_bind_pipeline_exec(avctx, s->exec, s->pl);
 
-    vkCmdDispatch(s->exec->buf,
+    vkCmdDispatch(cmd_buf,
                   FFALIGN(s->vkctx.output_width,  CGROUPS[0])/CGROUPS[0],
                   FFALIGN(s->vkctx.output_height, CGROUPS[1])/CGROUPS[1], 1);
 
@@ -329,14 +338,10 @@ static int process_frames(AVFilterContext *avctx, AVFrame *out_f,
     if (err)
         return err;
 
-fail:
-
-    for (int i = 0; i < planes; i++) {
-        ff_vk_destroy_imageview(avctx, &s->main_images[i].imageView);
-        ff_vk_destroy_imageview(avctx, &s->overlay_images[i].imageView);
-        ff_vk_destroy_imageview(avctx, &s->output_images[i].imageView);
-    }
+    return err;
 
+fail:
+    ff_vk_discard_exec_deps(avctx, s->exec);
     return err;
 }
 
index 328e6bc..9b2e5b9 100644 (file)
@@ -115,6 +115,10 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
     int crop_w = in->width - (in->crop_left + in->crop_right);
     int crop_h = in->height - (in->crop_top + in->crop_bottom);
 
+    s->vkctx.queue_family_idx = s->vkctx.hwctx->queue_family_comp_index;
+    s->vkctx.queue_count = GET_QUEUE_COUNT(s->vkctx.hwctx, 0, 1, 0);
+    s->vkctx.cur_queue_idx = rand() % s->vkctx.queue_count;
+
     switch (s->scaler) {
     case F_NEAREST:
         sampler_mode = VK_FILTER_NEAREST;
@@ -276,8 +280,7 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
     }
 
     /* Execution context */
-    RET(ff_vk_create_exec_ctx(ctx, &s->exec,
-                              s->vkctx.hwctx->queue_family_comp_index));
+    RET(ff_vk_create_exec_ctx(ctx, &s->exec));
 
     s->initialized = 1;
 
@@ -290,14 +293,20 @@ fail:
 static int process_frames(AVFilterContext *avctx, AVFrame *out_f, AVFrame *in_f)
 {
     int err = 0;
+    VkCommandBuffer cmd_buf;
     ScaleVulkanContext *s = avctx->priv;
     AVVkFrame *in = (AVVkFrame *)in_f->data[0];
     AVVkFrame *out = (AVVkFrame *)out_f->data[0];
     VkImageMemoryBarrier barriers[AV_NUM_DATA_POINTERS*2];
     int barrier_count = 0;
 
+    /* Update descriptors and init the exec context */
+    ff_vk_start_exec_recording(avctx, s->exec);
+    cmd_buf = ff_vk_get_exec_buf(avctx, s->exec);
+
     for (int i = 0; i < av_pix_fmt_count_planes(s->vkctx.input_format); i++) {
-        RET(ff_vk_create_imageview(avctx, &s->input_images[i].imageView, in->img[i],
+        RET(ff_vk_create_imageview(avctx, s->exec, &s->input_images[i].imageView,
+                                   in->img[i],
                                    av_vkfmt_from_pixfmt(s->vkctx.input_format)[i],
                                    ff_comp_identity_map));
 
@@ -305,7 +314,8 @@ static int process_frames(AVFilterContext *avctx, AVFrame *out_f, AVFrame *in_f)
     }
 
     for (int i = 0; i < av_pix_fmt_count_planes(s->vkctx.output_format); i++) {
-        RET(ff_vk_create_imageview(avctx, &s->output_images[i].imageView, out->img[i],
+        RET(ff_vk_create_imageview(avctx, s->exec, &s->output_images[i].imageView,
+                                   out->img[i],
                                    av_vkfmt_from_pixfmt(s->vkctx.output_format)[i],
                                    ff_comp_identity_map));
 
@@ -314,8 +324,6 @@ static int process_frames(AVFilterContext *avctx, AVFrame *out_f, AVFrame *in_f)
 
     ff_vk_update_descriptor_set(avctx, s->pl, 0);
 
-    ff_vk_start_exec_recording(avctx, s->exec);
-
     for (int i = 0; i < av_pix_fmt_count_planes(s->vkctx.input_format); i++) {
         VkImageMemoryBarrier bar = {
             .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
@@ -358,13 +366,13 @@ static int process_frames(AVFilterContext *avctx, AVFrame *out_f, AVFrame *in_f)
         out->access[i] = bar.dstAccessMask;
     }
 
-    vkCmdPipelineBarrier(s->exec->buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+    vkCmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
                          VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0,
                          0, NULL, 0, NULL, barrier_count, barriers);
 
     ff_vk_bind_pipeline_exec(avctx, s->exec, s->pl);
 
-    vkCmdDispatch(s->exec->buf,
+    vkCmdDispatch(cmd_buf,
                   FFALIGN(s->vkctx.output_width,  CGROUPS[0])/CGROUPS[0],
                   FFALIGN(s->vkctx.output_height, CGROUPS[1])/CGROUPS[1], 1);
 
@@ -375,12 +383,10 @@ static int process_frames(AVFilterContext *avctx, AVFrame *out_f, AVFrame *in_f)
     if (err)
         return err;
 
-    for (int i = 0; i < av_pix_fmt_count_planes(s->vkctx.input_format); i++)
-        ff_vk_destroy_imageview(avctx, &s->input_images[i].imageView);
-    for (int i = 0; i < av_pix_fmt_count_planes(s->vkctx.output_format); i++)
-        ff_vk_destroy_imageview(avctx, &s->output_images[i].imageView);
+    return err;
 
 fail:
+    ff_vk_discard_exec_deps(avctx, s->exec);
     return err;
 }
 
index ccf71cb..301ee43 100644 (file)
@@ -311,72 +311,116 @@ int ff_vk_add_push_constant(AVFilterContext *avctx, VulkanPipeline *pl,
 }
 
 FN_CREATING(VulkanFilterContext, FFVkExecContext, exec_ctx, exec_ctx, exec_ctx_num)
-int ff_vk_create_exec_ctx(AVFilterContext *avctx, FFVkExecContext **ctx, int queue)
+int ff_vk_create_exec_ctx(AVFilterContext *avctx, FFVkExecContext **ctx)
 {
     VkResult ret;
     FFVkExecContext *e;
     VulkanFilterContext *s = avctx->priv;
 
+    int queue_family = s->queue_family_idx;
+    int nb_queues = s->queue_count;
+
     VkCommandPoolCreateInfo cqueue_create = {
         .sType              = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
         .flags              = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
-        .queueFamilyIndex   = queue,
+        .queueFamilyIndex   = queue_family,
     };
     VkCommandBufferAllocateInfo cbuf_create = {
         .sType              = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
         .level              = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
-        .commandBufferCount = 1,
+        .commandBufferCount = nb_queues,
     };
-    VkFenceCreateInfo fence_spawn = { VK_STRUCTURE_TYPE_FENCE_CREATE_INFO };
 
     e = create_exec_ctx(s);
     if (!e)
         return AVERROR(ENOMEM);
 
+    e->queues = av_mallocz(nb_queues * sizeof(*e->queues));
+    if (!e->queues)
+        return AVERROR(ENOMEM);
+
+    e->bufs = av_mallocz(nb_queues * sizeof(*e->bufs));
+    if (!e->bufs)
+        return AVERROR(ENOMEM);
+
+    /* Create command pool */
     ret = vkCreateCommandPool(s->hwctx->act_dev, &cqueue_create,
                               s->hwctx->alloc, &e->pool);
     if (ret != VK_SUCCESS) {
         av_log(avctx, AV_LOG_ERROR, "Command pool creation failure: %s\n",
                ff_vk_ret2str(ret));
-        return 1;
+        return AVERROR_EXTERNAL;
     }
 
     cbuf_create.commandPool = e->pool;
 
-    ret = vkAllocateCommandBuffers(s->hwctx->act_dev, &cbuf_create, &e->buf);
+    /* Allocate command buffer */
+    ret = vkAllocateCommandBuffers(s->hwctx->act_dev, &cbuf_create, e->bufs);
     if (ret != VK_SUCCESS) {
         av_log(avctx, AV_LOG_ERROR, "Command buffer alloc failure: %s\n",
                ff_vk_ret2str(ret));
-        return 1;
+        return AVERROR_EXTERNAL;
     }
 
-    ret = vkCreateFence(s->hwctx->act_dev, &fence_spawn,
-                        s->hwctx->alloc, &e->fence);
-    if (ret != VK_SUCCESS) {
-        av_log(avctx, AV_LOG_ERROR, "Failed to create frame fence: %s\n",
-               ff_vk_ret2str(ret));
-        return 1;
+    for (int i = 0; i < nb_queues; i++) {
+        FFVkQueueCtx *q = &e->queues[i];
+        vkGetDeviceQueue(s->hwctx->act_dev, queue_family, i, &q->queue);
     }
 
-    vkGetDeviceQueue(s->hwctx->act_dev, queue, 0, &e->queue);
-
     *ctx = e;
 
     return 0;
 }
 
+void ff_vk_discard_exec_deps(AVFilterContext *avctx, FFVkExecContext *e)
+{
+    VulkanFilterContext *s = avctx->priv;
+    FFVkQueueCtx *q = &e->queues[s->cur_queue_idx];
+
+    for (int j = 0; j < q->nb_buf_deps; j++)
+        av_buffer_unref(&q->buf_deps[j]);
+    q->nb_buf_deps = 0;
+
+    for (int j = 0; j < q->nb_frame_deps; j++)
+        av_frame_free(&q->frame_deps[j]);
+    q->nb_frame_deps = 0;
+
+    e->sem_wait_cnt = 0;
+    e->sem_sig_cnt = 0;
+}
+
 int ff_vk_start_exec_recording(AVFilterContext *avctx, FFVkExecContext *e)
 {
     VkResult ret;
+    VulkanFilterContext *s = avctx->priv;
+    FFVkQueueCtx *q = &e->queues[s->cur_queue_idx];
+
     VkCommandBufferBeginInfo cmd_start = {
         .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
         .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
     };
 
-    e->sem_wait_cnt = 0;
-    e->sem_sig_cnt = 0;
+    /* Create the fence and don't wait for it initially */
+    if (!q->fence) {
+        VkFenceCreateInfo fence_spawn = {
+            .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
+        };
+        ret = vkCreateFence(s->hwctx->act_dev, &fence_spawn, s->hwctx->alloc,
+                            &q->fence);
+        if (ret != VK_SUCCESS) {
+            av_log(avctx, AV_LOG_ERROR, "Failed to queue frame fence: %s\n",
+                   ff_vk_ret2str(ret));
+            return AVERROR_EXTERNAL;
+        }
+    } else {
+        vkWaitForFences(s->hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX);
+        vkResetFences(s->hwctx->act_dev, 1, &q->fence);
+    }
 
-    ret = vkBeginCommandBuffer(e->buf, &cmd_start);
+    /* Discard queue dependencies */
+    ff_vk_discard_exec_deps(avctx, e);
+
+    ret = vkBeginCommandBuffer(e->bufs[s->cur_queue_idx], &cmd_start);
     if (ret != VK_SUCCESS) {
         av_log(avctx, AV_LOG_ERROR, "Failed to start command recoding: %s\n",
                ff_vk_ret2str(ret));
@@ -386,28 +430,43 @@ int ff_vk_start_exec_recording(AVFilterContext *avctx, FFVkExecContext *e)
     return 0;
 }
 
+VkCommandBuffer ff_vk_get_exec_buf(AVFilterContext *avctx, FFVkExecContext *e)
+{
+    VulkanFilterContext *s = avctx->priv;
+    return e->bufs[s->cur_queue_idx];
+}
+
 int ff_vk_add_exec_dep(AVFilterContext *avctx, FFVkExecContext *e,
                        AVFrame *frame, VkPipelineStageFlagBits in_wait_dst_flag)
 {
+    AVFrame **dst;
+    VulkanFilterContext *s = avctx->priv;
     AVVkFrame *f = (AVVkFrame *)frame->data[0];
+    FFVkQueueCtx *q = &e->queues[s->cur_queue_idx];
     AVHWFramesContext *fc = (AVHWFramesContext *)frame->hw_frames_ctx->data;
     int planes = av_pix_fmt_count_planes(fc->sw_format);
 
     for (int i = 0; i < planes; i++) {
         e->sem_wait = av_fast_realloc(e->sem_wait, &e->sem_wait_alloc,
                                       (e->sem_wait_cnt + 1)*sizeof(*e->sem_wait));
-        if (!e->sem_wait)
+        if (!e->sem_wait) {
+            ff_vk_discard_exec_deps(avctx, e);
             return AVERROR(ENOMEM);
+        }
 
         e->sem_wait_dst = av_fast_realloc(e->sem_wait_dst, &e->sem_wait_dst_alloc,
                                           (e->sem_wait_cnt + 1)*sizeof(*e->sem_wait_dst));
-        if (!e->sem_wait_dst)
+        if (!e->sem_wait_dst) {
+            ff_vk_discard_exec_deps(avctx, e);
             return AVERROR(ENOMEM);
+        }
 
         e->sem_sig = av_fast_realloc(e->sem_sig, &e->sem_sig_alloc,
                                      (e->sem_sig_cnt + 1)*sizeof(*e->sem_sig));
-        if (!e->sem_sig)
+        if (!e->sem_sig) {
+            ff_vk_discard_exec_deps(avctx, e);
             return AVERROR(ENOMEM);
+        }
 
         e->sem_wait[e->sem_wait_cnt] = f->sem[i];
         e->sem_wait_dst[e->sem_wait_cnt] = in_wait_dst_flag;
@@ -417,6 +476,21 @@ int ff_vk_add_exec_dep(AVFilterContext *avctx, FFVkExecContext *e,
         e->sem_sig_cnt++;
     }
 
+    dst = av_fast_realloc(q->frame_deps, &q->frame_deps_alloc_size,
+                          (q->nb_frame_deps + 1) * sizeof(*dst));
+    if (!dst) {
+        ff_vk_discard_exec_deps(avctx, e);
+        return AVERROR(ENOMEM);
+    }
+
+    q->frame_deps = dst;
+    q->frame_deps[q->nb_frame_deps] = av_frame_clone(frame);
+    if (!q->frame_deps[q->nb_frame_deps]) {
+        ff_vk_discard_exec_deps(avctx, e);
+        return AVERROR(ENOMEM);
+    }
+    q->nb_frame_deps++;
+
     return 0;
 }
 
@@ -424,11 +498,12 @@ int ff_vk_submit_exec_queue(AVFilterContext *avctx, FFVkExecContext *e)
 {
     VkResult ret;
     VulkanFilterContext *s = avctx->priv;
+    FFVkQueueCtx *q = &e->queues[s->cur_queue_idx];
 
     VkSubmitInfo s_info = {
         .sType                = VK_STRUCTURE_TYPE_SUBMIT_INFO,
         .commandBufferCount   = 1,
-        .pCommandBuffers      = &e->buf,
+        .pCommandBuffers      = &e->bufs[s->cur_queue_idx],
 
         .pWaitSemaphores      = e->sem_wait,
         .pWaitDstStageMask    = e->sem_wait_dst,
@@ -438,21 +513,57 @@ int ff_vk_submit_exec_queue(AVFilterContext *avctx, FFVkExecContext *e)
         .signalSemaphoreCount = e->sem_sig_cnt,
     };
 
-    vkEndCommandBuffer(e->buf);
+    ret = vkEndCommandBuffer(e->bufs[s->cur_queue_idx]);
+    if (ret != VK_SUCCESS) {
+        av_log(avctx, AV_LOG_ERROR, "Unable to finish command buffer: %s\n",
+               ff_vk_ret2str(ret));
+        return AVERROR_EXTERNAL;
+    }
 
-    ret = vkQueueSubmit(e->queue, 1, &s_info, e->fence);
+    ret = vkQueueSubmit(q->queue, 1, &s_info, q->fence);
     if (ret != VK_SUCCESS) {
         av_log(avctx, AV_LOG_ERROR, "Unable to submit command buffer: %s\n",
                ff_vk_ret2str(ret));
         return AVERROR_EXTERNAL;
     }
 
-    vkWaitForFences(s->hwctx->act_dev, 1, &e->fence, VK_TRUE, UINT64_MAX);
-    vkResetFences(s->hwctx->act_dev, 1, &e->fence);
+    /* Rotate queues */
+    s->cur_queue_idx = (s->cur_queue_idx + 1) % s->queue_count;
 
     return 0;
 }
 
+int ff_vk_add_dep_exec_ctx(AVFilterContext *avctx, FFVkExecContext *e,
+                           AVBufferRef **deps, int nb_deps)
+{
+    AVBufferRef **dst;
+    VulkanFilterContext *s = avctx->priv;
+    FFVkQueueCtx *q = &e->queues[s->cur_queue_idx];
+
+    if (!deps || !nb_deps)
+        return 0;
+
+    dst = av_fast_realloc(q->buf_deps, &q->buf_deps_alloc_size,
+                          (q->nb_buf_deps + nb_deps) * sizeof(*dst));
+    if (!dst)
+        goto err;
+
+    q->buf_deps = dst;
+
+    for (int i = 0; i < nb_deps; i++) {
+        q->buf_deps[q->nb_buf_deps] = deps[i];
+        if (!q->buf_deps[q->nb_buf_deps])
+            goto err;
+        q->nb_buf_deps++;
+    }
+
+    return 0;
+
+err:
+    ff_vk_discard_exec_deps(avctx, e);
+    return AVERROR(ENOMEM);
+}
+
 int ff_vk_filter_query_formats(AVFilterContext *avctx)
 {
     static const enum AVPixelFormat pixel_formats[] = {
@@ -685,9 +796,24 @@ const char *ff_vk_shader_rep_fmt(enum AVPixelFormat pixfmt)
     return high ? "rgba16f" : "rgba8";
 }
 
-int ff_vk_create_imageview(AVFilterContext *avctx, VkImageView *v, VkImage img,
-                           VkFormat fmt, const VkComponentMapping map)
+typedef struct ImageViewCtx {
+    VkImageView view;
+} ImageViewCtx;
+
+static void destroy_imageview(void *opaque, uint8_t *data)
+{
+    VulkanFilterContext *s = opaque;
+    ImageViewCtx *iv = (ImageViewCtx *)data;
+    vkDestroyImageView(s->hwctx->act_dev, iv->view, s->hwctx->alloc);
+    av_free(iv);
+}
+
+int ff_vk_create_imageview(AVFilterContext *avctx, FFVkExecContext *e,
+                           VkImageView *v, VkImage img, VkFormat fmt,
+                           const VkComponentMapping map)
 {
+    int err;
+    AVBufferRef *buf;
     VulkanFilterContext *s = avctx->priv;
     VkImageViewCreateInfo imgview_spawn = {
         .sType      = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
@@ -705,24 +831,32 @@ int ff_vk_create_imageview(AVFilterContext *avctx, VkImageView *v, VkImage img,
         },
     };
 
+    ImageViewCtx *iv = av_mallocz(sizeof(*iv));
+
     VkResult ret = vkCreateImageView(s->hwctx->act_dev, &imgview_spawn,
-                                     s->hwctx->alloc, v);
+                                     s->hwctx->alloc, &iv->view);
     if (ret != VK_SUCCESS) {
-        av_log(s, AV_LOG_ERROR, "Failed to create imageview: %s\n",
+        av_log(avctx, AV_LOG_ERROR, "Failed to create imageview: %s\n",
                ff_vk_ret2str(ret));
         return AVERROR_EXTERNAL;
     }
 
-    return 0;
-}
+    buf = av_buffer_create((uint8_t *)iv, sizeof(*iv), destroy_imageview, s, 0);
+    if (!buf) {
+        destroy_imageview(s, (uint8_t *)iv);
+        return AVERROR(ENOMEM);
+    }
 
-void ff_vk_destroy_imageview(AVFilterContext *avctx, VkImageView *v)
-{
-    VulkanFilterContext *s = avctx->priv;
-    if (v && *v) {
-        vkDestroyImageView(s->hwctx->act_dev, *v, s->hwctx->alloc);
-        *v = NULL;
+    /* Add to queue dependencies */
+    err = ff_vk_add_dep_exec_ctx(avctx, e, &buf, 1);
+    if (err) {
+        av_buffer_unref(&buf);
+        return err;
     }
+
+    *v = iv->view;
+
+    return 0;
 }
 
 FN_CREATING(VulkanPipeline, SPIRVShader, shader, shaders, shaders_num)
@@ -870,11 +1004,11 @@ int ff_vk_add_descriptor_set(AVFilterContext *avctx, VulkanPipeline *pl,
         goto print;
 
     pl->desc_layout = av_realloc_array(pl->desc_layout, sizeof(*pl->desc_layout),
-                                       pl->descriptor_sets_num + 1);
+                                       pl->desc_layout_num + 1);
     if (!pl->desc_layout)
         return AVERROR(ENOMEM);
 
-    layout = &pl->desc_layout[pl->descriptor_sets_num];
+    layout = &pl->desc_layout[pl->desc_layout_num];
     memset(layout, 0, sizeof(*layout));
 
     { /* Create descriptor set layout descriptions */
@@ -946,11 +1080,11 @@ int ff_vk_add_descriptor_set(AVFilterContext *avctx, VulkanPipeline *pl,
 
         pl->desc_template_info = av_realloc_array(pl->desc_template_info,
                                                   sizeof(*pl->desc_template_info),
-                                                  pl->descriptor_sets_num + 1);
+                                                  pl->desc_layout_num + 1);
         if (!pl->desc_template_info)
             return AVERROR(ENOMEM);
 
-        dt = &pl->desc_template_info[pl->descriptor_sets_num];
+        dt = &pl->desc_template_info[pl->desc_layout_num];
         memset(dt, 0, sizeof(*dt));
 
         dt->sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO;
@@ -960,13 +1094,13 @@ int ff_vk_add_descriptor_set(AVFilterContext *avctx, VulkanPipeline *pl,
         dt->descriptorUpdateEntryCount = num;
     }
 
-    pl->descriptor_sets_num++;
+    pl->desc_layout_num++;
 
 print:
     /* Write shader info */
     for (int i = 0; i < num; i++) {
         const struct descriptor_props *prop = &descriptor_props[desc[i].type];
-        GLSLA("layout (set = %i, binding = %i", pl->descriptor_sets_num - 1, i);
+        GLSLA("layout (set = %i, binding = %i", pl->desc_layout_num - 1, i);
 
         if (desc[i].mem_layout)
             GLSLA(", %s", desc[i].mem_layout);
@@ -1004,15 +1138,17 @@ void ff_vk_update_descriptor_set(AVFilterContext *avctx, VulkanPipeline *pl,
     VulkanFilterContext *s = avctx->priv;
 
     vkUpdateDescriptorSetWithTemplate(s->hwctx->act_dev,
-                                      pl->desc_set[set_id],
-                                      pl->desc_template[set_id], s);
+                                      pl->desc_set[set_id * s->cur_queue_idx],
+                                      pl->desc_template[set_id],
+                                      s);
 }
 
 void ff_vk_update_push_exec(AVFilterContext *avctx, FFVkExecContext *e,
                             VkShaderStageFlagBits stage, int offset,
                             size_t size, void *src)
 {
-    vkCmdPushConstants(e->buf, e->bound_pl->pipeline_layout,
+    VulkanFilterContext *s = avctx->priv;
+    vkCmdPushConstants(e->bufs[s->cur_queue_idx], e->bound_pl->pipeline_layout,
                        stage, offset, size, src);
 }
 
@@ -1021,6 +1157,10 @@ int ff_vk_init_pipeline_layout(AVFilterContext *avctx, VulkanPipeline *pl)
     VkResult ret;
     VulkanFilterContext *s = avctx->priv;
 
+    int queues_count = 1;
+
+    pl->descriptor_sets_num = pl->desc_layout_num * queues_count;
+
     { /* Init descriptor set pool */
         VkDescriptorPoolCreateInfo pool_create_info = {
             .sType         = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
@@ -1063,7 +1203,7 @@ int ff_vk_init_pipeline_layout(AVFilterContext *avctx, VulkanPipeline *pl)
     { /* Finally create the pipeline layout */
         VkPipelineLayoutCreateInfo spawn_pipeline_layout = {
             .sType                  = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
-            .setLayoutCount         = pl->descriptor_sets_num,
+            .setLayoutCount         = pl->desc_layout_num,
             .pSetLayouts            = pl->desc_layout,
             .pushConstantRangeCount = pl->push_consts_num,
             .pPushConstantRanges    = pl->push_consts,
@@ -1089,7 +1229,7 @@ int ff_vk_init_pipeline_layout(AVFilterContext *avctx, VulkanPipeline *pl)
 
         /* Create update templates for the descriptor sets */
         for (int i = 0; i < pl->descriptor_sets_num; i++) {
-            desc_template_info = &pl->desc_template_info[i];
+            desc_template_info = &pl->desc_template_info[i % pl->desc_layout_num];
             desc_template_info->pipelineLayout = pl->pipeline_layout;
             ret = vkCreateDescriptorUpdateTemplate(s->hwctx->act_dev,
                                                    desc_template_info,
@@ -1153,27 +1293,53 @@ int ff_vk_init_compute_pipeline(AVFilterContext *avctx, VulkanPipeline *pl)
 void ff_vk_bind_pipeline_exec(AVFilterContext *avctx, FFVkExecContext *e,
                               VulkanPipeline *pl)
 {
-    vkCmdBindPipeline(e->buf, pl->bind_point, pl->pipeline);
+    VulkanFilterContext *s = avctx->priv;
+
+    vkCmdBindPipeline(e->bufs[s->cur_queue_idx], pl->bind_point, pl->pipeline);
 
-    vkCmdBindDescriptorSets(e->buf, pl->bind_point, pl->pipeline_layout, 0,
-                            pl->descriptor_sets_num, pl->desc_set, 0, 0);
+    vkCmdBindDescriptorSets(e->bufs[s->cur_queue_idx], pl->bind_point,
+                            pl->pipeline_layout, 0, pl->descriptor_sets_num,
+                            pl->desc_set, 0, 0);
 
     e->bound_pl = pl;
 }
 
 static void free_exec_ctx(VulkanFilterContext *s, FFVkExecContext *e)
 {
-    vkDestroyFence(s->hwctx->act_dev, e->fence, s->hwctx->alloc);
+    /* Make sure all queues have finished executing */
+    for (int i = 0; i < s->queue_count; i++) {
+        FFVkQueueCtx *q = &e->queues[i];
 
-    if (e->buf   != VK_NULL_HANDLE)
-        vkFreeCommandBuffers(s->hwctx->act_dev, e->pool, 1, &e->buf);
-    if (e->pool  != VK_NULL_HANDLE)
-        vkDestroyCommandPool(s->hwctx->act_dev, e->pool, s->hwctx->alloc);
+        if (q->fence) {
+            vkWaitForFences(s->hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX);
+            vkResetFences(s->hwctx->act_dev, 1, &q->fence);
+        }
+
+        /* Free the fence */
+        if (q->fence)
+            vkDestroyFence(s->hwctx->act_dev, q->fence, s->hwctx->alloc);
 
-    av_free(e->sem_wait);
-    av_free(e->sem_wait_dst);
-    av_free(e->sem_sig);
+        /* Free buffer dependencies */
+        for (int j = 0; j < q->nb_buf_deps; j++)
+            av_buffer_unref(&q->buf_deps[j]);
+        av_free(q->buf_deps);
 
+        /* Free frame dependencies */
+        for (int j = 0; j < q->nb_frame_deps; j++)
+            av_frame_free(&q->frame_deps[j]);
+        av_free(q->frame_deps);
+    }
+
+    if (e->bufs)
+        vkFreeCommandBuffers(s->hwctx->act_dev, e->pool, s->queue_count, e->bufs);
+    if (e->pool)
+        vkDestroyCommandPool(s->hwctx->act_dev, e->pool, s->hwctx->alloc);
+
+    av_freep(&e->bufs);
+    av_freep(&e->queues);
+    av_freep(&e->sem_sig);
+    av_freep(&e->sem_wait);
+    av_freep(&e->sem_wait_dst);
     av_free(e);
 }
 
@@ -1191,7 +1357,7 @@ static void free_pipeline(VulkanFilterContext *s, VulkanPipeline *pl)
     vkDestroyPipelineLayout(s->hwctx->act_dev, pl->pipeline_layout,
                             s->hwctx->alloc);
 
-    for (int i = 0; i < pl->descriptor_sets_num; i++) {
+    for (int i = 0; i < pl->desc_layout_num; i++) {
         if (pl->desc_template && pl->desc_template[i])
             vkDestroyDescriptorUpdateTemplate(s->hwctx->act_dev, pl->desc_template[i],
                                               s->hwctx->alloc);
@@ -1229,6 +1395,10 @@ void ff_vk_filter_uninit(AVFilterContext *avctx)
 
     glslang_uninit();
 
+    for (int i = 0; i < s->exec_ctx_num; i++)
+        free_exec_ctx(s, s->exec_ctx[i]);
+    av_freep(&s->exec_ctx);
+
     for (int i = 0; i < s->samplers_num; i++) {
         vkDestroySampler(s->hwctx->act_dev, *s->samplers[i], s->hwctx->alloc);
         av_free(s->samplers[i]);
@@ -1239,10 +1409,6 @@ void ff_vk_filter_uninit(AVFilterContext *avctx)
         free_pipeline(s, s->pipelines[i]);
     av_freep(&s->pipelines);
 
-    for (int i = 0; i < s->exec_ctx_num; i++)
-        free_exec_ctx(s, s->exec_ctx[i]);
-    av_freep(&s->exec_ctx);
-
     av_freep(&s->scratch);
     s->scratch_size = 0;
 
index 30a64ce..f9a4dc5 100644 (file)
             goto fail;                                                         \
     } while (0)
 
+/* Gets the queues count for a single queue family */
+#define GET_QUEUE_COUNT(hwctx, graph, comp, tx) (                   \
+    graph ?  hwctx->nb_graphics_queues :                            \
+    comp  ? (hwctx->nb_comp_queues ?                                \
+             hwctx->nb_comp_queues : hwctx->nb_graphics_queues) :   \
+    tx    ? (hwctx->nb_tx_queues ? hwctx->nb_tx_queues :            \
+             (hwctx->nb_comp_queues ?                               \
+              hwctx->nb_comp_queues : hwctx->nb_graphics_queues)) : \
+    0                                                               \
+)
+
 /* Useful for attaching immutable samplers to arrays */
 #define DUP_SAMPLER_ARRAY4(x) (VkSampler []){ x, x, x, x, }
 
@@ -98,6 +109,7 @@ typedef struct VulkanPipeline {
     VkDescriptorPool            desc_pool;
     VkDescriptorSet            *desc_set;
     VkDescriptorUpdateTemplate *desc_template;
+    int                         desc_layout_num;
     int                         descriptor_sets_num;
     int                         pool_size_desc_num;
 
@@ -106,11 +118,29 @@ typedef struct VulkanPipeline {
     VkDescriptorPoolSize *pool_size_desc;
 } VulkanPipeline;
 
+typedef struct FFVkQueueCtx {
+    VkFence fence;
+    VkQueue queue;
+
+    /* Buffer dependencies */
+    AVBufferRef **buf_deps;
+    int nb_buf_deps;
+    int buf_deps_alloc_size;
+
+    /* Frame dependencies */
+    AVFrame **frame_deps;
+    int nb_frame_deps;
+    int frame_deps_alloc_size;
+} FFVkQueueCtx;
+
 typedef struct FFVkExecContext {
     VkCommandPool pool;
-    VkCommandBuffer buf;
-    VkQueue queue;
-    VkFence fence;
+    VkCommandBuffer *bufs;
+    FFVkQueueCtx *queues;
+
+    AVBufferRef ***deps;
+    int *nb_deps;
+    int *dep_alloc_size;
 
     VulkanPipeline *bound_pl;
 
@@ -134,6 +164,11 @@ typedef struct VulkanFilterContext {
     AVHWDeviceContext     *device;
     AVVulkanDeviceContext *hwctx;
 
+    /* State - mirrored with the exec ctx */
+    int cur_queue_idx;
+    int queue_family_idx;
+    int queue_count;
+
     /* Properties */
     int                 output_width;
     int                output_height;
@@ -192,15 +227,12 @@ VkSampler *ff_vk_init_sampler(AVFilterContext *avctx, int unnorm_coords,
 
 /**
  * Create an imageview.
+ * Guaranteed to remain alive until the queue submission has finished executing,
+ * and will be destroyed after that.
  */
-int ff_vk_create_imageview(AVFilterContext *avctx, VkImageView *v, VkImage img,
-                           VkFormat fmt, const VkComponentMapping map);
-
-/**
- * Destroy an imageview. Command buffer must have completed executing, which
- * ff_vk_submit_exec_queue() will ensure
- */
-void ff_vk_destroy_imageview(AVFilterContext *avctx, VkImageView *v);
+int ff_vk_create_imageview(AVFilterContext *avctx, FFVkExecContext *e,
+                           VkImageView *v, VkImage img, VkFormat fmt,
+                           const VkComponentMapping map);
 
 /**
  * Define a push constant for a given stage into a pipeline.
@@ -264,7 +296,7 @@ void ff_vk_update_descriptor_set(AVFilterContext *avctx, VulkanPipeline *pl,
  * Init an execution context for command recording and queue submission.
  * WIll be auto-freed on uninit.
  */
-int ff_vk_create_exec_ctx(AVFilterContext *avctx, FFVkExecContext **ctx, int queue);
+int ff_vk_create_exec_ctx(AVFilterContext *avctx, FFVkExecContext **ctx);
 
 /**
  * Begin recording to the command buffer. Previous execution must have been
@@ -288,7 +320,23 @@ void ff_vk_update_push_exec(AVFilterContext *avctx, FFVkExecContext *e,
                             size_t size, void *src);
 
 /**
- * Adds a frame as a queue dependency. This manages semaphore signalling.
+ * Gets the command buffer to use for this submission from the exe context.
+ */
+VkCommandBuffer ff_vk_get_exec_buf(AVFilterContext *avctx, FFVkExecContext *e);
+
+/**
+ * Adds a generic AVBufferRef as a queue depenency.
+ */
+int ff_vk_add_dep_exec_ctx(AVFilterContext *avctx, FFVkExecContext *e,
+                           AVBufferRef **deps, int nb_deps);
+
+/**
+ * Discards all queue dependencies
+ */
+void ff_vk_discard_exec_deps(AVFilterContext *avctx, FFVkExecContext *e);
+
+/**
+ * Adds a frame as a queue dependency. This also manages semaphore signalling.
  * Must be called before submission.
  */
 int ff_vk_add_exec_dep(AVFilterContext *avctx, FFVkExecContext *e,