swscale: Add support for NV24 and NV42
authorPhilip Langdale <philipl@overt.org>
Fri, 10 May 2019 04:02:09 +0000 (21:02 -0700)
committerPhilip Langdale <philipl@overt.org>
Sun, 12 May 2019 14:51:02 +0000 (07:51 -0700)
The implementation is pretty straight-forward. Most of the existing
NV12 codepaths work regardless of subsampling and are re-used as is.
Where necessary I wrote the slightly different NV24 versions.

Finally, the one thing that confused me for a long time was the
asm specific x86 path that did an explicit exclusion check for NV12.
I replaced that with a semi-planar check and also updated the
equivalent PPC code, which Lauri kindly checked.

20 files changed:
libswscale/input.c
libswscale/output.c
libswscale/ppc/swscale_altivec.c
libswscale/ppc/swscale_vsx.c
libswscale/swscale_unscaled.c
libswscale/utils.c
libswscale/version.h
libswscale/x86/swscale_template.c
tests/ref/fate/filter-pixfmts-copy
tests/ref/fate/filter-pixfmts-crop
tests/ref/fate/filter-pixfmts-field
tests/ref/fate/filter-pixfmts-fieldorder
tests/ref/fate/filter-pixfmts-hflip
tests/ref/fate/filter-pixfmts-il
tests/ref/fate/filter-pixfmts-null
tests/ref/fate/filter-pixfmts-pad
tests/ref/fate/filter-pixfmts-scale
tests/ref/fate/filter-pixfmts-transpose
tests/ref/fate/filter-pixfmts-vflip
tests/ref/fate/sws-pixdesc-query

index c2dc356..064f8da 100644 (file)
@@ -1020,9 +1020,11 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c)
         c->chrToYV12 = uyvyToUV_c;
         break;
     case AV_PIX_FMT_NV12:
+    case AV_PIX_FMT_NV24:
         c->chrToYV12 = nv12ToUV_c;
         break;
     case AV_PIX_FMT_NV21:
+    case AV_PIX_FMT_NV42:
         c->chrToYV12 = nv21ToUV_c;
         break;
     case AV_PIX_FMT_RGB8:
index d3401f0..26b0ff3 100644 (file)
@@ -410,7 +410,8 @@ static void yuv2nv12cX_c(SwsContext *c, const int16_t *chrFilter, int chrFilterS
     const uint8_t *chrDither = c->chrDither8;
     int i;
 
-    if (dstFormat == AV_PIX_FMT_NV12)
+    if (dstFormat == AV_PIX_FMT_NV12 ||
+        dstFormat == AV_PIX_FMT_NV24)
         for (i=0; i<chrDstW; i++) {
             int u = chrDither[i & 7] << 12;
             int v = chrDither[(i + 3) & 7] << 12;
@@ -2496,7 +2497,8 @@ av_cold void ff_sws_init_output_funcs(SwsContext *c,
     } else {
         *yuv2plane1 = yuv2plane1_8_c;
         *yuv2planeX = yuv2planeX_8_c;
-        if (dstFormat == AV_PIX_FMT_NV12 || dstFormat == AV_PIX_FMT_NV21)
+        if (dstFormat == AV_PIX_FMT_NV12 || dstFormat == AV_PIX_FMT_NV21 ||
+            dstFormat == AV_PIX_FMT_NV24 || dstFormat == AV_PIX_FMT_NV42)
             *yuv2nv12cX = yuv2nv12cX_c;
     }
 
index 3cd9782..6b8cc2c 100644 (file)
@@ -247,8 +247,7 @@ av_cold void ff_sws_init_swscale_ppc(SwsContext *c)
     if (c->srcBpc == 8 && c->dstBpc <= 14) {
         c->hyScale = c->hcScale = hScale_real_altivec;
     }
-    if (!is16BPS(dstFormat) && !isNBPS(dstFormat) &&
-        dstFormat != AV_PIX_FMT_NV12 && dstFormat != AV_PIX_FMT_NV21 &&
+    if (!is16BPS(dstFormat) && !isNBPS(dstFormat) && !isSemiPlanarYUV(dstFormat) &&
         dstFormat != AV_PIX_FMT_GRAYF32BE && dstFormat != AV_PIX_FMT_GRAYF32LE &&
         !c->needAlpha) {
         c->yuv2planeX = yuv2planeX_altivec;
index a617f76..75dee5e 100644 (file)
@@ -2096,8 +2096,7 @@ av_cold void ff_sws_init_swscale_vsx(SwsContext *c)
                                                      : hScale16To15_vsx;
         }
     }
-    if (!is16BPS(dstFormat) && !isNBPS(dstFormat) &&
-        dstFormat != AV_PIX_FMT_NV12 && dstFormat != AV_PIX_FMT_NV21 &&
+    if (!is16BPS(dstFormat) && !isNBPS(dstFormat) && !isSemiPlanarYUV(dstFormat) &&
         dstFormat != AV_PIX_FMT_GRAYF32BE && dstFormat != AV_PIX_FMT_GRAYF32LE &&
         !c->needAlpha) {
         c->yuv2planeX = yuv2planeX_vsx;
index be04a23..e0b9e99 100644 (file)
@@ -180,6 +180,47 @@ static int nv12ToPlanarWrapper(SwsContext *c, const uint8_t *src[],
     return srcSliceH;
 }
 
+static int planarToNv24Wrapper(SwsContext *c, const uint8_t *src[],
+                               int srcStride[], int srcSliceY,
+                               int srcSliceH, uint8_t *dstParam[],
+                               int dstStride[])
+{
+    uint8_t *dst = dstParam[1] + dstStride[1] * srcSliceY;
+
+    copyPlane(src[0], srcStride[0], srcSliceY, srcSliceH, c->srcW,
+              dstParam[0], dstStride[0]);
+
+    if (c->dstFormat == AV_PIX_FMT_NV24)
+        interleaveBytes(src[1], src[2], dst, c->chrSrcW, srcSliceH,
+                        srcStride[1], srcStride[2], dstStride[1]);
+    else
+        interleaveBytes(src[2], src[1], dst, c->chrSrcW, srcSliceH,
+                        srcStride[2], srcStride[1], dstStride[1]);
+
+    return srcSliceH;
+}
+
+static int nv24ToPlanarWrapper(SwsContext *c, const uint8_t *src[],
+                               int srcStride[], int srcSliceY,
+                               int srcSliceH, uint8_t *dstParam[],
+                               int dstStride[])
+{
+    uint8_t *dst1 = dstParam[1] + dstStride[1] * srcSliceY;
+    uint8_t *dst2 = dstParam[2] + dstStride[2] * srcSliceY;
+
+    copyPlane(src[0], srcStride[0], srcSliceY, srcSliceH, c->srcW,
+              dstParam[0], dstStride[0]);
+
+    if (c->srcFormat == AV_PIX_FMT_NV24)
+        deinterleaveBytes(src[1], dst1, dst2, c->chrSrcW, srcSliceH,
+                          srcStride[1], dstStride[1], dstStride[2]);
+    else
+        deinterleaveBytes(src[1], dst2, dst1, c->chrSrcW, srcSliceH,
+                          srcStride[1], dstStride[2], dstStride[1]);
+
+    return srcSliceH;
+}
+
 static int planarToP01xWrapper(SwsContext *c, const uint8_t *src8[],
                                int srcStride[], int srcSliceY,
                                int srcSliceH, uint8_t *dstParam8[],
@@ -1872,11 +1913,21 @@ void ff_get_unscaled_swscale(SwsContext *c)
         (dstFormat == AV_PIX_FMT_NV12 || dstFormat == AV_PIX_FMT_NV21)) {
         c->swscale = planarToNv12Wrapper;
     }
+    /* yv24_to_nv24 */
+    if ((srcFormat == AV_PIX_FMT_YUV444P || srcFormat == AV_PIX_FMT_YUVA444P) &&
+        (dstFormat == AV_PIX_FMT_NV24 || dstFormat == AV_PIX_FMT_NV42)) {
+        c->swscale = planarToNv24Wrapper;
+    }
     /* nv12_to_yv12 */
     if (dstFormat == AV_PIX_FMT_YUV420P &&
         (srcFormat == AV_PIX_FMT_NV12 || srcFormat == AV_PIX_FMT_NV21)) {
         c->swscale = nv12ToPlanarWrapper;
     }
+    /* nv24_to_yv24 */
+    if (dstFormat == AV_PIX_FMT_YUV444P &&
+        (srcFormat == AV_PIX_FMT_NV24 || srcFormat == AV_PIX_FMT_NV42)) {
+        c->swscale = nv24ToPlanarWrapper;
+    }
     /* yuv2bgr */
     if ((srcFormat == AV_PIX_FMT_YUV420P || srcFormat == AV_PIX_FMT_YUV422P ||
          srcFormat == AV_PIX_FMT_YUVA420P) && isAnyRGB(dstFormat) &&
index df68bcc..1b1f779 100644 (file)
@@ -264,6 +264,8 @@ static const FormatEntry format_entries[AV_PIX_FMT_NB] = {
     [AV_PIX_FMT_YUVA422P12LE] = { 1, 1 },
     [AV_PIX_FMT_YUVA444P12BE] = { 1, 1 },
     [AV_PIX_FMT_YUVA444P12LE] = { 1, 1 },
+    [AV_PIX_FMT_NV24]        = { 1, 1 },
+    [AV_PIX_FMT_NV42]        = { 1, 1 },
 };
 
 int sws_isSupportedInput(enum AVPixelFormat pix_fmt)
index 0e28a76..891c76d 100644 (file)
@@ -28,7 +28,7 @@
 
 #define LIBSWSCALE_VERSION_MAJOR   5
 #define LIBSWSCALE_VERSION_MINOR   4
-#define LIBSWSCALE_VERSION_MICRO 100
+#define LIBSWSCALE_VERSION_MICRO 101
 
 #define LIBSWSCALE_VERSION_INT  AV_VERSION_INT(LIBSWSCALE_VERSION_MAJOR, \
                                                LIBSWSCALE_VERSION_MINOR, \
index 7c30470..823056c 100644 (file)
@@ -1499,8 +1499,8 @@ static av_cold void RENAME(sws_init_swscale)(SwsContext *c)
     enum AVPixelFormat dstFormat = c->dstFormat;
 
     c->use_mmx_vfilter= 0;
-    if (!is16BPS(dstFormat) && !isNBPS(dstFormat) && dstFormat != AV_PIX_FMT_NV12
-        && dstFormat != AV_PIX_FMT_NV21 && dstFormat != AV_PIX_FMT_GRAYF32BE && dstFormat != AV_PIX_FMT_GRAYF32LE
+    if (!is16BPS(dstFormat) && !isNBPS(dstFormat) && !isSemiPlanarYUV(dstFormat)
+        && dstFormat != AV_PIX_FMT_GRAYF32BE && dstFormat != AV_PIX_FMT_GRAYF32LE
         && !(c->flags & SWS_BITEXACT)) {
             if (c->flags & SWS_ACCURATE_RND) {
                 if (!(c->flags & SWS_FULL_CHR_H_INT)) {
index 0609344..4675b6e 100644 (file)
@@ -53,6 +53,8 @@ monob               8b04f859fee6a0be856be184acd7a0b5
 monow               54d16d2c01abfd72ecdb5e51e283937c
 nv12                8e24feb2c544dc26a20047a71e4c27aa
 nv21                335d85c9af6110f26ae9e187a82ed2cf
+nv24                f30fc8d0ac40af69e119ea919a314572
+nv42                29a212f70f8780fe0eb99abcae81894d
 p010be              7f9842d6015026136bad60d03c035cc3
 p010le              c453421b9f726bdaf2bacf59a492c43b
 p016be              7f9842d6015026136bad60d03c035cc3
index 4e4b6e4..4b9f67c 100644 (file)
@@ -51,6 +51,8 @@ grayf32be           cf40ec06a8abe54852b7f85a00549eec
 grayf32le           b672526c9da9c8959ab881f242f6890a
 nv12                92cda427f794374731ec0321ee00caac
 nv21                1bcfc197f4fb95de85ba58182d8d2f69
+nv24                514c8f12082f0737e558778cbe7de258
+nv42                ece9baae1c5de579dac2c66a89e08ef3
 p010be              8b2de2eb6b099bbf355bfc55a0694ddc
 p010le              373b50c766dfd0a8e79c9a73246d803a
 p016be              8b2de2eb6b099bbf355bfc55a0694ddc
index d59c982..059347e 100644 (file)
@@ -53,6 +53,8 @@ monob               2129cc72a484d7e10a44de9117aa9f80
 monow               03d783611d265cae78293f88ea126ea1
 nv12                16f7a46708ef25ebd0b72e47920cc11e
 nv21                7294574037cc7f9373ef5695d8ebe809
+nv24                3b100fb527b64ee2b2d7120da573faf5
+nv42                1841ce853152d86b27c130f319ea0db2
 p010be              a0311a09bba7383553267d2b3b9c075e
 p010le              ee09a18aefa3ebe97715b3a7312cb8ff
 p016be              a0311a09bba7383553267d2b3b9c075e
index 1996649..066b944 100644 (file)
@@ -49,6 +49,8 @@ gray9be             ec877f5bcf0ea275a6f36c12cc9adf11
 gray9le             fba944fde7923d5089f4f52d12988b9e
 grayf32be           1aa7960131f880c54fe3c77f13448674
 grayf32le           4029ac9d197f255794c1b9e416520fc7
+nv24                4fdbef26042c77f012df114e666efdb2
+nv42                59608290fece913e6b7d61edf581a529
 rgb0                2e3d8c91c7a83d451593dfd06607ff39
 rgb24               b82577f8215d3dc2681be60f1da247af
 rgb444be            1c3afc3a0c53c51139c76504f59bb1f4
index f171a95..100dd70 100644 (file)
@@ -51,6 +51,8 @@ grayf32be           a69add7bbf892a71fe81b3b75982dbe2
 grayf32le           4563e176a35dc8a8a07e0829fad5eb88
 nv12                801e58f1be5fd0b5bc4bf007c604b0b4
 nv21                9f10dfff8963dc327d3395af21f0554f
+nv24                f0c5b2f42970f8d4003621d8857a872f
+nv42                4dcf9aec82b110712b396a8b365dcb13
 p010be              744b13e44d39e1ff7588983fa03e0101
 p010le              a50b160346ab94f55a425065b57006f0
 p016be              744b13e44d39e1ff7588983fa03e0101
index 0839a77..979eb0c 100644 (file)
@@ -53,6 +53,8 @@ monob               faba75df28033ba7ce3d82ff2a99ee68
 monow               6e9cfb8d3a344c5f0c3e1d5e1297e580
 nv12                3c3ba9b1b4c4dfff09c26f71b51dd146
 nv21                ab586d8781246b5a32d8760a61db9797
+nv24                554153c71d142e3fd8e40b7dcaaec229
+nv42                d699724c8deaeb4f87faf2766512eec3
 p010be              3df51286ef66b53e3e283dbbab582263
 p010le              eadcd8241e97e35b2b47d5eb2eaea6cd
 p016be              3df51286ef66b53e3e283dbbab582263
index 0609344..4675b6e 100644 (file)
@@ -53,6 +53,8 @@ monob               8b04f859fee6a0be856be184acd7a0b5
 monow               54d16d2c01abfd72ecdb5e51e283937c
 nv12                8e24feb2c544dc26a20047a71e4c27aa
 nv21                335d85c9af6110f26ae9e187a82ed2cf
+nv24                f30fc8d0ac40af69e119ea919a314572
+nv42                29a212f70f8780fe0eb99abcae81894d
 p010be              7f9842d6015026136bad60d03c035cc3
 p010le              c453421b9f726bdaf2bacf59a492c43b
 p016be              7f9842d6015026136bad60d03c035cc3
index c863d54..41ccec8 100644 (file)
@@ -23,6 +23,8 @@ gray16le            468bda6155bdc7a7a20c34d6e599fd16
 gray9le             f8f3dfe31ca5fcba828285bceefdab9a
 nv12                381574979cb04be10c9168540310afad
 nv21                0fdeb2cdd56cf5a7147dc273456fa217
+nv24                193b9eadcc06ad5081609f76249b3e47
+nv42                1738ad3c31c6c16e17679f5b09ce4677
 rgb0                78d500c8361ab6423a4826a00268c908
 rgb24               17f9e2e0c609009acaf2175c42d4a2a5
 rgba                b157c90191463d34fb3ce77b36c96386
index 3226e8b..2f38241 100644 (file)
@@ -53,6 +53,8 @@ monob               f01cb0b623357387827902d9d0963435
 monow               35c68b86c226d6990b2dcb573a05ff6b
 nv12                b118d24a3653fe66e5d9e079033aef79
 nv21                c74bb1c10dbbdee8a1f682b194486c4d
+nv24                2aa6e805bf6d4179ed8d7dea37d75db3
+nv42                80714d1eb2d8bcaeab3abc3124df1abd
 p010be              1d6726d94bf1385996a9a9840dd0e878
 p010le              4b316f2b9e18972299beb73511278fa8
 p016be              31e204018cbb53f8988c4e1174ea8ce9
index 7bcb88c..b2ab3b7 100644 (file)
@@ -51,6 +51,8 @@ grayf32be           823288e1ec497bb1f22c070e502e5272
 grayf32le           6e9ec0e1cac3617f3041e681afd2c575
 nv12                1965e3826144686748f2f6b516fca5ba
 nv21                292adaf5271c5c8516b71640458c01f4
+nv24                ea9de8b47faed722ee40182f89489beb
+nv42                636af6cd6a4f3ac5edc0fc3ce3c56d63
 p010be              ad0de2cc9bff81688b182a870fcf7000
 p010le              e7ff5143595021246733ce6bd0a769e8
 p016be              ad0de2cc9bff81688b182a870fcf7000
index 933ea0c..e4d58f9 100644 (file)
@@ -53,6 +53,8 @@ monob               7810c4857822ccfc844d78f5e803269a
 monow               90a947bfcd5f2261e83b577f48ec57b1
 nv12                261ebe585ae2aa4e70d39a10c1679294
 nv21                2909feacd27bebb080c8e0fa41795269
+nv24                334420b9d3df84499d2ca16bb66eed2b
+nv42                ba4063e2795c17fea3c8a646b01fd1f5
 p010be              06e9354b6e0e38ba41736352cedc0bd5
 p010le              fd18d322bffbf5816902c13102872e22
 p016be              06e9354b6e0e38ba41736352cedc0bd5
index 6c41a86..bc8147e 100644 (file)
@@ -178,6 +178,8 @@ isYUV:
   nv20be
   nv20le
   nv21
+  nv24
+  nv42
   p010be
   p010le
   p016be
@@ -268,6 +270,8 @@ isPlanarYUV:
   nv20be
   nv20le
   nv21
+  nv24
+  nv42
   p010be
   p010le
   p016be
@@ -703,6 +707,8 @@ Planar:
   nv20be
   nv20le
   nv21
+  nv24
+  nv42
   p010be
   p010le
   p016be