avutil/md5: fix misaligned reads
authorJames Almer <jamrial@gmail.com>
Fri, 3 Mar 2017 03:25:54 +0000 (00:25 -0300)
committerJames Almer <jamrial@gmail.com>
Fri, 3 Mar 2017 16:36:49 +0000 (13:36 -0300)
This makes ubsan happy and also considerably increases performance on
big endian systems.

Tested on an IBM POWER7 3.55 GHz

Before:

2.24user 0.14system 0:02.39elapsed 99%CPU (0avgtext+0avgdata 2624maxresident)k
2.26user 0.11system 0:02.38elapsed 99%CPU (0avgtext+0avgdata 2688maxresident)k
2.23user 0.15system 0:02.38elapsed 99%CPU (0avgtext+0avgdata 2624maxresident)k
2.25user 0.12system 0:02.38elapsed 100%CPU (0avgtext+0avgdata 2624maxresident)k
2.20user 0.15system 0:02.36elapsed 99%CPU (0avgtext+0avgdata 2624maxresident)k

After:

1.86user 0.13system 0:02.00elapsed 99%CPU (0avgtext+0avgdata 2624maxresident)k
1.89user 0.11system 0:02.01elapsed 99%CPU (0avgtext+0avgdata 2624maxresident)k
1.85user 0.14system 0:02.00elapsed 99%CPU (0avgtext+0avgdata 2624maxresident)k
1.84user 0.15system 0:01.99elapsed 99%CPU (0avgtext+0avgdata 2624maxresident)k
1.89user 0.13system 0:02.02elapsed 99%CPU (0avgtext+0avgdata 2688maxresident)k

Tested-by: Nicolas George <george@nsup.org>
Reviewed-by: Michael Niedermayer <michael@niedermayer.cc>
Signed-off-by: James Almer <jamrial@gmail.com>
libavutil/md5.c

index 8c36aa8..d3698dc 100644 (file)
@@ -86,14 +86,14 @@ static const uint32_t T[64] = { // T[i]= fabs(sin(i+1)<<32)
                                                                         \
         if (i < 32) {                                                   \
             if (i < 16)                                                 \
-                a += (d ^ (b & (c ^ d)))  + X[       i  & 15];          \
+                a += (d ^ (b & (c ^ d)))  + AV_RL32(X+(       i  & 15));\
             else                                                        \
-                a += ((d & b) | (~d & c)) + X[(1 + 5*i) & 15];          \
+                a += ((d & b) | (~d & c)) + AV_RL32(X+((1 + 5*i) & 15));\
         } else {                                                        \
             if (i < 48)                                                 \
-                a += (b ^ c ^ d)          + X[(5 + 3*i) & 15];          \
+                a += (b ^ c ^ d)          + AV_RL32(X+((5 + 3*i) & 15));\
             else                                                        \
-                a += (c ^ (b | ~d))       + X[(    7*i) & 15];          \
+                a += (c ^ (b | ~d))       + AV_RL32(X+((    7*i) & 15));\
         }                                                               \
         a = b + (a << t | a >> (32 - t));                               \
     } while (0)
@@ -112,11 +112,6 @@ static void body(uint32_t ABCD[4], uint32_t *src, int nblocks)
 
         X = src + n * 16;
 
-#if HAVE_BIGENDIAN
-        for (i = 0; i < 16; i++)
-            X[i] = av_bswap32(X[i]);
-#endif
-
 #if CONFIG_SMALL
         for (i = 0; i < 64; i++) {
             CORE(i, a, b, c, d);
@@ -173,7 +168,7 @@ void av_md5_update(AVMD5 *ctx, const uint8_t *src, int len)
     }
 
     end = src + (len & ~63);
-    if (HAVE_BIGENDIAN || (!HAVE_FAST_UNALIGNED && ((intptr_t)src & 3))) {
+    if (!HAVE_FAST_UNALIGNED && ((intptr_t)src & 3)) {
        while (src < end) {
            memcpy(ctx->block, src, 64);
            body(ctx->ABCD, (uint32_t *) ctx->block, 1);