+#if !COMPILE_TEMPLATE_MMX2
+static av_always_inline void
+dither_8to32(SwsContext *c, const uint8_t *srcDither, int rot)
+{
+ if (rot) {
+ __asm__ volatile("pxor %%mm0, %%mm0\n\t"
+ "movq (%0), %%mm4\n\t"
+ "movq %%mm4, %%mm5\n\t"
+ "psrlq $24, %%mm4\n\t"
+ "psllq $40, %%mm5\n\t"
+ "por %%mm5, %%mm4\n\t"
+ "movq %%mm4, %%mm6\n\t"
+ "punpcklbw %%mm0, %%mm4\n\t"
+ "punpckhbw %%mm0, %%mm6\n\t"
+ "movq %%mm4, %%mm5\n\t"
+ "movq %%mm6, %%mm7\n\t"
+ "punpcklwd %%mm0, %%mm4\n\t"
+ "punpckhwd %%mm0, %%mm5\n\t"
+ "punpcklwd %%mm0, %%mm6\n\t"
+ "punpckhwd %%mm0, %%mm7\n\t"
+ "psllw $12, %%mm4\n\t"
+ "psllw $12, %%mm5\n\t"
+ "psllw $12, %%mm6\n\t"
+ "psllw $12, %%mm7\n\t"
+ "movq %%mm3, "DITHER32"+0(%1)\n\t"
+ "movq %%mm4, "DITHER32"+8(%1)\n\t"
+ "movq %%mm4, "DITHER32"+16(%1)\n\t"
+ "movq %%mm4, "DITHER32"+24(%1)\n\t"
+ :: "r"(srcDither), "r"(&c->redDither)
+ );
+ } else {
+ __asm__ volatile("pxor %%mm0, %%mm0\n\t"
+ "movq (%0), %%mm4\n\t"
+ "movq %%mm4, %%mm6\n\t"
+ "punpcklbw %%mm0, %%mm4\n\t"
+ "punpckhbw %%mm0, %%mm6\n\t"
+ "movq %%mm4, %%mm5\n\t"
+ "movq %%mm6, %%mm7\n\t"
+ "punpcklwd %%mm0, %%mm4\n\t"
+ "punpckhwd %%mm0, %%mm5\n\t"
+ "punpcklwd %%mm0, %%mm6\n\t"
+ "punpckhwd %%mm0, %%mm7\n\t"
+ "psllw $12, %%mm4\n\t"
+ "psllw $12, %%mm5\n\t"
+ "psllw $12, %%mm6\n\t"
+ "psllw $12, %%mm7\n\t"
+ "movq %%mm3, "DITHER32"+0(%1)\n\t"
+ "movq %%mm4, "DITHER32"+8(%1)\n\t"
+ "movq %%mm4, "DITHER32"+16(%1)\n\t"
+ "movq %%mm4, "DITHER32"+24(%1)\n\t"
+ :: "r"(srcDither), "r"(&c->redDither)
+ );
+ }
+}
+#endif
+