- : "r" (blk1 - len), "r" (blk2 - len), "r" ((x86_reg) stride));
-}
-
-static inline void sad8_1_mmxext(uint8_t *blk1, uint8_t *blk2,
- int stride, int h)
-{
- __asm__ volatile (
- ".p2align 4 \n\t"
- "1: \n\t"
- "movq (%1), %%mm0 \n\t"
- "movq (%1, %3), %%mm1 \n\t"
- "psadbw (%2), %%mm0 \n\t"
- "psadbw (%2, %3), %%mm1 \n\t"
- "paddw %%mm0, %%mm6 \n\t"
- "paddw %%mm1, %%mm6 \n\t"
- "lea (%1,%3,2), %1 \n\t"
- "lea (%2,%3,2), %2 \n\t"
- "sub $2, %0 \n\t"
- " jg 1b \n\t"
- : "+r" (h), "+r" (blk1), "+r" (blk2)
- : "r" ((x86_reg) stride));
-}
-
-static int sad16_sse2(MpegEncContext *v, uint8_t *blk2, uint8_t *blk1,
- int stride, int h)
-{
- int ret;
- __asm__ volatile (
- "pxor %%xmm2, %%xmm2 \n\t"
- ".p2align 4 \n\t"
- "1: \n\t"
- "movdqu (%1), %%xmm0 \n\t"
- "movdqu (%1, %4), %%xmm1 \n\t"
- "psadbw (%2), %%xmm0 \n\t"
- "psadbw (%2, %4), %%xmm1 \n\t"
- "paddw %%xmm0, %%xmm2 \n\t"
- "paddw %%xmm1, %%xmm2 \n\t"
- "lea (%1,%4,2), %1 \n\t"
- "lea (%2,%4,2), %2 \n\t"
- "sub $2, %0 \n\t"
- " jg 1b \n\t"
- "movhlps %%xmm2, %%xmm0 \n\t"
- "paddw %%xmm0, %%xmm2 \n\t"
- "movd %%xmm2, %3 \n\t"
- : "+r" (h), "+r" (blk1), "+r" (blk2), "=r" (ret)
- : "r" ((x86_reg) stride));
- return ret;
-}
-
-static inline void sad8_x2a_mmxext(uint8_t *blk1, uint8_t *blk2,
- int stride, int h)
-{
- __asm__ volatile (
- ".p2align 4 \n\t"
- "1: \n\t"
- "movq (%1), %%mm0 \n\t"
- "movq (%1, %3), %%mm1 \n\t"
- "pavgb 1(%1), %%mm0 \n\t"
- "pavgb 1(%1, %3), %%mm1 \n\t"
- "psadbw (%2), %%mm0 \n\t"
- "psadbw (%2, %3), %%mm1 \n\t"
- "paddw %%mm0, %%mm6 \n\t"
- "paddw %%mm1, %%mm6 \n\t"
- "lea (%1,%3,2), %1 \n\t"
- "lea (%2,%3,2), %2 \n\t"
- "sub $2, %0 \n\t"
- " jg 1b \n\t"
- : "+r" (h), "+r" (blk1), "+r" (blk2)
- : "r" ((x86_reg) stride));
-}
-
-static inline void sad8_y2a_mmxext(uint8_t *blk1, uint8_t *blk2,
- int stride, int h)
-{
- __asm__ volatile (
- "movq (%1), %%mm0 \n\t"
- "add %3, %1 \n\t"
- ".p2align 4 \n\t"
- "1: \n\t"
- "movq (%1), %%mm1 \n\t"
- "movq (%1, %3), %%mm2 \n\t"
- "pavgb %%mm1, %%mm0 \n\t"
- "pavgb %%mm2, %%mm1 \n\t"
- "psadbw (%2), %%mm0 \n\t"
- "psadbw (%2, %3), %%mm1 \n\t"
- "paddw %%mm0, %%mm6 \n\t"
- "paddw %%mm1, %%mm6 \n\t"
- "movq %%mm2, %%mm0 \n\t"
- "lea (%1,%3,2), %1 \n\t"
- "lea (%2,%3,2), %2 \n\t"
- "sub $2, %0 \n\t"
- " jg 1b \n\t"
- : "+r" (h), "+r" (blk1), "+r" (blk2)
- : "r" ((x86_reg) stride));
-}
-
-static inline void sad8_4_mmxext(uint8_t *blk1, uint8_t *blk2,
- int stride, int h)
-{
- __asm__ volatile (
- "movq "MANGLE(bone)", %%mm5 \n\t"
- "movq (%1), %%mm0 \n\t"
- "pavgb 1(%1), %%mm0 \n\t"
- "add %3, %1 \n\t"
- ".p2align 4 \n\t"
- "1: \n\t"
- "movq (%1), %%mm1 \n\t"
- "movq (%1,%3), %%mm2 \n\t"
- "pavgb 1(%1), %%mm1 \n\t"
- "pavgb 1(%1,%3), %%mm2 \n\t"
- "psubusb %%mm5, %%mm1 \n\t"
- "pavgb %%mm1, %%mm0 \n\t"
- "pavgb %%mm2, %%mm1 \n\t"
- "psadbw (%2), %%mm0 \n\t"
- "psadbw (%2,%3), %%mm1 \n\t"
- "paddw %%mm0, %%mm6 \n\t"
- "paddw %%mm1, %%mm6 \n\t"
- "movq %%mm2, %%mm0 \n\t"
- "lea (%1,%3,2), %1 \n\t"
- "lea (%2,%3,2), %2 \n\t"
- "sub $2, %0 \n\t"
- " jg 1b \n\t"
- : "+r" (h), "+r" (blk1), "+r" (blk2)
- : "r" ((x86_reg) stride)
- NAMED_CONSTRAINTS_ADD(bone));