X-Git-Url: https://git.piment-noir.org/?p=deb_ffmpeg.git;a=blobdiff_plain;f=ffmpeg%2Flibavcodec%2Fx86%2Fmpegvideoencdsp.asm;h=aec73f82dcd00acb5b38cb382e74afa522de7be3;hp=4fe6cfe5a6ed73d7ddf87b5884d1c90bb923c915;hb=f6fa7814ccfe3e76514b36cf04f5cd3cb657c8cf;hpb=2ba45a602cbfa7b771effba9b11bb4245c21bc00 diff --git a/ffmpeg/libavcodec/x86/mpegvideoencdsp.asm b/ffmpeg/libavcodec/x86/mpegvideoencdsp.asm index 4fe6cfe..aec73f8 100644 --- a/ffmpeg/libavcodec/x86/mpegvideoencdsp.asm +++ b/ffmpeg/libavcodec/x86/mpegvideoencdsp.asm @@ -29,16 +29,16 @@ cextern pw_1 SECTION .text ; int ff_pix_sum16_mmx(uint8_t *pix, int line_size) -; %1 = number of xmm registers used -; %2 = number of loops -; %3 = number of GPRs used -%macro PIX_SUM16 4 -cglobal pix_sum16, 2, %3, %1 +; %1 = number of loops +; %2 = number of GPRs used +%macro PIX_SUM16 3 +cglobal pix_sum16, 2, %2, 6 movsxdifnidn r1, r1d - mov r2, %2 -%if cpuflag(xop) + mov r2, %1 +%if mmsize == 16 lea r3, [r1*3] -%else +%endif +%if notcpuflag(xop) pxor m5, m5 %endif pxor m4, m4 @@ -52,42 +52,59 @@ cglobal pix_sum16, 2, %3, %1 mova m0, [r0] %if mmsize == 8 mova m1, [r0+8] -%else +%if cpuflag(mmxext) + mova m2, [r0+r1] + mova m3, [r0+r1+8] +%endif +%else ; sse2 mova m1, [r0+r1] + mova m2, [r0+r1*2] + mova m3, [r0+r3] %endif +%if cpuflag(mmxext) + psadbw m0, m5 + psadbw m1, m5 + psadbw m2, m5 + psadbw m3, m5 +%else ; mmx punpckhbw m2, m0, m5 punpcklbw m0, m5 punpckhbw m3, m1, m5 punpcklbw m1, m5 +%endif ; cpuflag(mmxext) %endif ; cpuflag(xop) paddw m1, m0 paddw m3, m2 paddw m3, m1 paddw m4, m3 -%if mmsize == 8 - add r0, r1 +%if cpuflag(mmxext) + lea r0, [r0+r1*%3] %else - lea r0, [r0+r1*%4] + add r0, r1 %endif dec r2 jne .loop -%if cpuflag(xop) +%if mmsize == 16 pshufd m0, m4, q0032 paddd m4, m0 -%else +%elif notcpuflag(mmxext) HADDW m4, m5 %endif movd eax, m4 RET %endmacro +%if ARCH_X86_32 INIT_MMX mmx -PIX_SUM16 0, 16, 3, 0 +PIX_SUM16 16, 3, 0 +INIT_MMX mmxext +PIX_SUM16 8, 4, 2 +%endif INIT_XMM sse2 -PIX_SUM16 6, 8, 3, 2 +PIX_SUM16 4, 4, 4 %if HAVE_XOP_EXTERNAL INIT_XMM xop -PIX_SUM16 5, 4, 4, 4 +PIX_SUM16 4, 4, 4 %endif ; int ff_pix_norm1_mmx(uint8_t *pix, int line_size)