X-Git-Url: https://git.piment-noir.org/?p=deb_ffmpeg.git;a=blobdiff_plain;f=ffmpeg%2Flibswscale%2Fx86%2Fswscale.c;h=fe5c4eaec75f1b138c14900deb177c263f471f8e;hp=c4c0e28e53df005cb4b2e3ecac2fb750fd887907;hb=f6fa7814ccfe3e76514b36cf04f5cd3cb657c8cf;hpb=2ba45a602cbfa7b771effba9b11bb4245c21bc00 diff --git a/ffmpeg/libswscale/x86/swscale.c b/ffmpeg/libswscale/x86/swscale.c index c4c0e28..fe5c4ea 100644 --- a/ffmpeg/libswscale/x86/swscale.c +++ b/ffmpeg/libswscale/x86/swscale.c @@ -205,36 +205,20 @@ static void yuv2yuvX_sse3(const int16_t *filter, int filterSize, yuv2yuvX_mmxext(filter, filterSize, src, dest, dstW, dither, offset); return; } - if (offset) { - __asm__ volatile("movq (%0), %%xmm3\n\t" - "movdqa %%xmm3, %%xmm4\n\t" - "psrlq $24, %%xmm3\n\t" - "psllq $40, %%xmm4\n\t" - "por %%xmm4, %%xmm3\n\t" - :: "r"(dither) - ); - } else { - __asm__ volatile("movq (%0), %%xmm3\n\t" - :: "r"(dither) - ); - } filterSize--; - __asm__ volatile( - "pxor %%xmm0, %%xmm0\n\t" - "punpcklbw %%xmm0, %%xmm3\n\t" - "movd %0, %%xmm1\n\t" - "punpcklwd %%xmm1, %%xmm1\n\t" - "punpckldq %%xmm1, %%xmm1\n\t" - "punpcklqdq %%xmm1, %%xmm1\n\t" - "psllw $3, %%xmm1\n\t" - "paddw %%xmm1, %%xmm3\n\t" - "psraw $4, %%xmm3\n\t" - ::"m"(filterSize) - ); - __asm__ volatile( - "movdqa %%xmm3, %%xmm4\n\t" - "movdqa %%xmm3, %%xmm7\n\t" - "movl %3, %%ecx\n\t" +#define MAIN_FUNCTION \ + "pxor %%xmm0, %%xmm0 \n\t" \ + "punpcklbw %%xmm0, %%xmm3 \n\t" \ + "movd %4, %%xmm1 \n\t" \ + "punpcklwd %%xmm1, %%xmm1 \n\t" \ + "punpckldq %%xmm1, %%xmm1 \n\t" \ + "punpcklqdq %%xmm1, %%xmm1 \n\t" \ + "psllw $3, %%xmm1 \n\t" \ + "paddw %%xmm1, %%xmm3 \n\t" \ + "psraw $4, %%xmm3 \n\t" \ + "movdqa %%xmm3, %%xmm4 \n\t" \ + "movdqa %%xmm3, %%xmm7 \n\t" \ + "movl %3, %%ecx \n\t" \ "mov %0, %%"REG_d" \n\t"\ "mov (%%"REG_d"), %%"REG_S" \n\t"\ ".p2align 4 \n\t" /* FIXME Unroll? */\ @@ -252,20 +236,41 @@ static void yuv2yuvX_sse3(const int16_t *filter, int filterSize, " jnz 1b \n\t"\ "psraw $3, %%xmm3 \n\t"\ "psraw $3, %%xmm4 \n\t"\ - "packuswb %%xmm4, %%xmm3 \n\t" - "movntdq %%xmm3, (%1, %%"REG_c")\n\t" + "packuswb %%xmm4, %%xmm3 \n\t"\ + "movntdq %%xmm3, (%1, %%"REG_c")\n\t"\ "add $16, %%"REG_c" \n\t"\ "cmp %2, %%"REG_c" \n\t"\ - "movdqa %%xmm7, %%xmm3\n\t" - "movdqa %%xmm7, %%xmm4\n\t" + "movdqa %%xmm7, %%xmm3 \n\t" \ + "movdqa %%xmm7, %%xmm4 \n\t" \ "mov %0, %%"REG_d" \n\t"\ "mov (%%"REG_d"), %%"REG_S" \n\t"\ - "jb 1b \n\t"\ - :: "g" (filter), - "r" (dest-offset), "g" ((x86_reg)(dstW+offset)), "m" (offset) - : XMM_CLOBBERS("%xmm0" , "%xmm1" , "%xmm2" , "%xmm3" , "%xmm4" , "%xmm5" , "%xmm7" ,) - "%"REG_d, "%"REG_S, "%"REG_c - ); + "jb 1b \n\t" + + if (offset) { + __asm__ volatile( + "movq %5, %%xmm3 \n\t" + "movdqa %%xmm3, %%xmm4 \n\t" + "psrlq $24, %%xmm3 \n\t" + "psllq $40, %%xmm4 \n\t" + "por %%xmm4, %%xmm3 \n\t" + MAIN_FUNCTION + :: "g" (filter), + "r" (dest-offset), "g" ((x86_reg)(dstW+offset)), "m" (offset), + "m"(filterSize), "m"(((uint64_t *) dither)[0]) + : XMM_CLOBBERS("%xmm0" , "%xmm1" , "%xmm2" , "%xmm3" , "%xmm4" , "%xmm5" , "%xmm7" ,) + "%"REG_d, "%"REG_S, "%"REG_c + ); + } else { + __asm__ volatile( + "movq %5, %%xmm3 \n\t" + MAIN_FUNCTION + :: "g" (filter), + "r" (dest-offset), "g" ((x86_reg)(dstW+offset)), "m" (offset), + "m"(filterSize), "m"(((uint64_t *) dither)[0]) + : XMM_CLOBBERS("%xmm0" , "%xmm1" , "%xmm2" , "%xmm3" , "%xmm4" , "%xmm5" , "%xmm7" ,) + "%"REG_d, "%"REG_S, "%"REG_c + ); + } } #endif @@ -425,7 +430,7 @@ switch(c->dstBpc){ \ case 16: do_16_case; break; \ case 10: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2planeX_10_ ## opt; break; \ case 9: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2planeX_9_ ## opt; break; \ - default: if (condition_8bit) /*vscalefn = ff_yuv2planeX_8_ ## opt;*/ break; \ + case 8: if ((condition_8bit) && !c->use_mmx_vfilter) vscalefn = ff_yuv2planeX_8_ ## opt; break; \ } #define ASSIGN_VSCALE_FUNC(vscalefn, opt1, opt2, opt2chk) \ switch(c->dstBpc){ \