Imported Debian version 2.4.3~trusty1
[deb_ffmpeg.git] / ffmpeg / libavfilter / x86 / vf_gradfun.asm
1 ;******************************************************************************
2 ;* x86-optimized functions for gradfun filter
3 ;*
4 ;* This file is part of FFmpeg.
5 ;*
6 ;* FFmpeg is free software; you can redistribute it and/or
7 ;* modify it under the terms of the GNU Lesser General Public
8 ;* License as published by the Free Software Foundation; either
9 ;* version 2.1 of the License, or (at your option) any later version.
10 ;*
11 ;* FFmpeg is distributed in the hope that it will be useful,
12 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
13 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 ;* Lesser General Public License for more details.
15 ;*
16 ;* You should have received a copy of the GNU Lesser General Public
17 ;* License along with FFmpeg; if not, write to the Free Software
18 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 ;******************************************************************************
20
21 %include "libavutil/x86/x86util.asm"
22
23 SECTION_RODATA
24
25 pw_7f: times 8 dw 0x7F
26 pw_ff: times 8 dw 0xFF
27
28 SECTION .text
29
30 %macro FILTER_LINE 1
31 movh m0, [r2+r0]
32 movh m1, [r3+r0]
33 punpcklbw m0, m7
34 punpcklwd m1, m1
35 psllw m0, 7
36 psubw m1, m0
37 PABSW m2, m1
38 pmulhuw m2, m5
39 psubw m2, m6
40 pminsw m2, m7
41 pmullw m2, m2
42 psllw m1, 2
43 paddw m0, %1
44 pmulhw m1, m2
45 paddw m0, m1
46 psraw m0, 7
47 packuswb m0, m0
48 movh [r1+r0], m0
49 %endmacro
50
51 INIT_MMX mmxext
52 cglobal gradfun_filter_line, 6, 6
53 movh m5, r4d
54 pxor m7, m7
55 pshufw m5, m5,0
56 mova m6, [pw_7f]
57 mova m3, [r5]
58 mova m4, [r5+8]
59 .loop:
60 FILTER_LINE m3
61 add r0, 4
62 jge .end
63 FILTER_LINE m4
64 add r0, 4
65 jl .loop
66 .end:
67 REP_RET
68
69 INIT_XMM ssse3
70 cglobal gradfun_filter_line, 6, 6, 8
71 movd m5, r4d
72 pxor m7, m7
73 pshuflw m5, m5, 0
74 mova m6, [pw_7f]
75 punpcklqdq m5, m5
76 mova m4, [r5]
77 .loop:
78 FILTER_LINE m4
79 add r0, 8
80 jl .loop
81 REP_RET
82
83 %macro BLUR_LINE 1
84 cglobal gradfun_blur_line_%1, 6, 6, 8
85 mova m7, [pw_ff]
86 .loop:
87 %1 m0, [r4+r0]
88 %1 m1, [r5+r0]
89 mova m2, m0
90 mova m3, m1
91 psrlw m0, 8
92 psrlw m1, 8
93 pand m2, m7
94 pand m3, m7
95 paddw m0, m1
96 paddw m2, m3
97 paddw m0, m2
98 paddw m0, [r2+r0]
99 mova m1, [r1+r0]
100 mova [r1+r0], m0
101 psubw m0, m1
102 mova [r3+r0], m0
103 add r0, 16
104 jl .loop
105 REP_RET
106 %endmacro
107
108 INIT_XMM sse2
109 BLUR_LINE movdqa
110 BLUR_LINE movdqu