Imported Debian version 2.4.3~trusty1
[deb_ffmpeg.git] / ffmpeg / libavcodec / x86 / h263_loopfilter.asm
CommitLineData
2ba45a60
DM
1;******************************************************************************
2;* MMX-optimized H.263 loop filter
3;* Copyright (c) 2003-2013 Michael Niedermayer
4;* Copyright (c) 2013 Daniel Kang
5;*
6;* This file is part of FFmpeg.
7;*
8;* FFmpeg is free software; you can redistribute it and/or
9;* modify it under the terms of the GNU Lesser General Public
10;* License as published by the Free Software Foundation; either
11;* version 2.1 of the License, or (at your option) any later version.
12;*
13;* FFmpeg is distributed in the hope that it will be useful,
14;* but WITHOUT ANY WARRANTY; without even the implied warranty of
15;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16;* Lesser General Public License for more details.
17;*
18;* You should have received a copy of the GNU Lesser General Public
19;* License along with FFmpeg; if not, write to the Free Software
20;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21;******************************************************************************
22
23%include "libavutil/x86/x86util.asm"
24
25SECTION_RODATA
26cextern pb_FC
27cextern h263_loop_filter_strength
28
29SECTION_TEXT
30
31%macro H263_LOOP_FILTER 5
32 pxor m7, m7
33 mova m0, [%1]
34 mova m1, [%1]
35 mova m2, [%4]
36 mova m3, [%4]
37 punpcklbw m0, m7
38 punpckhbw m1, m7
39 punpcklbw m2, m7
40 punpckhbw m3, m7
41 psubw m0, m2
42 psubw m1, m3
43 mova m2, [%2]
44 mova m3, [%2]
45 mova m4, [%3]
46 mova m5, [%3]
47 punpcklbw m2, m7
48 punpckhbw m3, m7
49 punpcklbw m4, m7
50 punpckhbw m5, m7
51 psubw m4, m2
52 psubw m5, m3
53 psllw m4, 2
54 psllw m5, 2
55 paddw m4, m0
56 paddw m5, m1
57 pxor m6, m6
58 pcmpgtw m6, m4
59 pcmpgtw m7, m5
60 pxor m4, m6
61 pxor m5, m7
62 psubw m4, m6
63 psubw m5, m7
64 psrlw m4, 3
65 psrlw m5, 3
66 packuswb m4, m5
67 packsswb m6, m7
68 pxor m7, m7
69 movd m2, %5
70 punpcklbw m2, m2
71 punpcklbw m2, m2
72 punpcklbw m2, m2
73 psubusb m2, m4
74 mova m3, m2
75 psubusb m3, m4
76 psubb m2, m3
77 mova m3, [%2]
78 mova m4, [%3]
79 pxor m3, m6
80 pxor m4, m6
81 paddusb m3, m2
82 psubusb m4, m2
83 pxor m3, m6
84 pxor m4, m6
85 paddusb m2, m2
86 packsswb m0, m1
87 pcmpgtb m7, m0
88 pxor m0, m7
89 psubb m0, m7
90 mova m1, m0
91 psubusb m0, m2
92 psubb m1, m0
93 pand m1, [pb_FC]
94 psrlw m1, 2
95 pxor m1, m7
96 psubb m1, m7
97 mova m5, [%1]
98 mova m6, [%4]
99 psubb m5, m1
100 paddb m6, m1
101%endmacro
102
103INIT_MMX mmx
104; void ff_h263_v_loop_filter_mmx(uint8_t *src, int stride, int qscale)
105cglobal h263_v_loop_filter, 3,5
106 movsxdifnidn r1, r1d
107 movsxdifnidn r2, r2d
108
109 lea r4, [h263_loop_filter_strength]
110 movzx r3d, BYTE [r4+r2]
111 movsx r2, r3b
112 shl r2, 1
113
114 mov r3, r0
115 sub r3, r1
116 mov r4, r3
117 sub r4, r1
118 H263_LOOP_FILTER r4, r3, r0, r0+r1, r2d
119
120 mova [r3], m3
121 mova [r0], m4
122 mova [r4], m5
123 mova [r0+r1], m6
124 RET
125
126%macro TRANSPOSE4X4 2
127 movd m0, [%1]
128 movd m1, [%1+r1]
129 movd m2, [%1+r1*2]
130 movd m3, [%1+r3]
131 punpcklbw m0, m1
132 punpcklbw m2, m3
133 mova m1, m0
134 punpcklwd m0, m2
135 punpckhwd m1, m2
136 movd [%2+ 0], m0
137 punpckhdq m0, m0
138 movd [%2+ 8], m0
139 movd [%2+16], m1
140 punpckhdq m1, m1
141 movd [%2+24], m1
142%endmacro
143
144
145; void ff_h263_h_loop_filter_mmx(uint8_t *src, int stride, int qscale)
146INIT_MMX mmx
147cglobal h263_h_loop_filter, 3,5,0,32
148 movsxdifnidn r1, r1d
149 movsxdifnidn r2, r2d
150
151 lea r4, [h263_loop_filter_strength]
152 movzx r3d, BYTE [r4+r2]
153 movsx r2, r3b
154 shl r2, 1
155
156 sub r0, 2
157 lea r3, [r1*3]
158
159 TRANSPOSE4X4 r0, rsp
160 lea r4, [r0+r1*4]
161 TRANSPOSE4X4 r4, rsp+4
162
163 H263_LOOP_FILTER rsp, rsp+8, rsp+16, rsp+24, r2d
164
165 mova m1, m5
166 mova m0, m4
167 punpcklbw m5, m3
168 punpcklbw m4, m6
169 punpckhbw m1, m3
170 punpckhbw m0, m6
171 mova m3, m5
172 mova m6, m1
173 punpcklwd m5, m4
174 punpcklwd m1, m0
175 punpckhwd m3, m4
176 punpckhwd m6, m0
177 movd [r0], m5
178 punpckhdq m5, m5
179 movd [r0+r1*1], m5
180 movd [r0+r1*2], m3
181 punpckhdq m3, m3
182 movd [r0+r3], m3
183 movd [r4], m1
184 punpckhdq m1, m1
185 movd [r4+r1*1], m1
186 movd [r4+r1*2], m6
187 punpckhdq m6, m6
188 movd [r4+r3], m6
189 RET