Imported Debian version 2.4.3~trusty1
[deb_ffmpeg.git] / ffmpeg / libavcodec / x86 / qpeldsp_init.c
CommitLineData
2ba45a60
DM
1/*
2 * quarterpel DSP functions
3 * Copyright (c) 2000, 2001 Fabrice Bellard
4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5 *
6 * This file is part of FFmpeg.
7 *
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 */
22
23#include <stddef.h>
24#include <stdint.h>
25
26#include "config.h"
27#include "libavutil/attributes.h"
28#include "libavutil/cpu.h"
29#include "libavutil/x86/cpu.h"
30#include "libavcodec/pixels.h"
31#include "libavcodec/qpeldsp.h"
32#include "fpel.h"
33
34void ff_put_pixels8_l2_mmxext(uint8_t *dst,
35 const uint8_t *src1, const uint8_t *src2,
36 int dstStride, int src1Stride, int h);
37void ff_put_no_rnd_pixels8_l2_mmxext(uint8_t *dst,
38 const uint8_t *src1, const uint8_t *src2,
39 int dstStride, int src1Stride, int h);
40void ff_avg_pixels8_l2_mmxext(uint8_t *dst,
41 const uint8_t *src1, const uint8_t *src2,
42 int dstStride, int src1Stride, int h);
43void ff_put_pixels16_l2_mmxext(uint8_t *dst,
44 const uint8_t *src1, const uint8_t *src2,
45 int dstStride, int src1Stride, int h);
46void ff_avg_pixels16_l2_mmxext(uint8_t *dst,
47 const uint8_t *src1, const uint8_t *src2,
48 int dstStride, int src1Stride, int h);
49void ff_put_no_rnd_pixels16_l2_mmxext(uint8_t *dst,
50 const uint8_t *src1, const uint8_t *src2,
51 int dstStride, int src1Stride, int h);
52void ff_put_mpeg4_qpel16_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
53 int dstStride, int srcStride, int h);
54void ff_avg_mpeg4_qpel16_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
55 int dstStride, int srcStride, int h);
56void ff_put_no_rnd_mpeg4_qpel16_h_lowpass_mmxext(uint8_t *dst,
57 const uint8_t *src,
58 int dstStride, int srcStride,
59 int h);
60void ff_put_mpeg4_qpel8_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
61 int dstStride, int srcStride, int h);
62void ff_avg_mpeg4_qpel8_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
63 int dstStride, int srcStride, int h);
64void ff_put_no_rnd_mpeg4_qpel8_h_lowpass_mmxext(uint8_t *dst,
65 const uint8_t *src,
66 int dstStride, int srcStride,
67 int h);
68void ff_put_mpeg4_qpel16_v_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
69 int dstStride, int srcStride);
70void ff_avg_mpeg4_qpel16_v_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
71 int dstStride, int srcStride);
72void ff_put_no_rnd_mpeg4_qpel16_v_lowpass_mmxext(uint8_t *dst,
73 const uint8_t *src,
74 int dstStride, int srcStride);
75void ff_put_mpeg4_qpel8_v_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
76 int dstStride, int srcStride);
77void ff_avg_mpeg4_qpel8_v_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
78 int dstStride, int srcStride);
79void ff_put_no_rnd_mpeg4_qpel8_v_lowpass_mmxext(uint8_t *dst,
80 const uint8_t *src,
81 int dstStride, int srcStride);
82#define ff_put_no_rnd_pixels16_mmxext ff_put_pixels16_mmx
83#define ff_put_no_rnd_pixels8_mmxext ff_put_pixels8_mmx
84
85#if HAVE_YASM
86
87#define ff_put_pixels16_mmxext ff_put_pixels16_mmx
88#define ff_put_pixels8_mmxext ff_put_pixels8_mmx
89
90#define QPEL_OP(OPNAME, RND, MMX) \
91static void OPNAME ## qpel8_mc00_ ## MMX(uint8_t *dst, \
92 const uint8_t *src, \
93 ptrdiff_t stride) \
94{ \
95 ff_ ## OPNAME ## pixels8_ ## MMX(dst, src, stride, 8); \
96} \
97 \
98static void OPNAME ## qpel8_mc10_ ## MMX(uint8_t *dst, \
99 const uint8_t *src, \
100 ptrdiff_t stride) \
101{ \
102 uint64_t temp[8]; \
103 uint8_t *const half = (uint8_t *) temp; \
104 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, \
105 stride, 8); \
106 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src, half, \
107 stride, stride, 8); \
108} \
109 \
110static void OPNAME ## qpel8_mc20_ ## MMX(uint8_t *dst, \
111 const uint8_t *src, \
112 ptrdiff_t stride) \
113{ \
114 ff_ ## OPNAME ## mpeg4_qpel8_h_lowpass_ ## MMX(dst, src, stride, \
115 stride, 8); \
116} \
117 \
118static void OPNAME ## qpel8_mc30_ ## MMX(uint8_t *dst, \
119 const uint8_t *src, \
120 ptrdiff_t stride) \
121{ \
122 uint64_t temp[8]; \
123 uint8_t *const half = (uint8_t *) temp; \
124 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, \
125 stride, 8); \
126 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src + 1, half, stride, \
127 stride, 8); \
128} \
129 \
130static void OPNAME ## qpel8_mc01_ ## MMX(uint8_t *dst, \
131 const uint8_t *src, \
132 ptrdiff_t stride) \
133{ \
134 uint64_t temp[8]; \
135 uint8_t *const half = (uint8_t *) temp; \
136 ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, \
137 8, stride); \
138 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src, half, \
139 stride, stride, 8); \
140} \
141 \
142static void OPNAME ## qpel8_mc02_ ## MMX(uint8_t *dst, \
143 const uint8_t *src, \
144 ptrdiff_t stride) \
145{ \
146 ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, src, \
147 stride, stride); \
148} \
149 \
150static void OPNAME ## qpel8_mc03_ ## MMX(uint8_t *dst, \
151 const uint8_t *src, \
152 ptrdiff_t stride) \
153{ \
154 uint64_t temp[8]; \
155 uint8_t *const half = (uint8_t *) temp; \
156 ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, \
157 8, stride); \
158 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src + stride, half, stride,\
159 stride, 8); \
160} \
161 \
162static void OPNAME ## qpel8_mc11_ ## MMX(uint8_t *dst, \
163 const uint8_t *src, \
164 ptrdiff_t stride) \
165{ \
166 uint64_t half[8 + 9]; \
167 uint8_t *const halfH = (uint8_t *) half + 64; \
168 uint8_t *const halfHV = (uint8_t *) half; \
169 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
170 stride, 9); \
171 ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, 8, \
172 stride, 9); \
173 ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
174 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV, \
175 stride, 8, 8); \
176} \
177 \
178static void OPNAME ## qpel8_mc31_ ## MMX(uint8_t *dst, \
179 const uint8_t *src, \
180 ptrdiff_t stride) \
181{ \
182 uint64_t half[8 + 9]; \
183 uint8_t *const halfH = (uint8_t *) half + 64; \
184 uint8_t *const halfHV = (uint8_t *) half; \
185 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
186 stride, 9); \
187 ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8, \
188 stride, 9); \
189 ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
190 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV, \
191 stride, 8, 8); \
192} \
193 \
194static void OPNAME ## qpel8_mc13_ ## MMX(uint8_t *dst, \
195 const uint8_t *src, \
196 ptrdiff_t stride) \
197{ \
198 uint64_t half[8 + 9]; \
199 uint8_t *const halfH = (uint8_t *) half + 64; \
200 uint8_t *const halfHV = (uint8_t *) half; \
201 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
202 stride, 9); \
203 ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, 8, \
204 stride, 9); \
205 ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
206 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH + 8, halfHV, \
207 stride, 8, 8); \
208} \
209 \
210static void OPNAME ## qpel8_mc33_ ## MMX(uint8_t *dst, \
211 const uint8_t *src, \
212 ptrdiff_t stride) \
213{ \
214 uint64_t half[8 + 9]; \
215 uint8_t *const halfH = (uint8_t *) half + 64; \
216 uint8_t *const halfHV = (uint8_t *) half; \
217 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
218 stride, 9); \
219 ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8, \
220 stride, 9); \
221 ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
222 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH + 8, halfHV, \
223 stride, 8, 8); \
224} \
225 \
226static void OPNAME ## qpel8_mc21_ ## MMX(uint8_t *dst, \
227 const uint8_t *src, \
228 ptrdiff_t stride) \
229{ \
230 uint64_t half[8 + 9]; \
231 uint8_t *const halfH = (uint8_t *) half + 64; \
232 uint8_t *const halfHV = (uint8_t *) half; \
233 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
234 stride, 9); \
235 ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
236 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV, \
237 stride, 8, 8); \
238} \
239 \
240static void OPNAME ## qpel8_mc23_ ## MMX(uint8_t *dst, \
241 const uint8_t *src, \
242 ptrdiff_t stride) \
243{ \
244 uint64_t half[8 + 9]; \
245 uint8_t *const halfH = (uint8_t *) half + 64; \
246 uint8_t *const halfHV = (uint8_t *) half; \
247 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
248 stride, 9); \
249 ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
250 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH + 8, halfHV, \
251 stride, 8, 8); \
252} \
253 \
254static void OPNAME ## qpel8_mc12_ ## MMX(uint8_t *dst, \
255 const uint8_t *src, \
256 ptrdiff_t stride) \
257{ \
258 uint64_t half[8 + 9]; \
259 uint8_t *const halfH = (uint8_t *) half; \
260 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
261 stride, 9); \
262 ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, \
263 8, stride, 9); \
264 ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, \
265 stride, 8); \
266} \
267 \
268static void OPNAME ## qpel8_mc32_ ## MMX(uint8_t *dst, \
269 const uint8_t *src, \
270 ptrdiff_t stride) \
271{ \
272 uint64_t half[8 + 9]; \
273 uint8_t *const halfH = (uint8_t *) half; \
274 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
275 stride, 9); \
276 ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8, \
277 stride, 9); \
278 ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, \
279 stride, 8); \
280} \
281 \
282static void OPNAME ## qpel8_mc22_ ## MMX(uint8_t *dst, \
283 const uint8_t *src, \
284 ptrdiff_t stride) \
285{ \
286 uint64_t half[9]; \
287 uint8_t *const halfH = (uint8_t *) half; \
288 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
289 stride, 9); \
290 ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, \
291 stride, 8); \
292} \
293 \
294static void OPNAME ## qpel16_mc00_ ## MMX(uint8_t *dst, \
295 const uint8_t *src, \
296 ptrdiff_t stride) \
297{ \
298 ff_ ## OPNAME ## pixels16_ ## MMX(dst, src, stride, 16); \
299} \
300 \
301static void OPNAME ## qpel16_mc10_ ## MMX(uint8_t *dst, \
302 const uint8_t *src, \
303 ptrdiff_t stride) \
304{ \
305 uint64_t temp[32]; \
306 uint8_t *const half = (uint8_t *) temp; \
307 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16, \
308 stride, 16); \
309 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src, half, stride, \
310 stride, 16); \
311} \
312 \
313static void OPNAME ## qpel16_mc20_ ## MMX(uint8_t *dst, \
314 const uint8_t *src, \
315 ptrdiff_t stride) \
316{ \
317 ff_ ## OPNAME ## mpeg4_qpel16_h_lowpass_ ## MMX(dst, src, \
318 stride, stride, 16);\
319} \
320 \
321static void OPNAME ## qpel16_mc30_ ## MMX(uint8_t *dst, \
322 const uint8_t *src, \
323 ptrdiff_t stride) \
324{ \
325 uint64_t temp[32]; \
326 uint8_t *const half = (uint8_t*) temp; \
327 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16, \
328 stride, 16); \
329 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src + 1, half, \
330 stride, stride, 16); \
331} \
332 \
333static void OPNAME ## qpel16_mc01_ ## MMX(uint8_t *dst, \
334 const uint8_t *src, \
335 ptrdiff_t stride) \
336{ \
337 uint64_t temp[32]; \
338 uint8_t *const half = (uint8_t *) temp; \
339 ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16, \
340 stride); \
341 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src, half, stride, \
342 stride, 16); \
343} \
344 \
345static void OPNAME ## qpel16_mc02_ ## MMX(uint8_t *dst, \
346 const uint8_t *src, \
347 ptrdiff_t stride) \
348{ \
349 ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, src, \
350 stride, stride); \
351} \
352 \
353static void OPNAME ## qpel16_mc03_ ## MMX(uint8_t *dst, \
354 const uint8_t *src, \
355 ptrdiff_t stride) \
356{ \
357 uint64_t temp[32]; \
358 uint8_t *const half = (uint8_t *) temp; \
359 ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16, \
360 stride); \
361 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src+stride, half, \
362 stride, stride, 16); \
363} \
364 \
365static void OPNAME ## qpel16_mc11_ ## MMX(uint8_t *dst, \
366 const uint8_t *src, \
367 ptrdiff_t stride) \
368{ \
369 uint64_t half[16 * 2 + 17 * 2]; \
370 uint8_t *const halfH = (uint8_t *) half + 256; \
371 uint8_t *const halfHV = (uint8_t *) half; \
372 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
373 stride, 17); \
374 ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, \
375 stride, 17); \
376 ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
377 16, 16); \
378 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV, \
379 stride, 16, 16); \
380} \
381 \
382static void OPNAME ## qpel16_mc31_ ## MMX(uint8_t *dst, \
383 const uint8_t *src, \
384 ptrdiff_t stride) \
385{ \
386 uint64_t half[16 * 2 + 17 * 2]; \
387 uint8_t *const halfH = (uint8_t *) half + 256; \
388 uint8_t *const halfHV = (uint8_t *) half; \
389 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
390 stride, 17); \
391 ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src + 1, halfH, 16, \
392 stride, 17); \
393 ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
394 16, 16); \
395 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV, \
396 stride, 16, 16); \
397} \
398 \
399static void OPNAME ## qpel16_mc13_ ## MMX(uint8_t *dst, \
400 const uint8_t *src, \
401 ptrdiff_t stride) \
402{ \
403 uint64_t half[16 * 2 + 17 * 2]; \
404 uint8_t *const halfH = (uint8_t *) half + 256; \
405 uint8_t *const halfHV = (uint8_t *) half; \
406 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
407 stride, 17); \
408 ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, \
409 stride, 17); \
410 ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
411 16, 16); \
412 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH + 16, halfHV, \
413 stride, 16, 16); \
414} \
415 \
416static void OPNAME ## qpel16_mc33_ ## MMX(uint8_t *dst, \
417 const uint8_t *src, \
418 ptrdiff_t stride) \
419{ \
420 uint64_t half[16 * 2 + 17 * 2]; \
421 uint8_t *const halfH = (uint8_t *) half + 256; \
422 uint8_t *const halfHV = (uint8_t *) half; \
423 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
424 stride, 17); \
425 ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src + 1, halfH, 16, \
426 stride, 17); \
427 ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
428 16, 16); \
429 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH + 16, halfHV, \
430 stride, 16, 16); \
431} \
432 \
433static void OPNAME ## qpel16_mc21_ ## MMX(uint8_t *dst, \
434 const uint8_t *src, \
435 ptrdiff_t stride) \
436{ \
437 uint64_t half[16 * 2 + 17 * 2]; \
438 uint8_t *const halfH = (uint8_t *) half + 256; \
439 uint8_t *const halfHV = (uint8_t *) half; \
440 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
441 stride, 17); \
442 ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
443 16, 16); \
444 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV, \
445 stride, 16, 16); \
446} \
447 \
448static void OPNAME ## qpel16_mc23_ ## MMX(uint8_t *dst, \
449 const uint8_t *src, \
450 ptrdiff_t stride) \
451{ \
452 uint64_t half[16 * 2 + 17 * 2]; \
453 uint8_t *const halfH = (uint8_t *) half + 256; \
454 uint8_t *const halfHV = (uint8_t *) half; \
455 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
456 stride, 17); \
457 ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
458 16, 16); \
459 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH + 16, halfHV, \
460 stride, 16, 16); \
461} \
462 \
463static void OPNAME ## qpel16_mc12_ ## MMX(uint8_t *dst, \
464 const uint8_t *src, \
465 ptrdiff_t stride) \
466{ \
467 uint64_t half[17 * 2]; \
468 uint8_t *const halfH = (uint8_t *) half; \
469 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
470 stride, 17); \
471 ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, \
472 stride, 17); \
473 ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, \
474 stride, 16); \
475} \
476 \
477static void OPNAME ## qpel16_mc32_ ## MMX(uint8_t *dst, \
478 const uint8_t *src, \
479 ptrdiff_t stride) \
480{ \
481 uint64_t half[17 * 2]; \
482 uint8_t *const halfH = (uint8_t *) half; \
483 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
484 stride, 17); \
485 ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src + 1, halfH, 16, \
486 stride, 17); \
487 ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, \
488 stride, 16); \
489} \
490 \
491static void OPNAME ## qpel16_mc22_ ## MMX(uint8_t *dst, \
492 const uint8_t *src, \
493 ptrdiff_t stride) \
494{ \
495 uint64_t half[17 * 2]; \
496 uint8_t *const halfH = (uint8_t *) half; \
497 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
498 stride, 17); \
499 ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, \
500 stride, 16); \
501}
502
503QPEL_OP(put_, _, mmxext)
504QPEL_OP(avg_, _, mmxext)
505QPEL_OP(put_no_rnd_, _no_rnd_, mmxext)
506
507#endif /* HAVE_YASM */
508
509#define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU, PREFIX) \
510do { \
511 c->PFX ## _pixels_tab[IDX][ 0] = PREFIX ## PFX ## SIZE ## _mc00_ ## CPU; \
512 c->PFX ## _pixels_tab[IDX][ 1] = PREFIX ## PFX ## SIZE ## _mc10_ ## CPU; \
513 c->PFX ## _pixels_tab[IDX][ 2] = PREFIX ## PFX ## SIZE ## _mc20_ ## CPU; \
514 c->PFX ## _pixels_tab[IDX][ 3] = PREFIX ## PFX ## SIZE ## _mc30_ ## CPU; \
515 c->PFX ## _pixels_tab[IDX][ 4] = PREFIX ## PFX ## SIZE ## _mc01_ ## CPU; \
516 c->PFX ## _pixels_tab[IDX][ 5] = PREFIX ## PFX ## SIZE ## _mc11_ ## CPU; \
517 c->PFX ## _pixels_tab[IDX][ 6] = PREFIX ## PFX ## SIZE ## _mc21_ ## CPU; \
518 c->PFX ## _pixels_tab[IDX][ 7] = PREFIX ## PFX ## SIZE ## _mc31_ ## CPU; \
519 c->PFX ## _pixels_tab[IDX][ 8] = PREFIX ## PFX ## SIZE ## _mc02_ ## CPU; \
520 c->PFX ## _pixels_tab[IDX][ 9] = PREFIX ## PFX ## SIZE ## _mc12_ ## CPU; \
521 c->PFX ## _pixels_tab[IDX][10] = PREFIX ## PFX ## SIZE ## _mc22_ ## CPU; \
522 c->PFX ## _pixels_tab[IDX][11] = PREFIX ## PFX ## SIZE ## _mc32_ ## CPU; \
523 c->PFX ## _pixels_tab[IDX][12] = PREFIX ## PFX ## SIZE ## _mc03_ ## CPU; \
524 c->PFX ## _pixels_tab[IDX][13] = PREFIX ## PFX ## SIZE ## _mc13_ ## CPU; \
525 c->PFX ## _pixels_tab[IDX][14] = PREFIX ## PFX ## SIZE ## _mc23_ ## CPU; \
526 c->PFX ## _pixels_tab[IDX][15] = PREFIX ## PFX ## SIZE ## _mc33_ ## CPU; \
527} while (0)
528
529av_cold void ff_qpeldsp_init_x86(QpelDSPContext *c)
530{
531 int cpu_flags = av_get_cpu_flags();
532
533 if (X86_MMXEXT(cpu_flags)) {
534#if HAVE_MMXEXT_EXTERNAL
535 SET_QPEL_FUNCS(avg_qpel, 0, 16, mmxext, );
536 SET_QPEL_FUNCS(avg_qpel, 1, 8, mmxext, );
537
538 SET_QPEL_FUNCS(put_qpel, 0, 16, mmxext, );
539 SET_QPEL_FUNCS(put_qpel, 1, 8, mmxext, );
540 SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, mmxext, );
541 SET_QPEL_FUNCS(put_no_rnd_qpel, 1, 8, mmxext, );
542#endif /* HAVE_MMXEXT_EXTERNAL */
543 }
544}