Imported Debian version 2.4.3~trusty1
[deb_ffmpeg.git] / ffmpeg / libavcodec / x86 / fpel.asm
1 ;******************************************************************************
2 ;* SIMD-optimized fullpel functions
3 ;* Copyright (c) 2008 Loren Merritt
4 ;* Copyright (c) 2003-2013 Michael Niedermayer
5 ;* Copyright (c) 2013 Daniel Kang
6 ;*
7 ;* This file is part of FFmpeg.
8 ;*
9 ;* FFmpeg is free software; you can redistribute it and/or
10 ;* modify it under the terms of the GNU Lesser General Public
11 ;* License as published by the Free Software Foundation; either
12 ;* version 2.1 of the License, or (at your option) any later version.
13 ;*
14 ;* FFmpeg is distributed in the hope that it will be useful,
15 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
16 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 ;* Lesser General Public License for more details.
18 ;*
19 ;* You should have received a copy of the GNU Lesser General Public
20 ;* License along with FFmpeg; if not, write to the Free Software
21 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 ;******************************************************************************
23
24 %include "libavutil/x86/x86util.asm"
25
26 SECTION .text
27
28 %macro PAVGB_MMX 4
29 LOAD %3, %1
30 por %3, %2
31 pxor %2, %1
32 pand %2, %4
33 psrlq %2, 1
34 psubb %3, %2
35 SWAP %2, %3
36 %endmacro
37
38 ; void ff_put/avg_pixels(uint8_t *block, const uint8_t *pixels,
39 ; ptrdiff_t line_size, int h)
40 %macro OP_PIXELS 2
41 %if %2 == mmsize/2
42 %define LOAD movh
43 %define SAVE movh
44 %define LEN mmsize
45 %else
46 %define LOAD movu
47 %define SAVE mova
48 %define LEN %2
49 %endif
50 cglobal %1_pixels%2, 4,5,4
51 movsxdifnidn r2, r2d
52 lea r4, [r2*3]
53 %ifidn %1, avg
54 %if notcpuflag(mmxext)
55 pcmpeqd m6, m6
56 paddb m6, m6
57 %endif
58 %endif
59 .loop:
60 %assign %%i 0
61 %rep LEN/mmsize
62 LOAD m0, [r1 + %%i]
63 LOAD m1, [r1+r2 + %%i]
64 LOAD m2, [r1+r2*2 + %%i]
65 LOAD m3, [r1+r4 + %%i]
66 %ifidn %1, avg
67 %if notcpuflag(mmxext)
68 PAVGB_MMX [r0 + %%i], m0, m4, m6
69 PAVGB_MMX [r0+r2 + %%i], m1, m5, m6
70 PAVGB_MMX [r0+r2*2 + %%i], m2, m4, m6
71 PAVGB_MMX [r0+r4 + %%i], m3, m5, m6
72 %else
73 pavgb m0, [r0 + %%i]
74 pavgb m1, [r0+r2 + %%i]
75 pavgb m2, [r0+r2*2 + %%i]
76 pavgb m3, [r0+r4 + %%i]
77 %endif
78 %endif
79 SAVE [r0 + %%i], m0
80 SAVE [r0+r2 + %%i], m1
81 SAVE [r0+r2*2 + %%i], m2
82 SAVE [r0+r4 + %%i], m3
83 %assign %%i %%i+mmsize
84 %endrep
85 sub r3d, 4
86 lea r1, [r1+r2*4]
87 lea r0, [r0+r2*4]
88 jne .loop
89 RET
90 %endmacro
91
92 INIT_MMX mmx
93 OP_PIXELS put, 4
94 OP_PIXELS avg, 4
95 OP_PIXELS put, 8
96 OP_PIXELS avg, 8
97 OP_PIXELS put, 16
98 OP_PIXELS avg, 16
99
100 INIT_MMX mmxext
101 OP_PIXELS avg, 4
102 OP_PIXELS avg, 8
103 OP_PIXELS avg, 16
104
105 INIT_XMM sse2
106 OP_PIXELS put, 16
107 OP_PIXELS avg, 16