Commit | Line | Data |
---|---|---|
2ba45a60 DM |
1 | ;****************************************************************************** |
2 | ;* SIMD-optimized quarterpel functions | |
3 | ;* Copyright (c) 2008 Loren Merritt | |
4 | ;* Copyright (c) 2003-2013 Michael Niedermayer | |
5 | ;* Copyright (c) 2013 Daniel Kang | |
6 | ;* | |
7 | ;* This file is part of FFmpeg. | |
8 | ;* | |
9 | ;* FFmpeg is free software; you can redistribute it and/or | |
10 | ;* modify it under the terms of the GNU Lesser General Public | |
11 | ;* License as published by the Free Software Foundation; either | |
12 | ;* version 2.1 of the License, or (at your option) any later version. | |
13 | ;* | |
14 | ;* FFmpeg is distributed in the hope that it will be useful, | |
15 | ;* but WITHOUT ANY WARRANTY; without even the implied warranty of | |
16 | ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
17 | ;* Lesser General Public License for more details. | |
18 | ;* | |
19 | ;* You should have received a copy of the GNU Lesser General Public | |
20 | ;* License along with FFmpeg; if not, write to the Free Software | |
21 | ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
22 | ;****************************************************************************** | |
23 | ||
24 | %include "libavutil/x86/x86util.asm" | |
25 | ||
26 | SECTION .text | |
27 | ||
28 | %macro op_avgh 3 | |
29 | movh %3, %2 | |
30 | pavgb %1, %3 | |
31 | movh %2, %1 | |
32 | %endmacro | |
33 | ||
34 | %macro op_avg 2 | |
35 | pavgb %1, %2 | |
36 | mova %2, %1 | |
37 | %endmacro | |
38 | ||
39 | %macro op_puth 2-3 | |
40 | movh %2, %1 | |
41 | %endmacro | |
42 | ||
43 | %macro op_put 2 | |
44 | mova %2, %1 | |
45 | %endmacro | |
46 | ||
47 | ; void ff_put/avg_pixels4_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2, | |
48 | ; int dstStride, int src1Stride, int h) | |
49 | %macro PIXELS4_L2 1 | |
50 | %define OP op_%1h | |
51 | cglobal %1_pixels4_l2, 6,6 | |
52 | movsxdifnidn r3, r3d | |
53 | movsxdifnidn r4, r4d | |
54 | test r5d, 1 | |
55 | je .loop | |
56 | movd m0, [r1] | |
57 | movd m1, [r2] | |
58 | add r1, r4 | |
59 | add r2, 4 | |
60 | pavgb m0, m1 | |
61 | OP m0, [r0], m3 | |
62 | add r0, r3 | |
63 | dec r5d | |
64 | .loop: | |
65 | mova m0, [r1] | |
66 | mova m1, [r1+r4] | |
67 | lea r1, [r1+2*r4] | |
68 | pavgb m0, [r2] | |
69 | pavgb m1, [r2+4] | |
70 | OP m0, [r0], m3 | |
71 | OP m1, [r0+r3], m3 | |
72 | lea r0, [r0+2*r3] | |
73 | mova m0, [r1] | |
74 | mova m1, [r1+r4] | |
75 | lea r1, [r1+2*r4] | |
76 | pavgb m0, [r2+8] | |
77 | pavgb m1, [r2+12] | |
78 | OP m0, [r0], m3 | |
79 | OP m1, [r0+r3], m3 | |
80 | lea r0, [r0+2*r3] | |
81 | add r2, 16 | |
82 | sub r5d, 4 | |
83 | jne .loop | |
84 | REP_RET | |
85 | %endmacro | |
86 | ||
87 | INIT_MMX mmxext | |
88 | PIXELS4_L2 put | |
89 | PIXELS4_L2 avg | |
90 | ||
91 | ; void ff_put/avg_pixels8_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2, | |
92 | ; int dstStride, int src1Stride, int h) | |
93 | %macro PIXELS8_L2 1 | |
94 | %define OP op_%1 | |
95 | cglobal %1_pixels8_l2, 6,6 | |
96 | movsxdifnidn r3, r3d | |
97 | movsxdifnidn r4, r4d | |
98 | test r5d, 1 | |
99 | je .loop | |
100 | mova m0, [r1] | |
101 | mova m1, [r2] | |
102 | add r1, r4 | |
103 | add r2, 8 | |
104 | pavgb m0, m1 | |
105 | OP m0, [r0] | |
106 | add r0, r3 | |
107 | dec r5d | |
108 | .loop: | |
109 | mova m0, [r1] | |
110 | mova m1, [r1+r4] | |
111 | lea r1, [r1+2*r4] | |
112 | pavgb m0, [r2] | |
113 | pavgb m1, [r2+8] | |
114 | OP m0, [r0] | |
115 | OP m1, [r0+r3] | |
116 | lea r0, [r0+2*r3] | |
117 | mova m0, [r1] | |
118 | mova m1, [r1+r4] | |
119 | lea r1, [r1+2*r4] | |
120 | pavgb m0, [r2+16] | |
121 | pavgb m1, [r2+24] | |
122 | OP m0, [r0] | |
123 | OP m1, [r0+r3] | |
124 | lea r0, [r0+2*r3] | |
125 | add r2, 32 | |
126 | sub r5d, 4 | |
127 | jne .loop | |
128 | REP_RET | |
129 | %endmacro | |
130 | ||
131 | INIT_MMX mmxext | |
132 | PIXELS8_L2 put | |
133 | PIXELS8_L2 avg | |
134 | ||
135 | ; void ff_put/avg_pixels16_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2, | |
136 | ; int dstStride, int src1Stride, int h) | |
137 | %macro PIXELS16_L2 1 | |
138 | %define OP op_%1 | |
139 | cglobal %1_pixels16_l2, 6,6 | |
140 | movsxdifnidn r3, r3d | |
141 | movsxdifnidn r4, r4d | |
142 | test r5d, 1 | |
143 | je .loop | |
144 | mova m0, [r1] | |
145 | mova m1, [r1+8] | |
146 | pavgb m0, [r2] | |
147 | pavgb m1, [r2+8] | |
148 | add r1, r4 | |
149 | add r2, 16 | |
150 | OP m0, [r0] | |
151 | OP m1, [r0+8] | |
152 | add r0, r3 | |
153 | dec r5d | |
154 | .loop: | |
155 | mova m0, [r1] | |
156 | mova m1, [r1+8] | |
157 | add r1, r4 | |
158 | pavgb m0, [r2] | |
159 | pavgb m1, [r2+8] | |
160 | OP m0, [r0] | |
161 | OP m1, [r0+8] | |
162 | add r0, r3 | |
163 | mova m0, [r1] | |
164 | mova m1, [r1+8] | |
165 | add r1, r4 | |
166 | pavgb m0, [r2+16] | |
167 | pavgb m1, [r2+24] | |
168 | OP m0, [r0] | |
169 | OP m1, [r0+8] | |
170 | add r0, r3 | |
171 | add r2, 32 | |
172 | sub r5d, 2 | |
173 | jne .loop | |
174 | REP_RET | |
175 | %endmacro | |
176 | ||
177 | INIT_MMX mmxext | |
178 | PIXELS16_L2 put | |
179 | PIXELS16_L2 avg |