Commit | Line | Data |
---|---|---|
2ba45a60 DM |
1 | /* |
2 | * Copyright (C) 2013 Xiaolei Yu <dreifachstein@gmail.com> | |
3 | * | |
4 | * This file is part of FFmpeg. | |
5 | * | |
6 | * FFmpeg is free software; you can redistribute it and/or | |
7 | * modify it under the terms of the GNU Lesser General Public | |
8 | * License as published by the Free Software Foundation; either | |
9 | * version 2.1 of the License, or (at your option) any later version. | |
10 | * | |
11 | * FFmpeg is distributed in the hope that it will be useful, | |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | * Lesser General Public License for more details. | |
15 | * | |
16 | * You should have received a copy of the GNU Lesser General Public | |
17 | * License along with FFmpeg; if not, write to the Free Software | |
18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
19 | */ | |
20 | ||
21 | #include "libavutil/arm/asm.S" | |
22 | ||
23 | .macro alias name, tgt, set=1 | |
24 | .if \set != 0 | |
25 | \name .req \tgt | |
26 | .else | |
27 | .unreq \name | |
28 | .endif | |
29 | .endm | |
30 | ||
31 | .altmacro | |
32 | ||
33 | .macro alias_dw_all qw, dw_l, dw_h | |
34 | alias q\qw\()_l, d\dw_l | |
35 | alias q\qw\()_h, d\dw_h | |
36 | .if \qw < 15 | |
37 | alias_dw_all %(\qw + 1), %(\dw_l + 2), %(\dw_h + 2) | |
38 | .endif | |
39 | .endm | |
40 | ||
41 | alias_dw_all 0, 0, 1 | |
42 | ||
43 | .noaltmacro | |
44 | ||
45 | .macro alias_qw name, qw, set=1 | |
46 | alias \name\(), \qw, \set | |
47 | alias \name\()_l, \qw\()_l, \set | |
48 | alias \name\()_h, \qw\()_h, \set | |
49 | .endm | |
50 | ||
51 | .macro prologue | |
52 | push {r4-r12, lr} | |
53 | vpush {q4-q7} | |
54 | .endm | |
55 | ||
56 | .macro epilogue | |
57 | vpop {q4-q7} | |
58 | pop {r4-r12, pc} | |
59 | .endm | |
60 | ||
61 | .macro load_arg reg, ix | |
62 | ldr \reg, [sp, #((10 * 4 + 4 * 16) + (\ix - 4) * 4)] | |
63 | .endm | |
64 | ||
65 | ||
66 | /* ()_to_()_neon(const uint8_t *src, uint8_t *y, uint8_t *chroma | |
67 | * int width, int height, | |
68 | * int y_stride, int c_stride, int src_stride, | |
69 | * int32_t coeff_table[9]); | |
70 | */ | |
71 | .macro alias_loop_420sp set=1 | |
72 | alias src, r0, \set | |
73 | alias src0, src, \set | |
74 | alias y, r1, \set | |
75 | alias y0, y, \set | |
76 | alias chroma, r2, \set | |
77 | alias width, r3, \set | |
78 | alias header, width, \set | |
79 | ||
80 | alias height, r4, \set | |
81 | alias y_stride, r5, \set | |
82 | alias c_stride, r6, \set | |
83 | alias c_padding, c_stride, \set | |
84 | alias src_stride, r7, \set | |
85 | ||
86 | alias y0_end, r8, \set | |
87 | ||
88 | alias src_padding,r9, \set | |
89 | alias y_padding, r10, \set | |
90 | ||
91 | alias src1, r11, \set | |
92 | alias y1, r12, \set | |
93 | ||
94 | alias coeff_table,r12, \set | |
95 | .endm | |
96 | ||
97 | ||
98 | .macro loop_420sp s_fmt, d_fmt, init, kernel, precision | |
99 | ||
100 | function \s_fmt\()_to_\d_fmt\()_neon_\precision, export=1 | |
101 | prologue | |
102 | ||
103 | alias_loop_420sp | |
104 | ||
105 | load_arg height, 4 | |
106 | load_arg y_stride, 5 | |
107 | load_arg c_stride, 6 | |
108 | load_arg src_stride, 7 | |
109 | load_arg coeff_table, 8 | |
110 | ||
111 | \init coeff_table | |
112 | ||
113 | sub y_padding, y_stride, width | |
114 | sub c_padding, c_stride, width | |
115 | sub src_padding, src_stride, width, LSL #2 | |
116 | ||
117 | add y0_end, y0, width | |
118 | and header, width, #15 | |
119 | ||
120 | add y1, y0, y_stride | |
121 | add src1, src0, src_stride | |
122 | ||
123 | 0: | |
124 | cmp header, #0 | |
125 | beq 1f | |
126 | ||
127 | \kernel \s_fmt, \d_fmt, src0, src1, y0, y1, chroma, header | |
128 | ||
129 | 1: | |
130 | \kernel \s_fmt, \d_fmt, src0, src1, y0, y1, chroma | |
131 | ||
132 | cmp y0, y0_end | |
133 | blt 1b | |
134 | 2: | |
135 | add y0, y1, y_padding | |
136 | add y0_end, y1, y_stride | |
137 | add chroma, chroma, c_padding | |
138 | add src0, src1, src_padding | |
139 | ||
140 | add y1, y0, y_stride | |
141 | add src1, src0, src_stride | |
142 | ||
143 | subs height, height, #2 | |
144 | ||
145 | bgt 0b | |
146 | ||
147 | epilogue | |
148 | ||
149 | alias_loop_420sp 0 | |
150 | ||
151 | endfunc | |
152 | .endm | |
153 | ||
154 | .macro downsample | |
155 | vpaddl.u8 r16x8, r8x16 | |
156 | vpaddl.u8 g16x8, g8x16 | |
157 | vpaddl.u8 b16x8, b8x16 | |
158 | .endm | |
159 | ||
160 | ||
161 | /* acculumate and right shift by 2 */ | |
162 | .macro downsample_ars2 | |
163 | vpadal.u8 r16x8, r8x16 | |
164 | vpadal.u8 g16x8, g8x16 | |
165 | vpadal.u8 b16x8, b8x16 | |
166 | ||
167 | vrshr.u16 r16x8, r16x8, #2 | |
168 | vrshr.u16 g16x8, g16x8, #2 | |
169 | vrshr.u16 b16x8, b16x8, #2 | |
170 | .endm | |
171 | ||
172 | .macro store_y8_16x1 dst, count | |
173 | .ifc "\count","" | |
174 | vstmia \dst!, {y8x16} | |
175 | .else | |
176 | vstmia \dst, {y8x16} | |
177 | add \dst, \dst, \count | |
178 | .endif | |
179 | .endm | |
180 | ||
181 | .macro store_chroma_nv12_8x1 dst, count | |
182 | .ifc "\count","" | |
183 | vst2.i8 {u8x8, v8x8}, [\dst]! | |
184 | .else | |
185 | vst2.i8 {u8x8, v8x8}, [\dst], \count | |
186 | .endif | |
187 | .endm | |
188 | ||
189 | .macro store_chroma_nv21_8x1 dst, count | |
190 | .ifc "\count","" | |
191 | vst2.i8 {v8x8, u8x8}, [\dst]! | |
192 | .else | |
193 | vst2.i8 {v8x8, u8x8}, [\dst], \count | |
194 | .endif | |
195 | .endm | |
196 | ||
197 | .macro load_8888_16x1 a, b, c, d, src, count | |
198 | .ifc "\count","" | |
199 | vld4.8 {\a\()8x16_l, \b\()8x16_l, \c\()8x16_l, \d\()8x16_l}, [\src]! | |
200 | vld4.8 {\a\()8x16_h, \b\()8x16_h, \c\()8x16_h, \d\()8x16_h}, [\src]! | |
201 | .else | |
202 | vld4.8 {\a\()8x16_l, \b\()8x16_l, \c\()8x16_l, \d\()8x16_l}, [\src]! | |
203 | vld4.8 {\a\()8x16_h, \b\()8x16_h, \c\()8x16_h, \d\()8x16_h}, [\src] | |
204 | sub \src, \src, #32 | |
205 | add \src, \src, \count, LSL #2 | |
206 | .endif | |
207 | .endm | |
208 | ||
209 | .macro load_rgbx_16x1 src, count | |
210 | load_8888_16x1 r, g, b, x, \src, \count | |
211 | .endm | |
212 | ||
213 | .macro load_bgrx_16x1 src, count | |
214 | load_8888_16x1 b, g, r, x, \src, \count | |
215 | .endm | |
216 | ||
217 | .macro alias_src_rgbx set=1 | |
218 | alias_src_8888 r, g, b, x, \set | |
219 | .endm | |
220 | ||
221 | .macro alias_src_bgrx set=1 | |
222 | alias_src_8888 b, g, r, x, \set | |
223 | .endm | |
224 | ||
225 | .macro alias_dst_nv12 set=1 | |
226 | alias u8x8, c8x8x2_l, \set | |
227 | alias v8x8, c8x8x2_h, \set | |
228 | .endm | |
229 | ||
230 | .macro alias_dst_nv21 set=1 | |
231 | alias v8x8, c8x8x2_l, \set | |
232 | alias u8x8, c8x8x2_h, \set | |
233 | .endm | |
234 | ||
235 | ||
236 | // common aliases | |
237 | ||
238 | alias CO_R d0 | |
239 | CO_RY .dn d0.s16[0] | |
240 | CO_RU .dn d0.s16[1] | |
241 | CO_RV .dn d0.s16[2] | |
242 | ||
243 | alias CO_G d1 | |
244 | CO_GY .dn d1.s16[0] | |
245 | CO_GU .dn d1.s16[1] | |
246 | CO_GV .dn d1.s16[2] | |
247 | ||
248 | alias CO_B d2 | |
249 | CO_BY .dn d2.s16[0] | |
250 | CO_BU .dn d2.s16[1] | |
251 | CO_BV .dn d2.s16[2] | |
252 | ||
253 | alias BIAS_U, d3 | |
254 | alias BIAS_V, BIAS_U | |
255 | ||
256 | alias BIAS_Y, q2 | |
257 | ||
258 | ||
259 | /* q3-q6 R8G8B8X8 x16 */ | |
260 | ||
261 | .macro alias_src_8888 a, b, c, d, set | |
262 | alias_qw \a\()8x16, q3, \set | |
263 | alias_qw \b\()8x16, q4, \set | |
264 | alias_qw \c\()8x16, q5, \set | |
265 | alias_qw \d\()8x16, q6, \set | |
266 | .endm | |
267 | ||
268 | .macro kernel_420_16x2 rgb_fmt, yuv_fmt, rgb0, rgb1, y0, y1, chroma, count | |
269 | alias_src_\rgb_fmt | |
270 | alias_dst_\yuv_fmt | |
271 | ||
272 | load_\rgb_fmt\()_16x1 \rgb0, \count | |
273 | ||
274 | downsample | |
275 | compute_y_16x1 | |
276 | store_y8_16x1 \y0, \count | |
277 | ||
278 | ||
279 | load_\rgb_fmt\()_16x1 \rgb1, \count | |
280 | downsample_ars2 | |
281 | compute_y_16x1 | |
282 | store_y8_16x1 \y1, \count | |
283 | ||
284 | compute_chroma_8x1 u, U | |
285 | compute_chroma_8x1 v, V | |
286 | ||
287 | store_chroma_\yuv_fmt\()_8x1 \chroma, \count | |
288 | ||
289 | alias_dst_\yuv_fmt 0 | |
290 | alias_src_\rgb_fmt 0 | |
291 | .endm |