Imported Debian version 2.4.3~trusty1
[deb_ffmpeg.git] / ffmpeg / libswscale / arm / rgb2yuv_neon_common.S
CommitLineData
2ba45a60
DM
1/*
2 * Copyright (C) 2013 Xiaolei Yu <dreifachstein@gmail.com>
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21#include "libavutil/arm/asm.S"
22
23.macro alias name, tgt, set=1
24.if \set != 0
25 \name .req \tgt
26.else
27 .unreq \name
28.endif
29.endm
30
31.altmacro
32
33.macro alias_dw_all qw, dw_l, dw_h
34 alias q\qw\()_l, d\dw_l
35 alias q\qw\()_h, d\dw_h
36 .if \qw < 15
37 alias_dw_all %(\qw + 1), %(\dw_l + 2), %(\dw_h + 2)
38 .endif
39.endm
40
41alias_dw_all 0, 0, 1
42
43.noaltmacro
44
45.macro alias_qw name, qw, set=1
46 alias \name\(), \qw, \set
47 alias \name\()_l, \qw\()_l, \set
48 alias \name\()_h, \qw\()_h, \set
49.endm
50
51.macro prologue
52 push {r4-r12, lr}
53 vpush {q4-q7}
54.endm
55
56.macro epilogue
57 vpop {q4-q7}
58 pop {r4-r12, pc}
59.endm
60
61.macro load_arg reg, ix
62 ldr \reg, [sp, #((10 * 4 + 4 * 16) + (\ix - 4) * 4)]
63.endm
64
65
66/* ()_to_()_neon(const uint8_t *src, uint8_t *y, uint8_t *chroma
67 * int width, int height,
68 * int y_stride, int c_stride, int src_stride,
69 * int32_t coeff_table[9]);
70 */
71.macro alias_loop_420sp set=1
72 alias src, r0, \set
73 alias src0, src, \set
74 alias y, r1, \set
75 alias y0, y, \set
76 alias chroma, r2, \set
77 alias width, r3, \set
78 alias header, width, \set
79
80 alias height, r4, \set
81 alias y_stride, r5, \set
82 alias c_stride, r6, \set
83 alias c_padding, c_stride, \set
84 alias src_stride, r7, \set
85
86 alias y0_end, r8, \set
87
88 alias src_padding,r9, \set
89 alias y_padding, r10, \set
90
91 alias src1, r11, \set
92 alias y1, r12, \set
93
94 alias coeff_table,r12, \set
95.endm
96
97
98.macro loop_420sp s_fmt, d_fmt, init, kernel, precision
99
100function \s_fmt\()_to_\d_fmt\()_neon_\precision, export=1
101 prologue
102
103 alias_loop_420sp
104
105 load_arg height, 4
106 load_arg y_stride, 5
107 load_arg c_stride, 6
108 load_arg src_stride, 7
109 load_arg coeff_table, 8
110
111 \init coeff_table
112
113 sub y_padding, y_stride, width
114 sub c_padding, c_stride, width
115 sub src_padding, src_stride, width, LSL #2
116
117 add y0_end, y0, width
118 and header, width, #15
119
120 add y1, y0, y_stride
121 add src1, src0, src_stride
122
1230:
124 cmp header, #0
125 beq 1f
126
127 \kernel \s_fmt, \d_fmt, src0, src1, y0, y1, chroma, header
128
1291:
130 \kernel \s_fmt, \d_fmt, src0, src1, y0, y1, chroma
131
132 cmp y0, y0_end
133 blt 1b
1342:
135 add y0, y1, y_padding
136 add y0_end, y1, y_stride
137 add chroma, chroma, c_padding
138 add src0, src1, src_padding
139
140 add y1, y0, y_stride
141 add src1, src0, src_stride
142
143 subs height, height, #2
144
145 bgt 0b
146
147 epilogue
148
149 alias_loop_420sp 0
150
151endfunc
152.endm
153
154.macro downsample
155 vpaddl.u8 r16x8, r8x16
156 vpaddl.u8 g16x8, g8x16
157 vpaddl.u8 b16x8, b8x16
158.endm
159
160
161/* acculumate and right shift by 2 */
162.macro downsample_ars2
163 vpadal.u8 r16x8, r8x16
164 vpadal.u8 g16x8, g8x16
165 vpadal.u8 b16x8, b8x16
166
167 vrshr.u16 r16x8, r16x8, #2
168 vrshr.u16 g16x8, g16x8, #2
169 vrshr.u16 b16x8, b16x8, #2
170.endm
171
172.macro store_y8_16x1 dst, count
173.ifc "\count",""
174 vstmia \dst!, {y8x16}
175.else
176 vstmia \dst, {y8x16}
177 add \dst, \dst, \count
178.endif
179.endm
180
181.macro store_chroma_nv12_8x1 dst, count
182.ifc "\count",""
183 vst2.i8 {u8x8, v8x8}, [\dst]!
184.else
185 vst2.i8 {u8x8, v8x8}, [\dst], \count
186.endif
187.endm
188
189.macro store_chroma_nv21_8x1 dst, count
190.ifc "\count",""
191 vst2.i8 {v8x8, u8x8}, [\dst]!
192.else
193 vst2.i8 {v8x8, u8x8}, [\dst], \count
194.endif
195.endm
196
197.macro load_8888_16x1 a, b, c, d, src, count
198.ifc "\count",""
199 vld4.8 {\a\()8x16_l, \b\()8x16_l, \c\()8x16_l, \d\()8x16_l}, [\src]!
200 vld4.8 {\a\()8x16_h, \b\()8x16_h, \c\()8x16_h, \d\()8x16_h}, [\src]!
201.else
202 vld4.8 {\a\()8x16_l, \b\()8x16_l, \c\()8x16_l, \d\()8x16_l}, [\src]!
203 vld4.8 {\a\()8x16_h, \b\()8x16_h, \c\()8x16_h, \d\()8x16_h}, [\src]
204 sub \src, \src, #32
205 add \src, \src, \count, LSL #2
206.endif
207.endm
208
209.macro load_rgbx_16x1 src, count
210 load_8888_16x1 r, g, b, x, \src, \count
211.endm
212
213.macro load_bgrx_16x1 src, count
214 load_8888_16x1 b, g, r, x, \src, \count
215.endm
216
217.macro alias_src_rgbx set=1
218 alias_src_8888 r, g, b, x, \set
219.endm
220
221.macro alias_src_bgrx set=1
222 alias_src_8888 b, g, r, x, \set
223.endm
224
225.macro alias_dst_nv12 set=1
226 alias u8x8, c8x8x2_l, \set
227 alias v8x8, c8x8x2_h, \set
228.endm
229
230.macro alias_dst_nv21 set=1
231 alias v8x8, c8x8x2_l, \set
232 alias u8x8, c8x8x2_h, \set
233.endm
234
235
236// common aliases
237
238alias CO_R d0
239CO_RY .dn d0.s16[0]
240CO_RU .dn d0.s16[1]
241CO_RV .dn d0.s16[2]
242
243alias CO_G d1
244CO_GY .dn d1.s16[0]
245CO_GU .dn d1.s16[1]
246CO_GV .dn d1.s16[2]
247
248alias CO_B d2
249CO_BY .dn d2.s16[0]
250CO_BU .dn d2.s16[1]
251CO_BV .dn d2.s16[2]
252
253alias BIAS_U, d3
254alias BIAS_V, BIAS_U
255
256alias BIAS_Y, q2
257
258
259/* q3-q6 R8G8B8X8 x16 */
260
261.macro alias_src_8888 a, b, c, d, set
262 alias_qw \a\()8x16, q3, \set
263 alias_qw \b\()8x16, q4, \set
264 alias_qw \c\()8x16, q5, \set
265 alias_qw \d\()8x16, q6, \set
266.endm
267
268.macro kernel_420_16x2 rgb_fmt, yuv_fmt, rgb0, rgb1, y0, y1, chroma, count
269 alias_src_\rgb_fmt
270 alias_dst_\yuv_fmt
271
272 load_\rgb_fmt\()_16x1 \rgb0, \count
273
274 downsample
275 compute_y_16x1
276 store_y8_16x1 \y0, \count
277
278
279 load_\rgb_fmt\()_16x1 \rgb1, \count
280 downsample_ars2
281 compute_y_16x1
282 store_y8_16x1 \y1, \count
283
284 compute_chroma_8x1 u, U
285 compute_chroma_8x1 v, V
286
287 store_chroma_\yuv_fmt\()_8x1 \chroma, \count
288
289 alias_dst_\yuv_fmt 0
290 alias_src_\rgb_fmt 0
291.endm