Imported Debian version 2.4.3~trusty1
[deb_ffmpeg.git] / ffmpeg / libavcodec / arm / vp6dsp_neon.S
CommitLineData
2ba45a60
DM
1/*
2 * Copyright (c) 2010 Mans Rullgard <mans@mansr.com>
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21#include "libavutil/arm/asm.S"
22
23.macro vp6_edge_filter
24 vdup.16 q3, r2 @ t
25 vmov.i16 q13, #1
26 vsubl.u8 q0, d20, d18 @ p[ 0] - p[-s]
27 vsubl.u8 q1, d16, d22 @ p[-2*s] - p[ s]
28 vsubl.u8 q14, d21, d19
29 vsubl.u8 q15, d17, d23
30 vadd.i16 q2, q0, q0 @ 2*(p[0]-p[-s])
31 vadd.i16 d29, d28, d28
32 vadd.i16 q0, q0, q1 @ p[0]-p[-s] + p[-2*s]-p[s]
33 vadd.i16 d28, d28, d30
34 vadd.i16 q0, q0, q2 @ 3*(p[0]-p[-s]) + p[-2*s]-p[s]
35 vadd.i16 d28, d28, d29
36 vrshr.s16 q0, q0, #3 @ v
37 vrshr.s16 d28, d28, #3
38 vsub.i16 q8, q3, q13 @ t-1
39 vabs.s16 q1, q0 @ V
40 vshr.s16 q2, q0, #15 @ s
41 vabs.s16 d30, d28
42 vshr.s16 d29, d28, #15
43 vsub.i16 q12, q1, q3 @ V-t
44 vsub.i16 d31, d30, d6
45 vsub.i16 q12, q12, q13 @ V-t-1
46 vsub.i16 d31, d31, d26
47 vcge.u16 q12, q12, q8 @ V-t-1 >= t-1
48 vcge.u16 d31, d31, d16
49 vadd.i16 q13, q3, q3 @ 2*t
50 vadd.i16 d16, d6, d6
51 vsub.i16 q13, q13, q1 @ 2*t - V
52 vsub.i16 d16, d16, d30
53 vadd.i16 q13, q13, q2 @ += s
54 vadd.i16 d16, d16, d29
55 veor q13, q13, q2 @ ^= s
56 veor d16, d16, d29
57 vbif q0, q13, q12
58 vbif d28, d16, d31
59 vmovl.u8 q1, d20
60 vmovl.u8 q15, d21
61 vaddw.u8 q2, q0, d18
62 vaddw.u8 q3, q14, d19
63 vsub.i16 q1, q1, q0
64 vsub.i16 d30, d30, d28
65 vqmovun.s16 d18, q2
66 vqmovun.s16 d19, q3
67 vqmovun.s16 d20, q1
68 vqmovun.s16 d21, q15
69.endm
70
71function ff_vp6_edge_filter_ver_neon, export=1
72 sub r0, r0, r1, lsl #1
73 vld1.8 {q8}, [r0], r1 @ p[-2*s]
74 vld1.8 {q9}, [r0], r1 @ p[-s]
75 vld1.8 {q10}, [r0], r1 @ p[0]
76 vld1.8 {q11}, [r0] @ p[s]
77 vp6_edge_filter
78 sub r0, r0, r1, lsl #1
79 sub r1, r1, #8
80 vst1.8 {d18}, [r0]!
81 vst1.32 {d19[0]}, [r0], r1
82 vst1.8 {d20}, [r0]!
83 vst1.32 {d21[0]}, [r0]
84 bx lr
85endfunc
86
87function ff_vp6_edge_filter_hor_neon, export=1
88 sub r3, r0, #1
89 sub r0, r0, #2
90 vld1.32 {d16[0]}, [r0], r1
91 vld1.32 {d18[0]}, [r0], r1
92 vld1.32 {d20[0]}, [r0], r1
93 vld1.32 {d22[0]}, [r0], r1
94 vld1.32 {d16[1]}, [r0], r1
95 vld1.32 {d18[1]}, [r0], r1
96 vld1.32 {d20[1]}, [r0], r1
97 vld1.32 {d22[1]}, [r0], r1
98 vld1.32 {d17[0]}, [r0], r1
99 vld1.32 {d19[0]}, [r0], r1
100 vld1.32 {d21[0]}, [r0], r1
101 vld1.32 {d23[0]}, [r0], r1
102 vtrn.8 q8, q9
103 vtrn.8 q10, q11
104 vtrn.16 q8, q10
105 vtrn.16 q9, q11
106 vp6_edge_filter
107 vtrn.8 q9, q10
108 vst1.16 {d18[0]}, [r3], r1
109 vst1.16 {d20[0]}, [r3], r1
110 vst1.16 {d18[1]}, [r3], r1
111 vst1.16 {d20[1]}, [r3], r1
112 vst1.16 {d18[2]}, [r3], r1
113 vst1.16 {d20[2]}, [r3], r1
114 vst1.16 {d18[3]}, [r3], r1
115 vst1.16 {d20[3]}, [r3], r1
116 vst1.16 {d19[0]}, [r3], r1
117 vst1.16 {d21[0]}, [r3], r1
118 vst1.16 {d19[1]}, [r3], r1
119 vst1.16 {d21[1]}, [r3], r1
120 bx lr
121endfunc