Imported Debian version 2.4.3~trusty1
[deb_ffmpeg.git] / ffmpeg / libavcodec / arm / idctdsp_neon.S
CommitLineData
2ba45a60
DM
1/*
2 * ARM-NEON-optimized IDCT functions
3 * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22#include "libavutil/arm/asm.S"
23
24function ff_put_pixels_clamped_neon, export=1
25 vld1.16 {d16-d19}, [r0,:128]!
26 vqmovun.s16 d0, q8
27 vld1.16 {d20-d23}, [r0,:128]!
28 vqmovun.s16 d1, q9
29 vld1.16 {d24-d27}, [r0,:128]!
30 vqmovun.s16 d2, q10
31 vld1.16 {d28-d31}, [r0,:128]!
32 vqmovun.s16 d3, q11
33 vst1.8 {d0}, [r1,:64], r2
34 vqmovun.s16 d4, q12
35 vst1.8 {d1}, [r1,:64], r2
36 vqmovun.s16 d5, q13
37 vst1.8 {d2}, [r1,:64], r2
38 vqmovun.s16 d6, q14
39 vst1.8 {d3}, [r1,:64], r2
40 vqmovun.s16 d7, q15
41 vst1.8 {d4}, [r1,:64], r2
42 vst1.8 {d5}, [r1,:64], r2
43 vst1.8 {d6}, [r1,:64], r2
44 vst1.8 {d7}, [r1,:64], r2
45 bx lr
46endfunc
47
48function ff_put_signed_pixels_clamped_neon, export=1
49 vmov.u8 d31, #128
50 vld1.16 {d16-d17}, [r0,:128]!
51 vqmovn.s16 d0, q8
52 vld1.16 {d18-d19}, [r0,:128]!
53 vqmovn.s16 d1, q9
54 vld1.16 {d16-d17}, [r0,:128]!
55 vqmovn.s16 d2, q8
56 vld1.16 {d18-d19}, [r0,:128]!
57 vadd.u8 d0, d0, d31
58 vld1.16 {d20-d21}, [r0,:128]!
59 vadd.u8 d1, d1, d31
60 vld1.16 {d22-d23}, [r0,:128]!
61 vadd.u8 d2, d2, d31
62 vst1.8 {d0}, [r1,:64], r2
63 vqmovn.s16 d3, q9
64 vst1.8 {d1}, [r1,:64], r2
65 vqmovn.s16 d4, q10
66 vst1.8 {d2}, [r1,:64], r2
67 vqmovn.s16 d5, q11
68 vld1.16 {d24-d25}, [r0,:128]!
69 vadd.u8 d3, d3, d31
70 vld1.16 {d26-d27}, [r0,:128]!
71 vadd.u8 d4, d4, d31
72 vadd.u8 d5, d5, d31
73 vst1.8 {d3}, [r1,:64], r2
74 vqmovn.s16 d6, q12
75 vst1.8 {d4}, [r1,:64], r2
76 vqmovn.s16 d7, q13
77 vst1.8 {d5}, [r1,:64], r2
78 vadd.u8 d6, d6, d31
79 vadd.u8 d7, d7, d31
80 vst1.8 {d6}, [r1,:64], r2
81 vst1.8 {d7}, [r1,:64], r2
82 bx lr
83endfunc
84
85function ff_add_pixels_clamped_neon, export=1
86 mov r3, r1
87 vld1.8 {d16}, [r1,:64], r2
88 vld1.16 {d0-d1}, [r0,:128]!
89 vaddw.u8 q0, q0, d16
90 vld1.8 {d17}, [r1,:64], r2
91 vld1.16 {d2-d3}, [r0,:128]!
92 vqmovun.s16 d0, q0
93 vld1.8 {d18}, [r1,:64], r2
94 vaddw.u8 q1, q1, d17
95 vld1.16 {d4-d5}, [r0,:128]!
96 vaddw.u8 q2, q2, d18
97 vst1.8 {d0}, [r3,:64], r2
98 vqmovun.s16 d2, q1
99 vld1.8 {d19}, [r1,:64], r2
100 vld1.16 {d6-d7}, [r0,:128]!
101 vaddw.u8 q3, q3, d19
102 vqmovun.s16 d4, q2
103 vst1.8 {d2}, [r3,:64], r2
104 vld1.8 {d16}, [r1,:64], r2
105 vqmovun.s16 d6, q3
106 vld1.16 {d0-d1}, [r0,:128]!
107 vaddw.u8 q0, q0, d16
108 vst1.8 {d4}, [r3,:64], r2
109 vld1.8 {d17}, [r1,:64], r2
110 vld1.16 {d2-d3}, [r0,:128]!
111 vaddw.u8 q1, q1, d17
112 vst1.8 {d6}, [r3,:64], r2
113 vqmovun.s16 d0, q0
114 vld1.8 {d18}, [r1,:64], r2
115 vld1.16 {d4-d5}, [r0,:128]!
116 vaddw.u8 q2, q2, d18
117 vst1.8 {d0}, [r3,:64], r2
118 vqmovun.s16 d2, q1
119 vld1.8 {d19}, [r1,:64], r2
120 vqmovun.s16 d4, q2
121 vld1.16 {d6-d7}, [r0,:128]!
122 vaddw.u8 q3, q3, d19
123 vst1.8 {d2}, [r3,:64], r2
124 vqmovun.s16 d6, q3
125 vst1.8 {d4}, [r3,:64], r2
126 vst1.8 {d6}, [r3,:64], r2
127 bx lr
128endfunc