Commit | Line | Data |
---|---|---|
2ba45a60 DM |
1 | /* |
2 | * Simple IDCT | |
3 | * | |
4 | * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at> | |
5 | * | |
6 | * This file is part of FFmpeg. | |
7 | * | |
8 | * FFmpeg is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License as published by the Free Software Foundation; either | |
11 | * version 2.1 of the License, or (at your option) any later version. | |
12 | * | |
13 | * FFmpeg is distributed in the hope that it will be useful, | |
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
16 | * Lesser General Public License for more details. | |
17 | * | |
18 | * You should have received a copy of the GNU Lesser General Public | |
19 | * License along with FFmpeg; if not, write to the Free Software | |
20 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
21 | */ | |
22 | ||
23 | /** | |
24 | * @file | |
25 | * simpleidct in C. | |
26 | */ | |
27 | ||
28 | /* | |
29 | based upon some outcommented c code from mpeg2dec (idct_mmx.c | |
30 | written by Aaron Holtzman <aholtzma@ess.engr.uvic.ca>) | |
31 | */ | |
32 | ||
33 | #include "simple_idct.h" | |
34 | ||
35 | #include "bit_depth_template.c" | |
36 | ||
37 | #undef W1 | |
38 | #undef W2 | |
39 | #undef W3 | |
40 | #undef W4 | |
41 | #undef W5 | |
42 | #undef W6 | |
43 | #undef W7 | |
44 | #undef ROW_SHIFT | |
45 | #undef COL_SHIFT | |
46 | #undef DC_SHIFT | |
47 | #undef MUL | |
48 | #undef MAC | |
49 | ||
50 | #if BIT_DEPTH == 8 | |
51 | ||
52 | #define W1 22725 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 | |
53 | #define W2 21407 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 | |
54 | #define W3 19266 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 | |
55 | #define W4 16383 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 | |
56 | #define W5 12873 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 | |
57 | #define W6 8867 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 | |
58 | #define W7 4520 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 | |
59 | ||
60 | #define ROW_SHIFT 11 | |
61 | #define COL_SHIFT 20 | |
62 | #define DC_SHIFT 3 | |
63 | ||
64 | #define MUL(a, b) MUL16(a, b) | |
65 | #define MAC(a, b, c) MAC16(a, b, c) | |
66 | ||
67 | #elif BIT_DEPTH == 10 || BIT_DEPTH == 12 | |
68 | ||
69 | #if BIT_DEPTH == 10 | |
70 | #define W1 (22725*4) // 90901 | |
71 | #define W2 (21407*4) // 85627 | |
72 | #define W3 (19265*4) // 77062 | |
73 | #define W4 (16384*4) // 65535 | |
74 | #define W5 (12873*4) // 51491 | |
75 | #define W6 ( 8867*4) // 35468 | |
76 | #define W7 ( 4520*4) // 18081 | |
77 | ||
78 | #define ROW_SHIFT 15 | |
79 | #define COL_SHIFT 20 | |
80 | #define DC_SHIFT 1 | |
81 | #else | |
82 | #define W1 45451 | |
83 | #define W2 42813 | |
84 | #define W3 38531 | |
85 | #define W4 32767 | |
86 | #define W5 25746 | |
87 | #define W6 17734 | |
88 | #define W7 9041 | |
89 | ||
90 | #define ROW_SHIFT 16 | |
91 | #define COL_SHIFT 17 | |
92 | #define DC_SHIFT -1 | |
93 | #endif | |
94 | ||
95 | #define MUL(a, b) ((a) * (b)) | |
96 | #define MAC(a, b, c) ((a) += (b) * (c)) | |
97 | ||
98 | #else | |
99 | ||
100 | #error "Unsupported bitdepth" | |
101 | ||
102 | #endif | |
103 | ||
104 | static inline void FUNC(idctRowCondDC)(int16_t *row, int extra_shift) | |
105 | { | |
106 | int a0, a1, a2, a3, b0, b1, b2, b3; | |
107 | ||
108 | #if HAVE_FAST_64BIT | |
109 | #define ROW0_MASK (0xffffLL << 48 * HAVE_BIGENDIAN) | |
110 | if (((((uint64_t *)row)[0] & ~ROW0_MASK) | ((uint64_t *)row)[1]) == 0) { | |
111 | uint64_t temp; | |
112 | if (DC_SHIFT - extra_shift >= 0) { | |
113 | temp = (row[0] * (1 << (DC_SHIFT - extra_shift))) & 0xffff; | |
114 | } else { | |
115 | temp = ((row[0] + (1<<(extra_shift - DC_SHIFT-1))) >> (extra_shift - DC_SHIFT)) & 0xffff; | |
116 | } | |
117 | temp += temp * (1 << 16); | |
118 | temp += temp * ((uint64_t) 1 << 32); | |
119 | ((uint64_t *)row)[0] = temp; | |
120 | ((uint64_t *)row)[1] = temp; | |
121 | return; | |
122 | } | |
123 | #else | |
124 | if (!(((uint32_t*)row)[1] | | |
125 | ((uint32_t*)row)[2] | | |
126 | ((uint32_t*)row)[3] | | |
127 | row[1])) { | |
128 | uint32_t temp; | |
129 | if (DC_SHIFT - extra_shift >= 0) { | |
130 | temp = (row[0] * (1 << (DC_SHIFT - extra_shift))) & 0xffff; | |
131 | } else { | |
132 | temp = ((row[0] + (1<<(extra_shift - DC_SHIFT-1))) >> (extra_shift - DC_SHIFT)) & 0xffff; | |
133 | } | |
134 | temp += temp * (1 << 16); | |
135 | ((uint32_t*)row)[0]=((uint32_t*)row)[1] = | |
136 | ((uint32_t*)row)[2]=((uint32_t*)row)[3] = temp; | |
137 | return; | |
138 | } | |
139 | #endif | |
140 | ||
141 | a0 = (W4 * row[0]) + (1 << (ROW_SHIFT + extra_shift - 1)); | |
142 | a1 = a0; | |
143 | a2 = a0; | |
144 | a3 = a0; | |
145 | ||
146 | a0 += W2 * row[2]; | |
147 | a1 += W6 * row[2]; | |
148 | a2 -= W6 * row[2]; | |
149 | a3 -= W2 * row[2]; | |
150 | ||
151 | b0 = MUL(W1, row[1]); | |
152 | MAC(b0, W3, row[3]); | |
153 | b1 = MUL(W3, row[1]); | |
154 | MAC(b1, -W7, row[3]); | |
155 | b2 = MUL(W5, row[1]); | |
156 | MAC(b2, -W1, row[3]); | |
157 | b3 = MUL(W7, row[1]); | |
158 | MAC(b3, -W5, row[3]); | |
159 | ||
160 | if (AV_RN64A(row + 4)) { | |
161 | a0 += W4*row[4] + W6*row[6]; | |
162 | a1 += - W4*row[4] - W2*row[6]; | |
163 | a2 += - W4*row[4] + W2*row[6]; | |
164 | a3 += W4*row[4] - W6*row[6]; | |
165 | ||
166 | MAC(b0, W5, row[5]); | |
167 | MAC(b0, W7, row[7]); | |
168 | ||
169 | MAC(b1, -W1, row[5]); | |
170 | MAC(b1, -W5, row[7]); | |
171 | ||
172 | MAC(b2, W7, row[5]); | |
173 | MAC(b2, W3, row[7]); | |
174 | ||
175 | MAC(b3, W3, row[5]); | |
176 | MAC(b3, -W1, row[7]); | |
177 | } | |
178 | ||
179 | row[0] = (a0 + b0) >> (ROW_SHIFT + extra_shift); | |
180 | row[7] = (a0 - b0) >> (ROW_SHIFT + extra_shift); | |
181 | row[1] = (a1 + b1) >> (ROW_SHIFT + extra_shift); | |
182 | row[6] = (a1 - b1) >> (ROW_SHIFT + extra_shift); | |
183 | row[2] = (a2 + b2) >> (ROW_SHIFT + extra_shift); | |
184 | row[5] = (a2 - b2) >> (ROW_SHIFT + extra_shift); | |
185 | row[3] = (a3 + b3) >> (ROW_SHIFT + extra_shift); | |
186 | row[4] = (a3 - b3) >> (ROW_SHIFT + extra_shift); | |
187 | } | |
188 | ||
189 | #define IDCT_COLS do { \ | |
190 | a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4)); \ | |
191 | a1 = a0; \ | |
192 | a2 = a0; \ | |
193 | a3 = a0; \ | |
194 | \ | |
195 | a0 += W2*col[8*2]; \ | |
196 | a1 += W6*col[8*2]; \ | |
197 | a2 += -W6*col[8*2]; \ | |
198 | a3 += -W2*col[8*2]; \ | |
199 | \ | |
200 | b0 = MUL(W1, col[8*1]); \ | |
201 | b1 = MUL(W3, col[8*1]); \ | |
202 | b2 = MUL(W5, col[8*1]); \ | |
203 | b3 = MUL(W7, col[8*1]); \ | |
204 | \ | |
205 | MAC(b0, W3, col[8*3]); \ | |
206 | MAC(b1, -W7, col[8*3]); \ | |
207 | MAC(b2, -W1, col[8*3]); \ | |
208 | MAC(b3, -W5, col[8*3]); \ | |
209 | \ | |
210 | if (col[8*4]) { \ | |
211 | a0 += W4*col[8*4]; \ | |
212 | a1 += -W4*col[8*4]; \ | |
213 | a2 += -W4*col[8*4]; \ | |
214 | a3 += W4*col[8*4]; \ | |
215 | } \ | |
216 | \ | |
217 | if (col[8*5]) { \ | |
218 | MAC(b0, W5, col[8*5]); \ | |
219 | MAC(b1, -W1, col[8*5]); \ | |
220 | MAC(b2, W7, col[8*5]); \ | |
221 | MAC(b3, W3, col[8*5]); \ | |
222 | } \ | |
223 | \ | |
224 | if (col[8*6]) { \ | |
225 | a0 += W6*col[8*6]; \ | |
226 | a1 += -W2*col[8*6]; \ | |
227 | a2 += W2*col[8*6]; \ | |
228 | a3 += -W6*col[8*6]; \ | |
229 | } \ | |
230 | \ | |
231 | if (col[8*7]) { \ | |
232 | MAC(b0, W7, col[8*7]); \ | |
233 | MAC(b1, -W5, col[8*7]); \ | |
234 | MAC(b2, W3, col[8*7]); \ | |
235 | MAC(b3, -W1, col[8*7]); \ | |
236 | } \ | |
237 | } while (0) | |
238 | ||
239 | static inline void FUNC(idctSparseColPut)(pixel *dest, int line_size, | |
240 | int16_t *col) | |
241 | { | |
242 | int a0, a1, a2, a3, b0, b1, b2, b3; | |
243 | ||
244 | IDCT_COLS; | |
245 | ||
246 | dest[0] = av_clip_pixel((a0 + b0) >> COL_SHIFT); | |
247 | dest += line_size; | |
248 | dest[0] = av_clip_pixel((a1 + b1) >> COL_SHIFT); | |
249 | dest += line_size; | |
250 | dest[0] = av_clip_pixel((a2 + b2) >> COL_SHIFT); | |
251 | dest += line_size; | |
252 | dest[0] = av_clip_pixel((a3 + b3) >> COL_SHIFT); | |
253 | dest += line_size; | |
254 | dest[0] = av_clip_pixel((a3 - b3) >> COL_SHIFT); | |
255 | dest += line_size; | |
256 | dest[0] = av_clip_pixel((a2 - b2) >> COL_SHIFT); | |
257 | dest += line_size; | |
258 | dest[0] = av_clip_pixel((a1 - b1) >> COL_SHIFT); | |
259 | dest += line_size; | |
260 | dest[0] = av_clip_pixel((a0 - b0) >> COL_SHIFT); | |
261 | } | |
262 | ||
263 | static inline void FUNC(idctSparseColAdd)(pixel *dest, int line_size, | |
264 | int16_t *col) | |
265 | { | |
266 | int a0, a1, a2, a3, b0, b1, b2, b3; | |
267 | ||
268 | IDCT_COLS; | |
269 | ||
270 | dest[0] = av_clip_pixel(dest[0] + ((a0 + b0) >> COL_SHIFT)); | |
271 | dest += line_size; | |
272 | dest[0] = av_clip_pixel(dest[0] + ((a1 + b1) >> COL_SHIFT)); | |
273 | dest += line_size; | |
274 | dest[0] = av_clip_pixel(dest[0] + ((a2 + b2) >> COL_SHIFT)); | |
275 | dest += line_size; | |
276 | dest[0] = av_clip_pixel(dest[0] + ((a3 + b3) >> COL_SHIFT)); | |
277 | dest += line_size; | |
278 | dest[0] = av_clip_pixel(dest[0] + ((a3 - b3) >> COL_SHIFT)); | |
279 | dest += line_size; | |
280 | dest[0] = av_clip_pixel(dest[0] + ((a2 - b2) >> COL_SHIFT)); | |
281 | dest += line_size; | |
282 | dest[0] = av_clip_pixel(dest[0] + ((a1 - b1) >> COL_SHIFT)); | |
283 | dest += line_size; | |
284 | dest[0] = av_clip_pixel(dest[0] + ((a0 - b0) >> COL_SHIFT)); | |
285 | } | |
286 | ||
287 | static inline void FUNC(idctSparseCol)(int16_t *col) | |
288 | { | |
289 | int a0, a1, a2, a3, b0, b1, b2, b3; | |
290 | ||
291 | IDCT_COLS; | |
292 | ||
293 | col[0 ] = ((a0 + b0) >> COL_SHIFT); | |
294 | col[8 ] = ((a1 + b1) >> COL_SHIFT); | |
295 | col[16] = ((a2 + b2) >> COL_SHIFT); | |
296 | col[24] = ((a3 + b3) >> COL_SHIFT); | |
297 | col[32] = ((a3 - b3) >> COL_SHIFT); | |
298 | col[40] = ((a2 - b2) >> COL_SHIFT); | |
299 | col[48] = ((a1 - b1) >> COL_SHIFT); | |
300 | col[56] = ((a0 - b0) >> COL_SHIFT); | |
301 | } | |
302 | ||
303 | void FUNC(ff_simple_idct_put)(uint8_t *dest_, int line_size, int16_t *block) | |
304 | { | |
305 | pixel *dest = (pixel *)dest_; | |
306 | int i; | |
307 | ||
308 | line_size /= sizeof(pixel); | |
309 | ||
310 | for (i = 0; i < 8; i++) | |
311 | FUNC(idctRowCondDC)(block + i*8, 0); | |
312 | ||
313 | for (i = 0; i < 8; i++) | |
314 | FUNC(idctSparseColPut)(dest + i, line_size, block + i); | |
315 | } | |
316 | ||
317 | void FUNC(ff_simple_idct_add)(uint8_t *dest_, int line_size, int16_t *block) | |
318 | { | |
319 | pixel *dest = (pixel *)dest_; | |
320 | int i; | |
321 | ||
322 | line_size /= sizeof(pixel); | |
323 | ||
324 | for (i = 0; i < 8; i++) | |
325 | FUNC(idctRowCondDC)(block + i*8, 0); | |
326 | ||
327 | for (i = 0; i < 8; i++) | |
328 | FUNC(idctSparseColAdd)(dest + i, line_size, block + i); | |
329 | } | |
330 | ||
331 | void FUNC(ff_simple_idct)(int16_t *block) | |
332 | { | |
333 | int i; | |
334 | ||
335 | for (i = 0; i < 8; i++) | |
336 | FUNC(idctRowCondDC)(block + i*8, 0); | |
337 | ||
338 | for (i = 0; i < 8; i++) | |
339 | FUNC(idctSparseCol)(block + i); | |
340 | } |