4 * Copyright (C) 2006-2011 Xvid Solutions GmbH
6 * This file is part of FFmpeg.
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 * Alternative IDCT implementation for decoding compatibility.
29 * @note This C version is not the original IDCT, but a modified one that
30 * yields the same error profile as the MMX/MMXEXT/SSE2 versions.
34 #include "libavutil/attributes.h"
42 // #define FIX(x) (int)((x) * (1 << ROW_SHIFT))
43 #define RND0 65536 // 1 << (COL_SHIFT + ROW_SHIFT - 1);
44 #define RND1 3597 // FIX (1.75683487303);
45 #define RND2 2260 // FIX (1.10355339059);
46 #define RND3 1203 // FIX (0.587788325588);
48 #define RND5 120 // FIX (0.058658283817);
49 #define RND6 512 // FIX (0.25);
50 #define RND7 512 // FIX (0.25);
52 static const int TAB04
[] = { 22725, 21407, 19266, 16384, 12873, 8867, 4520 };
53 static const int TAB17
[] = { 31521, 29692, 26722, 22725, 17855, 12299, 6270 };
54 static const int TAB26
[] = { 29692, 27969, 25172, 21407, 16819, 11585, 5906 };
55 static const int TAB35
[] = { 26722, 25172, 22654, 19266, 15137, 10426, 5315 };
57 static int idct_row(short *in
, const int *const tab
, int rnd
)
59 const int c1
= tab
[0];
60 const int c2
= tab
[1];
61 const int c3
= tab
[2];
62 const int c4
= tab
[3];
63 const int c5
= tab
[4];
64 const int c6
= tab
[5];
65 const int c7
= tab
[6];
67 const int right
= in
[5] | in
[6] | in
[7];
68 const int left
= in
[1] | in
[2] | in
[3];
69 if (!(right
| in
[4])) {
70 const int k
= c4
* in
[0] + rnd
;
72 const int a0
= k
+ c2
* in
[2];
73 const int a1
= k
+ c6
* in
[2];
74 const int a2
= k
- c6
* in
[2];
75 const int a3
= k
- c2
* in
[2];
77 const int b0
= c1
* in
[1] + c3
* in
[3];
78 const int b1
= c3
* in
[1] - c7
* in
[3];
79 const int b2
= c5
* in
[1] - c1
* in
[3];
80 const int b3
= c7
* in
[1] - c5
* in
[3];
82 in
[0] = (a0
+ b0
) >> ROW_SHIFT
;
83 in
[1] = (a1
+ b1
) >> ROW_SHIFT
;
84 in
[2] = (a2
+ b2
) >> ROW_SHIFT
;
85 in
[3] = (a3
+ b3
) >> ROW_SHIFT
;
86 in
[4] = (a3
- b3
) >> ROW_SHIFT
;
87 in
[5] = (a2
- b2
) >> ROW_SHIFT
;
88 in
[6] = (a1
- b1
) >> ROW_SHIFT
;
89 in
[7] = (a0
- b0
) >> ROW_SHIFT
;
91 const int a0
= k
>> ROW_SHIFT
;
104 } else if (!(left
| right
)) {
105 const int a0
= (rnd
+ c4
* (in
[0] + in
[4])) >> ROW_SHIFT
;
106 const int a1
= (rnd
+ c4
* (in
[0] - in
[4])) >> ROW_SHIFT
;
117 const int k
= c4
* in
[0] + rnd
;
118 const int a0
= k
+ c2
* in
[2] + c4
* in
[4] + c6
* in
[6];
119 const int a1
= k
+ c6
* in
[2] - c4
* in
[4] - c2
* in
[6];
120 const int a2
= k
- c6
* in
[2] - c4
* in
[4] + c2
* in
[6];
121 const int a3
= k
- c2
* in
[2] + c4
* in
[4] - c6
* in
[6];
123 const int b0
= c1
* in
[1] + c3
* in
[3] + c5
* in
[5] + c7
* in
[7];
124 const int b1
= c3
* in
[1] - c7
* in
[3] - c1
* in
[5] - c5
* in
[7];
125 const int b2
= c5
* in
[1] - c1
* in
[3] + c7
* in
[5] + c3
* in
[7];
126 const int b3
= c7
* in
[1] - c5
* in
[3] + c3
* in
[5] - c1
* in
[7];
128 in
[0] = (a0
+ b0
) >> ROW_SHIFT
;
129 in
[1] = (a1
+ b1
) >> ROW_SHIFT
;
130 in
[2] = (a2
+ b2
) >> ROW_SHIFT
;
131 in
[3] = (a3
+ b3
) >> ROW_SHIFT
;
132 in
[4] = (a3
- b3
) >> ROW_SHIFT
;
133 in
[5] = (a2
- b2
) >> ROW_SHIFT
;
134 in
[6] = (a1
- b1
) >> ROW_SHIFT
;
135 in
[7] = (a0
- b0
) >> ROW_SHIFT
;
145 #define MULT(c, x, n) (((c) * (x)) >> (n))
146 // 12b version => #define MULT(c,x, n) ((((c) >> 3) * (x)) >> ((n) - 3))
147 // 12b zero-testing version:
149 #define BUTTERFLY(a, b, tmp) \
154 #define LOAD_BUTTERFLY(m1, m2, a, b, tmp, s) \
155 (m1) = (s)[(a)] + (s)[(b)]; \
156 (m2) = (s)[(a)] - (s)[(b)]
158 static void idct_col_8(short *const in
)
160 int mm0
, mm1
, mm2
, mm3
, mm4
, mm5
, mm6
, mm7
, spill
;
164 mm4
= (int) in
[7 * 8];
165 mm5
= (int) in
[5 * 8];
166 mm6
= (int) in
[3 * 8];
167 mm7
= (int) in
[1 * 8];
169 mm0
= MULT(TAN1
, mm4
, 16) + mm7
;
170 mm1
= MULT(TAN1
, mm7
, 16) - mm4
;
171 mm2
= MULT(TAN3
, mm5
, 16) + mm6
;
172 mm3
= MULT(TAN3
, mm6
, 16) - mm5
;
180 mm5
= 2 * MULT(SQRT2
, mm5
, 16); // 2*sqrt2
181 mm6
= 2 * MULT(SQRT2
, mm6
, 16); // Watch out: precision loss but done to match
182 // the pmulhw used in MMX/MMXEXT/SSE2 versions
186 mm1
= (int) in
[2 * 8];
187 mm2
= (int) in
[6 * 8];
188 mm3
= MULT(TAN2
, mm2
, 16) + mm1
;
189 mm2
= MULT(TAN2
, mm1
, 16) - mm2
;
191 LOAD_BUTTERFLY(mm0
, mm1
, 0 * 8, 4 * 8, spill
, in
);
193 BUTTERFLY(mm0
, mm3
, spill
);
194 BUTTERFLY(mm0
, mm7
, spill
);
195 in
[8 * 0] = (int16_t) (mm0
>> COL_SHIFT
);
196 in
[8 * 7] = (int16_t) (mm7
>> COL_SHIFT
);
197 BUTTERFLY(mm3
, mm4
, mm0
);
198 in
[8 * 3] = (int16_t) (mm3
>> COL_SHIFT
);
199 in
[8 * 4] = (int16_t) (mm4
>> COL_SHIFT
);
201 BUTTERFLY(mm1
, mm2
, mm0
);
202 BUTTERFLY(mm1
, mm6
, mm0
);
203 in
[8 * 1] = (int16_t) (mm1
>> COL_SHIFT
);
204 in
[8 * 6] = (int16_t) (mm6
>> COL_SHIFT
);
205 BUTTERFLY(mm2
, mm5
, mm0
);
206 in
[8 * 2] = (int16_t) (mm2
>> COL_SHIFT
);
207 in
[8 * 5] = (int16_t) (mm5
>> COL_SHIFT
);
210 static void idct_col_4(short *const in
)
212 int mm0
, mm1
, mm2
, mm3
, mm4
, mm5
, mm6
, mm7
, spill
;
216 mm0
= (int) in
[1 * 8];
217 mm2
= (int) in
[3 * 8];
219 mm1
= MULT(TAN1
, mm0
, 16);
220 mm3
= MULT(TAN3
, mm2
, 16);
228 mm6
= 2 * MULT(SQRT2
, mm6
, 16); // 2*sqrt2
229 mm5
= 2 * MULT(SQRT2
, mm5
, 16);
233 mm0
= mm1
= (int) in
[0 * 8];
234 mm3
= (int) in
[2 * 8];
235 mm2
= MULT(TAN2
, mm3
, 16);
237 BUTTERFLY(mm0
, mm3
, spill
);
238 BUTTERFLY(mm0
, mm7
, spill
);
239 in
[8 * 0] = (int16_t) (mm0
>> COL_SHIFT
);
240 in
[8 * 7] = (int16_t) (mm7
>> COL_SHIFT
);
241 BUTTERFLY(mm3
, mm4
, mm0
);
242 in
[8 * 3] = (int16_t) (mm3
>> COL_SHIFT
);
243 in
[8 * 4] = (int16_t) (mm4
>> COL_SHIFT
);
245 BUTTERFLY(mm1
, mm2
, mm0
);
246 BUTTERFLY(mm1
, mm6
, mm0
);
247 in
[8 * 1] = (int16_t) (mm1
>> COL_SHIFT
);
248 in
[8 * 6] = (int16_t) (mm6
>> COL_SHIFT
);
249 BUTTERFLY(mm2
, mm5
, mm0
);
250 in
[8 * 2] = (int16_t) (mm2
>> COL_SHIFT
);
251 in
[8 * 5] = (int16_t) (mm5
>> COL_SHIFT
);
254 static void idct_col_3(short *const in
)
256 int mm0
, mm1
, mm2
, mm3
, mm4
, mm5
, mm6
, mm7
, spill
;
260 mm7
= (int) in
[1 * 8];
261 mm4
= MULT(TAN1
, mm7
, 16);
265 mm6
= 2 * MULT(SQRT2
, mm6
, 16); // 2*sqrt2
266 mm5
= 2 * MULT(SQRT2
, mm5
, 16);
270 mm0
= mm1
= (int) in
[0 * 8];
271 mm3
= (int) in
[2 * 8];
272 mm2
= MULT(TAN2
, mm3
, 16);
274 BUTTERFLY(mm0
, mm3
, spill
);
275 BUTTERFLY(mm0
, mm7
, spill
);
276 in
[8 * 0] = (int16_t) (mm0
>> COL_SHIFT
);
277 in
[8 * 7] = (int16_t) (mm7
>> COL_SHIFT
);
278 BUTTERFLY(mm3
, mm4
, mm0
);
279 in
[8 * 3] = (int16_t) (mm3
>> COL_SHIFT
);
280 in
[8 * 4] = (int16_t) (mm4
>> COL_SHIFT
);
282 BUTTERFLY(mm1
, mm2
, mm0
);
283 BUTTERFLY(mm1
, mm6
, mm0
);
284 in
[8 * 1] = (int16_t) (mm1
>> COL_SHIFT
);
285 in
[8 * 6] = (int16_t) (mm6
>> COL_SHIFT
);
286 BUTTERFLY(mm2
, mm5
, mm0
);
287 in
[8 * 2] = (int16_t) (mm2
>> COL_SHIFT
);
288 in
[8 * 5] = (int16_t) (mm5
>> COL_SHIFT
);
291 void ff_xvid_idct(int16_t *const in
)
295 idct_row(in
+ 0 * 8, TAB04
, RND0
);
296 idct_row(in
+ 1 * 8, TAB17
, RND1
);
297 idct_row(in
+ 2 * 8, TAB26
, RND2
);
298 if (idct_row(in
+ 3 * 8, TAB35
, RND3
))
300 if (idct_row(in
+ 4 * 8, TAB04
, RND4
))
302 if (idct_row(in
+ 5 * 8, TAB35
, RND5
))
304 if (idct_row(in
+ 6 * 8, TAB26
, RND6
))
306 if (idct_row(in
+ 7 * 8, TAB17
, RND7
))
310 for (i
= 0; i
< 8; i
++)
312 } else if (rows
& 0x08) {
313 for (i
= 0; i
< 8; i
++)
316 for (i
= 0; i
< 8; i
++)
321 static void xvid_idct_put(uint8_t *dest
, int line_size
, int16_t *block
)
324 ff_put_pixels_clamped(block
, dest
, line_size
);
327 static void xvid_idct_add(uint8_t *dest
, int line_size
, int16_t *block
)
330 ff_add_pixels_clamped(block
, dest
, line_size
);
333 av_cold
void ff_xvid_idct_init(IDCTDSPContext
*c
, AVCodecContext
*avctx
)
335 const unsigned high_bit_depth
= avctx
->bits_per_raw_sample
> 8;
337 if (high_bit_depth
|| avctx
->lowres
||
338 !(avctx
->idct_algo
== FF_IDCT_AUTO
||
339 avctx
->idct_algo
== FF_IDCT_XVID
))
342 if (avctx
->idct_algo
== FF_IDCT_XVID
) {
343 c
->idct_put
= xvid_idct_put
;
344 c
->idct_add
= xvid_idct_add
;
345 c
->idct
= ff_xvid_idct
;
346 c
->perm_type
= FF_IDCT_PERM_NONE
;
350 ff_xvid_idct_init_x86(c
, avctx
, high_bit_depth
);
352 ff_init_scantable_permutation(c
->idct_permutation
, c
->perm_type
);