2 * Floating point AAN DCT
3 * this implementation is based upon the IJG integer AAN DCT (see jfdctfst.c)
5 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
6 * Copyright (c) 2003 Roman Shaposhnik
8 * Permission to use, copy, modify, and/or distribute this software for any
9 * purpose with or without fee is hereby granted, provided that the above
10 * copyright notice and this permission notice appear in all copies.
12 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
13 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
14 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
15 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
16 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
17 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
18 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
24 * Floating point AAN DCT
25 * @author Michael Niedermayer <michaelni@gmx.at>
29 #include "libavutil/internal.h"
30 #include "libavutil/libm.h"
34 //numbers generated by simple c code (not as accurate as they could be)
37 printf("#define B%d %1.20llf\n", i, (long double)1.0/(cosl(i*acosl(-1.0)/(long double)16.0)*sqrtl(2)));
40 #define B0 1.00000000000000000000
41 #define B1 0.72095982200694791383 // (cos(pi*1/16)sqrt(2))^-1
42 #define B2 0.76536686473017954350 // (cos(pi*2/16)sqrt(2))^-1
43 #define B3 0.85043009476725644878 // (cos(pi*3/16)sqrt(2))^-1
44 #define B4 1.00000000000000000000 // (cos(pi*4/16)sqrt(2))^-1
45 #define B5 1.27275858057283393842 // (cos(pi*5/16)sqrt(2))^-1
46 #define B6 1.84775906502257351242 // (cos(pi*6/16)sqrt(2))^-1
47 #define B7 3.62450978541155137218 // (cos(pi*7/16)sqrt(2))^-1
50 #define A1 0.70710678118654752438 // cos(pi*4/16)
51 #define A2 0.54119610014619698435 // cos(pi*6/16)sqrt(2)
52 #define A5 0.38268343236508977170 // cos(pi*6/16)
53 #define A4 1.30656296487637652774 // cos(pi*2/16)sqrt(2)
55 static const FLOAT postscale
[64]={
56 B0
*B0
, B0
*B1
, B0
*B2
, B0
*B3
, B0
*B4
, B0
*B5
, B0
*B6
, B0
*B7
,
57 B1
*B0
, B1
*B1
, B1
*B2
, B1
*B3
, B1
*B4
, B1
*B5
, B1
*B6
, B1
*B7
,
58 B2
*B0
, B2
*B1
, B2
*B2
, B2
*B3
, B2
*B4
, B2
*B5
, B2
*B6
, B2
*B7
,
59 B3
*B0
, B3
*B1
, B3
*B2
, B3
*B3
, B3
*B4
, B3
*B5
, B3
*B6
, B3
*B7
,
60 B4
*B0
, B4
*B1
, B4
*B2
, B4
*B3
, B4
*B4
, B4
*B5
, B4
*B6
, B4
*B7
,
61 B5
*B0
, B5
*B1
, B5
*B2
, B5
*B3
, B5
*B4
, B5
*B5
, B5
*B6
, B5
*B7
,
62 B6
*B0
, B6
*B1
, B6
*B2
, B6
*B3
, B6
*B4
, B6
*B5
, B6
*B6
, B6
*B7
,
63 B7
*B0
, B7
*B1
, B7
*B2
, B7
*B3
, B7
*B4
, B7
*B5
, B7
*B6
, B7
*B7
,
66 static av_always_inline
void row_fdct(FLOAT temp
[64], int16_t *data
)
68 FLOAT tmp0
, tmp1
, tmp2
, tmp3
, tmp4
, tmp5
, tmp6
, tmp7
;
69 FLOAT tmp10
, tmp11
, tmp12
, tmp13
;
70 FLOAT z2
, z4
, z11
, z13
;
73 for (i
=0; i
<8*8; i
+=8) {
74 tmp0
= data
[0 + i
] + data
[7 + i
];
75 tmp7
= data
[0 + i
] - data
[7 + i
];
76 tmp1
= data
[1 + i
] + data
[6 + i
];
77 tmp6
= data
[1 + i
] - data
[6 + i
];
78 tmp2
= data
[2 + i
] + data
[5 + i
];
79 tmp5
= data
[2 + i
] - data
[5 + i
];
80 tmp3
= data
[3 + i
] + data
[4 + i
];
81 tmp4
= data
[3 + i
] - data
[4 + i
];
88 temp
[0 + i
]= tmp10
+ tmp11
;
89 temp
[4 + i
]= tmp10
- tmp11
;
93 temp
[2 + i
]= tmp13
+ tmp12
;
94 temp
[6 + i
]= tmp13
- tmp12
;
103 z5
= (tmp4
- tmp6
) * A5
;
108 z2
= tmp4
*(A2
+A5
) - tmp6
*A5
;
109 z4
= tmp6
*(A4
-A5
) + tmp4
*A5
;
116 temp
[5 + i
]= z13
+ z2
;
117 temp
[3 + i
]= z13
- z2
;
118 temp
[1 + i
]= z11
+ z4
;
119 temp
[7 + i
]= z11
- z4
;
123 void ff_faandct(int16_t *data
)
125 FLOAT tmp0
, tmp1
, tmp2
, tmp3
, tmp4
, tmp5
, tmp6
, tmp7
;
126 FLOAT tmp10
, tmp11
, tmp12
, tmp13
;
127 FLOAT z2
, z4
, z11
, z13
;
133 row_fdct(temp
, data
);
135 for (i
=0; i
<8; i
++) {
136 tmp0
= temp
[8*0 + i
] + temp
[8*7 + i
];
137 tmp7
= temp
[8*0 + i
] - temp
[8*7 + i
];
138 tmp1
= temp
[8*1 + i
] + temp
[8*6 + i
];
139 tmp6
= temp
[8*1 + i
] - temp
[8*6 + i
];
140 tmp2
= temp
[8*2 + i
] + temp
[8*5 + i
];
141 tmp5
= temp
[8*2 + i
] - temp
[8*5 + i
];
142 tmp3
= temp
[8*3 + i
] + temp
[8*4 + i
];
143 tmp4
= temp
[8*3 + i
] - temp
[8*4 + i
];
150 data
[8*0 + i
]= lrintf(postscale
[8*0 + i
] * (tmp10
+ tmp11
));
151 data
[8*4 + i
]= lrintf(postscale
[8*4 + i
] * (tmp10
- tmp11
));
155 data
[8*2 + i
]= lrintf(postscale
[8*2 + i
] * (tmp13
+ tmp12
));
156 data
[8*6 + i
]= lrintf(postscale
[8*6 + i
] * (tmp13
- tmp12
));
165 z5
= (tmp4
- tmp6
) * A5
;
170 z2
= tmp4
*(A2
+A5
) - tmp6
*A5
;
171 z4
= tmp6
*(A4
-A5
) + tmp4
*A5
;
178 data
[8*5 + i
]= lrintf(postscale
[8*5 + i
] * (z13
+ z2
));
179 data
[8*3 + i
]= lrintf(postscale
[8*3 + i
] * (z13
- z2
));
180 data
[8*1 + i
]= lrintf(postscale
[8*1 + i
] * (z11
+ z4
));
181 data
[8*7 + i
]= lrintf(postscale
[8*7 + i
] * (z11
- z4
));
185 void ff_faandct248(int16_t *data
)
187 FLOAT tmp0
, tmp1
, tmp2
, tmp3
, tmp4
, tmp5
, tmp6
, tmp7
;
188 FLOAT tmp10
, tmp11
, tmp12
, tmp13
;
194 row_fdct(temp
, data
);
196 for (i
=0; i
<8; i
++) {
197 tmp0
= temp
[8*0 + i
] + temp
[8*1 + i
];
198 tmp1
= temp
[8*2 + i
] + temp
[8*3 + i
];
199 tmp2
= temp
[8*4 + i
] + temp
[8*5 + i
];
200 tmp3
= temp
[8*6 + i
] + temp
[8*7 + i
];
201 tmp4
= temp
[8*0 + i
] - temp
[8*1 + i
];
202 tmp5
= temp
[8*2 + i
] - temp
[8*3 + i
];
203 tmp6
= temp
[8*4 + i
] - temp
[8*5 + i
];
204 tmp7
= temp
[8*6 + i
] - temp
[8*7 + i
];
211 data
[8*0 + i
] = lrintf(postscale
[8*0 + i
] * (tmp10
+ tmp11
));
212 data
[8*4 + i
] = lrintf(postscale
[8*4 + i
] * (tmp10
- tmp11
));
216 data
[8*2 + i
] = lrintf(postscale
[8*2 + i
] * (tmp13
+ tmp12
));
217 data
[8*6 + i
] = lrintf(postscale
[8*6 + i
] * (tmp13
- tmp12
));
224 data
[8*1 + i
] = lrintf(postscale
[8*0 + i
] * (tmp10
+ tmp11
));
225 data
[8*5 + i
] = lrintf(postscale
[8*4 + i
] * (tmp10
- tmp11
));
229 data
[8*3 + i
] = lrintf(postscale
[8*2 + i
] * (tmp13
+ tmp12
));
230 data
[8*7 + i
] = lrintf(postscale
[8*6 + i
] * (tmp13
- tmp12
));