3 * Copyright (c) 2004-2011 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 * @author Michael Niedermayer <michaelni@gmx.at>
28 #include "bit_depth_template.c"
29 #include "libavutil/common.h"
33 void FUNCC(ff_h264_idct_add
)(uint8_t *_dst
, int16_t *_block
, int stride
)
36 pixel
*dst
= (pixel
*)_dst
;
37 dctcoef
*block
= (dctcoef
*)_block
;
38 stride
>>= sizeof(pixel
)-1;
43 const int z0
= block
[i
+ 4*0] + block
[i
+ 4*2];
44 const int z1
= block
[i
+ 4*0] - block
[i
+ 4*2];
45 const int z2
= (block
[i
+ 4*1]>>1) - block
[i
+ 4*3];
46 const int z3
= block
[i
+ 4*1] + (block
[i
+ 4*3]>>1);
48 block
[i
+ 4*0]= z0
+ z3
;
49 block
[i
+ 4*1]= z1
+ z2
;
50 block
[i
+ 4*2]= z1
- z2
;
51 block
[i
+ 4*3]= z0
- z3
;
55 const int z0
= block
[0 + 4*i
] + block
[2 + 4*i
];
56 const int z1
= block
[0 + 4*i
] - block
[2 + 4*i
];
57 const int z2
= (block
[1 + 4*i
]>>1) - block
[3 + 4*i
];
58 const int z3
= block
[1 + 4*i
] + (block
[3 + 4*i
]>>1);
60 dst
[i
+ 0*stride
]= av_clip_pixel(dst
[i
+ 0*stride
] + ((z0
+ z3
) >> 6));
61 dst
[i
+ 1*stride
]= av_clip_pixel(dst
[i
+ 1*stride
] + ((z1
+ z2
) >> 6));
62 dst
[i
+ 2*stride
]= av_clip_pixel(dst
[i
+ 2*stride
] + ((z1
- z2
) >> 6));
63 dst
[i
+ 3*stride
]= av_clip_pixel(dst
[i
+ 3*stride
] + ((z0
- z3
) >> 6));
66 memset(block
, 0, 16 * sizeof(dctcoef
));
69 void FUNCC(ff_h264_idct8_add
)(uint8_t *_dst
, int16_t *_block
, int stride
){
71 pixel
*dst
= (pixel
*)_dst
;
72 dctcoef
*block
= (dctcoef
*)_block
;
73 stride
>>= sizeof(pixel
)-1;
77 for( i
= 0; i
< 8; i
++ )
79 const int a0
= block
[i
+0*8] + block
[i
+4*8];
80 const int a2
= block
[i
+0*8] - block
[i
+4*8];
81 const int a4
= (block
[i
+2*8]>>1) - block
[i
+6*8];
82 const int a6
= (block
[i
+6*8]>>1) + block
[i
+2*8];
84 const int b0
= a0
+ a6
;
85 const int b2
= a2
+ a4
;
86 const int b4
= a2
- a4
;
87 const int b6
= a0
- a6
;
89 const int a1
= -block
[i
+3*8] + block
[i
+5*8] - block
[i
+7*8] - (block
[i
+7*8]>>1);
90 const int a3
= block
[i
+1*8] + block
[i
+7*8] - block
[i
+3*8] - (block
[i
+3*8]>>1);
91 const int a5
= -block
[i
+1*8] + block
[i
+7*8] + block
[i
+5*8] + (block
[i
+5*8]>>1);
92 const int a7
= block
[i
+3*8] + block
[i
+5*8] + block
[i
+1*8] + (block
[i
+1*8]>>1);
94 const int b1
= (a7
>>2) + a1
;
95 const int b3
= a3
+ (a5
>>2);
96 const int b5
= (a3
>>2) - a5
;
97 const int b7
= a7
- (a1
>>2);
99 block
[i
+0*8] = b0
+ b7
;
100 block
[i
+7*8] = b0
- b7
;
101 block
[i
+1*8] = b2
+ b5
;
102 block
[i
+6*8] = b2
- b5
;
103 block
[i
+2*8] = b4
+ b3
;
104 block
[i
+5*8] = b4
- b3
;
105 block
[i
+3*8] = b6
+ b1
;
106 block
[i
+4*8] = b6
- b1
;
108 for( i
= 0; i
< 8; i
++ )
110 const int a0
= block
[0+i
*8] + block
[4+i
*8];
111 const int a2
= block
[0+i
*8] - block
[4+i
*8];
112 const int a4
= (block
[2+i
*8]>>1) - block
[6+i
*8];
113 const int a6
= (block
[6+i
*8]>>1) + block
[2+i
*8];
115 const int b0
= a0
+ a6
;
116 const int b2
= a2
+ a4
;
117 const int b4
= a2
- a4
;
118 const int b6
= a0
- a6
;
120 const int a1
= -block
[3+i
*8] + block
[5+i
*8] - block
[7+i
*8] - (block
[7+i
*8]>>1);
121 const int a3
= block
[1+i
*8] + block
[7+i
*8] - block
[3+i
*8] - (block
[3+i
*8]>>1);
122 const int a5
= -block
[1+i
*8] + block
[7+i
*8] + block
[5+i
*8] + (block
[5+i
*8]>>1);
123 const int a7
= block
[3+i
*8] + block
[5+i
*8] + block
[1+i
*8] + (block
[1+i
*8]>>1);
125 const int b1
= (a7
>>2) + a1
;
126 const int b3
= a3
+ (a5
>>2);
127 const int b5
= (a3
>>2) - a5
;
128 const int b7
= a7
- (a1
>>2);
130 dst
[i
+ 0*stride
] = av_clip_pixel( dst
[i
+ 0*stride
] + ((b0
+ b7
) >> 6) );
131 dst
[i
+ 1*stride
] = av_clip_pixel( dst
[i
+ 1*stride
] + ((b2
+ b5
) >> 6) );
132 dst
[i
+ 2*stride
] = av_clip_pixel( dst
[i
+ 2*stride
] + ((b4
+ b3
) >> 6) );
133 dst
[i
+ 3*stride
] = av_clip_pixel( dst
[i
+ 3*stride
] + ((b6
+ b1
) >> 6) );
134 dst
[i
+ 4*stride
] = av_clip_pixel( dst
[i
+ 4*stride
] + ((b6
- b1
) >> 6) );
135 dst
[i
+ 5*stride
] = av_clip_pixel( dst
[i
+ 5*stride
] + ((b4
- b3
) >> 6) );
136 dst
[i
+ 6*stride
] = av_clip_pixel( dst
[i
+ 6*stride
] + ((b2
- b5
) >> 6) );
137 dst
[i
+ 7*stride
] = av_clip_pixel( dst
[i
+ 7*stride
] + ((b0
- b7
) >> 6) );
140 memset(block
, 0, 64 * sizeof(dctcoef
));
143 // assumes all AC coefs are 0
144 void FUNCC(ff_h264_idct_dc_add
)(uint8_t *_dst
, int16_t *_block
, int stride
){
146 pixel
*dst
= (pixel
*)_dst
;
147 dctcoef
*block
= (dctcoef
*)_block
;
148 int dc
= (block
[0] + 32) >> 6;
149 stride
/= sizeof(pixel
);
151 for( j
= 0; j
< 4; j
++ )
153 for( i
= 0; i
< 4; i
++ )
154 dst
[i
] = av_clip_pixel( dst
[i
] + dc
);
159 void FUNCC(ff_h264_idct8_dc_add
)(uint8_t *_dst
, int16_t *_block
, int stride
){
161 pixel
*dst
= (pixel
*)_dst
;
162 dctcoef
*block
= (dctcoef
*)_block
;
163 int dc
= (block
[0] + 32) >> 6;
165 stride
/= sizeof(pixel
);
166 for( j
= 0; j
< 8; j
++ )
168 for( i
= 0; i
< 8; i
++ )
169 dst
[i
] = av_clip_pixel( dst
[i
] + dc
);
174 void FUNCC(ff_h264_idct_add16
)(uint8_t *dst
, const int *block_offset
, int16_t *block
, int stride
, const uint8_t nnzc
[15*8]){
177 int nnz
= nnzc
[ scan8
[i
] ];
179 if(nnz
==1 && ((dctcoef
*)block
)[i
*16]) FUNCC(ff_h264_idct_dc_add
)(dst
+ block_offset
[i
], block
+ i
*16*sizeof(pixel
), stride
);
180 else FUNCC(ff_h264_idct_add
)(dst
+ block_offset
[i
], block
+ i
*16*sizeof(pixel
), stride
);
185 void FUNCC(ff_h264_idct_add16intra
)(uint8_t *dst
, const int *block_offset
, int16_t *block
, int stride
, const uint8_t nnzc
[15*8]){
188 if(nnzc
[ scan8
[i
] ]) FUNCC(ff_h264_idct_add
)(dst
+ block_offset
[i
], block
+ i
*16*sizeof(pixel
), stride
);
189 else if(((dctcoef
*)block
)[i
*16]) FUNCC(ff_h264_idct_dc_add
)(dst
+ block_offset
[i
], block
+ i
*16*sizeof(pixel
), stride
);
193 void FUNCC(ff_h264_idct8_add4
)(uint8_t *dst
, const int *block_offset
, int16_t *block
, int stride
, const uint8_t nnzc
[15*8]){
195 for(i
=0; i
<16; i
+=4){
196 int nnz
= nnzc
[ scan8
[i
] ];
198 if(nnz
==1 && ((dctcoef
*)block
)[i
*16]) FUNCC(ff_h264_idct8_dc_add
)(dst
+ block_offset
[i
], block
+ i
*16*sizeof(pixel
), stride
);
199 else FUNCC(ff_h264_idct8_add
)(dst
+ block_offset
[i
], block
+ i
*16*sizeof(pixel
), stride
);
204 void FUNCC(ff_h264_idct_add8
)(uint8_t **dest
, const int *block_offset
, int16_t *block
, int stride
, const uint8_t nnzc
[15*8]){
207 for(i
=j
*16; i
<j
*16+4; i
++){
209 FUNCC(ff_h264_idct_add
)(dest
[j
-1] + block_offset
[i
], block
+ i
*16*sizeof(pixel
), stride
);
210 else if(((dctcoef
*)block
)[i
*16])
211 FUNCC(ff_h264_idct_dc_add
)(dest
[j
-1] + block_offset
[i
], block
+ i
*16*sizeof(pixel
), stride
);
216 void FUNCC(ff_h264_idct_add8_422
)(uint8_t **dest
, const int *block_offset
, int16_t *block
, int stride
, const uint8_t nnzc
[15*8]){
220 for(i
=j
*16; i
<j
*16+4; i
++){
222 FUNCC(ff_h264_idct_add
)(dest
[j
-1] + block_offset
[i
], block
+ i
*16*sizeof(pixel
), stride
);
223 else if(((dctcoef
*)block
)[i
*16])
224 FUNCC(ff_h264_idct_dc_add
)(dest
[j
-1] + block_offset
[i
], block
+ i
*16*sizeof(pixel
), stride
);
229 for(i
=j
*16+4; i
<j
*16+8; i
++){
230 if(nnzc
[ scan8
[i
+4] ])
231 FUNCC(ff_h264_idct_add
)(dest
[j
-1] + block_offset
[i
+4], block
+ i
*16*sizeof(pixel
), stride
);
232 else if(((dctcoef
*)block
)[i
*16])
233 FUNCC(ff_h264_idct_dc_add
)(dest
[j
-1] + block_offset
[i
+4], block
+ i
*16*sizeof(pixel
), stride
);
239 * IDCT transforms the 16 dc values and dequantizes them.
240 * @param qmul quantization parameter
242 void FUNCC(ff_h264_luma_dc_dequant_idct
)(int16_t *_output
, int16_t *_input
, int qmul
){
246 static const uint8_t x_offset
[4]={0, 2*stride
, 8*stride
, 10*stride
};
247 dctcoef
*input
= (dctcoef
*)_input
;
248 dctcoef
*output
= (dctcoef
*)_output
;
251 const int z0
= input
[4*i
+0] + input
[4*i
+1];
252 const int z1
= input
[4*i
+0] - input
[4*i
+1];
253 const int z2
= input
[4*i
+2] - input
[4*i
+3];
254 const int z3
= input
[4*i
+2] + input
[4*i
+3];
263 const int offset
= x_offset
[i
];
264 const int z0
= temp
[4*0+i
] + temp
[4*2+i
];
265 const int z1
= temp
[4*0+i
] - temp
[4*2+i
];
266 const int z2
= temp
[4*1+i
] - temp
[4*3+i
];
267 const int z3
= temp
[4*1+i
] + temp
[4*3+i
];
269 output
[stride
* 0+offset
]= ((((z0
+ z3
)*qmul
+ 128 ) >> 8));
270 output
[stride
* 1+offset
]= ((((z1
+ z2
)*qmul
+ 128 ) >> 8));
271 output
[stride
* 4+offset
]= ((((z1
- z2
)*qmul
+ 128 ) >> 8));
272 output
[stride
* 5+offset
]= ((((z0
- z3
)*qmul
+ 128 ) >> 8));
277 void FUNCC(ff_h264_chroma422_dc_dequant_idct
)(int16_t *_block
, int qmul
){
278 const int stride
= 16*2;
279 const int xStride
= 16;
282 static const uint8_t x_offset
[2]={0, 16};
283 dctcoef
*block
= (dctcoef
*)_block
;
286 temp
[2*i
+0] = block
[stride
*i
+ xStride
*0] + block
[stride
*i
+ xStride
*1];
287 temp
[2*i
+1] = block
[stride
*i
+ xStride
*0] - block
[stride
*i
+ xStride
*1];
291 const int offset
= x_offset
[i
];
292 const int z0
= temp
[2*0+i
] + temp
[2*2+i
];
293 const int z1
= temp
[2*0+i
] - temp
[2*2+i
];
294 const int z2
= temp
[2*1+i
] - temp
[2*3+i
];
295 const int z3
= temp
[2*1+i
] + temp
[2*3+i
];
297 block
[stride
*0+offset
]= ((z0
+ z3
)*qmul
+ 128) >> 8;
298 block
[stride
*1+offset
]= ((z1
+ z2
)*qmul
+ 128) >> 8;
299 block
[stride
*2+offset
]= ((z1
- z2
)*qmul
+ 128) >> 8;
300 block
[stride
*3+offset
]= ((z0
- z3
)*qmul
+ 128) >> 8;
304 void FUNCC(ff_h264_chroma_dc_dequant_idct
)(int16_t *_block
, int qmul
){
305 const int stride
= 16*2;
306 const int xStride
= 16;
308 dctcoef
*block
= (dctcoef
*)_block
;
310 a
= block
[stride
*0 + xStride
*0];
311 b
= block
[stride
*0 + xStride
*1];
312 c
= block
[stride
*1 + xStride
*0];
313 d
= block
[stride
*1 + xStride
*1];
320 block
[stride
*0 + xStride
*0]= ((a
+c
)*qmul
) >> 7;
321 block
[stride
*0 + xStride
*1]= ((e
+b
)*qmul
) >> 7;
322 block
[stride
*1 + xStride
*0]= ((a
-c
)*qmul
) >> 7;
323 block
[stride
*1 + xStride
*1]= ((e
-b
)*qmul
) >> 7;