Imported Debian version 2.5.0~trusty1.1
[deb_ffmpeg.git] / ffmpeg / libpostproc / postprocess.c
CommitLineData
2ba45a60
DM
1/*
2 * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
3 *
4 * AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
5 *
6 * This file is part of FFmpeg.
7 *
8 * FFmpeg is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 */
22
23/**
24 * @file
25 * postprocessing.
26 */
27
28/*
29 C MMX MMX2 3DNow AltiVec
30isVertDC Ec Ec Ec
31isVertMinMaxOk Ec Ec Ec
32doVertLowPass E e e Ec
33doVertDefFilter Ec Ec e e Ec
34isHorizDC Ec Ec Ec
35isHorizMinMaxOk a E Ec
36doHorizLowPass E e e Ec
37doHorizDefFilter Ec Ec e e Ec
38do_a_deblock Ec E Ec E
39deRing E e e* Ecp
40Vertical RKAlgo1 E a a
41Horizontal RKAlgo1 a a
42Vertical X1# a E E
43Horizontal X1# a E E
44LinIpolDeinterlace e E E*
45CubicIpolDeinterlace a e e*
46LinBlendDeinterlace e E E*
47MedianDeinterlace# E Ec Ec
48TempDeNoiser# E e e Ec
49
50* I do not have a 3DNow! CPU -> it is untested, but no one said it does not work so it seems to work
51# more or less selfinvented filters so the exactness is not too meaningful
52E = Exact implementation
53e = almost exact implementation (slightly different rounding,...)
54a = alternative / approximate impl
55c = checked against the other implementations (-vo md5)
56p = partially optimized, still some work to do
57*/
58
59/*
60TODO:
61reduce the time wasted on the mem transfer
62unroll stuff if instructions depend too much on the prior one
63move YScale thing to the end instead of fixing QP
64write a faster and higher quality deblocking filter :)
65make the mainloop more flexible (variable number of blocks at once
66 (the if/else stuff per block is slowing things down)
67compare the quality & speed of all filters
68split this huge file
69optimize c versions
70try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
71...
72*/
73
74//Changelog: use git log
75
76#include "config.h"
77#include "libavutil/avutil.h"
78#include "libavutil/avassert.h"
79#include <inttypes.h>
80#include <stdio.h>
81#include <stdlib.h>
82#include <string.h>
83//#undef HAVE_MMXEXT_INLINE
84//#define HAVE_AMD3DNOW_INLINE
85//#undef HAVE_MMX_INLINE
86//#undef ARCH_X86
87//#define DEBUG_BRIGHTNESS
88#include "postprocess.h"
89#include "postprocess_internal.h"
90#include "libavutil/avstring.h"
91
92unsigned postproc_version(void)
93{
94 av_assert0(LIBPOSTPROC_VERSION_MICRO >= 100);
95 return LIBPOSTPROC_VERSION_INT;
96}
97
98const char *postproc_configuration(void)
99{
100 return FFMPEG_CONFIGURATION;
101}
102
103const char *postproc_license(void)
104{
105#define LICENSE_PREFIX "libpostproc license: "
106 return LICENSE_PREFIX FFMPEG_LICENSE + sizeof(LICENSE_PREFIX) - 1;
107}
108
109#if HAVE_ALTIVEC_H
110#include <altivec.h>
111#endif
112
113#define GET_MODE_BUFFER_SIZE 500
114#define OPTIONS_ARRAY_SIZE 10
115#define BLOCK_SIZE 8
116#define TEMP_STRIDE 8
117//#define NUM_BLOCKS_AT_ONCE 16 //not used yet
118
119#if ARCH_X86 && HAVE_INLINE_ASM
120DECLARE_ASM_CONST(8, uint64_t, w05)= 0x0005000500050005LL;
121DECLARE_ASM_CONST(8, uint64_t, w04)= 0x0004000400040004LL;
122DECLARE_ASM_CONST(8, uint64_t, w20)= 0x0020002000200020LL;
123DECLARE_ASM_CONST(8, uint64_t, b00)= 0x0000000000000000LL;
124DECLARE_ASM_CONST(8, uint64_t, b01)= 0x0101010101010101LL;
125DECLARE_ASM_CONST(8, uint64_t, b02)= 0x0202020202020202LL;
126DECLARE_ASM_CONST(8, uint64_t, b08)= 0x0808080808080808LL;
127DECLARE_ASM_CONST(8, uint64_t, b80)= 0x8080808080808080LL;
128#endif
129
130DECLARE_ASM_CONST(8, int, deringThreshold)= 20;
131
132
133static const struct PPFilter filters[]=
134{
135 {"hb", "hdeblock", 1, 1, 3, H_DEBLOCK},
136 {"vb", "vdeblock", 1, 2, 4, V_DEBLOCK},
137/* {"hr", "rkhdeblock", 1, 1, 3, H_RK1_FILTER},
138 {"vr", "rkvdeblock", 1, 2, 4, V_RK1_FILTER},*/
139 {"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER},
140 {"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER},
141 {"ha", "ahdeblock", 1, 1, 3, H_A_DEBLOCK},
142 {"va", "avdeblock", 1, 2, 4, V_A_DEBLOCK},
143 {"dr", "dering", 1, 5, 6, DERING},
144 {"al", "autolevels", 0, 1, 2, LEVEL_FIX},
145 {"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
146 {"li", "linipoldeint", 1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
147 {"ci", "cubicipoldeint", 1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
148 {"md", "mediandeint", 1, 1, 4, MEDIAN_DEINT_FILTER},
149 {"fd", "ffmpegdeint", 1, 1, 4, FFMPEG_DEINT_FILTER},
150 {"l5", "lowpass5", 1, 1, 4, LOWPASS5_DEINT_FILTER},
151 {"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER},
152 {"fq", "forcequant", 1, 0, 0, FORCE_QUANT},
153 {"be", "bitexact", 1, 0, 0, BITEXACT},
f6fa7814 154 {"vi", "visualize", 1, 0, 0, VISUALIZE},
2ba45a60
DM
155 {NULL, NULL,0,0,0,0} //End Marker
156};
157
158static const char * const replaceTable[]=
159{
160 "default", "hb:a,vb:a,dr:a",
161 "de", "hb:a,vb:a,dr:a",
162 "fast", "h1:a,v1:a,dr:a",
163 "fa", "h1:a,v1:a,dr:a",
164 "ac", "ha:a:128:7,va:a,dr:a",
165 NULL //End Marker
166};
167
168
169#if ARCH_X86 && HAVE_INLINE_ASM
f6fa7814 170static inline void prefetchnta(const void *p)
2ba45a60
DM
171{
172 __asm__ volatile( "prefetchnta (%0)\n\t"
173 : : "r" (p)
174 );
175}
176
f6fa7814 177static inline void prefetcht0(const void *p)
2ba45a60
DM
178{
179 __asm__ volatile( "prefetcht0 (%0)\n\t"
180 : : "r" (p)
181 );
182}
183
f6fa7814 184static inline void prefetcht1(const void *p)
2ba45a60
DM
185{
186 __asm__ volatile( "prefetcht1 (%0)\n\t"
187 : : "r" (p)
188 );
189}
190
f6fa7814 191static inline void prefetcht2(const void *p)
2ba45a60
DM
192{
193 __asm__ volatile( "prefetcht2 (%0)\n\t"
194 : : "r" (p)
195 );
196}
197#endif
198
199/* The horizontal functions exist only in C because the MMX
200 * code is faster with vertical filters and transposing. */
201
202/**
203 * Check if the given 8x8 Block is mostly "flat"
204 */
205static inline int isHorizDC_C(const uint8_t src[], int stride, const PPContext *c)
206{
207 int numEq= 0;
208 int y;
209 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
210 const int dcThreshold= dcOffset*2 + 1;
211
212 for(y=0; y<BLOCK_SIZE; y++){
f6fa7814
DM
213 numEq += ((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold;
214 numEq += ((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold;
215 numEq += ((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold;
216 numEq += ((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold;
217 numEq += ((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold;
218 numEq += ((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold;
219 numEq += ((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold;
2ba45a60
DM
220 src+= stride;
221 }
222 return numEq > c->ppMode.flatnessThreshold;
223}
224
225/**
226 * Check if the middle 8x8 Block in the given 8x16 block is flat
227 */
228static inline int isVertDC_C(const uint8_t src[], int stride, const PPContext *c)
229{
230 int numEq= 0;
231 int y;
232 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
233 const int dcThreshold= dcOffset*2 + 1;
234
235 src+= stride*4; // src points to begin of the 8x8 Block
236 for(y=0; y<BLOCK_SIZE-1; y++){
f6fa7814
DM
237 numEq += ((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold;
238 numEq += ((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold;
239 numEq += ((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold;
240 numEq += ((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold;
241 numEq += ((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold;
242 numEq += ((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold;
243 numEq += ((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold;
244 numEq += ((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold;
2ba45a60
DM
245 src+= stride;
246 }
247 return numEq > c->ppMode.flatnessThreshold;
248}
249
250static inline int isHorizMinMaxOk_C(const uint8_t src[], int stride, int QP)
251{
252 int i;
253 for(i=0; i<2; i++){
254 if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
255 src += stride;
256 if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
257 src += stride;
258 if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
259 src += stride;
260 if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
261 src += stride;
262 }
263 return 1;
264}
265
266static inline int isVertMinMaxOk_C(const uint8_t src[], int stride, int QP)
267{
268 int x;
269 src+= stride*4;
270 for(x=0; x<BLOCK_SIZE; x+=4){
271 if((unsigned)(src[ x + 0*stride] - src[ x + 5*stride] + 2*QP) > 4*QP) return 0;
272 if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
273 if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
274 if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
275 }
276 return 1;
277}
278
279static inline int horizClassify_C(const uint8_t src[], int stride, const PPContext *c)
280{
281 if( isHorizDC_C(src, stride, c) ){
f6fa7814 282 return isHorizMinMaxOk_C(src, stride, c->QP);
2ba45a60
DM
283 }else{
284 return 2;
285 }
286}
287
288static inline int vertClassify_C(const uint8_t src[], int stride, const PPContext *c)
289{
290 if( isVertDC_C(src, stride, c) ){
f6fa7814 291 return isVertMinMaxOk_C(src, stride, c->QP);
2ba45a60
DM
292 }else{
293 return 2;
294 }
295}
296
297static inline void doHorizDefFilter_C(uint8_t dst[], int stride, const PPContext *c)
298{
299 int y;
300 for(y=0; y<BLOCK_SIZE; y++){
301 const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
302
303 if(FFABS(middleEnergy) < 8*c->QP){
304 const int q=(dst[3] - dst[4])/2;
305 const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
306 const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
307
308 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
309 d= FFMAX(d, 0);
310
311 d= (5*d + 32) >> 6;
312 d*= FFSIGN(-middleEnergy);
313
314 if(q>0)
315 {
316 d = FFMAX(d, 0);
317 d = FFMIN(d, q);
318 }
319 else
320 {
321 d = FFMIN(d, 0);
322 d = FFMAX(d, q);
323 }
324
325 dst[3]-= d;
326 dst[4]+= d;
327 }
328 dst+= stride;
329 }
330}
331
332/**
333 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
334 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
335 */
336static inline void doHorizLowPass_C(uint8_t dst[], int stride, const PPContext *c)
337{
338 int y;
339 for(y=0; y<BLOCK_SIZE; y++){
340 const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
341 const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
342
343 int sums[10];
344 sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
345 sums[1] = sums[0] - first + dst[3];
346 sums[2] = sums[1] - first + dst[4];
347 sums[3] = sums[2] - first + dst[5];
348 sums[4] = sums[3] - first + dst[6];
349 sums[5] = sums[4] - dst[0] + dst[7];
350 sums[6] = sums[5] - dst[1] + last;
351 sums[7] = sums[6] - dst[2] + last;
352 sums[8] = sums[7] - dst[3] + last;
353 sums[9] = sums[8] - dst[4] + last;
354
355 dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
356 dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
357 dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
358 dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
359 dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
360 dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
361 dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
362 dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
363
364 dst+= stride;
365 }
366}
367
368/**
369 * Experimental Filter 1 (Horizontal)
370 * will not damage linear gradients
371 * Flat blocks should look like they were passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
372 * can only smooth blocks at the expected locations (it cannot smooth them if they did move)
373 * MMX2 version does correct clipping C version does not
374 * not identical with the vertical one
375 */
376static inline void horizX1Filter(uint8_t *src, int stride, int QP)
377{
378 int y;
379 static uint64_t lut[256];
380 if(!lut[255])
381 {
382 int i;
383 for(i=0; i<256; i++)
384 {
385 int v= i < 128 ? 2*i : 2*(i-256);
386/*
387//Simulate 112242211 9-Tap filter
388 uint64_t a= (v/16) & 0xFF;
389 uint64_t b= (v/8) & 0xFF;
390 uint64_t c= (v/4) & 0xFF;
391 uint64_t d= (3*v/8) & 0xFF;
392*/
393//Simulate piecewise linear interpolation
394 uint64_t a= (v/16) & 0xFF;
395 uint64_t b= (v*3/16) & 0xFF;
396 uint64_t c= (v*5/16) & 0xFF;
397 uint64_t d= (7*v/16) & 0xFF;
398 uint64_t A= (0x100 - a)&0xFF;
399 uint64_t B= (0x100 - b)&0xFF;
400 uint64_t C= (0x100 - c)&0xFF;
401 uint64_t D= (0x100 - c)&0xFF;
402
403 lut[i] = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
404 (D<<24) | (C<<16) | (B<<8) | (A);
405 //lut[i] = (v<<32) | (v<<24);
406 }
407 }
408
409 for(y=0; y<BLOCK_SIZE; y++){
410 int a= src[1] - src[2];
411 int b= src[3] - src[4];
412 int c= src[5] - src[6];
413
414 int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
415
416 if(d < QP){
417 int v = d * FFSIGN(-b);
418
419 src[1] +=v/8;
420 src[2] +=v/4;
421 src[3] +=3*v/8;
422 src[4] -=3*v/8;
423 src[5] -=v/4;
424 src[6] -=v/8;
425 }
426 src+=stride;
427 }
428}
429
430/**
431 * accurate deblock filter
432 */
433static av_always_inline void do_a_deblock_C(uint8_t *src, int step,
f6fa7814 434 int stride, const PPContext *c, int mode)
2ba45a60
DM
435{
436 int y;
437 const int QP= c->QP;
438 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
439 const int dcThreshold= dcOffset*2 + 1;
440//START_TIMER
441 src+= step*4; // src points to begin of the 8x8 Block
442 for(y=0; y<8; y++){
443 int numEq= 0;
444
f6fa7814
DM
445 numEq += ((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold;
446 numEq += ((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold;
447 numEq += ((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold;
448 numEq += ((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold;
449 numEq += ((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold;
450 numEq += ((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold;
451 numEq += ((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold;
452 numEq += ((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold;
453 numEq += ((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold;
2ba45a60
DM
454 if(numEq > c->ppMode.flatnessThreshold){
455 int min, max, x;
456
457 if(src[0] > src[step]){
458 max= src[0];
459 min= src[step];
460 }else{
461 max= src[step];
462 min= src[0];
463 }
464 for(x=2; x<8; x+=2){
465 if(src[x*step] > src[(x+1)*step]){
466 if(src[x *step] > max) max= src[ x *step];
467 if(src[(x+1)*step] < min) min= src[(x+1)*step];
468 }else{
469 if(src[(x+1)*step] > max) max= src[(x+1)*step];
470 if(src[ x *step] < min) min= src[ x *step];
471 }
472 }
473 if(max-min < 2*QP){
474 const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
475 const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
476
477 int sums[10];
478 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
479 sums[1] = sums[0] - first + src[3*step];
480 sums[2] = sums[1] - first + src[4*step];
481 sums[3] = sums[2] - first + src[5*step];
482 sums[4] = sums[3] - first + src[6*step];
483 sums[5] = sums[4] - src[0*step] + src[7*step];
484 sums[6] = sums[5] - src[1*step] + last;
485 sums[7] = sums[6] - src[2*step] + last;
486 sums[8] = sums[7] - src[3*step] + last;
487 sums[9] = sums[8] - src[4*step] + last;
488
f6fa7814
DM
489 if (mode & VISUALIZE) {
490 src[0*step] =
491 src[1*step] =
492 src[2*step] =
493 src[3*step] =
494 src[4*step] =
495 src[5*step] =
496 src[6*step] =
497 src[7*step] = 128;
498 }
2ba45a60
DM
499 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
500 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
501 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
502 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
503 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
504 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
505 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
506 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
507 }
508 }else{
509 const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
510
511 if(FFABS(middleEnergy) < 8*QP){
512 const int q=(src[3*step] - src[4*step])/2;
513 const int leftEnergy= 5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
514 const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
515
516 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
517 d= FFMAX(d, 0);
518
519 d= (5*d + 32) >> 6;
520 d*= FFSIGN(-middleEnergy);
521
522 if(q>0){
523 d = FFMAX(d, 0);
524 d = FFMIN(d, q);
525 }else{
526 d = FFMIN(d, 0);
527 d = FFMAX(d, q);
528 }
529
f6fa7814
DM
530 if ((mode & VISUALIZE) && d) {
531 d= (d < 0) ? 32 : -32;
532 src[3*step]= av_clip_uint8(src[3*step] - d);
533 src[4*step]= av_clip_uint8(src[4*step] + d);
534 d = 0;
535 }
536
2ba45a60
DM
537 src[3*step]-= d;
538 src[4*step]+= d;
539 }
540 }
541
542 src += stride;
543 }
544/*if(step==16){
545 STOP_TIMER("step16")
546}else{
547 STOP_TIMER("stepX")
548}*/
549}
550
551//Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
552//Plain C versions
553//we always compile C for testing which needs bitexactness
554#define TEMPLATE_PP_C 1
555#include "postprocess_template.c"
556
557#if HAVE_ALTIVEC
558# define TEMPLATE_PP_ALTIVEC 1
559# include "postprocess_altivec_template.c"
560# include "postprocess_template.c"
561#endif
562
563#if ARCH_X86 && HAVE_INLINE_ASM
564# if CONFIG_RUNTIME_CPUDETECT
565# define TEMPLATE_PP_MMX 1
566# include "postprocess_template.c"
567# define TEMPLATE_PP_MMXEXT 1
568# include "postprocess_template.c"
569# define TEMPLATE_PP_3DNOW 1
570# include "postprocess_template.c"
571# define TEMPLATE_PP_SSE2 1
572# include "postprocess_template.c"
573# else
574# if HAVE_SSE2_INLINE
575# define TEMPLATE_PP_SSE2 1
576# include "postprocess_template.c"
577# elif HAVE_MMXEXT_INLINE
578# define TEMPLATE_PP_MMXEXT 1
579# include "postprocess_template.c"
580# elif HAVE_AMD3DNOW_INLINE
581# define TEMPLATE_PP_3DNOW 1
582# include "postprocess_template.c"
583# elif HAVE_MMX_INLINE
584# define TEMPLATE_PP_MMX 1
585# include "postprocess_template.c"
586# endif
587# endif
588#endif
589
590typedef void (*pp_fn)(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
591 const QP_STORE_T QPs[], int QPStride, int isColor, PPContext *c2);
592
593static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
594 const QP_STORE_T QPs[], int QPStride, int isColor, pp_mode *vm, pp_context *vc)
595{
596 pp_fn pp = postProcess_C;
597 PPContext *c= (PPContext *)vc;
598 PPMode *ppMode= (PPMode *)vm;
599 c->ppMode= *ppMode; //FIXME
600
601 if (!(ppMode->lumMode & BITEXACT)) {
602#if CONFIG_RUNTIME_CPUDETECT
603#if ARCH_X86 && HAVE_INLINE_ASM
604 // ordered per speed fastest first
605 if (c->cpuCaps & AV_CPU_FLAG_SSE2) pp = postProcess_SSE2;
606 else if (c->cpuCaps & AV_CPU_FLAG_MMXEXT) pp = postProcess_MMX2;
607 else if (c->cpuCaps & AV_CPU_FLAG_3DNOW) pp = postProcess_3DNow;
608 else if (c->cpuCaps & AV_CPU_FLAG_MMX) pp = postProcess_MMX;
609#elif HAVE_ALTIVEC
610 if (c->cpuCaps & AV_CPU_FLAG_ALTIVEC) pp = postProcess_altivec;
611#endif
612#else /* CONFIG_RUNTIME_CPUDETECT */
613#if HAVE_SSE2_INLINE
614 pp = postProcess_SSE2;
615#elif HAVE_MMXEXT_INLINE
616 pp = postProcess_MMX2;
617#elif HAVE_AMD3DNOW_INLINE
618 pp = postProcess_3DNow;
619#elif HAVE_MMX_INLINE
620 pp = postProcess_MMX;
621#elif HAVE_ALTIVEC
622 pp = postProcess_altivec;
623#endif
624#endif /* !CONFIG_RUNTIME_CPUDETECT */
625 }
626
627 pp(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
628}
629
630/* -pp Command line Help
631*/
632const char pp_help[] =
633"Available postprocessing filters:\n"
634"Filters Options\n"
635"short long name short long option Description\n"
636"* * a autoq CPU power dependent enabler\n"
637" c chrom chrominance filtering enabled\n"
638" y nochrom chrominance filtering disabled\n"
639" n noluma luma filtering disabled\n"
640"hb hdeblock (2 threshold) horizontal deblocking filter\n"
641" 1. difference factor: default=32, higher -> more deblocking\n"
642" 2. flatness threshold: default=39, lower -> more deblocking\n"
643" the h & v deblocking filters share these\n"
644" so you can't set different thresholds for h / v\n"
645"vb vdeblock (2 threshold) vertical deblocking filter\n"
646"ha hadeblock (2 threshold) horizontal deblocking filter\n"
647"va vadeblock (2 threshold) vertical deblocking filter\n"
648"h1 x1hdeblock experimental h deblock filter 1\n"
649"v1 x1vdeblock experimental v deblock filter 1\n"
650"dr dering deringing filter\n"
651"al autolevels automatic brightness / contrast\n"
652" f fullyrange stretch luminance to (0..255)\n"
653"lb linblenddeint linear blend deinterlacer\n"
654"li linipoldeint linear interpolating deinterlace\n"
655"ci cubicipoldeint cubic interpolating deinterlacer\n"
656"md mediandeint median deinterlacer\n"
657"fd ffmpegdeint ffmpeg deinterlacer\n"
658"l5 lowpass5 FIR lowpass deinterlacer\n"
659"de default hb:a,vb:a,dr:a\n"
660"fa fast h1:a,v1:a,dr:a\n"
661"ac ha:a:128:7,va:a,dr:a\n"
662"tn tmpnoise (3 threshold) temporal noise reducer\n"
663" 1. <= 2. <= 3. larger -> stronger filtering\n"
664"fq forceQuant <quantizer> force quantizer\n"
665"Usage:\n"
666"<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
667"long form example:\n"
668"vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock\n"
669"short form example:\n"
670"vb:a/hb:a/lb de,-vb\n"
671"more examples:\n"
672"tn:64:128:256\n"
673"\n"
674;
675
676pp_mode *pp_get_mode_by_name_and_quality(const char *name, int quality)
677{
678 char temp[GET_MODE_BUFFER_SIZE];
679 char *p= temp;
680 static const char filterDelimiters[] = ",/";
681 static const char optionDelimiters[] = ":|";
682 struct PPMode *ppMode;
683 char *filterToken;
684
685 if (!name) {
686 av_log(NULL, AV_LOG_ERROR, "pp: Missing argument\n");
687 return NULL;
688 }
689
690 if (!strcmp(name, "help")) {
691 const char *p;
692 for (p = pp_help; strchr(p, '\n'); p = strchr(p, '\n') + 1) {
693 av_strlcpy(temp, p, FFMIN(sizeof(temp), strchr(p, '\n') - p + 2));
694 av_log(NULL, AV_LOG_INFO, "%s", temp);
695 }
696 return NULL;
697 }
698
699 ppMode= av_malloc(sizeof(PPMode));
700
701 ppMode->lumMode= 0;
702 ppMode->chromMode= 0;
703 ppMode->maxTmpNoise[0]= 700;
704 ppMode->maxTmpNoise[1]= 1500;
705 ppMode->maxTmpNoise[2]= 3000;
706 ppMode->maxAllowedY= 234;
707 ppMode->minAllowedY= 16;
708 ppMode->baseDcDiff= 256/8;
709 ppMode->flatnessThreshold= 56-16-1;
710 ppMode->maxClippedThreshold= 0.01;
711 ppMode->error=0;
712
713 memset(temp, 0, GET_MODE_BUFFER_SIZE);
714 av_strlcpy(temp, name, GET_MODE_BUFFER_SIZE - 1);
715
716 av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
717
718 for(;;){
f6fa7814 719 const char *filterName;
2ba45a60
DM
720 int q= 1000000; //PP_QUALITY_MAX;
721 int chrom=-1;
722 int luma=-1;
f6fa7814
DM
723 const char *option;
724 const char *options[OPTIONS_ARRAY_SIZE];
2ba45a60
DM
725 int i;
726 int filterNameOk=0;
727 int numOfUnknownOptions=0;
728 int enable=1; //does the user want us to enabled or disabled the filter
f6fa7814 729 char *tokstate;
2ba45a60 730
f6fa7814 731 filterToken= av_strtok(p, filterDelimiters, &tokstate);
2ba45a60
DM
732 if(!filterToken) break;
733 p+= strlen(filterToken) + 1; // p points to next filterToken
f6fa7814 734 filterName= av_strtok(filterToken, optionDelimiters, &tokstate);
2ba45a60
DM
735 if (!filterName) {
736 ppMode->error++;
737 break;
738 }
739 av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
740
741 if(*filterName == '-'){
742 enable=0;
743 filterName++;
744 }
745
746 for(;;){ //for all options
f6fa7814 747 option= av_strtok(NULL, optionDelimiters, &tokstate);
2ba45a60
DM
748 if(!option) break;
749
750 av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
751 if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
752 else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
753 else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
754 else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
755 else{
756 options[numOfUnknownOptions] = option;
757 numOfUnknownOptions++;
758 }
759 if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
760 }
761 options[numOfUnknownOptions] = NULL;
762
763 /* replace stuff from the replace Table */
764 for(i=0; replaceTable[2*i]; i++){
765 if(!strcmp(replaceTable[2*i], filterName)){
766 int newlen= strlen(replaceTable[2*i + 1]);
767 int plen;
768 int spaceLeft;
769
770 p--, *p=',';
771
772 plen= strlen(p);
773 spaceLeft= p - temp + plen;
774 if(spaceLeft + newlen >= GET_MODE_BUFFER_SIZE - 1){
775 ppMode->error++;
776 break;
777 }
778 memmove(p + newlen, p, plen+1);
779 memcpy(p, replaceTable[2*i + 1], newlen);
780 filterNameOk=1;
781 }
782 }
783
784 for(i=0; filters[i].shortName; i++){
785 if( !strcmp(filters[i].longName, filterName)
786 || !strcmp(filters[i].shortName, filterName)){
787 ppMode->lumMode &= ~filters[i].mask;
788 ppMode->chromMode &= ~filters[i].mask;
789
790 filterNameOk=1;
791 if(!enable) break; // user wants to disable it
792
793 if(q >= filters[i].minLumQuality && luma)
794 ppMode->lumMode|= filters[i].mask;
795 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
796 if(q >= filters[i].minChromQuality)
797 ppMode->chromMode|= filters[i].mask;
798
799 if(filters[i].mask == LEVEL_FIX){
800 int o;
801 ppMode->minAllowedY= 16;
802 ppMode->maxAllowedY= 234;
803 for(o=0; options[o]; o++){
804 if( !strcmp(options[o],"fullyrange")
805 ||!strcmp(options[o],"f")){
806 ppMode->minAllowedY= 0;
807 ppMode->maxAllowedY= 255;
808 numOfUnknownOptions--;
809 }
810 }
811 }
812 else if(filters[i].mask == TEMP_NOISE_FILTER)
813 {
814 int o;
815 int numOfNoises=0;
816
817 for(o=0; options[o]; o++){
818 char *tail;
819 ppMode->maxTmpNoise[numOfNoises]=
820 strtol(options[o], &tail, 0);
821 if(tail!=options[o]){
822 numOfNoises++;
823 numOfUnknownOptions--;
824 if(numOfNoises >= 3) break;
825 }
826 }
827 }
828 else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK
829 || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK){
830 int o;
831
832 for(o=0; options[o] && o<2; o++){
833 char *tail;
834 int val= strtol(options[o], &tail, 0);
835 if(tail==options[o]) break;
836
837 numOfUnknownOptions--;
838 if(o==0) ppMode->baseDcDiff= val;
839 else ppMode->flatnessThreshold= val;
840 }
841 }
842 else if(filters[i].mask == FORCE_QUANT){
843 int o;
844 ppMode->forcedQuant= 15;
845
846 for(o=0; options[o] && o<1; o++){
847 char *tail;
848 int val= strtol(options[o], &tail, 0);
849 if(tail==options[o]) break;
850
851 numOfUnknownOptions--;
852 ppMode->forcedQuant= val;
853 }
854 }
855 }
856 }
857 if(!filterNameOk) ppMode->error++;
858 ppMode->error += numOfUnknownOptions;
859 }
860
861 av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
862 if(ppMode->error){
863 av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
864 av_free(ppMode);
865 return NULL;
866 }
867 return ppMode;
868}
869
870void pp_free_mode(pp_mode *mode){
871 av_free(mode);
872}
873
f6fa7814 874static void reallocAlign(void **p, int size){
2ba45a60
DM
875 av_free(*p);
876 *p= av_mallocz(size);
877}
878
879static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
880 int mbWidth = (width+15)>>4;
881 int mbHeight= (height+15)>>4;
882 int i;
883
884 c->stride= stride;
885 c->qpStride= qpStride;
886
f6fa7814
DM
887 reallocAlign((void **)&c->tempDst, stride*24+32);
888 reallocAlign((void **)&c->tempSrc, stride*24);
889 reallocAlign((void **)&c->tempBlocks, 2*16*8);
890 reallocAlign((void **)&c->yHistogram, 256*sizeof(uint64_t));
2ba45a60
DM
891 for(i=0; i<256; i++)
892 c->yHistogram[i]= width*height/64*15/256;
893
894 for(i=0; i<3; i++){
895 //Note: The +17*1024 is just there so I do not have to worry about r/w over the end.
f6fa7814
DM
896 reallocAlign((void **)&c->tempBlurred[i], stride*mbHeight*16 + 17*1024);
897 reallocAlign((void **)&c->tempBlurredPast[i], 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
2ba45a60
DM
898 }
899
f6fa7814
DM
900 reallocAlign((void **)&c->deintTemp, 2*width+32);
901 reallocAlign((void **)&c->nonBQPTable, qpStride*mbHeight*sizeof(QP_STORE_T));
902 reallocAlign((void **)&c->stdQPTable, qpStride*mbHeight*sizeof(QP_STORE_T));
903 reallocAlign((void **)&c->forcedQPTable, mbWidth*sizeof(QP_STORE_T));
2ba45a60
DM
904}
905
906static const char * context_to_name(void * ptr) {
907 return "postproc";
908}
909
910static const AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
911
912pp_context *pp_get_context(int width, int height, int cpuCaps){
913 PPContext *c= av_malloc(sizeof(PPContext));
914 int stride= FFALIGN(width, 16); //assumed / will realloc if needed
915 int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
916
917 memset(c, 0, sizeof(PPContext));
918 c->av_class = &av_codec_context_class;
919 if(cpuCaps&PP_FORMAT){
920 c->hChromaSubSample= cpuCaps&0x3;
921 c->vChromaSubSample= (cpuCaps>>4)&0x3;
922 }else{
923 c->hChromaSubSample= 1;
924 c->vChromaSubSample= 1;
925 }
926 if (cpuCaps & PP_CPU_CAPS_AUTO) {
927 c->cpuCaps = av_get_cpu_flags();
928 } else {
929 c->cpuCaps = 0;
930 if (cpuCaps & PP_CPU_CAPS_MMX) c->cpuCaps |= AV_CPU_FLAG_MMX;
931 if (cpuCaps & PP_CPU_CAPS_MMX2) c->cpuCaps |= AV_CPU_FLAG_MMXEXT;
932 if (cpuCaps & PP_CPU_CAPS_3DNOW) c->cpuCaps |= AV_CPU_FLAG_3DNOW;
933 if (cpuCaps & PP_CPU_CAPS_ALTIVEC) c->cpuCaps |= AV_CPU_FLAG_ALTIVEC;
934 }
935
936 reallocBuffers(c, width, height, stride, qpStride);
937
938 c->frameNum=-1;
939
940 return c;
941}
942
943void pp_free_context(void *vc){
944 PPContext *c = (PPContext*)vc;
945 int i;
946
f6fa7814
DM
947 for(i=0; i<FF_ARRAY_ELEMS(c->tempBlurred); i++)
948 av_free(c->tempBlurred[i]);
949 for(i=0; i<FF_ARRAY_ELEMS(c->tempBlurredPast); i++)
950 av_free(c->tempBlurredPast[i]);
2ba45a60
DM
951
952 av_free(c->tempBlocks);
953 av_free(c->yHistogram);
954 av_free(c->tempDst);
955 av_free(c->tempSrc);
956 av_free(c->deintTemp);
957 av_free(c->stdQPTable);
958 av_free(c->nonBQPTable);
959 av_free(c->forcedQPTable);
960
961 memset(c, 0, sizeof(PPContext));
962
963 av_free(c);
964}
965
966void pp_postprocess(const uint8_t * src[3], const int srcStride[3],
967 uint8_t * dst[3], const int dstStride[3],
968 int width, int height,
969 const QP_STORE_T *QP_store, int QPStride,
970 pp_mode *vm, void *vc, int pict_type)
971{
972 int mbWidth = (width+15)>>4;
973 int mbHeight= (height+15)>>4;
f6fa7814
DM
974 PPMode *mode = vm;
975 PPContext *c = vc;
2ba45a60
DM
976 int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
977 int absQPStride = FFABS(QPStride);
978
979 // c->stride and c->QPStride are always positive
980 if(c->stride < minStride || c->qpStride < absQPStride)
981 reallocBuffers(c, width, height,
982 FFMAX(minStride, c->stride),
983 FFMAX(c->qpStride, absQPStride));
984
985 if(!QP_store || (mode->lumMode & FORCE_QUANT)){
986 int i;
987 QP_store= c->forcedQPTable;
988 absQPStride = QPStride = 0;
989 if(mode->lumMode & FORCE_QUANT)
990 for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= mode->forcedQuant;
991 else
992 for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= 1;
993 }
994
995 if(pict_type & PP_PICT_TYPE_QP2){
996 int i;
997 const int count= FFMAX(mbHeight * absQPStride, mbWidth);
998 for(i=0; i<(count>>2); i++){
999 ((uint32_t*)c->stdQPTable)[i] = (((const uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
1000 }
1001 for(i<<=2; i<count; i++){
1002 c->stdQPTable[i] = QP_store[i]>>1;
1003 }
1004 QP_store= c->stdQPTable;
1005 QPStride= absQPStride;
1006 }
1007
1008 if(0){
1009 int x,y;
1010 for(y=0; y<mbHeight; y++){
1011 for(x=0; x<mbWidth; x++){
1012 av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
1013 }
1014 av_log(c, AV_LOG_INFO, "\n");
1015 }
1016 av_log(c, AV_LOG_INFO, "\n");
1017 }
1018
1019 if((pict_type&7)!=3){
1020 if (QPStride >= 0){
1021 int i;
1022 const int count= FFMAX(mbHeight * QPStride, mbWidth);
1023 for(i=0; i<(count>>2); i++){
1024 ((uint32_t*)c->nonBQPTable)[i] = ((const uint32_t*)QP_store)[i] & 0x3F3F3F3F;
1025 }
1026 for(i<<=2; i<count; i++){
1027 c->nonBQPTable[i] = QP_store[i] & 0x3F;
1028 }
1029 } else {
1030 int i,j;
1031 for(i=0; i<mbHeight; i++) {
1032 for(j=0; j<absQPStride; j++) {
1033 c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
1034 }
1035 }
1036 }
1037 }
1038
1039 av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
1040 mode->lumMode, mode->chromMode);
1041
1042 postProcess(src[0], srcStride[0], dst[0], dstStride[0],
1043 width, height, QP_store, QPStride, 0, mode, c);
1044
f6fa7814
DM
1045 if (!(src[1] && src[2] && dst[1] && dst[2]))
1046 return;
1047
2ba45a60
DM
1048 width = (width )>>c->hChromaSubSample;
1049 height = (height)>>c->vChromaSubSample;
1050
1051 if(mode->chromMode){
1052 postProcess(src[1], srcStride[1], dst[1], dstStride[1],
1053 width, height, QP_store, QPStride, 1, mode, c);
1054 postProcess(src[2], srcStride[2], dst[2], dstStride[2],
1055 width, height, QP_store, QPStride, 2, mode, c);
1056 }
1057 else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]){
1058 linecpy(dst[1], src[1], height, srcStride[1]);
1059 linecpy(dst[2], src[2], height, srcStride[2]);
1060 }else{
1061 int y;
1062 for(y=0; y<height; y++){
1063 memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
1064 memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
1065 }
1066 }
1067}