2 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 #define _SVID_SOURCE // needed for MAP_ANONYMOUS
24 #define _DARWIN_C_SOURCE // needed for MAP_ANON
31 #if defined(MAP_ANON) && !defined(MAP_ANONYMOUS)
32 #define MAP_ANONYMOUS MAP_ANON
36 #define WIN32_LEAN_AND_MEAN
40 #include "libavutil/attributes.h"
41 #include "libavutil/avassert.h"
42 #include "libavutil/avutil.h"
43 #include "libavutil/bswap.h"
44 #include "libavutil/cpu.h"
45 #include "libavutil/imgutils.h"
46 #include "libavutil/intreadwrite.h"
47 #include "libavutil/mathematics.h"
48 #include "libavutil/opt.h"
49 #include "libavutil/pixdesc.h"
50 #include "libavutil/ppc/cpu.h"
51 #include "libavutil/x86/asm.h"
52 #include "libavutil/x86/cpu.h"
55 #include "swscale_internal.h"
57 static void handle_formats(SwsContext
*c
);
59 unsigned swscale_version(void)
61 av_assert0(LIBSWSCALE_VERSION_MICRO
>= 100);
62 return LIBSWSCALE_VERSION_INT
;
65 const char *swscale_configuration(void)
67 return FFMPEG_CONFIGURATION
;
70 const char *swscale_license(void)
72 #define LICENSE_PREFIX "libswscale license: "
73 return LICENSE_PREFIX FFMPEG_LICENSE
+ sizeof(LICENSE_PREFIX
) - 1;
76 typedef struct FormatEntry
{
77 uint8_t is_supported_in
:1;
78 uint8_t is_supported_out
:1;
79 uint8_t is_supported_endianness
:1;
82 static const FormatEntry format_entries
[AV_PIX_FMT_NB
] = {
83 [AV_PIX_FMT_YUV420P
] = { 1, 1 },
84 [AV_PIX_FMT_YUYV422
] = { 1, 1 },
85 [AV_PIX_FMT_RGB24
] = { 1, 1 },
86 [AV_PIX_FMT_BGR24
] = { 1, 1 },
87 [AV_PIX_FMT_YUV422P
] = { 1, 1 },
88 [AV_PIX_FMT_YUV444P
] = { 1, 1 },
89 [AV_PIX_FMT_YUV410P
] = { 1, 1 },
90 [AV_PIX_FMT_YUV411P
] = { 1, 1 },
91 [AV_PIX_FMT_GRAY8
] = { 1, 1 },
92 [AV_PIX_FMT_MONOWHITE
] = { 1, 1 },
93 [AV_PIX_FMT_MONOBLACK
] = { 1, 1 },
94 [AV_PIX_FMT_PAL8
] = { 1, 0 },
95 [AV_PIX_FMT_YUVJ420P
] = { 1, 1 },
96 [AV_PIX_FMT_YUVJ411P
] = { 1, 1 },
97 [AV_PIX_FMT_YUVJ422P
] = { 1, 1 },
98 [AV_PIX_FMT_YUVJ444P
] = { 1, 1 },
99 [AV_PIX_FMT_YVYU422
] = { 1, 1 },
100 [AV_PIX_FMT_UYVY422
] = { 1, 1 },
101 [AV_PIX_FMT_UYYVYY411
] = { 0, 0 },
102 [AV_PIX_FMT_BGR8
] = { 1, 1 },
103 [AV_PIX_FMT_BGR4
] = { 0, 1 },
104 [AV_PIX_FMT_BGR4_BYTE
] = { 1, 1 },
105 [AV_PIX_FMT_RGB8
] = { 1, 1 },
106 [AV_PIX_FMT_RGB4
] = { 0, 1 },
107 [AV_PIX_FMT_RGB4_BYTE
] = { 1, 1 },
108 [AV_PIX_FMT_NV12
] = { 1, 1 },
109 [AV_PIX_FMT_NV21
] = { 1, 1 },
110 [AV_PIX_FMT_ARGB
] = { 1, 1 },
111 [AV_PIX_FMT_RGBA
] = { 1, 1 },
112 [AV_PIX_FMT_ABGR
] = { 1, 1 },
113 [AV_PIX_FMT_BGRA
] = { 1, 1 },
114 [AV_PIX_FMT_0RGB
] = { 1, 1 },
115 [AV_PIX_FMT_RGB0
] = { 1, 1 },
116 [AV_PIX_FMT_0BGR
] = { 1, 1 },
117 [AV_PIX_FMT_BGR0
] = { 1, 1 },
118 [AV_PIX_FMT_GRAY16BE
] = { 1, 1 },
119 [AV_PIX_FMT_GRAY16LE
] = { 1, 1 },
120 [AV_PIX_FMT_YUV440P
] = { 1, 1 },
121 [AV_PIX_FMT_YUVJ440P
] = { 1, 1 },
122 [AV_PIX_FMT_YUVA420P
] = { 1, 1 },
123 [AV_PIX_FMT_YUVA422P
] = { 1, 1 },
124 [AV_PIX_FMT_YUVA444P
] = { 1, 1 },
125 [AV_PIX_FMT_YUVA420P9BE
] = { 1, 1 },
126 [AV_PIX_FMT_YUVA420P9LE
] = { 1, 1 },
127 [AV_PIX_FMT_YUVA422P9BE
] = { 1, 1 },
128 [AV_PIX_FMT_YUVA422P9LE
] = { 1, 1 },
129 [AV_PIX_FMT_YUVA444P9BE
] = { 1, 1 },
130 [AV_PIX_FMT_YUVA444P9LE
] = { 1, 1 },
131 [AV_PIX_FMT_YUVA420P10BE
]= { 1, 1 },
132 [AV_PIX_FMT_YUVA420P10LE
]= { 1, 1 },
133 [AV_PIX_FMT_YUVA422P10BE
]= { 1, 1 },
134 [AV_PIX_FMT_YUVA422P10LE
]= { 1, 1 },
135 [AV_PIX_FMT_YUVA444P10BE
]= { 1, 1 },
136 [AV_PIX_FMT_YUVA444P10LE
]= { 1, 1 },
137 [AV_PIX_FMT_YUVA420P16BE
]= { 1, 1 },
138 [AV_PIX_FMT_YUVA420P16LE
]= { 1, 1 },
139 [AV_PIX_FMT_YUVA422P16BE
]= { 1, 1 },
140 [AV_PIX_FMT_YUVA422P16LE
]= { 1, 1 },
141 [AV_PIX_FMT_YUVA444P16BE
]= { 1, 1 },
142 [AV_PIX_FMT_YUVA444P16LE
]= { 1, 1 },
143 [AV_PIX_FMT_RGB48BE
] = { 1, 1 },
144 [AV_PIX_FMT_RGB48LE
] = { 1, 1 },
145 [AV_PIX_FMT_RGBA64BE
] = { 1, 1, 1 },
146 [AV_PIX_FMT_RGBA64LE
] = { 1, 1, 1 },
147 [AV_PIX_FMT_RGB565BE
] = { 1, 1 },
148 [AV_PIX_FMT_RGB565LE
] = { 1, 1 },
149 [AV_PIX_FMT_RGB555BE
] = { 1, 1 },
150 [AV_PIX_FMT_RGB555LE
] = { 1, 1 },
151 [AV_PIX_FMT_BGR565BE
] = { 1, 1 },
152 [AV_PIX_FMT_BGR565LE
] = { 1, 1 },
153 [AV_PIX_FMT_BGR555BE
] = { 1, 1 },
154 [AV_PIX_FMT_BGR555LE
] = { 1, 1 },
155 [AV_PIX_FMT_YUV420P16LE
] = { 1, 1 },
156 [AV_PIX_FMT_YUV420P16BE
] = { 1, 1 },
157 [AV_PIX_FMT_YUV422P16LE
] = { 1, 1 },
158 [AV_PIX_FMT_YUV422P16BE
] = { 1, 1 },
159 [AV_PIX_FMT_YUV444P16LE
] = { 1, 1 },
160 [AV_PIX_FMT_YUV444P16BE
] = { 1, 1 },
161 [AV_PIX_FMT_RGB444LE
] = { 1, 1 },
162 [AV_PIX_FMT_RGB444BE
] = { 1, 1 },
163 [AV_PIX_FMT_BGR444LE
] = { 1, 1 },
164 [AV_PIX_FMT_BGR444BE
] = { 1, 1 },
165 [AV_PIX_FMT_YA8
] = { 1, 0 },
166 [AV_PIX_FMT_YA16BE
] = { 1, 0 },
167 [AV_PIX_FMT_YA16LE
] = { 1, 0 },
168 [AV_PIX_FMT_BGR48BE
] = { 1, 1 },
169 [AV_PIX_FMT_BGR48LE
] = { 1, 1 },
170 [AV_PIX_FMT_BGRA64BE
] = { 1, 1, 1 },
171 [AV_PIX_FMT_BGRA64LE
] = { 1, 1, 1 },
172 [AV_PIX_FMT_YUV420P9BE
] = { 1, 1 },
173 [AV_PIX_FMT_YUV420P9LE
] = { 1, 1 },
174 [AV_PIX_FMT_YUV420P10BE
] = { 1, 1 },
175 [AV_PIX_FMT_YUV420P10LE
] = { 1, 1 },
176 [AV_PIX_FMT_YUV420P12BE
] = { 1, 1 },
177 [AV_PIX_FMT_YUV420P12LE
] = { 1, 1 },
178 [AV_PIX_FMT_YUV420P14BE
] = { 1, 1 },
179 [AV_PIX_FMT_YUV420P14LE
] = { 1, 1 },
180 [AV_PIX_FMT_YUV422P9BE
] = { 1, 1 },
181 [AV_PIX_FMT_YUV422P9LE
] = { 1, 1 },
182 [AV_PIX_FMT_YUV422P10BE
] = { 1, 1 },
183 [AV_PIX_FMT_YUV422P10LE
] = { 1, 1 },
184 [AV_PIX_FMT_YUV422P12BE
] = { 1, 1 },
185 [AV_PIX_FMT_YUV422P12LE
] = { 1, 1 },
186 [AV_PIX_FMT_YUV422P14BE
] = { 1, 1 },
187 [AV_PIX_FMT_YUV422P14LE
] = { 1, 1 },
188 [AV_PIX_FMT_YUV444P9BE
] = { 1, 1 },
189 [AV_PIX_FMT_YUV444P9LE
] = { 1, 1 },
190 [AV_PIX_FMT_YUV444P10BE
] = { 1, 1 },
191 [AV_PIX_FMT_YUV444P10LE
] = { 1, 1 },
192 [AV_PIX_FMT_YUV444P12BE
] = { 1, 1 },
193 [AV_PIX_FMT_YUV444P12LE
] = { 1, 1 },
194 [AV_PIX_FMT_YUV444P14BE
] = { 1, 1 },
195 [AV_PIX_FMT_YUV444P14LE
] = { 1, 1 },
196 [AV_PIX_FMT_GBRP
] = { 1, 1 },
197 [AV_PIX_FMT_GBRP9LE
] = { 1, 1 },
198 [AV_PIX_FMT_GBRP9BE
] = { 1, 1 },
199 [AV_PIX_FMT_GBRP10LE
] = { 1, 1 },
200 [AV_PIX_FMT_GBRP10BE
] = { 1, 1 },
201 [AV_PIX_FMT_GBRP12LE
] = { 1, 1 },
202 [AV_PIX_FMT_GBRP12BE
] = { 1, 1 },
203 [AV_PIX_FMT_GBRP14LE
] = { 1, 1 },
204 [AV_PIX_FMT_GBRP14BE
] = { 1, 1 },
205 [AV_PIX_FMT_GBRP16LE
] = { 1, 0 },
206 [AV_PIX_FMT_GBRP16BE
] = { 1, 0 },
207 [AV_PIX_FMT_XYZ12BE
] = { 1, 1, 1 },
208 [AV_PIX_FMT_XYZ12LE
] = { 1, 1, 1 },
209 [AV_PIX_FMT_GBRAP
] = { 1, 1 },
210 [AV_PIX_FMT_GBRAP16LE
] = { 1, 0 },
211 [AV_PIX_FMT_GBRAP16BE
] = { 1, 0 },
212 [AV_PIX_FMT_BAYER_BGGR8
] = { 1, 0 },
213 [AV_PIX_FMT_BAYER_RGGB8
] = { 1, 0 },
214 [AV_PIX_FMT_BAYER_GBRG8
] = { 1, 0 },
215 [AV_PIX_FMT_BAYER_GRBG8
] = { 1, 0 },
216 [AV_PIX_FMT_BAYER_BGGR16LE
] = { 1, 0 },
217 [AV_PIX_FMT_BAYER_BGGR16BE
] = { 1, 0 },
218 [AV_PIX_FMT_BAYER_RGGB16LE
] = { 1, 0 },
219 [AV_PIX_FMT_BAYER_RGGB16BE
] = { 1, 0 },
220 [AV_PIX_FMT_BAYER_GBRG16LE
] = { 1, 0 },
221 [AV_PIX_FMT_BAYER_GBRG16BE
] = { 1, 0 },
222 [AV_PIX_FMT_BAYER_GRBG16LE
] = { 1, 0 },
223 [AV_PIX_FMT_BAYER_GRBG16BE
] = { 1, 0 },
226 int sws_isSupportedInput(enum AVPixelFormat pix_fmt
)
228 return (unsigned)pix_fmt
< AV_PIX_FMT_NB
?
229 format_entries
[pix_fmt
].is_supported_in
: 0;
232 int sws_isSupportedOutput(enum AVPixelFormat pix_fmt
)
234 return (unsigned)pix_fmt
< AV_PIX_FMT_NB
?
235 format_entries
[pix_fmt
].is_supported_out
: 0;
238 int sws_isSupportedEndiannessConversion(enum AVPixelFormat pix_fmt
)
240 return (unsigned)pix_fmt
< AV_PIX_FMT_NB
?
241 format_entries
[pix_fmt
].is_supported_endianness
: 0;
244 static double getSplineCoeff(double a
, double b
, double c
, double d
,
248 return ((d
* dist
+ c
) * dist
+ b
) * dist
+ a
;
250 return getSplineCoeff(0.0,
251 b
+ 2.0 * c
+ 3.0 * d
,
253 -b
- 3.0 * c
- 6.0 * d
,
257 static av_cold
int get_local_pos(SwsContext
*s
, int chr_subsample
, int pos
, int dir
)
259 if (pos
== -1 || pos
<= -513) {
260 pos
= (128 << chr_subsample
) - 128;
262 pos
+= 128; // relative to ideal left edge
263 return pos
>> chr_subsample
;
267 int flag
; ///< flag associated to the algorithm
268 const char *description
; ///< human-readable description
269 int size_factor
; ///< size factor used when initing the filters
272 static const ScaleAlgorithm scale_algorithms
[] = {
273 { SWS_AREA
, "area averaging", 1 /* downscale only, for upscale it is bilinear */ },
274 { SWS_BICUBIC
, "bicubic", 4 },
275 { SWS_BICUBLIN
, "luma bicubic / chroma bilinear", -1 },
276 { SWS_BILINEAR
, "bilinear", 2 },
277 { SWS_FAST_BILINEAR
, "fast bilinear", -1 },
278 { SWS_GAUSS
, "Gaussian", 8 /* infinite ;) */ },
279 { SWS_LANCZOS
, "Lanczos", -1 /* custom */ },
280 { SWS_POINT
, "nearest neighbor / point", -1 },
281 { SWS_SINC
, "sinc", 20 /* infinite ;) */ },
282 { SWS_SPLINE
, "bicubic spline", 20 /* infinite :)*/ },
283 { SWS_X
, "experimental", 8 },
286 static av_cold
int initFilter(int16_t **outFilter
, int32_t **filterPos
,
287 int *outFilterSize
, int xInc
, int srcW
,
288 int dstW
, int filterAlign
, int one
,
289 int flags
, int cpu_flags
,
290 SwsVector
*srcFilter
, SwsVector
*dstFilter
,
291 double param
[2], int srcPos
, int dstPos
)
297 int64_t *filter
= NULL
;
298 int64_t *filter2
= NULL
;
299 const int64_t fone
= 1LL << (54 - FFMIN(av_log2(srcW
/dstW
), 8));
302 emms_c(); // FIXME should not be required but IS (even for non-MMX versions)
304 // NOTE: the +3 is for the MMX(+1) / SSE(+3) scaler which reads over the end
305 FF_ALLOC_ARRAY_OR_GOTO(NULL
, *filterPos
, (dstW
+ 3), sizeof(**filterPos
), fail
);
307 if (FFABS(xInc
- 0x10000) < 10 && srcPos
== dstPos
) { // unscaled
310 FF_ALLOCZ_ARRAY_OR_GOTO(NULL
, filter
,
311 dstW
, sizeof(*filter
) * filterSize
, fail
);
313 for (i
= 0; i
< dstW
; i
++) {
314 filter
[i
* filterSize
] = fone
;
317 } else if (flags
& SWS_POINT
) { // lame looking point sampling mode
321 FF_ALLOC_ARRAY_OR_GOTO(NULL
, filter
,
322 dstW
, sizeof(*filter
) * filterSize
, fail
);
324 xDstInSrc
= ((dstPos
*(int64_t)xInc
)>>8) - ((srcPos
*0x8000LL
)>>7);
325 for (i
= 0; i
< dstW
; i
++) {
326 int xx
= (xDstInSrc
- ((filterSize
- 1) << 15) + (1 << 15)) >> 16;
328 (*filterPos
)[i
] = xx
;
332 } else if ((xInc
<= (1 << 16) && (flags
& SWS_AREA
)) ||
333 (flags
& SWS_FAST_BILINEAR
)) { // bilinear upscale
337 FF_ALLOC_ARRAY_OR_GOTO(NULL
, filter
,
338 dstW
, sizeof(*filter
) * filterSize
, fail
);
340 xDstInSrc
= ((dstPos
*(int64_t)xInc
)>>8) - ((srcPos
*0x8000LL
)>>7);
341 for (i
= 0; i
< dstW
; i
++) {
342 int xx
= (xDstInSrc
- ((filterSize
- 1) << 15) + (1 << 15)) >> 16;
345 (*filterPos
)[i
] = xx
;
346 // bilinear upscale / linear interpolate / area averaging
347 for (j
= 0; j
< filterSize
; j
++) {
348 int64_t coeff
= fone
- FFABS(((int64_t)xx
<<16) - xDstInSrc
)*(fone
>>16);
351 filter
[i
* filterSize
+ j
] = coeff
;
360 for (i
= 0; i
< FF_ARRAY_ELEMS(scale_algorithms
); i
++) {
361 if (flags
& scale_algorithms
[i
].flag
&& scale_algorithms
[i
].size_factor
> 0) {
362 sizeFactor
= scale_algorithms
[i
].size_factor
;
366 if (flags
& SWS_LANCZOS
)
367 sizeFactor
= param
[0] != SWS_PARAM_DEFAULT
? ceil(2 * param
[0]) : 6;
368 av_assert0(sizeFactor
> 0);
371 filterSize
= 1 + sizeFactor
; // upscale
373 filterSize
= 1 + (sizeFactor
* srcW
+ dstW
- 1) / dstW
;
375 filterSize
= FFMIN(filterSize
, srcW
- 2);
376 filterSize
= FFMAX(filterSize
, 1);
378 FF_ALLOC_ARRAY_OR_GOTO(NULL
, filter
,
379 dstW
, sizeof(*filter
) * filterSize
, fail
);
381 xDstInSrc
= ((dstPos
*(int64_t)xInc
)>>7) - ((srcPos
*0x10000LL
)>>7);
382 for (i
= 0; i
< dstW
; i
++) {
383 int xx
= (xDstInSrc
- ((int64_t)(filterSize
- 2) << 16)) / (1 << 17);
385 (*filterPos
)[i
] = xx
;
386 for (j
= 0; j
< filterSize
; j
++) {
387 int64_t d
= (FFABS(((int64_t)xx
<< 17) - xDstInSrc
)) << 13;
393 floatd
= d
* (1.0 / (1 << 30));
395 if (flags
& SWS_BICUBIC
) {
396 int64_t B
= (param
[0] != SWS_PARAM_DEFAULT
? param
[0] : 0) * (1 << 24);
397 int64_t C
= (param
[1] != SWS_PARAM_DEFAULT
? param
[1] : 0.6) * (1 << 24);
399 if (d
>= 1LL << 31) {
402 int64_t dd
= (d
* d
) >> 30;
403 int64_t ddd
= (dd
* d
) >> 30;
406 coeff
= (12 * (1 << 24) - 9 * B
- 6 * C
) * ddd
+
407 (-18 * (1 << 24) + 12 * B
+ 6 * C
) * dd
+
408 (6 * (1 << 24) - 2 * B
) * (1 << 30);
410 coeff
= (-B
- 6 * C
) * ddd
+
411 (6 * B
+ 30 * C
) * dd
+
412 (-12 * B
- 48 * C
) * d
+
413 (8 * B
+ 24 * C
) * (1 << 30);
415 coeff
/= (1LL<<54)/fone
;
418 else if (flags
& SWS_X
) {
419 double p
= param
? param
* 0.01 : 0.3;
420 coeff
= d
? sin(d
* M_PI
) / (d
* M_PI
) : 1.0;
421 coeff
*= pow(2.0, -p
* d
* d
);
424 else if (flags
& SWS_X
) {
425 double A
= param
[0] != SWS_PARAM_DEFAULT
? param
[0] : 1.0;
429 c
= cos(floatd
* M_PI
);
436 coeff
= (c
* 0.5 + 0.5) * fone
;
437 } else if (flags
& SWS_AREA
) {
438 int64_t d2
= d
- (1 << 29);
439 if (d2
* xInc
< -(1LL << (29 + 16)))
440 coeff
= 1.0 * (1LL << (30 + 16));
441 else if (d2
* xInc
< (1LL << (29 + 16)))
442 coeff
= -d2
* xInc
+ (1LL << (29 + 16));
445 coeff
*= fone
>> (30 + 16);
446 } else if (flags
& SWS_GAUSS
) {
447 double p
= param
[0] != SWS_PARAM_DEFAULT
? param
[0] : 3.0;
448 coeff
= (pow(2.0, -p
* floatd
* floatd
)) * fone
;
449 } else if (flags
& SWS_SINC
) {
450 coeff
= (d
? sin(floatd
* M_PI
) / (floatd
* M_PI
) : 1.0) * fone
;
451 } else if (flags
& SWS_LANCZOS
) {
452 double p
= param
[0] != SWS_PARAM_DEFAULT
? param
[0] : 3.0;
453 coeff
= (d
? sin(floatd
* M_PI
) * sin(floatd
* M_PI
/ p
) /
454 (floatd
* floatd
* M_PI
* M_PI
/ p
) : 1.0) * fone
;
457 } else if (flags
& SWS_BILINEAR
) {
458 coeff
= (1 << 30) - d
;
462 } else if (flags
& SWS_SPLINE
) {
463 double p
= -2.196152422706632;
464 coeff
= getSplineCoeff(1.0, 0.0, p
, -p
- 1.0, floatd
) * fone
;
469 filter
[i
* filterSize
+ j
] = coeff
;
472 xDstInSrc
+= 2 * xInc
;
476 /* apply src & dst Filter to filter -> filter2
479 av_assert0(filterSize
> 0);
480 filter2Size
= filterSize
;
482 filter2Size
+= srcFilter
->length
- 1;
484 filter2Size
+= dstFilter
->length
- 1;
485 av_assert0(filter2Size
> 0);
486 FF_ALLOCZ_ARRAY_OR_GOTO(NULL
, filter2
, dstW
, filter2Size
* sizeof(*filter2
), fail
);
488 for (i
= 0; i
< dstW
; i
++) {
492 for (k
= 0; k
< srcFilter
->length
; k
++) {
493 for (j
= 0; j
< filterSize
; j
++)
494 filter2
[i
* filter2Size
+ k
+ j
] +=
495 srcFilter
->coeff
[k
] * filter
[i
* filterSize
+ j
];
498 for (j
= 0; j
< filterSize
; j
++)
499 filter2
[i
* filter2Size
+ j
] = filter
[i
* filterSize
+ j
];
503 (*filterPos
)[i
] += (filterSize
- 1) / 2 - (filter2Size
- 1) / 2;
507 /* try to reduce the filter-size (step1 find size and shift left) */
508 // Assume it is near normalized (*0.5 or *2.0 is OK but * 0.001 is not).
510 for (i
= dstW
- 1; i
>= 0; i
--) {
511 int min
= filter2Size
;
513 int64_t cutOff
= 0.0;
515 /* get rid of near zero elements on the left by shifting left */
516 for (j
= 0; j
< filter2Size
; j
++) {
518 cutOff
+= FFABS(filter2
[i
* filter2Size
]);
520 if (cutOff
> SWS_MAX_REDUCE_CUTOFF
* fone
)
523 /* preserve monotonicity because the core can't handle the
524 * filter otherwise */
525 if (i
< dstW
- 1 && (*filterPos
)[i
] >= (*filterPos
)[i
+ 1])
528 // move filter coefficients left
529 for (k
= 1; k
< filter2Size
; k
++)
530 filter2
[i
* filter2Size
+ k
- 1] = filter2
[i
* filter2Size
+ k
];
531 filter2
[i
* filter2Size
+ k
- 1] = 0;
536 /* count near zeros on the right */
537 for (j
= filter2Size
- 1; j
> 0; j
--) {
538 cutOff
+= FFABS(filter2
[i
* filter2Size
+ j
]);
540 if (cutOff
> SWS_MAX_REDUCE_CUTOFF
* fone
)
545 if (min
> minFilterSize
)
549 if (PPC_ALTIVEC(cpu_flags
)) {
550 // we can handle the special case 4, so we don't want to go the full 8
551 if (minFilterSize
< 5)
554 /* We really don't want to waste our time doing useless computation, so
555 * fall back on the scalar C code for very small filters.
556 * Vectorizing is worth it only if you have a decent-sized vector. */
557 if (minFilterSize
< 3)
561 if (HAVE_MMX
&& cpu_flags
& AV_CPU_FLAG_MMX
) {
562 // special case for unscaled vertical filtering
563 if (minFilterSize
== 1 && filterAlign
== 2)
567 av_assert0(minFilterSize
> 0);
568 filterSize
= (minFilterSize
+ (filterAlign
- 1)) & (~(filterAlign
- 1));
569 av_assert0(filterSize
> 0);
570 filter
= av_malloc_array(dstW
, filterSize
* sizeof(*filter
));
573 if (filterSize
>= MAX_FILTER_SIZE
* 16 /
574 ((flags
& SWS_ACCURATE_RND
) ? APCK_SIZE
: 16)) {
575 ret
= RETCODE_USE_CASCADE
;
578 *outFilterSize
= filterSize
;
580 if (flags
& SWS_PRINT_INFO
)
581 av_log(NULL
, AV_LOG_VERBOSE
,
582 "SwScaler: reducing / aligning filtersize %d -> %d\n",
583 filter2Size
, filterSize
);
584 /* try to reduce the filter-size (step2 reduce it) */
585 for (i
= 0; i
< dstW
; i
++) {
588 for (j
= 0; j
< filterSize
; j
++) {
589 if (j
>= filter2Size
)
590 filter
[i
* filterSize
+ j
] = 0;
592 filter
[i
* filterSize
+ j
] = filter2
[i
* filter2Size
+ j
];
593 if ((flags
& SWS_BITEXACT
) && j
>= minFilterSize
)
594 filter
[i
* filterSize
+ j
] = 0;
598 // FIXME try to align filterPos if possible
601 for (i
= 0; i
< dstW
; i
++) {
603 if ((*filterPos
)[i
] < 0) {
604 // move filter coefficients left to compensate for filterPos
605 for (j
= 1; j
< filterSize
; j
++) {
606 int left
= FFMAX(j
+ (*filterPos
)[i
], 0);
607 filter
[i
* filterSize
+ left
] += filter
[i
* filterSize
+ j
];
608 filter
[i
* filterSize
+ j
] = 0;
613 if ((*filterPos
)[i
] + filterSize
> srcW
) {
614 int shift
= (*filterPos
)[i
] + filterSize
- srcW
;
615 // move filter coefficients right to compensate for filterPos
616 for (j
= filterSize
- 2; j
>= 0; j
--) {
617 int right
= FFMIN(j
+ shift
, filterSize
- 1);
618 filter
[i
* filterSize
+ right
] += filter
[i
* filterSize
+ j
];
619 filter
[i
* filterSize
+ j
] = 0;
621 (*filterPos
)[i
]= srcW
- filterSize
;
625 // Note the +1 is for the MMX scaler which reads over the end
626 /* align at 16 for AltiVec (needed by hScale_altivec_real) */
627 FF_ALLOCZ_ARRAY_OR_GOTO(NULL
, *outFilter
,
628 (dstW
+ 3), *outFilterSize
* sizeof(int16_t), fail
);
630 /* normalize & store in outFilter */
631 for (i
= 0; i
< dstW
; i
++) {
636 for (j
= 0; j
< filterSize
; j
++) {
637 sum
+= filter
[i
* filterSize
+ j
];
639 sum
= (sum
+ one
/ 2) / one
;
641 av_log(NULL
, AV_LOG_WARNING
, "SwScaler: zero vector in scaling\n");
644 for (j
= 0; j
< *outFilterSize
; j
++) {
645 int64_t v
= filter
[i
* filterSize
+ j
] + error
;
646 int intV
= ROUNDED_DIV(v
, sum
);
647 (*outFilter
)[i
* (*outFilterSize
) + j
] = intV
;
648 error
= v
- intV
* sum
;
652 (*filterPos
)[dstW
+ 0] =
653 (*filterPos
)[dstW
+ 1] =
654 (*filterPos
)[dstW
+ 2] = (*filterPos
)[dstW
- 1]; /* the MMX/SSE scaler will
655 * read over the end */
656 for (i
= 0; i
< *outFilterSize
; i
++) {
657 int k
= (dstW
- 1) * (*outFilterSize
) + i
;
658 (*outFilter
)[k
+ 1 * (*outFilterSize
)] =
659 (*outFilter
)[k
+ 2 * (*outFilterSize
)] =
660 (*outFilter
)[k
+ 3 * (*outFilterSize
)] = (*outFilter
)[k
];
667 av_log(NULL
, ret
== RETCODE_USE_CASCADE
? AV_LOG_DEBUG
: AV_LOG_ERROR
, "sws: initFilter failed\n");
673 static void fill_rgb2yuv_table(SwsContext
*c
, const int table
[4], int dstRange
)
675 int64_t W
, V
, Z
, Cy
, Cu
, Cv
;
676 int64_t vr
= table
[0];
677 int64_t ub
= table
[1];
678 int64_t ug
= -table
[2];
679 int64_t vg
= -table
[3];
682 uint8_t *p
= (uint8_t*)c
->input_rgb2yuv_table
;
684 static const int8_t map
[] = {
685 BY_IDX
, GY_IDX
, -1 , BY_IDX
, BY_IDX
, GY_IDX
, -1 , BY_IDX
,
686 RY_IDX
, -1 , GY_IDX
, RY_IDX
, RY_IDX
, -1 , GY_IDX
, RY_IDX
,
687 RY_IDX
, GY_IDX
, -1 , RY_IDX
, RY_IDX
, GY_IDX
, -1 , RY_IDX
,
688 BY_IDX
, -1 , GY_IDX
, BY_IDX
, BY_IDX
, -1 , GY_IDX
, BY_IDX
,
689 BU_IDX
, GU_IDX
, -1 , BU_IDX
, BU_IDX
, GU_IDX
, -1 , BU_IDX
,
690 RU_IDX
, -1 , GU_IDX
, RU_IDX
, RU_IDX
, -1 , GU_IDX
, RU_IDX
,
691 RU_IDX
, GU_IDX
, -1 , RU_IDX
, RU_IDX
, GU_IDX
, -1 , RU_IDX
,
692 BU_IDX
, -1 , GU_IDX
, BU_IDX
, BU_IDX
, -1 , GU_IDX
, BU_IDX
,
693 BV_IDX
, GV_IDX
, -1 , BV_IDX
, BV_IDX
, GV_IDX
, -1 , BV_IDX
,
694 RV_IDX
, -1 , GV_IDX
, RV_IDX
, RV_IDX
, -1 , GV_IDX
, RV_IDX
,
695 RV_IDX
, GV_IDX
, -1 , RV_IDX
, RV_IDX
, GV_IDX
, -1 , RV_IDX
,
696 BV_IDX
, -1 , GV_IDX
, BV_IDX
, BV_IDX
, -1 , GV_IDX
, BV_IDX
,
697 RY_IDX
, BY_IDX
, RY_IDX
, BY_IDX
, RY_IDX
, BY_IDX
, RY_IDX
, BY_IDX
,
698 BY_IDX
, RY_IDX
, BY_IDX
, RY_IDX
, BY_IDX
, RY_IDX
, BY_IDX
, RY_IDX
,
699 GY_IDX
, -1 , GY_IDX
, -1 , GY_IDX
, -1 , GY_IDX
, -1 ,
700 -1 , GY_IDX
, -1 , GY_IDX
, -1 , GY_IDX
, -1 , GY_IDX
,
701 RU_IDX
, BU_IDX
, RU_IDX
, BU_IDX
, RU_IDX
, BU_IDX
, RU_IDX
, BU_IDX
,
702 BU_IDX
, RU_IDX
, BU_IDX
, RU_IDX
, BU_IDX
, RU_IDX
, BU_IDX
, RU_IDX
,
703 GU_IDX
, -1 , GU_IDX
, -1 , GU_IDX
, -1 , GU_IDX
, -1 ,
704 -1 , GU_IDX
, -1 , GU_IDX
, -1 , GU_IDX
, -1 , GU_IDX
,
705 RV_IDX
, BV_IDX
, RV_IDX
, BV_IDX
, RV_IDX
, BV_IDX
, RV_IDX
, BV_IDX
,
706 BV_IDX
, RV_IDX
, BV_IDX
, RV_IDX
, BV_IDX
, RV_IDX
, BV_IDX
, RV_IDX
,
707 GV_IDX
, -1 , GV_IDX
, -1 , GV_IDX
, -1 , GV_IDX
, -1 ,
708 -1 , GV_IDX
, -1 , GV_IDX
, -1 , GV_IDX
, -1 , GV_IDX
, //23
709 -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , //24
710 -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , //25
711 -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , //26
712 -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , //27
713 -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , //28
714 -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , //29
715 -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , //30
716 -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , //31
717 BY_IDX
, GY_IDX
, RY_IDX
, -1 , -1 , -1 , -1 , -1 , //32
718 BU_IDX
, GU_IDX
, RU_IDX
, -1 , -1 , -1 , -1 , -1 , //33
719 BV_IDX
, GV_IDX
, RV_IDX
, -1 , -1 , -1 , -1 , -1 , //34
722 dstRange
= 0; //FIXME range = 1 is handled elsewhere
732 W
= ROUNDED_DIV(ONE
*ONE
*ug
, ub
);
733 V
= ROUNDED_DIV(ONE
*ONE
*vg
, vr
);
736 Cy
= ROUNDED_DIV(cy
*Z
, ONE
);
737 Cu
= ROUNDED_DIV(ub
*Z
, ONE
);
738 Cv
= ROUNDED_DIV(vr
*Z
, ONE
);
740 c
->input_rgb2yuv_table
[RY_IDX
] = -ROUNDED_DIV((1 << RGB2YUV_SHIFT
)*V
, Cy
);
741 c
->input_rgb2yuv_table
[GY_IDX
] = ROUNDED_DIV((1 << RGB2YUV_SHIFT
)*ONE
*ONE
, Cy
);
742 c
->input_rgb2yuv_table
[BY_IDX
] = -ROUNDED_DIV((1 << RGB2YUV_SHIFT
)*W
, Cy
);
744 c
->input_rgb2yuv_table
[RU_IDX
] = ROUNDED_DIV((1 << RGB2YUV_SHIFT
)*V
, Cu
);
745 c
->input_rgb2yuv_table
[GU_IDX
] = -ROUNDED_DIV((1 << RGB2YUV_SHIFT
)*ONE
*ONE
, Cu
);
746 c
->input_rgb2yuv_table
[BU_IDX
] = ROUNDED_DIV((1 << RGB2YUV_SHIFT
)*(Z
+W
) , Cu
);
748 c
->input_rgb2yuv_table
[RV_IDX
] = ROUNDED_DIV((1 << RGB2YUV_SHIFT
)*(V
+Z
) , Cv
);
749 c
->input_rgb2yuv_table
[GV_IDX
] = -ROUNDED_DIV((1 << RGB2YUV_SHIFT
)*ONE
*ONE
, Cv
);
750 c
->input_rgb2yuv_table
[BV_IDX
] = ROUNDED_DIV((1 << RGB2YUV_SHIFT
)*W
, Cv
);
752 if(/*!dstRange && */!memcmp(table
, ff_yuv2rgb_coeffs
[SWS_CS_DEFAULT
], sizeof(ff_yuv2rgb_coeffs
[SWS_CS_DEFAULT
]))) {
753 c
->input_rgb2yuv_table
[BY_IDX
] = ((int)(0.114 * 219 / 255 * (1 << RGB2YUV_SHIFT
) + 0.5));
754 c
->input_rgb2yuv_table
[BV_IDX
] = (-(int)(0.081 * 224 / 255 * (1 << RGB2YUV_SHIFT
) + 0.5));
755 c
->input_rgb2yuv_table
[BU_IDX
] = ((int)(0.500 * 224 / 255 * (1 << RGB2YUV_SHIFT
) + 0.5));
756 c
->input_rgb2yuv_table
[GY_IDX
] = ((int)(0.587 * 219 / 255 * (1 << RGB2YUV_SHIFT
) + 0.5));
757 c
->input_rgb2yuv_table
[GV_IDX
] = (-(int)(0.419 * 224 / 255 * (1 << RGB2YUV_SHIFT
) + 0.5));
758 c
->input_rgb2yuv_table
[GU_IDX
] = (-(int)(0.331 * 224 / 255 * (1 << RGB2YUV_SHIFT
) + 0.5));
759 c
->input_rgb2yuv_table
[RY_IDX
] = ((int)(0.299 * 219 / 255 * (1 << RGB2YUV_SHIFT
) + 0.5));
760 c
->input_rgb2yuv_table
[RV_IDX
] = ((int)(0.500 * 224 / 255 * (1 << RGB2YUV_SHIFT
) + 0.5));
761 c
->input_rgb2yuv_table
[RU_IDX
] = (-(int)(0.169 * 224 / 255 * (1 << RGB2YUV_SHIFT
) + 0.5));
763 for(i
=0; i
<FF_ARRAY_ELEMS(map
); i
++)
764 AV_WL16(p
+ 16*4 + 2*i
, map
[i
] >= 0 ? c
->input_rgb2yuv_table
[map
[i
]] : 0);
767 static void fill_xyztables(struct SwsContext
*c
)
770 double xyzgamma
= XYZ_GAMMA
;
771 double rgbgamma
= 1.0 / RGB_GAMMA
;
772 double xyzgammainv
= 1.0 / XYZ_GAMMA
;
773 double rgbgammainv
= RGB_GAMMA
;
774 static const int16_t xyz2rgb_matrix
[3][4] = {
775 {13270, -6295, -2041},
777 { 228, -835, 4329} };
778 static const int16_t rgb2xyz_matrix
[3][4] = {
782 static int16_t xyzgamma_tab
[4096], rgbgamma_tab
[4096], xyzgammainv_tab
[4096], rgbgammainv_tab
[4096];
784 memcpy(c
->xyz2rgb_matrix
, xyz2rgb_matrix
, sizeof(c
->xyz2rgb_matrix
));
785 memcpy(c
->rgb2xyz_matrix
, rgb2xyz_matrix
, sizeof(c
->rgb2xyz_matrix
));
786 c
->xyzgamma
= xyzgamma_tab
;
787 c
->rgbgamma
= rgbgamma_tab
;
788 c
->xyzgammainv
= xyzgammainv_tab
;
789 c
->rgbgammainv
= rgbgammainv_tab
;
791 if (rgbgamma_tab
[4095])
794 /* set gamma vectors */
795 for (i
= 0; i
< 4096; i
++) {
796 xyzgamma_tab
[i
] = lrint(pow(i
/ 4095.0, xyzgamma
) * 4095.0);
797 rgbgamma_tab
[i
] = lrint(pow(i
/ 4095.0, rgbgamma
) * 4095.0);
798 xyzgammainv_tab
[i
] = lrint(pow(i
/ 4095.0, xyzgammainv
) * 4095.0);
799 rgbgammainv_tab
[i
] = lrint(pow(i
/ 4095.0, rgbgammainv
) * 4095.0);
803 int sws_setColorspaceDetails(struct SwsContext
*c
, const int inv_table
[4],
804 int srcRange
, const int table
[4], int dstRange
,
805 int brightness
, int contrast
, int saturation
)
807 const AVPixFmtDescriptor
*desc_dst
;
808 const AVPixFmtDescriptor
*desc_src
;
810 memmove(c
->srcColorspaceTable
, inv_table
, sizeof(int) * 4);
811 memmove(c
->dstColorspaceTable
, table
, sizeof(int) * 4);
814 desc_dst
= av_pix_fmt_desc_get(c
->dstFormat
);
815 desc_src
= av_pix_fmt_desc_get(c
->srcFormat
);
817 if(!isYUV(c
->dstFormat
) && !isGray(c
->dstFormat
))
819 if(!isYUV(c
->srcFormat
) && !isGray(c
->srcFormat
))
822 c
->brightness
= brightness
;
823 c
->contrast
= contrast
;
824 c
->saturation
= saturation
;
825 if (c
->srcRange
!= srcRange
|| c
->dstRange
!= dstRange
)
827 c
->srcRange
= srcRange
;
828 c
->dstRange
= dstRange
;
830 //The srcBpc check is possibly wrong but we seem to lack a definitive reference to test this
831 //and what we have in ticket 2939 looks better with this check
832 if (need_reinit
&& (c
->srcBpc
== 8 || !isYUV(c
->srcFormat
)))
833 ff_sws_init_range_convert(c
);
835 if ((isYUV(c
->dstFormat
) || isGray(c
->dstFormat
)) && (isYUV(c
->srcFormat
) || isGray(c
->srcFormat
)))
838 c
->dstFormatBpp
= av_get_bits_per_pixel(desc_dst
);
839 c
->srcFormatBpp
= av_get_bits_per_pixel(desc_src
);
841 if (!isYUV(c
->dstFormat
) && !isGray(c
->dstFormat
)) {
842 ff_yuv2rgb_c_init_tables(c
, inv_table
, srcRange
, brightness
,
843 contrast
, saturation
);
847 ff_yuv2rgb_init_tables_ppc(c
, inv_table
, brightness
,
848 contrast
, saturation
);
851 fill_rgb2yuv_table(c
, table
, dstRange
);
856 int sws_getColorspaceDetails(struct SwsContext
*c
, int **inv_table
,
857 int *srcRange
, int **table
, int *dstRange
,
858 int *brightness
, int *contrast
, int *saturation
)
863 *inv_table
= c
->srcColorspaceTable
;
864 *table
= c
->dstColorspaceTable
;
865 *srcRange
= c
->srcRange
;
866 *dstRange
= c
->dstRange
;
867 *brightness
= c
->brightness
;
868 *contrast
= c
->contrast
;
869 *saturation
= c
->saturation
;
874 static int handle_jpeg(enum AVPixelFormat
*format
)
877 case AV_PIX_FMT_YUVJ420P
:
878 *format
= AV_PIX_FMT_YUV420P
;
880 case AV_PIX_FMT_YUVJ411P
:
881 *format
= AV_PIX_FMT_YUV411P
;
883 case AV_PIX_FMT_YUVJ422P
:
884 *format
= AV_PIX_FMT_YUV422P
;
886 case AV_PIX_FMT_YUVJ444P
:
887 *format
= AV_PIX_FMT_YUV444P
;
889 case AV_PIX_FMT_YUVJ440P
:
890 *format
= AV_PIX_FMT_YUV440P
;
892 case AV_PIX_FMT_GRAY8
:
893 case AV_PIX_FMT_GRAY16LE
:
894 case AV_PIX_FMT_GRAY16BE
:
901 static int handle_0alpha(enum AVPixelFormat
*format
)
904 case AV_PIX_FMT_0BGR
: *format
= AV_PIX_FMT_ABGR
; return 1;
905 case AV_PIX_FMT_BGR0
: *format
= AV_PIX_FMT_BGRA
; return 4;
906 case AV_PIX_FMT_0RGB
: *format
= AV_PIX_FMT_ARGB
; return 1;
907 case AV_PIX_FMT_RGB0
: *format
= AV_PIX_FMT_RGBA
; return 4;
912 static int handle_xyz(enum AVPixelFormat
*format
)
915 case AV_PIX_FMT_XYZ12BE
: *format
= AV_PIX_FMT_RGB48BE
; return 1;
916 case AV_PIX_FMT_XYZ12LE
: *format
= AV_PIX_FMT_RGB48LE
; return 1;
921 static void handle_formats(SwsContext
*c
)
923 c
->src0Alpha
|= handle_0alpha(&c
->srcFormat
);
924 c
->dst0Alpha
|= handle_0alpha(&c
->dstFormat
);
925 c
->srcXYZ
|= handle_xyz(&c
->srcFormat
);
926 c
->dstXYZ
|= handle_xyz(&c
->dstFormat
);
927 if (c
->srcXYZ
|| c
->dstXYZ
)
931 SwsContext
*sws_alloc_context(void)
933 SwsContext
*c
= av_mallocz(sizeof(SwsContext
));
935 av_assert0(offsetof(SwsContext
, redDither
) + DITHER32_INT
== offsetof(SwsContext
, dither32
));
938 c
->av_class
= &sws_context_class
;
939 av_opt_set_defaults(c
);
945 av_cold
int sws_init_context(SwsContext
*c
, SwsFilter
*srcFilter
,
946 SwsFilter
*dstFilter
)
949 int usesVFilter
, usesHFilter
;
951 SwsFilter dummyFilter
= { NULL
, NULL
, NULL
, NULL
};
956 int dst_stride
= FFALIGN(dstW
* sizeof(int16_t) + 66, 16);
957 int flags
, cpu_flags
;
958 enum AVPixelFormat srcFormat
= c
->srcFormat
;
959 enum AVPixelFormat dstFormat
= c
->dstFormat
;
960 const AVPixFmtDescriptor
*desc_src
;
961 const AVPixFmtDescriptor
*desc_dst
;
964 cpu_flags
= av_get_cpu_flags();
970 unscaled
= (srcW
== dstW
&& srcH
== dstH
);
972 c
->srcRange
|= handle_jpeg(&c
->srcFormat
);
973 c
->dstRange
|= handle_jpeg(&c
->dstFormat
);
975 if(srcFormat
!=c
->srcFormat
|| dstFormat
!=c
->dstFormat
)
976 av_log(c
, AV_LOG_WARNING
, "deprecated pixel format used, make sure you did set range correctly\n");
978 if (!c
->contrast
&& !c
->saturation
&& !c
->dstFormatBpp
)
979 sws_setColorspaceDetails(c
, ff_yuv2rgb_coeffs
[SWS_CS_DEFAULT
], c
->srcRange
,
980 ff_yuv2rgb_coeffs
[SWS_CS_DEFAULT
],
981 c
->dstRange
, 0, 1 << 16, 1 << 16);
984 srcFormat
= c
->srcFormat
;
985 dstFormat
= c
->dstFormat
;
986 desc_src
= av_pix_fmt_desc_get(srcFormat
);
987 desc_dst
= av_pix_fmt_desc_get(dstFormat
);
989 if (!(unscaled
&& sws_isSupportedEndiannessConversion(srcFormat
) &&
990 av_pix_fmt_swap_endianness(srcFormat
) == dstFormat
)) {
991 if (!sws_isSupportedInput(srcFormat
)) {
992 av_log(c
, AV_LOG_ERROR
, "%s is not supported as input pixel format\n",
993 av_get_pix_fmt_name(srcFormat
));
994 return AVERROR(EINVAL
);
996 if (!sws_isSupportedOutput(dstFormat
)) {
997 av_log(c
, AV_LOG_ERROR
, "%s is not supported as output pixel format\n",
998 av_get_pix_fmt_name(dstFormat
));
999 return AVERROR(EINVAL
);
1003 i
= flags
& (SWS_POINT
|
1015 /* provide a default scaler if not set by caller */
1017 if (dstW
< srcW
&& dstH
< srcH
)
1018 flags
|= SWS_BICUBIC
;
1019 else if (dstW
> srcW
&& dstH
> srcH
)
1020 flags
|= SWS_BICUBIC
;
1022 flags
|= SWS_BICUBIC
;
1024 } else if (i
& (i
- 1)) {
1025 av_log(c
, AV_LOG_ERROR
,
1026 "Exactly one scaler algorithm must be chosen, got %X\n", i
);
1027 return AVERROR(EINVAL
);
1030 if (srcW
< 1 || srcH
< 1 || dstW
< 1 || dstH
< 1) {
1031 /* FIXME check if these are enough and try to lower them after
1032 * fixing the relevant parts of the code */
1033 av_log(c
, AV_LOG_ERROR
, "%dx%d -> %dx%d is invalid scaling dimension\n",
1034 srcW
, srcH
, dstW
, dstH
);
1035 return AVERROR(EINVAL
);
1039 dstFilter
= &dummyFilter
;
1041 srcFilter
= &dummyFilter
;
1043 c
->lumXInc
= (((int64_t)srcW
<< 16) + (dstW
>> 1)) / dstW
;
1044 c
->lumYInc
= (((int64_t)srcH
<< 16) + (dstH
>> 1)) / dstH
;
1045 c
->dstFormatBpp
= av_get_bits_per_pixel(desc_dst
);
1046 c
->srcFormatBpp
= av_get_bits_per_pixel(desc_src
);
1047 c
->vRounder
= 4 * 0x0001000100010001ULL
;
1049 usesVFilter
= (srcFilter
->lumV
&& srcFilter
->lumV
->length
> 1) ||
1050 (srcFilter
->chrV
&& srcFilter
->chrV
->length
> 1) ||
1051 (dstFilter
->lumV
&& dstFilter
->lumV
->length
> 1) ||
1052 (dstFilter
->chrV
&& dstFilter
->chrV
->length
> 1);
1053 usesHFilter
= (srcFilter
->lumH
&& srcFilter
->lumH
->length
> 1) ||
1054 (srcFilter
->chrH
&& srcFilter
->chrH
->length
> 1) ||
1055 (dstFilter
->lumH
&& dstFilter
->lumH
->length
> 1) ||
1056 (dstFilter
->chrH
&& dstFilter
->chrH
->length
> 1);
1058 av_pix_fmt_get_chroma_sub_sample(srcFormat
, &c
->chrSrcHSubSample
, &c
->chrSrcVSubSample
);
1059 av_pix_fmt_get_chroma_sub_sample(dstFormat
, &c
->chrDstHSubSample
, &c
->chrDstVSubSample
);
1061 if (isAnyRGB(dstFormat
) && !(flags
&SWS_FULL_CHR_H_INT
)) {
1063 av_log(c
, AV_LOG_DEBUG
, "Forcing full internal H chroma due to odd output size\n");
1064 flags
|= SWS_FULL_CHR_H_INT
;
1068 if ( c
->chrSrcHSubSample
== 0
1069 && c
->chrSrcVSubSample
== 0
1070 && c
->dither
!= SWS_DITHER_BAYER
//SWS_FULL_CHR_H_INT is currently not supported with SWS_DITHER_BAYER
1071 && !(c
->flags
& SWS_FAST_BILINEAR
)
1073 av_log(c
, AV_LOG_DEBUG
, "Forcing full internal H chroma due to input having non subsampled chroma\n");
1074 flags
|= SWS_FULL_CHR_H_INT
;
1079 if (c
->dither
== SWS_DITHER_AUTO
) {
1080 if (flags
& SWS_ERROR_DIFFUSION
)
1081 c
->dither
= SWS_DITHER_ED
;
1084 if(dstFormat
== AV_PIX_FMT_BGR4_BYTE
||
1085 dstFormat
== AV_PIX_FMT_RGB4_BYTE
||
1086 dstFormat
== AV_PIX_FMT_BGR8
||
1087 dstFormat
== AV_PIX_FMT_RGB8
) {
1088 if (c
->dither
== SWS_DITHER_AUTO
)
1089 c
->dither
= (flags
& SWS_FULL_CHR_H_INT
) ? SWS_DITHER_ED
: SWS_DITHER_BAYER
;
1090 if (!(flags
& SWS_FULL_CHR_H_INT
)) {
1091 if (c
->dither
== SWS_DITHER_ED
|| c
->dither
== SWS_DITHER_A_DITHER
|| c
->dither
== SWS_DITHER_X_DITHER
) {
1092 av_log(c
, AV_LOG_DEBUG
,
1093 "Desired dithering only supported in full chroma interpolation for destination format '%s'\n",
1094 av_get_pix_fmt_name(dstFormat
));
1095 flags
|= SWS_FULL_CHR_H_INT
;
1099 if (flags
& SWS_FULL_CHR_H_INT
) {
1100 if (c
->dither
== SWS_DITHER_BAYER
) {
1101 av_log(c
, AV_LOG_DEBUG
,
1102 "Ordered dither is not supported in full chroma interpolation for destination format '%s'\n",
1103 av_get_pix_fmt_name(dstFormat
));
1104 c
->dither
= SWS_DITHER_ED
;
1108 if (isPlanarRGB(dstFormat
)) {
1109 if (!(flags
& SWS_FULL_CHR_H_INT
)) {
1110 av_log(c
, AV_LOG_DEBUG
,
1111 "%s output is not supported with half chroma resolution, switching to full\n",
1112 av_get_pix_fmt_name(dstFormat
));
1113 flags
|= SWS_FULL_CHR_H_INT
;
1118 /* reuse chroma for 2 pixels RGB/BGR unless user wants full
1119 * chroma interpolation */
1120 if (flags
& SWS_FULL_CHR_H_INT
&&
1121 isAnyRGB(dstFormat
) &&
1122 !isPlanarRGB(dstFormat
) &&
1123 dstFormat
!= AV_PIX_FMT_RGBA
&&
1124 dstFormat
!= AV_PIX_FMT_ARGB
&&
1125 dstFormat
!= AV_PIX_FMT_BGRA
&&
1126 dstFormat
!= AV_PIX_FMT_ABGR
&&
1127 dstFormat
!= AV_PIX_FMT_RGB24
&&
1128 dstFormat
!= AV_PIX_FMT_BGR24
&&
1129 dstFormat
!= AV_PIX_FMT_BGR4_BYTE
&&
1130 dstFormat
!= AV_PIX_FMT_RGB4_BYTE
&&
1131 dstFormat
!= AV_PIX_FMT_BGR8
&&
1132 dstFormat
!= AV_PIX_FMT_RGB8
1134 av_log(c
, AV_LOG_WARNING
,
1135 "full chroma interpolation for destination format '%s' not yet implemented\n",
1136 av_get_pix_fmt_name(dstFormat
));
1137 flags
&= ~SWS_FULL_CHR_H_INT
;
1140 if (isAnyRGB(dstFormat
) && !(flags
& SWS_FULL_CHR_H_INT
))
1141 c
->chrDstHSubSample
= 1;
1143 // drop some chroma lines if the user wants it
1144 c
->vChrDrop
= (flags
& SWS_SRC_V_CHR_DROP_MASK
) >>
1145 SWS_SRC_V_CHR_DROP_SHIFT
;
1146 c
->chrSrcVSubSample
+= c
->vChrDrop
;
1148 /* drop every other pixel for chroma calculation unless user
1149 * wants full chroma */
1150 if (isAnyRGB(srcFormat
) && !(flags
& SWS_FULL_CHR_H_INP
) &&
1151 srcFormat
!= AV_PIX_FMT_RGB8
&& srcFormat
!= AV_PIX_FMT_BGR8
&&
1152 srcFormat
!= AV_PIX_FMT_RGB4
&& srcFormat
!= AV_PIX_FMT_BGR4
&&
1153 srcFormat
!= AV_PIX_FMT_RGB4_BYTE
&& srcFormat
!= AV_PIX_FMT_BGR4_BYTE
&&
1154 srcFormat
!= AV_PIX_FMT_GBRP9BE
&& srcFormat
!= AV_PIX_FMT_GBRP9LE
&&
1155 srcFormat
!= AV_PIX_FMT_GBRP10BE
&& srcFormat
!= AV_PIX_FMT_GBRP10LE
&&
1156 srcFormat
!= AV_PIX_FMT_GBRP12BE
&& srcFormat
!= AV_PIX_FMT_GBRP12LE
&&
1157 srcFormat
!= AV_PIX_FMT_GBRP14BE
&& srcFormat
!= AV_PIX_FMT_GBRP14LE
&&
1158 srcFormat
!= AV_PIX_FMT_GBRP16BE
&& srcFormat
!= AV_PIX_FMT_GBRP16LE
&&
1159 ((dstW
>> c
->chrDstHSubSample
) <= (srcW
>> 1) ||
1160 (flags
& SWS_FAST_BILINEAR
)))
1161 c
->chrSrcHSubSample
= 1;
1163 // Note the FF_CEIL_RSHIFT is so that we always round toward +inf.
1164 c
->chrSrcW
= FF_CEIL_RSHIFT(srcW
, c
->chrSrcHSubSample
);
1165 c
->chrSrcH
= FF_CEIL_RSHIFT(srcH
, c
->chrSrcVSubSample
);
1166 c
->chrDstW
= FF_CEIL_RSHIFT(dstW
, c
->chrDstHSubSample
);
1167 c
->chrDstH
= FF_CEIL_RSHIFT(dstH
, c
->chrDstVSubSample
);
1169 FF_ALLOC_OR_GOTO(c
, c
->formatConvBuffer
, FFALIGN(srcW
*2+78, 16) * 2, fail
);
1171 c
->srcBpc
= 1 + desc_src
->comp
[0].depth_minus1
;
1174 c
->dstBpc
= 1 + desc_dst
->comp
[0].depth_minus1
;
1177 if (isAnyRGB(srcFormat
) || srcFormat
== AV_PIX_FMT_PAL8
)
1179 if (c
->dstBpc
== 16)
1182 if (INLINE_MMXEXT(cpu_flags
) && c
->srcBpc
== 8 && c
->dstBpc
<= 14) {
1183 c
->canMMXEXTBeUsed
= dstW
>= srcW
&& (dstW
& 31) == 0 &&
1184 c
->chrDstW
>= c
->chrSrcW
&&
1186 if (!c
->canMMXEXTBeUsed
&& dstW
>= srcW
&& c
->chrDstW
>= c
->chrSrcW
&& (srcW
& 15) == 0
1188 && (flags
& SWS_FAST_BILINEAR
)) {
1189 if (flags
& SWS_PRINT_INFO
)
1190 av_log(c
, AV_LOG_INFO
,
1191 "output width is not a multiple of 32 -> no MMXEXT scaler\n");
1193 if (usesHFilter
|| isNBPS(c
->srcFormat
) || is16BPS(c
->srcFormat
) || isAnyRGB(c
->srcFormat
))
1194 c
->canMMXEXTBeUsed
= 0;
1196 c
->canMMXEXTBeUsed
= 0;
1198 c
->chrXInc
= (((int64_t)c
->chrSrcW
<< 16) + (c
->chrDstW
>> 1)) / c
->chrDstW
;
1199 c
->chrYInc
= (((int64_t)c
->chrSrcH
<< 16) + (c
->chrDstH
>> 1)) / c
->chrDstH
;
1201 /* Match pixel 0 of the src to pixel 0 of dst and match pixel n-2 of src
1202 * to pixel n-2 of dst, but only for the FAST_BILINEAR mode otherwise do
1204 * n-2 is the last chrominance sample available.
1205 * This is not perfect, but no one should notice the difference, the more
1206 * correct variant would be like the vertical one, but that would require
1207 * some special code for the first and last pixel */
1208 if (flags
& SWS_FAST_BILINEAR
) {
1209 if (c
->canMMXEXTBeUsed
) {
1213 // we don't use the x86 asm scaler if MMX is available
1214 else if (INLINE_MMX(cpu_flags
) && c
->dstBpc
<= 14) {
1215 c
->lumXInc
= ((int64_t)(srcW
- 2) << 16) / (dstW
- 2) - 20;
1216 c
->chrXInc
= ((int64_t)(c
->chrSrcW
- 2) << 16) / (c
->chrDstW
- 2) - 20;
1220 if (isBayer(srcFormat
)) {
1222 (dstFormat
!= AV_PIX_FMT_RGB24
&& dstFormat
!= AV_PIX_FMT_YUV420P
)) {
1223 enum AVPixelFormat tmpFormat
= AV_PIX_FMT_RGB24
;
1225 ret
= av_image_alloc(c
->cascaded_tmp
, c
->cascaded_tmpStride
,
1226 srcW
, srcH
, tmpFormat
, 64);
1230 c
->cascaded_context
[0] = sws_getContext(srcW
, srcH
, srcFormat
,
1231 srcW
, srcH
, tmpFormat
,
1232 flags
, srcFilter
, NULL
, c
->param
);
1233 if (!c
->cascaded_context
[0])
1236 c
->cascaded_context
[1] = sws_getContext(srcW
, srcH
, tmpFormat
,
1237 dstW
, dstH
, dstFormat
,
1238 flags
, NULL
, dstFilter
, c
->param
);
1239 if (!c
->cascaded_context
[1])
1245 #define USE_MMAP (HAVE_MMAP && HAVE_MPROTECT && defined MAP_ANONYMOUS)
1247 /* precalculate horizontal scaler filter coefficients */
1249 #if HAVE_MMXEXT_INLINE
1250 // can't downscale !!!
1251 if (c
->canMMXEXTBeUsed
&& (flags
& SWS_FAST_BILINEAR
)) {
1252 c
->lumMmxextFilterCodeSize
= ff_init_hscaler_mmxext(dstW
, c
->lumXInc
, NULL
,
1254 c
->chrMmxextFilterCodeSize
= ff_init_hscaler_mmxext(c
->chrDstW
, c
->chrXInc
,
1255 NULL
, NULL
, NULL
, 4);
1258 c
->lumMmxextFilterCode
= mmap(NULL
, c
->lumMmxextFilterCodeSize
,
1259 PROT_READ
| PROT_WRITE
,
1260 MAP_PRIVATE
| MAP_ANONYMOUS
,
1262 c
->chrMmxextFilterCode
= mmap(NULL
, c
->chrMmxextFilterCodeSize
,
1263 PROT_READ
| PROT_WRITE
,
1264 MAP_PRIVATE
| MAP_ANONYMOUS
,
1266 #elif HAVE_VIRTUALALLOC
1267 c
->lumMmxextFilterCode
= VirtualAlloc(NULL
,
1268 c
->lumMmxextFilterCodeSize
,
1270 PAGE_EXECUTE_READWRITE
);
1271 c
->chrMmxextFilterCode
= VirtualAlloc(NULL
,
1272 c
->chrMmxextFilterCodeSize
,
1274 PAGE_EXECUTE_READWRITE
);
1276 c
->lumMmxextFilterCode
= av_malloc(c
->lumMmxextFilterCodeSize
);
1277 c
->chrMmxextFilterCode
= av_malloc(c
->chrMmxextFilterCodeSize
);
1280 #ifdef MAP_ANONYMOUS
1281 if (c
->lumMmxextFilterCode
== MAP_FAILED
|| c
->chrMmxextFilterCode
== MAP_FAILED
)
1283 if (!c
->lumMmxextFilterCode
|| !c
->chrMmxextFilterCode
)
1286 av_log(c
, AV_LOG_ERROR
, "Failed to allocate MMX2FilterCode\n");
1287 return AVERROR(ENOMEM
);
1290 FF_ALLOCZ_OR_GOTO(c
, c
->hLumFilter
, (dstW
/ 8 + 8) * sizeof(int16_t), fail
);
1291 FF_ALLOCZ_OR_GOTO(c
, c
->hChrFilter
, (c
->chrDstW
/ 4 + 8) * sizeof(int16_t), fail
);
1292 FF_ALLOCZ_OR_GOTO(c
, c
->hLumFilterPos
, (dstW
/ 2 / 8 + 8) * sizeof(int32_t), fail
);
1293 FF_ALLOCZ_OR_GOTO(c
, c
->hChrFilterPos
, (c
->chrDstW
/ 2 / 4 + 8) * sizeof(int32_t), fail
);
1295 ff_init_hscaler_mmxext( dstW
, c
->lumXInc
, c
->lumMmxextFilterCode
,
1296 c
->hLumFilter
, (uint32_t*)c
->hLumFilterPos
, 8);
1297 ff_init_hscaler_mmxext(c
->chrDstW
, c
->chrXInc
, c
->chrMmxextFilterCode
,
1298 c
->hChrFilter
, (uint32_t*)c
->hChrFilterPos
, 4);
1301 if ( mprotect(c
->lumMmxextFilterCode
, c
->lumMmxextFilterCodeSize
, PROT_EXEC
| PROT_READ
) == -1
1302 || mprotect(c
->chrMmxextFilterCode
, c
->chrMmxextFilterCodeSize
, PROT_EXEC
| PROT_READ
) == -1) {
1303 av_log(c
, AV_LOG_ERROR
, "mprotect failed, cannot use fast bilinear scaler\n");
1308 #endif /* HAVE_MMXEXT_INLINE */
1310 const int filterAlign
= X86_MMX(cpu_flags
) ? 4 :
1311 PPC_ALTIVEC(cpu_flags
) ? 8 : 1;
1313 if ((ret
= initFilter(&c
->hLumFilter
, &c
->hLumFilterPos
,
1314 &c
->hLumFilterSize
, c
->lumXInc
,
1315 srcW
, dstW
, filterAlign
, 1 << 14,
1316 (flags
& SWS_BICUBLIN
) ? (flags
| SWS_BICUBIC
) : flags
,
1317 cpu_flags
, srcFilter
->lumH
, dstFilter
->lumH
,
1319 get_local_pos(c
, 0, 0, 0),
1320 get_local_pos(c
, 0, 0, 0))) < 0)
1322 if ((ret
= initFilter(&c
->hChrFilter
, &c
->hChrFilterPos
,
1323 &c
->hChrFilterSize
, c
->chrXInc
,
1324 c
->chrSrcW
, c
->chrDstW
, filterAlign
, 1 << 14,
1325 (flags
& SWS_BICUBLIN
) ? (flags
| SWS_BILINEAR
) : flags
,
1326 cpu_flags
, srcFilter
->chrH
, dstFilter
->chrH
,
1328 get_local_pos(c
, c
->chrSrcHSubSample
, c
->src_h_chr_pos
, 0),
1329 get_local_pos(c
, c
->chrDstHSubSample
, c
->dst_h_chr_pos
, 0))) < 0)
1332 } // initialize horizontal stuff
1334 /* precalculate vertical scaler filter coefficients */
1336 const int filterAlign
= X86_MMX(cpu_flags
) ? 2 :
1337 PPC_ALTIVEC(cpu_flags
) ? 8 : 1;
1339 if ((ret
= initFilter(&c
->vLumFilter
, &c
->vLumFilterPos
, &c
->vLumFilterSize
,
1340 c
->lumYInc
, srcH
, dstH
, filterAlign
, (1 << 12),
1341 (flags
& SWS_BICUBLIN
) ? (flags
| SWS_BICUBIC
) : flags
,
1342 cpu_flags
, srcFilter
->lumV
, dstFilter
->lumV
,
1344 get_local_pos(c
, 0, 0, 1),
1345 get_local_pos(c
, 0, 0, 1))) < 0)
1347 if ((ret
= initFilter(&c
->vChrFilter
, &c
->vChrFilterPos
, &c
->vChrFilterSize
,
1348 c
->chrYInc
, c
->chrSrcH
, c
->chrDstH
,
1349 filterAlign
, (1 << 12),
1350 (flags
& SWS_BICUBLIN
) ? (flags
| SWS_BILINEAR
) : flags
,
1351 cpu_flags
, srcFilter
->chrV
, dstFilter
->chrV
,
1353 get_local_pos(c
, c
->chrSrcVSubSample
, c
->src_v_chr_pos
, 1),
1354 get_local_pos(c
, c
->chrDstVSubSample
, c
->dst_v_chr_pos
, 1))) < 0)
1359 FF_ALLOC_OR_GOTO(c
, c
->vYCoeffsBank
, sizeof(vector
signed short) * c
->vLumFilterSize
* c
->dstH
, fail
);
1360 FF_ALLOC_OR_GOTO(c
, c
->vCCoeffsBank
, sizeof(vector
signed short) * c
->vChrFilterSize
* c
->chrDstH
, fail
);
1362 for (i
= 0; i
< c
->vLumFilterSize
* c
->dstH
; i
++) {
1364 short *p
= (short *)&c
->vYCoeffsBank
[i
];
1365 for (j
= 0; j
< 8; j
++)
1366 p
[j
] = c
->vLumFilter
[i
];
1369 for (i
= 0; i
< c
->vChrFilterSize
* c
->chrDstH
; i
++) {
1371 short *p
= (short *)&c
->vCCoeffsBank
[i
];
1372 for (j
= 0; j
< 8; j
++)
1373 p
[j
] = c
->vChrFilter
[i
];
1378 // calculate buffer sizes so that they won't run out while handling these damn slices
1379 c
->vLumBufSize
= c
->vLumFilterSize
;
1380 c
->vChrBufSize
= c
->vChrFilterSize
;
1381 for (i
= 0; i
< dstH
; i
++) {
1382 int chrI
= (int64_t)i
* c
->chrDstH
/ dstH
;
1383 int nextSlice
= FFMAX(c
->vLumFilterPos
[i
] + c
->vLumFilterSize
- 1,
1384 ((c
->vChrFilterPos
[chrI
] + c
->vChrFilterSize
- 1)
1385 << c
->chrSrcVSubSample
));
1387 nextSlice
>>= c
->chrSrcVSubSample
;
1388 nextSlice
<<= c
->chrSrcVSubSample
;
1389 if (c
->vLumFilterPos
[i
] + c
->vLumBufSize
< nextSlice
)
1390 c
->vLumBufSize
= nextSlice
- c
->vLumFilterPos
[i
];
1391 if (c
->vChrFilterPos
[chrI
] + c
->vChrBufSize
<
1392 (nextSlice
>> c
->chrSrcVSubSample
))
1393 c
->vChrBufSize
= (nextSlice
>> c
->chrSrcVSubSample
) -
1394 c
->vChrFilterPos
[chrI
];
1397 for (i
= 0; i
< 4; i
++)
1398 FF_ALLOCZ_OR_GOTO(c
, c
->dither_error
[i
], (c
->dstW
+2) * sizeof(int), fail
);
1400 /* Allocate pixbufs (we use dynamic allocation because otherwise we would
1401 * need to allocate several megabytes to handle all possible cases) */
1402 FF_ALLOC_OR_GOTO(c
, c
->lumPixBuf
, c
->vLumBufSize
* 3 * sizeof(int16_t *), fail
);
1403 FF_ALLOC_OR_GOTO(c
, c
->chrUPixBuf
, c
->vChrBufSize
* 3 * sizeof(int16_t *), fail
);
1404 FF_ALLOC_OR_GOTO(c
, c
->chrVPixBuf
, c
->vChrBufSize
* 3 * sizeof(int16_t *), fail
);
1405 if (CONFIG_SWSCALE_ALPHA
&& isALPHA(c
->srcFormat
) && isALPHA(c
->dstFormat
))
1406 FF_ALLOCZ_OR_GOTO(c
, c
->alpPixBuf
, c
->vLumBufSize
* 3 * sizeof(int16_t *), fail
);
1407 /* Note we need at least one pixel more at the end because of the MMX code
1408 * (just in case someone wants to replace the 4000/8000). */
1409 /* align at 16 bytes for AltiVec */
1410 for (i
= 0; i
< c
->vLumBufSize
; i
++) {
1411 FF_ALLOCZ_OR_GOTO(c
, c
->lumPixBuf
[i
+ c
->vLumBufSize
],
1412 dst_stride
+ 16, fail
);
1413 c
->lumPixBuf
[i
] = c
->lumPixBuf
[i
+ c
->vLumBufSize
];
1415 // 64 / c->scalingBpp is the same as 16 / sizeof(scaling_intermediate)
1416 c
->uv_off
= (dst_stride
>>1) + 64 / (c
->dstBpc
&~ 7);
1417 c
->uv_offx2
= dst_stride
+ 16;
1418 for (i
= 0; i
< c
->vChrBufSize
; i
++) {
1419 FF_ALLOC_OR_GOTO(c
, c
->chrUPixBuf
[i
+ c
->vChrBufSize
],
1420 dst_stride
* 2 + 32, fail
);
1421 c
->chrUPixBuf
[i
] = c
->chrUPixBuf
[i
+ c
->vChrBufSize
];
1422 c
->chrVPixBuf
[i
] = c
->chrVPixBuf
[i
+ c
->vChrBufSize
]
1423 = c
->chrUPixBuf
[i
] + (dst_stride
>> 1) + 8;
1425 if (CONFIG_SWSCALE_ALPHA
&& c
->alpPixBuf
)
1426 for (i
= 0; i
< c
->vLumBufSize
; i
++) {
1427 FF_ALLOCZ_OR_GOTO(c
, c
->alpPixBuf
[i
+ c
->vLumBufSize
],
1428 dst_stride
+ 16, fail
);
1429 c
->alpPixBuf
[i
] = c
->alpPixBuf
[i
+ c
->vLumBufSize
];
1432 // try to avoid drawing green stuff between the right end and the stride end
1433 for (i
= 0; i
< c
->vChrBufSize
; i
++)
1434 if(desc_dst
->comp
[0].depth_minus1
== 15){
1435 av_assert0(c
->dstBpc
> 14);
1436 for(j
=0; j
<dst_stride
/2+1; j
++)
1437 ((int32_t*)(c
->chrUPixBuf
[i
]))[j
] = 1<<18;
1439 for(j
=0; j
<dst_stride
+1; j
++)
1440 ((int16_t*)(c
->chrUPixBuf
[i
]))[j
] = 1<<14;
1442 av_assert0(c
->chrDstH
<= dstH
);
1444 if (flags
& SWS_PRINT_INFO
) {
1445 const char *scaler
= NULL
, *cpucaps
;
1447 for (i
= 0; i
< FF_ARRAY_ELEMS(scale_algorithms
); i
++) {
1448 if (flags
& scale_algorithms
[i
].flag
) {
1449 scaler
= scale_algorithms
[i
].description
;
1454 scaler
= "ehh flags invalid?!";
1455 av_log(c
, AV_LOG_INFO
, "%s scaler, from %s to %s%s ",
1457 av_get_pix_fmt_name(srcFormat
),
1459 dstFormat
== AV_PIX_FMT_BGR555
|| dstFormat
== AV_PIX_FMT_BGR565
||
1460 dstFormat
== AV_PIX_FMT_RGB444BE
|| dstFormat
== AV_PIX_FMT_RGB444LE
||
1461 dstFormat
== AV_PIX_FMT_BGR444BE
|| dstFormat
== AV_PIX_FMT_BGR444LE
?
1466 av_get_pix_fmt_name(dstFormat
));
1468 if (INLINE_MMXEXT(cpu_flags
))
1470 else if (INLINE_AMD3DNOW(cpu_flags
))
1472 else if (INLINE_MMX(cpu_flags
))
1474 else if (PPC_ALTIVEC(cpu_flags
))
1475 cpucaps
= "AltiVec";
1479 av_log(c
, AV_LOG_INFO
, "using %s\n", cpucaps
);
1481 av_log(c
, AV_LOG_VERBOSE
, "%dx%d -> %dx%d\n", srcW
, srcH
, dstW
, dstH
);
1482 av_log(c
, AV_LOG_DEBUG
,
1483 "lum srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
1484 c
->srcW
, c
->srcH
, c
->dstW
, c
->dstH
, c
->lumXInc
, c
->lumYInc
);
1485 av_log(c
, AV_LOG_DEBUG
,
1486 "chr srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
1487 c
->chrSrcW
, c
->chrSrcH
, c
->chrDstW
, c
->chrDstH
,
1488 c
->chrXInc
, c
->chrYInc
);
1491 /* unscaled special cases */
1492 if (unscaled
&& !usesHFilter
&& !usesVFilter
&&
1493 (c
->srcRange
== c
->dstRange
|| isAnyRGB(dstFormat
))) {
1494 ff_get_unscaled_swscale(c
);
1497 if (flags
& SWS_PRINT_INFO
)
1498 av_log(c
, AV_LOG_INFO
,
1499 "using unscaled %s -> %s special converter\n",
1500 av_get_pix_fmt_name(srcFormat
), av_get_pix_fmt_name(dstFormat
));
1505 c
->swscale
= ff_getSwsFunc(c
);
1507 fail
: // FIXME replace things by appropriate error codes
1508 if (ret
== RETCODE_USE_CASCADE
) {
1509 int tmpW
= sqrt(srcW
* (int64_t)dstW
);
1510 int tmpH
= sqrt(srcH
* (int64_t)dstH
);
1511 enum AVPixelFormat tmpFormat
= AV_PIX_FMT_YUV420P
;
1513 if (srcW
*(int64_t)srcH
<= 4LL*dstW
*dstH
)
1514 return AVERROR(EINVAL
);
1516 ret
= av_image_alloc(c
->cascaded_tmp
, c
->cascaded_tmpStride
,
1517 tmpW
, tmpH
, tmpFormat
, 64);
1521 c
->cascaded_context
[0] = sws_getContext(srcW
, srcH
, srcFormat
,
1522 tmpW
, tmpH
, tmpFormat
,
1523 flags
, srcFilter
, NULL
, c
->param
);
1524 if (!c
->cascaded_context
[0])
1527 c
->cascaded_context
[1] = sws_getContext(tmpW
, tmpH
, tmpFormat
,
1528 dstW
, dstH
, dstFormat
,
1529 flags
, NULL
, dstFilter
, c
->param
);
1530 if (!c
->cascaded_context
[1])
1537 SwsContext
*sws_getContext(int srcW
, int srcH
, enum AVPixelFormat srcFormat
,
1538 int dstW
, int dstH
, enum AVPixelFormat dstFormat
,
1539 int flags
, SwsFilter
*srcFilter
,
1540 SwsFilter
*dstFilter
, const double *param
)
1544 if (!(c
= sws_alloc_context()))
1552 c
->srcFormat
= srcFormat
;
1553 c
->dstFormat
= dstFormat
;
1556 c
->param
[0] = param
[0];
1557 c
->param
[1] = param
[1];
1560 if (sws_init_context(c
, srcFilter
, dstFilter
) < 0) {
1568 SwsFilter
*sws_getDefaultFilter(float lumaGBlur
, float chromaGBlur
,
1569 float lumaSharpen
, float chromaSharpen
,
1570 float chromaHShift
, float chromaVShift
,
1573 SwsFilter
*filter
= av_malloc(sizeof(SwsFilter
));
1577 if (lumaGBlur
!= 0.0) {
1578 filter
->lumH
= sws_getGaussianVec(lumaGBlur
, 3.0);
1579 filter
->lumV
= sws_getGaussianVec(lumaGBlur
, 3.0);
1581 filter
->lumH
= sws_getIdentityVec();
1582 filter
->lumV
= sws_getIdentityVec();
1585 if (chromaGBlur
!= 0.0) {
1586 filter
->chrH
= sws_getGaussianVec(chromaGBlur
, 3.0);
1587 filter
->chrV
= sws_getGaussianVec(chromaGBlur
, 3.0);
1589 filter
->chrH
= sws_getIdentityVec();
1590 filter
->chrV
= sws_getIdentityVec();
1593 if (chromaSharpen
!= 0.0) {
1594 SwsVector
*id
= sws_getIdentityVec();
1595 sws_scaleVec(filter
->chrH
, -chromaSharpen
);
1596 sws_scaleVec(filter
->chrV
, -chromaSharpen
);
1597 sws_addVec(filter
->chrH
, id
);
1598 sws_addVec(filter
->chrV
, id
);
1602 if (lumaSharpen
!= 0.0) {
1603 SwsVector
*id
= sws_getIdentityVec();
1604 sws_scaleVec(filter
->lumH
, -lumaSharpen
);
1605 sws_scaleVec(filter
->lumV
, -lumaSharpen
);
1606 sws_addVec(filter
->lumH
, id
);
1607 sws_addVec(filter
->lumV
, id
);
1611 if (chromaHShift
!= 0.0)
1612 sws_shiftVec(filter
->chrH
, (int)(chromaHShift
+ 0.5));
1614 if (chromaVShift
!= 0.0)
1615 sws_shiftVec(filter
->chrV
, (int)(chromaVShift
+ 0.5));
1617 sws_normalizeVec(filter
->chrH
, 1.0);
1618 sws_normalizeVec(filter
->chrV
, 1.0);
1619 sws_normalizeVec(filter
->lumH
, 1.0);
1620 sws_normalizeVec(filter
->lumV
, 1.0);
1623 sws_printVec2(filter
->chrH
, NULL
, AV_LOG_DEBUG
);
1625 sws_printVec2(filter
->lumH
, NULL
, AV_LOG_DEBUG
);
1630 SwsVector
*sws_allocVec(int length
)
1634 if(length
<= 0 || length
> INT_MAX
/ sizeof(double))
1637 vec
= av_malloc(sizeof(SwsVector
));
1640 vec
->length
= length
;
1641 vec
->coeff
= av_malloc(sizeof(double) * length
);
1647 SwsVector
*sws_getGaussianVec(double variance
, double quality
)
1649 const int length
= (int)(variance
* quality
+ 0.5) | 1;
1651 double middle
= (length
- 1) * 0.5;
1654 if(variance
< 0 || quality
< 0)
1657 vec
= sws_allocVec(length
);
1662 for (i
= 0; i
< length
; i
++) {
1663 double dist
= i
- middle
;
1664 vec
->coeff
[i
] = exp(-dist
* dist
/ (2 * variance
* variance
)) /
1665 sqrt(2 * variance
* M_PI
);
1668 sws_normalizeVec(vec
, 1.0);
1673 SwsVector
*sws_getConstVec(double c
, int length
)
1676 SwsVector
*vec
= sws_allocVec(length
);
1681 for (i
= 0; i
< length
; i
++)
1687 SwsVector
*sws_getIdentityVec(void)
1689 return sws_getConstVec(1.0, 1);
1692 static double sws_dcVec(SwsVector
*a
)
1697 for (i
= 0; i
< a
->length
; i
++)
1703 void sws_scaleVec(SwsVector
*a
, double scalar
)
1707 for (i
= 0; i
< a
->length
; i
++)
1708 a
->coeff
[i
] *= scalar
;
1711 void sws_normalizeVec(SwsVector
*a
, double height
)
1713 sws_scaleVec(a
, height
/ sws_dcVec(a
));
1716 static SwsVector
*sws_getConvVec(SwsVector
*a
, SwsVector
*b
)
1718 int length
= a
->length
+ b
->length
- 1;
1720 SwsVector
*vec
= sws_getConstVec(0.0, length
);
1725 for (i
= 0; i
< a
->length
; i
++) {
1726 for (j
= 0; j
< b
->length
; j
++) {
1727 vec
->coeff
[i
+ j
] += a
->coeff
[i
] * b
->coeff
[j
];
1734 static SwsVector
*sws_sumVec(SwsVector
*a
, SwsVector
*b
)
1736 int length
= FFMAX(a
->length
, b
->length
);
1738 SwsVector
*vec
= sws_getConstVec(0.0, length
);
1743 for (i
= 0; i
< a
->length
; i
++)
1744 vec
->coeff
[i
+ (length
- 1) / 2 - (a
->length
- 1) / 2] += a
->coeff
[i
];
1745 for (i
= 0; i
< b
->length
; i
++)
1746 vec
->coeff
[i
+ (length
- 1) / 2 - (b
->length
- 1) / 2] += b
->coeff
[i
];
1751 static SwsVector
*sws_diffVec(SwsVector
*a
, SwsVector
*b
)
1753 int length
= FFMAX(a
->length
, b
->length
);
1755 SwsVector
*vec
= sws_getConstVec(0.0, length
);
1760 for (i
= 0; i
< a
->length
; i
++)
1761 vec
->coeff
[i
+ (length
- 1) / 2 - (a
->length
- 1) / 2] += a
->coeff
[i
];
1762 for (i
= 0; i
< b
->length
; i
++)
1763 vec
->coeff
[i
+ (length
- 1) / 2 - (b
->length
- 1) / 2] -= b
->coeff
[i
];
1768 /* shift left / or right if "shift" is negative */
1769 static SwsVector
*sws_getShiftedVec(SwsVector
*a
, int shift
)
1771 int length
= a
->length
+ FFABS(shift
) * 2;
1773 SwsVector
*vec
= sws_getConstVec(0.0, length
);
1778 for (i
= 0; i
< a
->length
; i
++) {
1779 vec
->coeff
[i
+ (length
- 1) / 2 -
1780 (a
->length
- 1) / 2 - shift
] = a
->coeff
[i
];
1786 void sws_shiftVec(SwsVector
*a
, int shift
)
1788 SwsVector
*shifted
= sws_getShiftedVec(a
, shift
);
1790 a
->coeff
= shifted
->coeff
;
1791 a
->length
= shifted
->length
;
1795 void sws_addVec(SwsVector
*a
, SwsVector
*b
)
1797 SwsVector
*sum
= sws_sumVec(a
, b
);
1799 a
->coeff
= sum
->coeff
;
1800 a
->length
= sum
->length
;
1804 void sws_subVec(SwsVector
*a
, SwsVector
*b
)
1806 SwsVector
*diff
= sws_diffVec(a
, b
);
1808 a
->coeff
= diff
->coeff
;
1809 a
->length
= diff
->length
;
1813 void sws_convVec(SwsVector
*a
, SwsVector
*b
)
1815 SwsVector
*conv
= sws_getConvVec(a
, b
);
1817 a
->coeff
= conv
->coeff
;
1818 a
->length
= conv
->length
;
1822 SwsVector
*sws_cloneVec(SwsVector
*a
)
1824 SwsVector
*vec
= sws_allocVec(a
->length
);
1829 memcpy(vec
->coeff
, a
->coeff
, a
->length
* sizeof(*a
->coeff
));
1834 void sws_printVec2(SwsVector
*a
, AVClass
*log_ctx
, int log_level
)
1841 for (i
= 0; i
< a
->length
; i
++)
1842 if (a
->coeff
[i
] > max
)
1845 for (i
= 0; i
< a
->length
; i
++)
1846 if (a
->coeff
[i
] < min
)
1851 for (i
= 0; i
< a
->length
; i
++) {
1852 int x
= (int)((a
->coeff
[i
] - min
) * 60.0 / range
+ 0.5);
1853 av_log(log_ctx
, log_level
, "%1.3f ", a
->coeff
[i
]);
1855 av_log(log_ctx
, log_level
, " ");
1856 av_log(log_ctx
, log_level
, "|\n");
1860 void sws_freeVec(SwsVector
*a
)
1864 av_freep(&a
->coeff
);
1869 void sws_freeFilter(SwsFilter
*filter
)
1874 sws_freeVec(filter
->lumH
);
1875 sws_freeVec(filter
->lumV
);
1876 sws_freeVec(filter
->chrH
);
1877 sws_freeVec(filter
->chrV
);
1881 void sws_freeContext(SwsContext
*c
)
1888 for (i
= 0; i
< c
->vLumBufSize
; i
++)
1889 av_freep(&c
->lumPixBuf
[i
]);
1890 av_freep(&c
->lumPixBuf
);
1893 if (c
->chrUPixBuf
) {
1894 for (i
= 0; i
< c
->vChrBufSize
; i
++)
1895 av_freep(&c
->chrUPixBuf
[i
]);
1896 av_freep(&c
->chrUPixBuf
);
1897 av_freep(&c
->chrVPixBuf
);
1900 if (CONFIG_SWSCALE_ALPHA
&& c
->alpPixBuf
) {
1901 for (i
= 0; i
< c
->vLumBufSize
; i
++)
1902 av_freep(&c
->alpPixBuf
[i
]);
1903 av_freep(&c
->alpPixBuf
);
1906 for (i
= 0; i
< 4; i
++)
1907 av_freep(&c
->dither_error
[i
]);
1909 av_freep(&c
->vLumFilter
);
1910 av_freep(&c
->vChrFilter
);
1911 av_freep(&c
->hLumFilter
);
1912 av_freep(&c
->hChrFilter
);
1914 av_freep(&c
->vYCoeffsBank
);
1915 av_freep(&c
->vCCoeffsBank
);
1918 av_freep(&c
->vLumFilterPos
);
1919 av_freep(&c
->vChrFilterPos
);
1920 av_freep(&c
->hLumFilterPos
);
1921 av_freep(&c
->hChrFilterPos
);
1925 if (c
->lumMmxextFilterCode
)
1926 munmap(c
->lumMmxextFilterCode
, c
->lumMmxextFilterCodeSize
);
1927 if (c
->chrMmxextFilterCode
)
1928 munmap(c
->chrMmxextFilterCode
, c
->chrMmxextFilterCodeSize
);
1929 #elif HAVE_VIRTUALALLOC
1930 if (c
->lumMmxextFilterCode
)
1931 VirtualFree(c
->lumMmxextFilterCode
, 0, MEM_RELEASE
);
1932 if (c
->chrMmxextFilterCode
)
1933 VirtualFree(c
->chrMmxextFilterCode
, 0, MEM_RELEASE
);
1935 av_free(c
->lumMmxextFilterCode
);
1936 av_free(c
->chrMmxextFilterCode
);
1938 c
->lumMmxextFilterCode
= NULL
;
1939 c
->chrMmxextFilterCode
= NULL
;
1940 #endif /* HAVE_MMX_INLINE */
1942 av_freep(&c
->yuvTable
);
1943 av_freep(&c
->formatConvBuffer
);
1945 sws_freeContext(c
->cascaded_context
[0]);
1946 sws_freeContext(c
->cascaded_context
[1]);
1947 memset(c
->cascaded_context
, 0, sizeof(c
->cascaded_context
));
1948 av_freep(&c
->cascaded_tmp
[0]);
1953 struct SwsContext
*sws_getCachedContext(struct SwsContext
*context
, int srcW
,
1954 int srcH
, enum AVPixelFormat srcFormat
,
1956 enum AVPixelFormat dstFormat
, int flags
,
1957 SwsFilter
*srcFilter
,
1958 SwsFilter
*dstFilter
,
1959 const double *param
)
1961 static const double default_param
[2] = { SWS_PARAM_DEFAULT
,
1962 SWS_PARAM_DEFAULT
};
1965 param
= default_param
;
1968 (context
->srcW
!= srcW
||
1969 context
->srcH
!= srcH
||
1970 context
->srcFormat
!= srcFormat
||
1971 context
->dstW
!= dstW
||
1972 context
->dstH
!= dstH
||
1973 context
->dstFormat
!= dstFormat
||
1974 context
->flags
!= flags
||
1975 context
->param
[0] != param
[0] ||
1976 context
->param
[1] != param
[1])) {
1977 sws_freeContext(context
);
1982 if (!(context
= sws_alloc_context()))
1984 context
->srcW
= srcW
;
1985 context
->srcH
= srcH
;
1986 context
->srcFormat
= srcFormat
;
1987 context
->dstW
= dstW
;
1988 context
->dstH
= dstH
;
1989 context
->dstFormat
= dstFormat
;
1990 context
->flags
= flags
;
1991 context
->param
[0] = param
[0];
1992 context
->param
[1] = param
[1];
1993 if (sws_init_context(context
, srcFilter
, dstFilter
) < 0) {
1994 sws_freeContext(context
);