2 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 #define _SVID_SOURCE // needed for MAP_ANONYMOUS
24 #define _DARWIN_C_SOURCE // needed for MAP_ANON
31 #if defined(MAP_ANON) && !defined(MAP_ANONYMOUS)
32 #define MAP_ANONYMOUS MAP_ANON
36 #define WIN32_LEAN_AND_MEAN
40 #include "libavutil/attributes.h"
41 #include "libavutil/avassert.h"
42 #include "libavutil/avutil.h"
43 #include "libavutil/bswap.h"
44 #include "libavutil/cpu.h"
45 #include "libavutil/intreadwrite.h"
46 #include "libavutil/mathematics.h"
47 #include "libavutil/opt.h"
48 #include "libavutil/pixdesc.h"
49 #include "libavutil/ppc/cpu.h"
50 #include "libavutil/x86/asm.h"
51 #include "libavutil/x86/cpu.h"
54 #include "swscale_internal.h"
56 static void handle_formats(SwsContext
*c
);
58 unsigned swscale_version(void)
60 av_assert0(LIBSWSCALE_VERSION_MICRO
>= 100);
61 return LIBSWSCALE_VERSION_INT
;
64 const char *swscale_configuration(void)
66 return FFMPEG_CONFIGURATION
;
69 const char *swscale_license(void)
71 #define LICENSE_PREFIX "libswscale license: "
72 return LICENSE_PREFIX FFMPEG_LICENSE
+ sizeof(LICENSE_PREFIX
) - 1;
75 typedef struct FormatEntry
{
76 uint8_t is_supported_in
:1;
77 uint8_t is_supported_out
:1;
78 uint8_t is_supported_endianness
:1;
81 static const FormatEntry format_entries
[AV_PIX_FMT_NB
] = {
82 [AV_PIX_FMT_YUV420P
] = { 1, 1 },
83 [AV_PIX_FMT_YUYV422
] = { 1, 1 },
84 [AV_PIX_FMT_RGB24
] = { 1, 1 },
85 [AV_PIX_FMT_BGR24
] = { 1, 1 },
86 [AV_PIX_FMT_YUV422P
] = { 1, 1 },
87 [AV_PIX_FMT_YUV444P
] = { 1, 1 },
88 [AV_PIX_FMT_YUV410P
] = { 1, 1 },
89 [AV_PIX_FMT_YUV411P
] = { 1, 1 },
90 [AV_PIX_FMT_GRAY8
] = { 1, 1 },
91 [AV_PIX_FMT_MONOWHITE
] = { 1, 1 },
92 [AV_PIX_FMT_MONOBLACK
] = { 1, 1 },
93 [AV_PIX_FMT_PAL8
] = { 1, 0 },
94 [AV_PIX_FMT_YUVJ420P
] = { 1, 1 },
95 [AV_PIX_FMT_YUVJ411P
] = { 1, 1 },
96 [AV_PIX_FMT_YUVJ422P
] = { 1, 1 },
97 [AV_PIX_FMT_YUVJ444P
] = { 1, 1 },
98 [AV_PIX_FMT_YVYU422
] = { 1, 1 },
99 [AV_PIX_FMT_UYVY422
] = { 1, 1 },
100 [AV_PIX_FMT_UYYVYY411
] = { 0, 0 },
101 [AV_PIX_FMT_BGR8
] = { 1, 1 },
102 [AV_PIX_FMT_BGR4
] = { 0, 1 },
103 [AV_PIX_FMT_BGR4_BYTE
] = { 1, 1 },
104 [AV_PIX_FMT_RGB8
] = { 1, 1 },
105 [AV_PIX_FMT_RGB4
] = { 0, 1 },
106 [AV_PIX_FMT_RGB4_BYTE
] = { 1, 1 },
107 [AV_PIX_FMT_NV12
] = { 1, 1 },
108 [AV_PIX_FMT_NV21
] = { 1, 1 },
109 [AV_PIX_FMT_ARGB
] = { 1, 1 },
110 [AV_PIX_FMT_RGBA
] = { 1, 1 },
111 [AV_PIX_FMT_ABGR
] = { 1, 1 },
112 [AV_PIX_FMT_BGRA
] = { 1, 1 },
113 [AV_PIX_FMT_0RGB
] = { 1, 1 },
114 [AV_PIX_FMT_RGB0
] = { 1, 1 },
115 [AV_PIX_FMT_0BGR
] = { 1, 1 },
116 [AV_PIX_FMT_BGR0
] = { 1, 1 },
117 [AV_PIX_FMT_GRAY16BE
] = { 1, 1 },
118 [AV_PIX_FMT_GRAY16LE
] = { 1, 1 },
119 [AV_PIX_FMT_YUV440P
] = { 1, 1 },
120 [AV_PIX_FMT_YUVJ440P
] = { 1, 1 },
121 [AV_PIX_FMT_YUVA420P
] = { 1, 1 },
122 [AV_PIX_FMT_YUVA422P
] = { 1, 1 },
123 [AV_PIX_FMT_YUVA444P
] = { 1, 1 },
124 [AV_PIX_FMT_YUVA420P9BE
] = { 1, 1 },
125 [AV_PIX_FMT_YUVA420P9LE
] = { 1, 1 },
126 [AV_PIX_FMT_YUVA422P9BE
] = { 1, 1 },
127 [AV_PIX_FMT_YUVA422P9LE
] = { 1, 1 },
128 [AV_PIX_FMT_YUVA444P9BE
] = { 1, 1 },
129 [AV_PIX_FMT_YUVA444P9LE
] = { 1, 1 },
130 [AV_PIX_FMT_YUVA420P10BE
]= { 1, 1 },
131 [AV_PIX_FMT_YUVA420P10LE
]= { 1, 1 },
132 [AV_PIX_FMT_YUVA422P10BE
]= { 1, 1 },
133 [AV_PIX_FMT_YUVA422P10LE
]= { 1, 1 },
134 [AV_PIX_FMT_YUVA444P10BE
]= { 1, 1 },
135 [AV_PIX_FMT_YUVA444P10LE
]= { 1, 1 },
136 [AV_PIX_FMT_YUVA420P16BE
]= { 1, 1 },
137 [AV_PIX_FMT_YUVA420P16LE
]= { 1, 1 },
138 [AV_PIX_FMT_YUVA422P16BE
]= { 1, 1 },
139 [AV_PIX_FMT_YUVA422P16LE
]= { 1, 1 },
140 [AV_PIX_FMT_YUVA444P16BE
]= { 1, 1 },
141 [AV_PIX_FMT_YUVA444P16LE
]= { 1, 1 },
142 [AV_PIX_FMT_RGB48BE
] = { 1, 1 },
143 [AV_PIX_FMT_RGB48LE
] = { 1, 1 },
144 [AV_PIX_FMT_RGBA64BE
] = { 1, 1, 1 },
145 [AV_PIX_FMT_RGBA64LE
] = { 1, 1, 1 },
146 [AV_PIX_FMT_RGB565BE
] = { 1, 1 },
147 [AV_PIX_FMT_RGB565LE
] = { 1, 1 },
148 [AV_PIX_FMT_RGB555BE
] = { 1, 1 },
149 [AV_PIX_FMT_RGB555LE
] = { 1, 1 },
150 [AV_PIX_FMT_BGR565BE
] = { 1, 1 },
151 [AV_PIX_FMT_BGR565LE
] = { 1, 1 },
152 [AV_PIX_FMT_BGR555BE
] = { 1, 1 },
153 [AV_PIX_FMT_BGR555LE
] = { 1, 1 },
154 [AV_PIX_FMT_YUV420P16LE
] = { 1, 1 },
155 [AV_PIX_FMT_YUV420P16BE
] = { 1, 1 },
156 [AV_PIX_FMT_YUV422P16LE
] = { 1, 1 },
157 [AV_PIX_FMT_YUV422P16BE
] = { 1, 1 },
158 [AV_PIX_FMT_YUV444P16LE
] = { 1, 1 },
159 [AV_PIX_FMT_YUV444P16BE
] = { 1, 1 },
160 [AV_PIX_FMT_RGB444LE
] = { 1, 1 },
161 [AV_PIX_FMT_RGB444BE
] = { 1, 1 },
162 [AV_PIX_FMT_BGR444LE
] = { 1, 1 },
163 [AV_PIX_FMT_BGR444BE
] = { 1, 1 },
164 [AV_PIX_FMT_YA8
] = { 1, 0 },
165 [AV_PIX_FMT_YA16BE
] = { 1, 0 },
166 [AV_PIX_FMT_YA16LE
] = { 1, 0 },
167 [AV_PIX_FMT_BGR48BE
] = { 1, 1 },
168 [AV_PIX_FMT_BGR48LE
] = { 1, 1 },
169 [AV_PIX_FMT_BGRA64BE
] = { 1, 1, 1 },
170 [AV_PIX_FMT_BGRA64LE
] = { 1, 1, 1 },
171 [AV_PIX_FMT_YUV420P9BE
] = { 1, 1 },
172 [AV_PIX_FMT_YUV420P9LE
] = { 1, 1 },
173 [AV_PIX_FMT_YUV420P10BE
] = { 1, 1 },
174 [AV_PIX_FMT_YUV420P10LE
] = { 1, 1 },
175 [AV_PIX_FMT_YUV420P12BE
] = { 1, 1 },
176 [AV_PIX_FMT_YUV420P12LE
] = { 1, 1 },
177 [AV_PIX_FMT_YUV420P14BE
] = { 1, 1 },
178 [AV_PIX_FMT_YUV420P14LE
] = { 1, 1 },
179 [AV_PIX_FMT_YUV422P9BE
] = { 1, 1 },
180 [AV_PIX_FMT_YUV422P9LE
] = { 1, 1 },
181 [AV_PIX_FMT_YUV422P10BE
] = { 1, 1 },
182 [AV_PIX_FMT_YUV422P10LE
] = { 1, 1 },
183 [AV_PIX_FMT_YUV422P12BE
] = { 1, 1 },
184 [AV_PIX_FMT_YUV422P12LE
] = { 1, 1 },
185 [AV_PIX_FMT_YUV422P14BE
] = { 1, 1 },
186 [AV_PIX_FMT_YUV422P14LE
] = { 1, 1 },
187 [AV_PIX_FMT_YUV444P9BE
] = { 1, 1 },
188 [AV_PIX_FMT_YUV444P9LE
] = { 1, 1 },
189 [AV_PIX_FMT_YUV444P10BE
] = { 1, 1 },
190 [AV_PIX_FMT_YUV444P10LE
] = { 1, 1 },
191 [AV_PIX_FMT_YUV444P12BE
] = { 1, 1 },
192 [AV_PIX_FMT_YUV444P12LE
] = { 1, 1 },
193 [AV_PIX_FMT_YUV444P14BE
] = { 1, 1 },
194 [AV_PIX_FMT_YUV444P14LE
] = { 1, 1 },
195 [AV_PIX_FMT_GBRP
] = { 1, 1 },
196 [AV_PIX_FMT_GBRP9LE
] = { 1, 1 },
197 [AV_PIX_FMT_GBRP9BE
] = { 1, 1 },
198 [AV_PIX_FMT_GBRP10LE
] = { 1, 1 },
199 [AV_PIX_FMT_GBRP10BE
] = { 1, 1 },
200 [AV_PIX_FMT_GBRP12LE
] = { 1, 1 },
201 [AV_PIX_FMT_GBRP12BE
] = { 1, 1 },
202 [AV_PIX_FMT_GBRP14LE
] = { 1, 1 },
203 [AV_PIX_FMT_GBRP14BE
] = { 1, 1 },
204 [AV_PIX_FMT_GBRP16LE
] = { 1, 0 },
205 [AV_PIX_FMT_GBRP16BE
] = { 1, 0 },
206 [AV_PIX_FMT_XYZ12BE
] = { 1, 1, 1 },
207 [AV_PIX_FMT_XYZ12LE
] = { 1, 1, 1 },
208 [AV_PIX_FMT_GBRAP
] = { 1, 1 },
209 [AV_PIX_FMT_GBRAP16LE
] = { 1, 0 },
210 [AV_PIX_FMT_GBRAP16BE
] = { 1, 0 },
211 [AV_PIX_FMT_BAYER_BGGR8
] = { 1, 0 },
212 [AV_PIX_FMT_BAYER_RGGB8
] = { 1, 0 },
213 [AV_PIX_FMT_BAYER_GBRG8
] = { 1, 0 },
214 [AV_PIX_FMT_BAYER_GRBG8
] = { 1, 0 },
215 [AV_PIX_FMT_BAYER_BGGR16LE
] = { 1, 0 },
216 [AV_PIX_FMT_BAYER_BGGR16BE
] = { 1, 0 },
217 [AV_PIX_FMT_BAYER_RGGB16LE
] = { 1, 0 },
218 [AV_PIX_FMT_BAYER_RGGB16BE
] = { 1, 0 },
219 [AV_PIX_FMT_BAYER_GBRG16LE
] = { 1, 0 },
220 [AV_PIX_FMT_BAYER_GBRG16BE
] = { 1, 0 },
221 [AV_PIX_FMT_BAYER_GRBG16LE
] = { 1, 0 },
222 [AV_PIX_FMT_BAYER_GRBG16BE
] = { 1, 0 },
225 int sws_isSupportedInput(enum AVPixelFormat pix_fmt
)
227 return (unsigned)pix_fmt
< AV_PIX_FMT_NB
?
228 format_entries
[pix_fmt
].is_supported_in
: 0;
231 int sws_isSupportedOutput(enum AVPixelFormat pix_fmt
)
233 return (unsigned)pix_fmt
< AV_PIX_FMT_NB
?
234 format_entries
[pix_fmt
].is_supported_out
: 0;
237 int sws_isSupportedEndiannessConversion(enum AVPixelFormat pix_fmt
)
239 return (unsigned)pix_fmt
< AV_PIX_FMT_NB
?
240 format_entries
[pix_fmt
].is_supported_endianness
: 0;
243 #if FF_API_SWS_FORMAT_NAME
244 const char *sws_format_name(enum AVPixelFormat format
)
246 const AVPixFmtDescriptor
*desc
= av_pix_fmt_desc_get(format
);
250 return "Unknown format";
254 static double getSplineCoeff(double a
, double b
, double c
, double d
,
258 return ((d
* dist
+ c
) * dist
+ b
) * dist
+ a
;
260 return getSplineCoeff(0.0,
261 b
+ 2.0 * c
+ 3.0 * d
,
263 -b
- 3.0 * c
- 6.0 * d
,
267 static av_cold
int get_local_pos(SwsContext
*s
, int chr_subsample
, int pos
, int dir
)
269 if (pos
== -1 || pos
<= -513) {
270 pos
= (128 << chr_subsample
) - 128;
272 pos
+= 128; // relative to ideal left edge
273 return pos
>> chr_subsample
;
277 int flag
; ///< flag associated to the algorithm
278 const char *description
; ///< human-readable description
279 int size_factor
; ///< size factor used when initing the filters
282 static const ScaleAlgorithm scale_algorithms
[] = {
283 { SWS_AREA
, "area averaging", 1 /* downscale only, for upscale it is bilinear */ },
284 { SWS_BICUBIC
, "bicubic", 4 },
285 { SWS_BICUBLIN
, "luma bicubic / chroma bilinear", -1 },
286 { SWS_BILINEAR
, "bilinear", 2 },
287 { SWS_FAST_BILINEAR
, "fast bilinear", -1 },
288 { SWS_GAUSS
, "Gaussian", 8 /* infinite ;) */ },
289 { SWS_LANCZOS
, "Lanczos", -1 /* custom */ },
290 { SWS_POINT
, "nearest neighbor / point", -1 },
291 { SWS_SINC
, "sinc", 20 /* infinite ;) */ },
292 { SWS_SPLINE
, "bicubic spline", 20 /* infinite :)*/ },
293 { SWS_X
, "experimental", 8 },
296 static av_cold
int initFilter(int16_t **outFilter
, int32_t **filterPos
,
297 int *outFilterSize
, int xInc
, int srcW
,
298 int dstW
, int filterAlign
, int one
,
299 int flags
, int cpu_flags
,
300 SwsVector
*srcFilter
, SwsVector
*dstFilter
,
301 double param
[2], int srcPos
, int dstPos
)
307 int64_t *filter
= NULL
;
308 int64_t *filter2
= NULL
;
309 const int64_t fone
= 1LL << (54 - FFMIN(av_log2(srcW
/dstW
), 8));
312 emms_c(); // FIXME should not be required but IS (even for non-MMX versions)
314 // NOTE: the +3 is for the MMX(+1) / SSE(+3) scaler which reads over the end
315 FF_ALLOC_ARRAY_OR_GOTO(NULL
, *filterPos
, (dstW
+ 3), sizeof(**filterPos
), fail
);
317 if (FFABS(xInc
- 0x10000) < 10 && srcPos
== dstPos
) { // unscaled
320 FF_ALLOCZ_ARRAY_OR_GOTO(NULL
, filter
,
321 dstW
, sizeof(*filter
) * filterSize
, fail
);
323 for (i
= 0; i
< dstW
; i
++) {
324 filter
[i
* filterSize
] = fone
;
327 } else if (flags
& SWS_POINT
) { // lame looking point sampling mode
331 FF_ALLOC_ARRAY_OR_GOTO(NULL
, filter
,
332 dstW
, sizeof(*filter
) * filterSize
, fail
);
334 xDstInSrc
= ((dstPos
*(int64_t)xInc
)>>8) - ((srcPos
*0x8000LL
)>>7);
335 for (i
= 0; i
< dstW
; i
++) {
336 int xx
= (xDstInSrc
- ((filterSize
- 1) << 15) + (1 << 15)) >> 16;
338 (*filterPos
)[i
] = xx
;
342 } else if ((xInc
<= (1 << 16) && (flags
& SWS_AREA
)) ||
343 (flags
& SWS_FAST_BILINEAR
)) { // bilinear upscale
347 FF_ALLOC_ARRAY_OR_GOTO(NULL
, filter
,
348 dstW
, sizeof(*filter
) * filterSize
, fail
);
350 xDstInSrc
= ((dstPos
*(int64_t)xInc
)>>8) - ((srcPos
*0x8000LL
)>>7);
351 for (i
= 0; i
< dstW
; i
++) {
352 int xx
= (xDstInSrc
- ((filterSize
- 1) << 15) + (1 << 15)) >> 16;
355 (*filterPos
)[i
] = xx
;
356 // bilinear upscale / linear interpolate / area averaging
357 for (j
= 0; j
< filterSize
; j
++) {
358 int64_t coeff
= fone
- FFABS(((int64_t)xx
<<16) - xDstInSrc
)*(fone
>>16);
361 filter
[i
* filterSize
+ j
] = coeff
;
370 for (i
= 0; i
< FF_ARRAY_ELEMS(scale_algorithms
); i
++) {
371 if (flags
& scale_algorithms
[i
].flag
&& scale_algorithms
[i
].size_factor
> 0) {
372 sizeFactor
= scale_algorithms
[i
].size_factor
;
376 if (flags
& SWS_LANCZOS
)
377 sizeFactor
= param
[0] != SWS_PARAM_DEFAULT
? ceil(2 * param
[0]) : 6;
378 av_assert0(sizeFactor
> 0);
381 filterSize
= 1 + sizeFactor
; // upscale
383 filterSize
= 1 + (sizeFactor
* srcW
+ dstW
- 1) / dstW
;
385 filterSize
= FFMIN(filterSize
, srcW
- 2);
386 filterSize
= FFMAX(filterSize
, 1);
388 FF_ALLOC_ARRAY_OR_GOTO(NULL
, filter
,
389 dstW
, sizeof(*filter
) * filterSize
, fail
);
391 xDstInSrc
= ((dstPos
*(int64_t)xInc
)>>7) - ((srcPos
*0x10000LL
)>>7);
392 for (i
= 0; i
< dstW
; i
++) {
393 int xx
= (xDstInSrc
- ((int64_t)(filterSize
- 2) << 16)) / (1 << 17);
395 (*filterPos
)[i
] = xx
;
396 for (j
= 0; j
< filterSize
; j
++) {
397 int64_t d
= (FFABS(((int64_t)xx
<< 17) - xDstInSrc
)) << 13;
403 floatd
= d
* (1.0 / (1 << 30));
405 if (flags
& SWS_BICUBIC
) {
406 int64_t B
= (param
[0] != SWS_PARAM_DEFAULT
? param
[0] : 0) * (1 << 24);
407 int64_t C
= (param
[1] != SWS_PARAM_DEFAULT
? param
[1] : 0.6) * (1 << 24);
409 if (d
>= 1LL << 31) {
412 int64_t dd
= (d
* d
) >> 30;
413 int64_t ddd
= (dd
* d
) >> 30;
416 coeff
= (12 * (1 << 24) - 9 * B
- 6 * C
) * ddd
+
417 (-18 * (1 << 24) + 12 * B
+ 6 * C
) * dd
+
418 (6 * (1 << 24) - 2 * B
) * (1 << 30);
420 coeff
= (-B
- 6 * C
) * ddd
+
421 (6 * B
+ 30 * C
) * dd
+
422 (-12 * B
- 48 * C
) * d
+
423 (8 * B
+ 24 * C
) * (1 << 30);
425 coeff
/= (1LL<<54)/fone
;
428 else if (flags
& SWS_X
) {
429 double p
= param
? param
* 0.01 : 0.3;
430 coeff
= d
? sin(d
* M_PI
) / (d
* M_PI
) : 1.0;
431 coeff
*= pow(2.0, -p
* d
* d
);
434 else if (flags
& SWS_X
) {
435 double A
= param
[0] != SWS_PARAM_DEFAULT
? param
[0] : 1.0;
439 c
= cos(floatd
* M_PI
);
446 coeff
= (c
* 0.5 + 0.5) * fone
;
447 } else if (flags
& SWS_AREA
) {
448 int64_t d2
= d
- (1 << 29);
449 if (d2
* xInc
< -(1LL << (29 + 16)))
450 coeff
= 1.0 * (1LL << (30 + 16));
451 else if (d2
* xInc
< (1LL << (29 + 16)))
452 coeff
= -d2
* xInc
+ (1LL << (29 + 16));
455 coeff
*= fone
>> (30 + 16);
456 } else if (flags
& SWS_GAUSS
) {
457 double p
= param
[0] != SWS_PARAM_DEFAULT
? param
[0] : 3.0;
458 coeff
= (pow(2.0, -p
* floatd
* floatd
)) * fone
;
459 } else if (flags
& SWS_SINC
) {
460 coeff
= (d
? sin(floatd
* M_PI
) / (floatd
* M_PI
) : 1.0) * fone
;
461 } else if (flags
& SWS_LANCZOS
) {
462 double p
= param
[0] != SWS_PARAM_DEFAULT
? param
[0] : 3.0;
463 coeff
= (d
? sin(floatd
* M_PI
) * sin(floatd
* M_PI
/ p
) /
464 (floatd
* floatd
* M_PI
* M_PI
/ p
) : 1.0) * fone
;
467 } else if (flags
& SWS_BILINEAR
) {
468 coeff
= (1 << 30) - d
;
472 } else if (flags
& SWS_SPLINE
) {
473 double p
= -2.196152422706632;
474 coeff
= getSplineCoeff(1.0, 0.0, p
, -p
- 1.0, floatd
) * fone
;
479 filter
[i
* filterSize
+ j
] = coeff
;
482 xDstInSrc
+= 2 * xInc
;
486 /* apply src & dst Filter to filter -> filter2
489 av_assert0(filterSize
> 0);
490 filter2Size
= filterSize
;
492 filter2Size
+= srcFilter
->length
- 1;
494 filter2Size
+= dstFilter
->length
- 1;
495 av_assert0(filter2Size
> 0);
496 FF_ALLOCZ_ARRAY_OR_GOTO(NULL
, filter2
, dstW
, filter2Size
* sizeof(*filter2
), fail
);
498 for (i
= 0; i
< dstW
; i
++) {
502 for (k
= 0; k
< srcFilter
->length
; k
++) {
503 for (j
= 0; j
< filterSize
; j
++)
504 filter2
[i
* filter2Size
+ k
+ j
] +=
505 srcFilter
->coeff
[k
] * filter
[i
* filterSize
+ j
];
508 for (j
= 0; j
< filterSize
; j
++)
509 filter2
[i
* filter2Size
+ j
] = filter
[i
* filterSize
+ j
];
513 (*filterPos
)[i
] += (filterSize
- 1) / 2 - (filter2Size
- 1) / 2;
517 /* try to reduce the filter-size (step1 find size and shift left) */
518 // Assume it is near normalized (*0.5 or *2.0 is OK but * 0.001 is not).
520 for (i
= dstW
- 1; i
>= 0; i
--) {
521 int min
= filter2Size
;
523 int64_t cutOff
= 0.0;
525 /* get rid of near zero elements on the left by shifting left */
526 for (j
= 0; j
< filter2Size
; j
++) {
528 cutOff
+= FFABS(filter2
[i
* filter2Size
]);
530 if (cutOff
> SWS_MAX_REDUCE_CUTOFF
* fone
)
533 /* preserve monotonicity because the core can't handle the
534 * filter otherwise */
535 if (i
< dstW
- 1 && (*filterPos
)[i
] >= (*filterPos
)[i
+ 1])
538 // move filter coefficients left
539 for (k
= 1; k
< filter2Size
; k
++)
540 filter2
[i
* filter2Size
+ k
- 1] = filter2
[i
* filter2Size
+ k
];
541 filter2
[i
* filter2Size
+ k
- 1] = 0;
546 /* count near zeros on the right */
547 for (j
= filter2Size
- 1; j
> 0; j
--) {
548 cutOff
+= FFABS(filter2
[i
* filter2Size
+ j
]);
550 if (cutOff
> SWS_MAX_REDUCE_CUTOFF
* fone
)
555 if (min
> minFilterSize
)
559 if (PPC_ALTIVEC(cpu_flags
)) {
560 // we can handle the special case 4, so we don't want to go the full 8
561 if (minFilterSize
< 5)
564 /* We really don't want to waste our time doing useless computation, so
565 * fall back on the scalar C code for very small filters.
566 * Vectorizing is worth it only if you have a decent-sized vector. */
567 if (minFilterSize
< 3)
571 if (HAVE_MMX
&& cpu_flags
& AV_CPU_FLAG_MMX
) {
572 // special case for unscaled vertical filtering
573 if (minFilterSize
== 1 && filterAlign
== 2)
577 av_assert0(minFilterSize
> 0);
578 filterSize
= (minFilterSize
+ (filterAlign
- 1)) & (~(filterAlign
- 1));
579 av_assert0(filterSize
> 0);
580 filter
= av_malloc_array(dstW
, filterSize
* sizeof(*filter
));
583 if (filterSize
>= MAX_FILTER_SIZE
* 16 /
584 ((flags
& SWS_ACCURATE_RND
) ? APCK_SIZE
: 16)) {
585 av_log(NULL
, AV_LOG_ERROR
, "sws: filterSize %d is too large, try less extreme scaling or set --sws-max-filter-size and recompile\n",
586 FF_CEIL_RSHIFT((filterSize
+1) * ((flags
& SWS_ACCURATE_RND
) ? APCK_SIZE
: 16), 4));
589 *outFilterSize
= filterSize
;
591 if (flags
& SWS_PRINT_INFO
)
592 av_log(NULL
, AV_LOG_VERBOSE
,
593 "SwScaler: reducing / aligning filtersize %d -> %d\n",
594 filter2Size
, filterSize
);
595 /* try to reduce the filter-size (step2 reduce it) */
596 for (i
= 0; i
< dstW
; i
++) {
599 for (j
= 0; j
< filterSize
; j
++) {
600 if (j
>= filter2Size
)
601 filter
[i
* filterSize
+ j
] = 0;
603 filter
[i
* filterSize
+ j
] = filter2
[i
* filter2Size
+ j
];
604 if ((flags
& SWS_BITEXACT
) && j
>= minFilterSize
)
605 filter
[i
* filterSize
+ j
] = 0;
609 // FIXME try to align filterPos if possible
612 for (i
= 0; i
< dstW
; i
++) {
614 if ((*filterPos
)[i
] < 0) {
615 // move filter coefficients left to compensate for filterPos
616 for (j
= 1; j
< filterSize
; j
++) {
617 int left
= FFMAX(j
+ (*filterPos
)[i
], 0);
618 filter
[i
* filterSize
+ left
] += filter
[i
* filterSize
+ j
];
619 filter
[i
* filterSize
+ j
] = 0;
624 if ((*filterPos
)[i
] + filterSize
> srcW
) {
625 int shift
= (*filterPos
)[i
] + filterSize
- srcW
;
626 // move filter coefficients right to compensate for filterPos
627 for (j
= filterSize
- 2; j
>= 0; j
--) {
628 int right
= FFMIN(j
+ shift
, filterSize
- 1);
629 filter
[i
* filterSize
+ right
] += filter
[i
* filterSize
+ j
];
630 filter
[i
* filterSize
+ j
] = 0;
632 (*filterPos
)[i
]= srcW
- filterSize
;
636 // Note the +1 is for the MMX scaler which reads over the end
637 /* align at 16 for AltiVec (needed by hScale_altivec_real) */
638 FF_ALLOCZ_ARRAY_OR_GOTO(NULL
, *outFilter
,
639 (dstW
+ 3), *outFilterSize
* sizeof(int16_t), fail
);
641 /* normalize & store in outFilter */
642 for (i
= 0; i
< dstW
; i
++) {
647 for (j
= 0; j
< filterSize
; j
++) {
648 sum
+= filter
[i
* filterSize
+ j
];
650 sum
= (sum
+ one
/ 2) / one
;
652 av_log(NULL
, AV_LOG_WARNING
, "SwScaler: zero vector in scaling\n");
655 for (j
= 0; j
< *outFilterSize
; j
++) {
656 int64_t v
= filter
[i
* filterSize
+ j
] + error
;
657 int intV
= ROUNDED_DIV(v
, sum
);
658 (*outFilter
)[i
* (*outFilterSize
) + j
] = intV
;
659 error
= v
- intV
* sum
;
663 (*filterPos
)[dstW
+ 0] =
664 (*filterPos
)[dstW
+ 1] =
665 (*filterPos
)[dstW
+ 2] = (*filterPos
)[dstW
- 1]; /* the MMX/SSE scaler will
666 * read over the end */
667 for (i
= 0; i
< *outFilterSize
; i
++) {
668 int k
= (dstW
- 1) * (*outFilterSize
) + i
;
669 (*outFilter
)[k
+ 1 * (*outFilterSize
)] =
670 (*outFilter
)[k
+ 2 * (*outFilterSize
)] =
671 (*outFilter
)[k
+ 3 * (*outFilterSize
)] = (*outFilter
)[k
];
678 av_log(NULL
, AV_LOG_ERROR
, "sws: initFilter failed\n");
684 static void fill_rgb2yuv_table(SwsContext
*c
, const int table
[4], int dstRange
)
686 int64_t W
, V
, Z
, Cy
, Cu
, Cv
;
687 int64_t vr
= table
[0];
688 int64_t ub
= table
[1];
689 int64_t ug
= -table
[2];
690 int64_t vg
= -table
[3];
693 uint8_t *p
= (uint8_t*)c
->input_rgb2yuv_table
;
695 static const int8_t map
[] = {
696 BY_IDX
, GY_IDX
, -1 , BY_IDX
, BY_IDX
, GY_IDX
, -1 , BY_IDX
,
697 RY_IDX
, -1 , GY_IDX
, RY_IDX
, RY_IDX
, -1 , GY_IDX
, RY_IDX
,
698 RY_IDX
, GY_IDX
, -1 , RY_IDX
, RY_IDX
, GY_IDX
, -1 , RY_IDX
,
699 BY_IDX
, -1 , GY_IDX
, BY_IDX
, BY_IDX
, -1 , GY_IDX
, BY_IDX
,
700 BU_IDX
, GU_IDX
, -1 , BU_IDX
, BU_IDX
, GU_IDX
, -1 , BU_IDX
,
701 RU_IDX
, -1 , GU_IDX
, RU_IDX
, RU_IDX
, -1 , GU_IDX
, RU_IDX
,
702 RU_IDX
, GU_IDX
, -1 , RU_IDX
, RU_IDX
, GU_IDX
, -1 , RU_IDX
,
703 BU_IDX
, -1 , GU_IDX
, BU_IDX
, BU_IDX
, -1 , GU_IDX
, BU_IDX
,
704 BV_IDX
, GV_IDX
, -1 , BV_IDX
, BV_IDX
, GV_IDX
, -1 , BV_IDX
,
705 RV_IDX
, -1 , GV_IDX
, RV_IDX
, RV_IDX
, -1 , GV_IDX
, RV_IDX
,
706 RV_IDX
, GV_IDX
, -1 , RV_IDX
, RV_IDX
, GV_IDX
, -1 , RV_IDX
,
707 BV_IDX
, -1 , GV_IDX
, BV_IDX
, BV_IDX
, -1 , GV_IDX
, BV_IDX
,
708 RY_IDX
, BY_IDX
, RY_IDX
, BY_IDX
, RY_IDX
, BY_IDX
, RY_IDX
, BY_IDX
,
709 BY_IDX
, RY_IDX
, BY_IDX
, RY_IDX
, BY_IDX
, RY_IDX
, BY_IDX
, RY_IDX
,
710 GY_IDX
, -1 , GY_IDX
, -1 , GY_IDX
, -1 , GY_IDX
, -1 ,
711 -1 , GY_IDX
, -1 , GY_IDX
, -1 , GY_IDX
, -1 , GY_IDX
,
712 RU_IDX
, BU_IDX
, RU_IDX
, BU_IDX
, RU_IDX
, BU_IDX
, RU_IDX
, BU_IDX
,
713 BU_IDX
, RU_IDX
, BU_IDX
, RU_IDX
, BU_IDX
, RU_IDX
, BU_IDX
, RU_IDX
,
714 GU_IDX
, -1 , GU_IDX
, -1 , GU_IDX
, -1 , GU_IDX
, -1 ,
715 -1 , GU_IDX
, -1 , GU_IDX
, -1 , GU_IDX
, -1 , GU_IDX
,
716 RV_IDX
, BV_IDX
, RV_IDX
, BV_IDX
, RV_IDX
, BV_IDX
, RV_IDX
, BV_IDX
,
717 BV_IDX
, RV_IDX
, BV_IDX
, RV_IDX
, BV_IDX
, RV_IDX
, BV_IDX
, RV_IDX
,
718 GV_IDX
, -1 , GV_IDX
, -1 , GV_IDX
, -1 , GV_IDX
, -1 ,
719 -1 , GV_IDX
, -1 , GV_IDX
, -1 , GV_IDX
, -1 , GV_IDX
, //23
720 -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , //24
721 -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , //25
722 -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , //26
723 -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , //27
724 -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , //28
725 -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , //29
726 -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , //30
727 -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , //31
728 BY_IDX
, GY_IDX
, RY_IDX
, -1 , -1 , -1 , -1 , -1 , //32
729 BU_IDX
, GU_IDX
, RU_IDX
, -1 , -1 , -1 , -1 , -1 , //33
730 BV_IDX
, GV_IDX
, RV_IDX
, -1 , -1 , -1 , -1 , -1 , //34
733 dstRange
= 0; //FIXME range = 1 is handled elsewhere
743 W
= ROUNDED_DIV(ONE
*ONE
*ug
, ub
);
744 V
= ROUNDED_DIV(ONE
*ONE
*vg
, vr
);
747 Cy
= ROUNDED_DIV(cy
*Z
, ONE
);
748 Cu
= ROUNDED_DIV(ub
*Z
, ONE
);
749 Cv
= ROUNDED_DIV(vr
*Z
, ONE
);
751 c
->input_rgb2yuv_table
[RY_IDX
] = -ROUNDED_DIV((1 << RGB2YUV_SHIFT
)*V
, Cy
);
752 c
->input_rgb2yuv_table
[GY_IDX
] = ROUNDED_DIV((1 << RGB2YUV_SHIFT
)*ONE
*ONE
, Cy
);
753 c
->input_rgb2yuv_table
[BY_IDX
] = -ROUNDED_DIV((1 << RGB2YUV_SHIFT
)*W
, Cy
);
755 c
->input_rgb2yuv_table
[RU_IDX
] = ROUNDED_DIV((1 << RGB2YUV_SHIFT
)*V
, Cu
);
756 c
->input_rgb2yuv_table
[GU_IDX
] = -ROUNDED_DIV((1 << RGB2YUV_SHIFT
)*ONE
*ONE
, Cu
);
757 c
->input_rgb2yuv_table
[BU_IDX
] = ROUNDED_DIV((1 << RGB2YUV_SHIFT
)*(Z
+W
) , Cu
);
759 c
->input_rgb2yuv_table
[RV_IDX
] = ROUNDED_DIV((1 << RGB2YUV_SHIFT
)*(V
+Z
) , Cv
);
760 c
->input_rgb2yuv_table
[GV_IDX
] = -ROUNDED_DIV((1 << RGB2YUV_SHIFT
)*ONE
*ONE
, Cv
);
761 c
->input_rgb2yuv_table
[BV_IDX
] = ROUNDED_DIV((1 << RGB2YUV_SHIFT
)*W
, Cv
);
763 if(/*!dstRange && */!memcmp(table
, ff_yuv2rgb_coeffs
[SWS_CS_DEFAULT
], sizeof(ff_yuv2rgb_coeffs
[SWS_CS_DEFAULT
]))) {
764 c
->input_rgb2yuv_table
[BY_IDX
] = ((int)(0.114 * 219 / 255 * (1 << RGB2YUV_SHIFT
) + 0.5));
765 c
->input_rgb2yuv_table
[BV_IDX
] = (-(int)(0.081 * 224 / 255 * (1 << RGB2YUV_SHIFT
) + 0.5));
766 c
->input_rgb2yuv_table
[BU_IDX
] = ((int)(0.500 * 224 / 255 * (1 << RGB2YUV_SHIFT
) + 0.5));
767 c
->input_rgb2yuv_table
[GY_IDX
] = ((int)(0.587 * 219 / 255 * (1 << RGB2YUV_SHIFT
) + 0.5));
768 c
->input_rgb2yuv_table
[GV_IDX
] = (-(int)(0.419 * 224 / 255 * (1 << RGB2YUV_SHIFT
) + 0.5));
769 c
->input_rgb2yuv_table
[GU_IDX
] = (-(int)(0.331 * 224 / 255 * (1 << RGB2YUV_SHIFT
) + 0.5));
770 c
->input_rgb2yuv_table
[RY_IDX
] = ((int)(0.299 * 219 / 255 * (1 << RGB2YUV_SHIFT
) + 0.5));
771 c
->input_rgb2yuv_table
[RV_IDX
] = ((int)(0.500 * 224 / 255 * (1 << RGB2YUV_SHIFT
) + 0.5));
772 c
->input_rgb2yuv_table
[RU_IDX
] = (-(int)(0.169 * 224 / 255 * (1 << RGB2YUV_SHIFT
) + 0.5));
774 for(i
=0; i
<FF_ARRAY_ELEMS(map
); i
++)
775 AV_WL16(p
+ 16*4 + 2*i
, map
[i
] >= 0 ? c
->input_rgb2yuv_table
[map
[i
]] : 0);
778 static void fill_xyztables(struct SwsContext
*c
)
781 double xyzgamma
= XYZ_GAMMA
;
782 double rgbgamma
= 1.0 / RGB_GAMMA
;
783 double xyzgammainv
= 1.0 / XYZ_GAMMA
;
784 double rgbgammainv
= RGB_GAMMA
;
785 static const int16_t xyz2rgb_matrix
[3][4] = {
786 {13270, -6295, -2041},
788 { 228, -835, 4329} };
789 static const int16_t rgb2xyz_matrix
[3][4] = {
793 static int16_t xyzgamma_tab
[4096], rgbgamma_tab
[4096], xyzgammainv_tab
[4096], rgbgammainv_tab
[4096];
795 memcpy(c
->xyz2rgb_matrix
, xyz2rgb_matrix
, sizeof(c
->xyz2rgb_matrix
));
796 memcpy(c
->rgb2xyz_matrix
, rgb2xyz_matrix
, sizeof(c
->rgb2xyz_matrix
));
797 c
->xyzgamma
= xyzgamma_tab
;
798 c
->rgbgamma
= rgbgamma_tab
;
799 c
->xyzgammainv
= xyzgammainv_tab
;
800 c
->rgbgammainv
= rgbgammainv_tab
;
802 if (rgbgamma_tab
[4095])
805 /* set gamma vectors */
806 for (i
= 0; i
< 4096; i
++) {
807 xyzgamma_tab
[i
] = lrint(pow(i
/ 4095.0, xyzgamma
) * 4095.0);
808 rgbgamma_tab
[i
] = lrint(pow(i
/ 4095.0, rgbgamma
) * 4095.0);
809 xyzgammainv_tab
[i
] = lrint(pow(i
/ 4095.0, xyzgammainv
) * 4095.0);
810 rgbgammainv_tab
[i
] = lrint(pow(i
/ 4095.0, rgbgammainv
) * 4095.0);
814 int sws_setColorspaceDetails(struct SwsContext
*c
, const int inv_table
[4],
815 int srcRange
, const int table
[4], int dstRange
,
816 int brightness
, int contrast
, int saturation
)
818 const AVPixFmtDescriptor
*desc_dst
;
819 const AVPixFmtDescriptor
*desc_src
;
821 memmove(c
->srcColorspaceTable
, inv_table
, sizeof(int) * 4);
822 memmove(c
->dstColorspaceTable
, table
, sizeof(int) * 4);
825 desc_dst
= av_pix_fmt_desc_get(c
->dstFormat
);
826 desc_src
= av_pix_fmt_desc_get(c
->srcFormat
);
828 if(!isYUV(c
->dstFormat
) && !isGray(c
->dstFormat
))
830 if(!isYUV(c
->srcFormat
) && !isGray(c
->srcFormat
))
833 c
->brightness
= brightness
;
834 c
->contrast
= contrast
;
835 c
->saturation
= saturation
;
836 if (c
->srcRange
!= srcRange
|| c
->dstRange
!= dstRange
)
838 c
->srcRange
= srcRange
;
839 c
->dstRange
= dstRange
;
841 //The srcBpc check is possibly wrong but we seem to lack a definitive reference to test this
842 //and what we have in ticket 2939 looks better with this check
843 if (need_reinit
&& (c
->srcBpc
== 8 || !isYUV(c
->srcFormat
)))
844 ff_sws_init_range_convert(c
);
846 if ((isYUV(c
->dstFormat
) || isGray(c
->dstFormat
)) && (isYUV(c
->srcFormat
) || isGray(c
->srcFormat
)))
849 c
->dstFormatBpp
= av_get_bits_per_pixel(desc_dst
);
850 c
->srcFormatBpp
= av_get_bits_per_pixel(desc_src
);
852 if (!isYUV(c
->dstFormat
) && !isGray(c
->dstFormat
)) {
853 ff_yuv2rgb_c_init_tables(c
, inv_table
, srcRange
, brightness
,
854 contrast
, saturation
);
858 ff_yuv2rgb_init_tables_ppc(c
, inv_table
, brightness
,
859 contrast
, saturation
);
862 fill_rgb2yuv_table(c
, table
, dstRange
);
867 int sws_getColorspaceDetails(struct SwsContext
*c
, int **inv_table
,
868 int *srcRange
, int **table
, int *dstRange
,
869 int *brightness
, int *contrast
, int *saturation
)
874 *inv_table
= c
->srcColorspaceTable
;
875 *table
= c
->dstColorspaceTable
;
876 *srcRange
= c
->srcRange
;
877 *dstRange
= c
->dstRange
;
878 *brightness
= c
->brightness
;
879 *contrast
= c
->contrast
;
880 *saturation
= c
->saturation
;
885 static int handle_jpeg(enum AVPixelFormat
*format
)
888 case AV_PIX_FMT_YUVJ420P
:
889 *format
= AV_PIX_FMT_YUV420P
;
891 case AV_PIX_FMT_YUVJ411P
:
892 *format
= AV_PIX_FMT_YUV411P
;
894 case AV_PIX_FMT_YUVJ422P
:
895 *format
= AV_PIX_FMT_YUV422P
;
897 case AV_PIX_FMT_YUVJ444P
:
898 *format
= AV_PIX_FMT_YUV444P
;
900 case AV_PIX_FMT_YUVJ440P
:
901 *format
= AV_PIX_FMT_YUV440P
;
903 case AV_PIX_FMT_GRAY8
:
904 case AV_PIX_FMT_GRAY16LE
:
905 case AV_PIX_FMT_GRAY16BE
:
912 static int handle_0alpha(enum AVPixelFormat
*format
)
915 case AV_PIX_FMT_0BGR
: *format
= AV_PIX_FMT_ABGR
; return 1;
916 case AV_PIX_FMT_BGR0
: *format
= AV_PIX_FMT_BGRA
; return 4;
917 case AV_PIX_FMT_0RGB
: *format
= AV_PIX_FMT_ARGB
; return 1;
918 case AV_PIX_FMT_RGB0
: *format
= AV_PIX_FMT_RGBA
; return 4;
923 static int handle_xyz(enum AVPixelFormat
*format
)
926 case AV_PIX_FMT_XYZ12BE
: *format
= AV_PIX_FMT_RGB48BE
; return 1;
927 case AV_PIX_FMT_XYZ12LE
: *format
= AV_PIX_FMT_RGB48LE
; return 1;
932 static void handle_formats(SwsContext
*c
)
934 c
->src0Alpha
|= handle_0alpha(&c
->srcFormat
);
935 c
->dst0Alpha
|= handle_0alpha(&c
->dstFormat
);
936 c
->srcXYZ
|= handle_xyz(&c
->srcFormat
);
937 c
->dstXYZ
|= handle_xyz(&c
->dstFormat
);
938 if (c
->srcXYZ
|| c
->dstXYZ
)
942 SwsContext
*sws_alloc_context(void)
944 SwsContext
*c
= av_mallocz(sizeof(SwsContext
));
946 av_assert0(offsetof(SwsContext
, redDither
) + DITHER32_INT
== offsetof(SwsContext
, dither32
));
949 c
->av_class
= &sws_context_class
;
950 av_opt_set_defaults(c
);
956 av_cold
int sws_init_context(SwsContext
*c
, SwsFilter
*srcFilter
,
957 SwsFilter
*dstFilter
)
960 int usesVFilter
, usesHFilter
;
962 SwsFilter dummyFilter
= { NULL
, NULL
, NULL
, NULL
};
967 int dst_stride
= FFALIGN(dstW
* sizeof(int16_t) + 66, 16);
968 int flags
, cpu_flags
;
969 enum AVPixelFormat srcFormat
= c
->srcFormat
;
970 enum AVPixelFormat dstFormat
= c
->dstFormat
;
971 const AVPixFmtDescriptor
*desc_src
;
972 const AVPixFmtDescriptor
*desc_dst
;
974 cpu_flags
= av_get_cpu_flags();
980 unscaled
= (srcW
== dstW
&& srcH
== dstH
);
982 c
->srcRange
|= handle_jpeg(&c
->srcFormat
);
983 c
->dstRange
|= handle_jpeg(&c
->dstFormat
);
985 if(srcFormat
!=c
->srcFormat
|| dstFormat
!=c
->dstFormat
)
986 av_log(c
, AV_LOG_WARNING
, "deprecated pixel format used, make sure you did set range correctly\n");
988 if (!c
->contrast
&& !c
->saturation
&& !c
->dstFormatBpp
)
989 sws_setColorspaceDetails(c
, ff_yuv2rgb_coeffs
[SWS_CS_DEFAULT
], c
->srcRange
,
990 ff_yuv2rgb_coeffs
[SWS_CS_DEFAULT
],
991 c
->dstRange
, 0, 1 << 16, 1 << 16);
994 srcFormat
= c
->srcFormat
;
995 dstFormat
= c
->dstFormat
;
996 desc_src
= av_pix_fmt_desc_get(srcFormat
);
997 desc_dst
= av_pix_fmt_desc_get(dstFormat
);
999 if (!(unscaled
&& sws_isSupportedEndiannessConversion(srcFormat
) &&
1000 av_pix_fmt_swap_endianness(srcFormat
) == dstFormat
)) {
1001 if (!sws_isSupportedInput(srcFormat
)) {
1002 av_log(c
, AV_LOG_ERROR
, "%s is not supported as input pixel format\n",
1003 av_get_pix_fmt_name(srcFormat
));
1004 return AVERROR(EINVAL
);
1006 if (!sws_isSupportedOutput(dstFormat
)) {
1007 av_log(c
, AV_LOG_ERROR
, "%s is not supported as output pixel format\n",
1008 av_get_pix_fmt_name(dstFormat
));
1009 return AVERROR(EINVAL
);
1013 i
= flags
& (SWS_POINT
|
1025 /* provide a default scaler if not set by caller */
1027 if (dstW
< srcW
&& dstH
< srcH
)
1028 flags
|= SWS_BICUBIC
;
1029 else if (dstW
> srcW
&& dstH
> srcH
)
1030 flags
|= SWS_BICUBIC
;
1032 flags
|= SWS_BICUBIC
;
1034 } else if (i
& (i
- 1)) {
1035 av_log(c
, AV_LOG_ERROR
,
1036 "Exactly one scaler algorithm must be chosen, got %X\n", i
);
1037 return AVERROR(EINVAL
);
1040 if (srcW
< 1 || srcH
< 1 || dstW
< 1 || dstH
< 1) {
1041 /* FIXME check if these are enough and try to lower them after
1042 * fixing the relevant parts of the code */
1043 av_log(c
, AV_LOG_ERROR
, "%dx%d -> %dx%d is invalid scaling dimension\n",
1044 srcW
, srcH
, dstW
, dstH
);
1045 return AVERROR(EINVAL
);
1049 dstFilter
= &dummyFilter
;
1051 srcFilter
= &dummyFilter
;
1053 c
->lumXInc
= (((int64_t)srcW
<< 16) + (dstW
>> 1)) / dstW
;
1054 c
->lumYInc
= (((int64_t)srcH
<< 16) + (dstH
>> 1)) / dstH
;
1055 c
->dstFormatBpp
= av_get_bits_per_pixel(desc_dst
);
1056 c
->srcFormatBpp
= av_get_bits_per_pixel(desc_src
);
1057 c
->vRounder
= 4 * 0x0001000100010001ULL
;
1059 usesVFilter
= (srcFilter
->lumV
&& srcFilter
->lumV
->length
> 1) ||
1060 (srcFilter
->chrV
&& srcFilter
->chrV
->length
> 1) ||
1061 (dstFilter
->lumV
&& dstFilter
->lumV
->length
> 1) ||
1062 (dstFilter
->chrV
&& dstFilter
->chrV
->length
> 1);
1063 usesHFilter
= (srcFilter
->lumH
&& srcFilter
->lumH
->length
> 1) ||
1064 (srcFilter
->chrH
&& srcFilter
->chrH
->length
> 1) ||
1065 (dstFilter
->lumH
&& dstFilter
->lumH
->length
> 1) ||
1066 (dstFilter
->chrH
&& dstFilter
->chrH
->length
> 1);
1068 av_pix_fmt_get_chroma_sub_sample(srcFormat
, &c
->chrSrcHSubSample
, &c
->chrSrcVSubSample
);
1069 av_pix_fmt_get_chroma_sub_sample(dstFormat
, &c
->chrDstHSubSample
, &c
->chrDstVSubSample
);
1071 if (isAnyRGB(dstFormat
) && !(flags
&SWS_FULL_CHR_H_INT
)) {
1073 av_log(c
, AV_LOG_DEBUG
, "Forcing full internal H chroma due to odd output size\n");
1074 flags
|= SWS_FULL_CHR_H_INT
;
1078 if ( c
->chrSrcHSubSample
== 0
1079 && c
->chrSrcVSubSample
== 0
1080 && c
->dither
!= SWS_DITHER_BAYER
//SWS_FULL_CHR_H_INT is currently not supported with SWS_DITHER_BAYER
1081 && !(c
->flags
& SWS_FAST_BILINEAR
)
1083 av_log(c
, AV_LOG_DEBUG
, "Forcing full internal H chroma due to input having non subsampled chroma\n");
1084 flags
|= SWS_FULL_CHR_H_INT
;
1089 if (c
->dither
== SWS_DITHER_AUTO
) {
1090 if (flags
& SWS_ERROR_DIFFUSION
)
1091 c
->dither
= SWS_DITHER_ED
;
1094 if(dstFormat
== AV_PIX_FMT_BGR4_BYTE
||
1095 dstFormat
== AV_PIX_FMT_RGB4_BYTE
||
1096 dstFormat
== AV_PIX_FMT_BGR8
||
1097 dstFormat
== AV_PIX_FMT_RGB8
) {
1098 if (c
->dither
== SWS_DITHER_AUTO
)
1099 c
->dither
= (flags
& SWS_FULL_CHR_H_INT
) ? SWS_DITHER_ED
: SWS_DITHER_BAYER
;
1100 if (!(flags
& SWS_FULL_CHR_H_INT
)) {
1101 if (c
->dither
== SWS_DITHER_ED
|| c
->dither
== SWS_DITHER_A_DITHER
|| c
->dither
== SWS_DITHER_X_DITHER
) {
1102 av_log(c
, AV_LOG_DEBUG
,
1103 "Desired dithering only supported in full chroma interpolation for destination format '%s'\n",
1104 av_get_pix_fmt_name(dstFormat
));
1105 flags
|= SWS_FULL_CHR_H_INT
;
1109 if (flags
& SWS_FULL_CHR_H_INT
) {
1110 if (c
->dither
== SWS_DITHER_BAYER
) {
1111 av_log(c
, AV_LOG_DEBUG
,
1112 "Ordered dither is not supported in full chroma interpolation for destination format '%s'\n",
1113 av_get_pix_fmt_name(dstFormat
));
1114 c
->dither
= SWS_DITHER_ED
;
1118 if (isPlanarRGB(dstFormat
)) {
1119 if (!(flags
& SWS_FULL_CHR_H_INT
)) {
1120 av_log(c
, AV_LOG_DEBUG
,
1121 "%s output is not supported with half chroma resolution, switching to full\n",
1122 av_get_pix_fmt_name(dstFormat
));
1123 flags
|= SWS_FULL_CHR_H_INT
;
1128 /* reuse chroma for 2 pixels RGB/BGR unless user wants full
1129 * chroma interpolation */
1130 if (flags
& SWS_FULL_CHR_H_INT
&&
1131 isAnyRGB(dstFormat
) &&
1132 !isPlanarRGB(dstFormat
) &&
1133 dstFormat
!= AV_PIX_FMT_RGBA
&&
1134 dstFormat
!= AV_PIX_FMT_ARGB
&&
1135 dstFormat
!= AV_PIX_FMT_BGRA
&&
1136 dstFormat
!= AV_PIX_FMT_ABGR
&&
1137 dstFormat
!= AV_PIX_FMT_RGB24
&&
1138 dstFormat
!= AV_PIX_FMT_BGR24
&&
1139 dstFormat
!= AV_PIX_FMT_BGR4_BYTE
&&
1140 dstFormat
!= AV_PIX_FMT_RGB4_BYTE
&&
1141 dstFormat
!= AV_PIX_FMT_BGR8
&&
1142 dstFormat
!= AV_PIX_FMT_RGB8
1144 av_log(c
, AV_LOG_WARNING
,
1145 "full chroma interpolation for destination format '%s' not yet implemented\n",
1146 av_get_pix_fmt_name(dstFormat
));
1147 flags
&= ~SWS_FULL_CHR_H_INT
;
1150 if (isAnyRGB(dstFormat
) && !(flags
& SWS_FULL_CHR_H_INT
))
1151 c
->chrDstHSubSample
= 1;
1153 // drop some chroma lines if the user wants it
1154 c
->vChrDrop
= (flags
& SWS_SRC_V_CHR_DROP_MASK
) >>
1155 SWS_SRC_V_CHR_DROP_SHIFT
;
1156 c
->chrSrcVSubSample
+= c
->vChrDrop
;
1158 /* drop every other pixel for chroma calculation unless user
1159 * wants full chroma */
1160 if (isAnyRGB(srcFormat
) && !(flags
& SWS_FULL_CHR_H_INP
) &&
1161 srcFormat
!= AV_PIX_FMT_RGB8
&& srcFormat
!= AV_PIX_FMT_BGR8
&&
1162 srcFormat
!= AV_PIX_FMT_RGB4
&& srcFormat
!= AV_PIX_FMT_BGR4
&&
1163 srcFormat
!= AV_PIX_FMT_RGB4_BYTE
&& srcFormat
!= AV_PIX_FMT_BGR4_BYTE
&&
1164 srcFormat
!= AV_PIX_FMT_GBRP9BE
&& srcFormat
!= AV_PIX_FMT_GBRP9LE
&&
1165 srcFormat
!= AV_PIX_FMT_GBRP10BE
&& srcFormat
!= AV_PIX_FMT_GBRP10LE
&&
1166 srcFormat
!= AV_PIX_FMT_GBRP12BE
&& srcFormat
!= AV_PIX_FMT_GBRP12LE
&&
1167 srcFormat
!= AV_PIX_FMT_GBRP14BE
&& srcFormat
!= AV_PIX_FMT_GBRP14LE
&&
1168 srcFormat
!= AV_PIX_FMT_GBRP16BE
&& srcFormat
!= AV_PIX_FMT_GBRP16LE
&&
1169 ((dstW
>> c
->chrDstHSubSample
) <= (srcW
>> 1) ||
1170 (flags
& SWS_FAST_BILINEAR
)))
1171 c
->chrSrcHSubSample
= 1;
1173 // Note the FF_CEIL_RSHIFT is so that we always round toward +inf.
1174 c
->chrSrcW
= FF_CEIL_RSHIFT(srcW
, c
->chrSrcHSubSample
);
1175 c
->chrSrcH
= FF_CEIL_RSHIFT(srcH
, c
->chrSrcVSubSample
);
1176 c
->chrDstW
= FF_CEIL_RSHIFT(dstW
, c
->chrDstHSubSample
);
1177 c
->chrDstH
= FF_CEIL_RSHIFT(dstH
, c
->chrDstVSubSample
);
1179 FF_ALLOC_OR_GOTO(c
, c
->formatConvBuffer
, FFALIGN(srcW
*2+78, 16) * 2, fail
);
1181 c
->srcBpc
= 1 + desc_src
->comp
[0].depth_minus1
;
1184 c
->dstBpc
= 1 + desc_dst
->comp
[0].depth_minus1
;
1187 if (isAnyRGB(srcFormat
) || srcFormat
== AV_PIX_FMT_PAL8
)
1189 if (c
->dstBpc
== 16)
1192 if (INLINE_MMXEXT(cpu_flags
) && c
->srcBpc
== 8 && c
->dstBpc
<= 14) {
1193 c
->canMMXEXTBeUsed
= dstW
>= srcW
&& (dstW
& 31) == 0 &&
1194 c
->chrDstW
>= c
->chrSrcW
&&
1196 if (!c
->canMMXEXTBeUsed
&& dstW
>= srcW
&& c
->chrDstW
>= c
->chrSrcW
&& (srcW
& 15) == 0
1198 && (flags
& SWS_FAST_BILINEAR
)) {
1199 if (flags
& SWS_PRINT_INFO
)
1200 av_log(c
, AV_LOG_INFO
,
1201 "output width is not a multiple of 32 -> no MMXEXT scaler\n");
1203 if (usesHFilter
|| isNBPS(c
->srcFormat
) || is16BPS(c
->srcFormat
) || isAnyRGB(c
->srcFormat
))
1204 c
->canMMXEXTBeUsed
= 0;
1206 c
->canMMXEXTBeUsed
= 0;
1208 c
->chrXInc
= (((int64_t)c
->chrSrcW
<< 16) + (c
->chrDstW
>> 1)) / c
->chrDstW
;
1209 c
->chrYInc
= (((int64_t)c
->chrSrcH
<< 16) + (c
->chrDstH
>> 1)) / c
->chrDstH
;
1211 /* Match pixel 0 of the src to pixel 0 of dst and match pixel n-2 of src
1212 * to pixel n-2 of dst, but only for the FAST_BILINEAR mode otherwise do
1214 * n-2 is the last chrominance sample available.
1215 * This is not perfect, but no one should notice the difference, the more
1216 * correct variant would be like the vertical one, but that would require
1217 * some special code for the first and last pixel */
1218 if (flags
& SWS_FAST_BILINEAR
) {
1219 if (c
->canMMXEXTBeUsed
) {
1223 // we don't use the x86 asm scaler if MMX is available
1224 else if (INLINE_MMX(cpu_flags
) && c
->dstBpc
<= 14) {
1225 c
->lumXInc
= ((int64_t)(srcW
- 2) << 16) / (dstW
- 2) - 20;
1226 c
->chrXInc
= ((int64_t)(c
->chrSrcW
- 2) << 16) / (c
->chrDstW
- 2) - 20;
1230 #define USE_MMAP (HAVE_MMAP && HAVE_MPROTECT && defined MAP_ANONYMOUS)
1232 /* precalculate horizontal scaler filter coefficients */
1234 #if HAVE_MMXEXT_INLINE
1235 // can't downscale !!!
1236 if (c
->canMMXEXTBeUsed
&& (flags
& SWS_FAST_BILINEAR
)) {
1237 c
->lumMmxextFilterCodeSize
= ff_init_hscaler_mmxext(dstW
, c
->lumXInc
, NULL
,
1239 c
->chrMmxextFilterCodeSize
= ff_init_hscaler_mmxext(c
->chrDstW
, c
->chrXInc
,
1240 NULL
, NULL
, NULL
, 4);
1243 c
->lumMmxextFilterCode
= mmap(NULL
, c
->lumMmxextFilterCodeSize
,
1244 PROT_READ
| PROT_WRITE
,
1245 MAP_PRIVATE
| MAP_ANONYMOUS
,
1247 c
->chrMmxextFilterCode
= mmap(NULL
, c
->chrMmxextFilterCodeSize
,
1248 PROT_READ
| PROT_WRITE
,
1249 MAP_PRIVATE
| MAP_ANONYMOUS
,
1251 #elif HAVE_VIRTUALALLOC
1252 c
->lumMmxextFilterCode
= VirtualAlloc(NULL
,
1253 c
->lumMmxextFilterCodeSize
,
1255 PAGE_EXECUTE_READWRITE
);
1256 c
->chrMmxextFilterCode
= VirtualAlloc(NULL
,
1257 c
->chrMmxextFilterCodeSize
,
1259 PAGE_EXECUTE_READWRITE
);
1261 c
->lumMmxextFilterCode
= av_malloc(c
->lumMmxextFilterCodeSize
);
1262 c
->chrMmxextFilterCode
= av_malloc(c
->chrMmxextFilterCodeSize
);
1265 #ifdef MAP_ANONYMOUS
1266 if (c
->lumMmxextFilterCode
== MAP_FAILED
|| c
->chrMmxextFilterCode
== MAP_FAILED
)
1268 if (!c
->lumMmxextFilterCode
|| !c
->chrMmxextFilterCode
)
1271 av_log(c
, AV_LOG_ERROR
, "Failed to allocate MMX2FilterCode\n");
1272 return AVERROR(ENOMEM
);
1275 FF_ALLOCZ_OR_GOTO(c
, c
->hLumFilter
, (dstW
/ 8 + 8) * sizeof(int16_t), fail
);
1276 FF_ALLOCZ_OR_GOTO(c
, c
->hChrFilter
, (c
->chrDstW
/ 4 + 8) * sizeof(int16_t), fail
);
1277 FF_ALLOCZ_OR_GOTO(c
, c
->hLumFilterPos
, (dstW
/ 2 / 8 + 8) * sizeof(int32_t), fail
);
1278 FF_ALLOCZ_OR_GOTO(c
, c
->hChrFilterPos
, (c
->chrDstW
/ 2 / 4 + 8) * sizeof(int32_t), fail
);
1280 ff_init_hscaler_mmxext( dstW
, c
->lumXInc
, c
->lumMmxextFilterCode
,
1281 c
->hLumFilter
, (uint32_t*)c
->hLumFilterPos
, 8);
1282 ff_init_hscaler_mmxext(c
->chrDstW
, c
->chrXInc
, c
->chrMmxextFilterCode
,
1283 c
->hChrFilter
, (uint32_t*)c
->hChrFilterPos
, 4);
1286 if ( mprotect(c
->lumMmxextFilterCode
, c
->lumMmxextFilterCodeSize
, PROT_EXEC
| PROT_READ
) == -1
1287 || mprotect(c
->chrMmxextFilterCode
, c
->chrMmxextFilterCodeSize
, PROT_EXEC
| PROT_READ
) == -1) {
1288 av_log(c
, AV_LOG_ERROR
, "mprotect failed, cannot use fast bilinear scaler\n");
1293 #endif /* HAVE_MMXEXT_INLINE */
1295 const int filterAlign
= X86_MMX(cpu_flags
) ? 4 :
1296 PPC_ALTIVEC(cpu_flags
) ? 8 : 1;
1298 if (initFilter(&c
->hLumFilter
, &c
->hLumFilterPos
,
1299 &c
->hLumFilterSize
, c
->lumXInc
,
1300 srcW
, dstW
, filterAlign
, 1 << 14,
1301 (flags
& SWS_BICUBLIN
) ? (flags
| SWS_BICUBIC
) : flags
,
1302 cpu_flags
, srcFilter
->lumH
, dstFilter
->lumH
,
1304 get_local_pos(c
, 0, 0, 0),
1305 get_local_pos(c
, 0, 0, 0)) < 0)
1307 if (initFilter(&c
->hChrFilter
, &c
->hChrFilterPos
,
1308 &c
->hChrFilterSize
, c
->chrXInc
,
1309 c
->chrSrcW
, c
->chrDstW
, filterAlign
, 1 << 14,
1310 (flags
& SWS_BICUBLIN
) ? (flags
| SWS_BILINEAR
) : flags
,
1311 cpu_flags
, srcFilter
->chrH
, dstFilter
->chrH
,
1313 get_local_pos(c
, c
->chrSrcHSubSample
, c
->src_h_chr_pos
, 0),
1314 get_local_pos(c
, c
->chrDstHSubSample
, c
->dst_h_chr_pos
, 0)) < 0)
1317 } // initialize horizontal stuff
1319 /* precalculate vertical scaler filter coefficients */
1321 const int filterAlign
= X86_MMX(cpu_flags
) ? 2 :
1322 PPC_ALTIVEC(cpu_flags
) ? 8 : 1;
1324 if (initFilter(&c
->vLumFilter
, &c
->vLumFilterPos
, &c
->vLumFilterSize
,
1325 c
->lumYInc
, srcH
, dstH
, filterAlign
, (1 << 12),
1326 (flags
& SWS_BICUBLIN
) ? (flags
| SWS_BICUBIC
) : flags
,
1327 cpu_flags
, srcFilter
->lumV
, dstFilter
->lumV
,
1329 get_local_pos(c
, 0, 0, 1),
1330 get_local_pos(c
, 0, 0, 1)) < 0)
1332 if (initFilter(&c
->vChrFilter
, &c
->vChrFilterPos
, &c
->vChrFilterSize
,
1333 c
->chrYInc
, c
->chrSrcH
, c
->chrDstH
,
1334 filterAlign
, (1 << 12),
1335 (flags
& SWS_BICUBLIN
) ? (flags
| SWS_BILINEAR
) : flags
,
1336 cpu_flags
, srcFilter
->chrV
, dstFilter
->chrV
,
1338 get_local_pos(c
, c
->chrSrcVSubSample
, c
->src_v_chr_pos
, 1),
1339 get_local_pos(c
, c
->chrDstVSubSample
, c
->dst_v_chr_pos
, 1)) < 0)
1344 FF_ALLOC_OR_GOTO(c
, c
->vYCoeffsBank
, sizeof(vector
signed short) * c
->vLumFilterSize
* c
->dstH
, fail
);
1345 FF_ALLOC_OR_GOTO(c
, c
->vCCoeffsBank
, sizeof(vector
signed short) * c
->vChrFilterSize
* c
->chrDstH
, fail
);
1347 for (i
= 0; i
< c
->vLumFilterSize
* c
->dstH
; i
++) {
1349 short *p
= (short *)&c
->vYCoeffsBank
[i
];
1350 for (j
= 0; j
< 8; j
++)
1351 p
[j
] = c
->vLumFilter
[i
];
1354 for (i
= 0; i
< c
->vChrFilterSize
* c
->chrDstH
; i
++) {
1356 short *p
= (short *)&c
->vCCoeffsBank
[i
];
1357 for (j
= 0; j
< 8; j
++)
1358 p
[j
] = c
->vChrFilter
[i
];
1363 // calculate buffer sizes so that they won't run out while handling these damn slices
1364 c
->vLumBufSize
= c
->vLumFilterSize
;
1365 c
->vChrBufSize
= c
->vChrFilterSize
;
1366 for (i
= 0; i
< dstH
; i
++) {
1367 int chrI
= (int64_t)i
* c
->chrDstH
/ dstH
;
1368 int nextSlice
= FFMAX(c
->vLumFilterPos
[i
] + c
->vLumFilterSize
- 1,
1369 ((c
->vChrFilterPos
[chrI
] + c
->vChrFilterSize
- 1)
1370 << c
->chrSrcVSubSample
));
1372 nextSlice
>>= c
->chrSrcVSubSample
;
1373 nextSlice
<<= c
->chrSrcVSubSample
;
1374 if (c
->vLumFilterPos
[i
] + c
->vLumBufSize
< nextSlice
)
1375 c
->vLumBufSize
= nextSlice
- c
->vLumFilterPos
[i
];
1376 if (c
->vChrFilterPos
[chrI
] + c
->vChrBufSize
<
1377 (nextSlice
>> c
->chrSrcVSubSample
))
1378 c
->vChrBufSize
= (nextSlice
>> c
->chrSrcVSubSample
) -
1379 c
->vChrFilterPos
[chrI
];
1382 for (i
= 0; i
< 4; i
++)
1383 FF_ALLOCZ_OR_GOTO(c
, c
->dither_error
[i
], (c
->dstW
+2) * sizeof(int), fail
);
1385 /* Allocate pixbufs (we use dynamic allocation because otherwise we would
1386 * need to allocate several megabytes to handle all possible cases) */
1387 FF_ALLOC_OR_GOTO(c
, c
->lumPixBuf
, c
->vLumBufSize
* 3 * sizeof(int16_t *), fail
);
1388 FF_ALLOC_OR_GOTO(c
, c
->chrUPixBuf
, c
->vChrBufSize
* 3 * sizeof(int16_t *), fail
);
1389 FF_ALLOC_OR_GOTO(c
, c
->chrVPixBuf
, c
->vChrBufSize
* 3 * sizeof(int16_t *), fail
);
1390 if (CONFIG_SWSCALE_ALPHA
&& isALPHA(c
->srcFormat
) && isALPHA(c
->dstFormat
))
1391 FF_ALLOCZ_OR_GOTO(c
, c
->alpPixBuf
, c
->vLumBufSize
* 3 * sizeof(int16_t *), fail
);
1392 /* Note we need at least one pixel more at the end because of the MMX code
1393 * (just in case someone wants to replace the 4000/8000). */
1394 /* align at 16 bytes for AltiVec */
1395 for (i
= 0; i
< c
->vLumBufSize
; i
++) {
1396 FF_ALLOCZ_OR_GOTO(c
, c
->lumPixBuf
[i
+ c
->vLumBufSize
],
1397 dst_stride
+ 16, fail
);
1398 c
->lumPixBuf
[i
] = c
->lumPixBuf
[i
+ c
->vLumBufSize
];
1400 // 64 / c->scalingBpp is the same as 16 / sizeof(scaling_intermediate)
1401 c
->uv_off
= (dst_stride
>>1) + 64 / (c
->dstBpc
&~ 7);
1402 c
->uv_offx2
= dst_stride
+ 16;
1403 for (i
= 0; i
< c
->vChrBufSize
; i
++) {
1404 FF_ALLOC_OR_GOTO(c
, c
->chrUPixBuf
[i
+ c
->vChrBufSize
],
1405 dst_stride
* 2 + 32, fail
);
1406 c
->chrUPixBuf
[i
] = c
->chrUPixBuf
[i
+ c
->vChrBufSize
];
1407 c
->chrVPixBuf
[i
] = c
->chrVPixBuf
[i
+ c
->vChrBufSize
]
1408 = c
->chrUPixBuf
[i
] + (dst_stride
>> 1) + 8;
1410 if (CONFIG_SWSCALE_ALPHA
&& c
->alpPixBuf
)
1411 for (i
= 0; i
< c
->vLumBufSize
; i
++) {
1412 FF_ALLOCZ_OR_GOTO(c
, c
->alpPixBuf
[i
+ c
->vLumBufSize
],
1413 dst_stride
+ 16, fail
);
1414 c
->alpPixBuf
[i
] = c
->alpPixBuf
[i
+ c
->vLumBufSize
];
1417 // try to avoid drawing green stuff between the right end and the stride end
1418 for (i
= 0; i
< c
->vChrBufSize
; i
++)
1419 if(desc_dst
->comp
[0].depth_minus1
== 15){
1420 av_assert0(c
->dstBpc
> 14);
1421 for(j
=0; j
<dst_stride
/2+1; j
++)
1422 ((int32_t*)(c
->chrUPixBuf
[i
]))[j
] = 1<<18;
1424 for(j
=0; j
<dst_stride
+1; j
++)
1425 ((int16_t*)(c
->chrUPixBuf
[i
]))[j
] = 1<<14;
1427 av_assert0(c
->chrDstH
<= dstH
);
1429 if (flags
& SWS_PRINT_INFO
) {
1430 const char *scaler
= NULL
, *cpucaps
;
1432 for (i
= 0; i
< FF_ARRAY_ELEMS(scale_algorithms
); i
++) {
1433 if (flags
& scale_algorithms
[i
].flag
) {
1434 scaler
= scale_algorithms
[i
].description
;
1439 scaler
= "ehh flags invalid?!";
1440 av_log(c
, AV_LOG_INFO
, "%s scaler, from %s to %s%s ",
1442 av_get_pix_fmt_name(srcFormat
),
1444 dstFormat
== AV_PIX_FMT_BGR555
|| dstFormat
== AV_PIX_FMT_BGR565
||
1445 dstFormat
== AV_PIX_FMT_RGB444BE
|| dstFormat
== AV_PIX_FMT_RGB444LE
||
1446 dstFormat
== AV_PIX_FMT_BGR444BE
|| dstFormat
== AV_PIX_FMT_BGR444LE
?
1451 av_get_pix_fmt_name(dstFormat
));
1453 if (INLINE_MMXEXT(cpu_flags
))
1455 else if (INLINE_AMD3DNOW(cpu_flags
))
1457 else if (INLINE_MMX(cpu_flags
))
1459 else if (PPC_ALTIVEC(cpu_flags
))
1460 cpucaps
= "AltiVec";
1464 av_log(c
, AV_LOG_INFO
, "using %s\n", cpucaps
);
1466 av_log(c
, AV_LOG_VERBOSE
, "%dx%d -> %dx%d\n", srcW
, srcH
, dstW
, dstH
);
1467 av_log(c
, AV_LOG_DEBUG
,
1468 "lum srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
1469 c
->srcW
, c
->srcH
, c
->dstW
, c
->dstH
, c
->lumXInc
, c
->lumYInc
);
1470 av_log(c
, AV_LOG_DEBUG
,
1471 "chr srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
1472 c
->chrSrcW
, c
->chrSrcH
, c
->chrDstW
, c
->chrDstH
,
1473 c
->chrXInc
, c
->chrYInc
);
1476 /* unscaled special cases */
1477 if (unscaled
&& !usesHFilter
&& !usesVFilter
&&
1478 (c
->srcRange
== c
->dstRange
|| isAnyRGB(dstFormat
))) {
1479 ff_get_unscaled_swscale(c
);
1482 if (flags
& SWS_PRINT_INFO
)
1483 av_log(c
, AV_LOG_INFO
,
1484 "using unscaled %s -> %s special converter\n",
1485 av_get_pix_fmt_name(srcFormat
), av_get_pix_fmt_name(dstFormat
));
1490 c
->swscale
= ff_getSwsFunc(c
);
1492 fail
: // FIXME replace things by appropriate error codes
1496 SwsContext
*sws_getContext(int srcW
, int srcH
, enum AVPixelFormat srcFormat
,
1497 int dstW
, int dstH
, enum AVPixelFormat dstFormat
,
1498 int flags
, SwsFilter
*srcFilter
,
1499 SwsFilter
*dstFilter
, const double *param
)
1503 if (!(c
= sws_alloc_context()))
1511 c
->srcFormat
= srcFormat
;
1512 c
->dstFormat
= dstFormat
;
1515 c
->param
[0] = param
[0];
1516 c
->param
[1] = param
[1];
1519 if (sws_init_context(c
, srcFilter
, dstFilter
) < 0) {
1527 SwsFilter
*sws_getDefaultFilter(float lumaGBlur
, float chromaGBlur
,
1528 float lumaSharpen
, float chromaSharpen
,
1529 float chromaHShift
, float chromaVShift
,
1532 SwsFilter
*filter
= av_malloc(sizeof(SwsFilter
));
1536 if (lumaGBlur
!= 0.0) {
1537 filter
->lumH
= sws_getGaussianVec(lumaGBlur
, 3.0);
1538 filter
->lumV
= sws_getGaussianVec(lumaGBlur
, 3.0);
1540 filter
->lumH
= sws_getIdentityVec();
1541 filter
->lumV
= sws_getIdentityVec();
1544 if (chromaGBlur
!= 0.0) {
1545 filter
->chrH
= sws_getGaussianVec(chromaGBlur
, 3.0);
1546 filter
->chrV
= sws_getGaussianVec(chromaGBlur
, 3.0);
1548 filter
->chrH
= sws_getIdentityVec();
1549 filter
->chrV
= sws_getIdentityVec();
1552 if (chromaSharpen
!= 0.0) {
1553 SwsVector
*id
= sws_getIdentityVec();
1554 sws_scaleVec(filter
->chrH
, -chromaSharpen
);
1555 sws_scaleVec(filter
->chrV
, -chromaSharpen
);
1556 sws_addVec(filter
->chrH
, id
);
1557 sws_addVec(filter
->chrV
, id
);
1561 if (lumaSharpen
!= 0.0) {
1562 SwsVector
*id
= sws_getIdentityVec();
1563 sws_scaleVec(filter
->lumH
, -lumaSharpen
);
1564 sws_scaleVec(filter
->lumV
, -lumaSharpen
);
1565 sws_addVec(filter
->lumH
, id
);
1566 sws_addVec(filter
->lumV
, id
);
1570 if (chromaHShift
!= 0.0)
1571 sws_shiftVec(filter
->chrH
, (int)(chromaHShift
+ 0.5));
1573 if (chromaVShift
!= 0.0)
1574 sws_shiftVec(filter
->chrV
, (int)(chromaVShift
+ 0.5));
1576 sws_normalizeVec(filter
->chrH
, 1.0);
1577 sws_normalizeVec(filter
->chrV
, 1.0);
1578 sws_normalizeVec(filter
->lumH
, 1.0);
1579 sws_normalizeVec(filter
->lumV
, 1.0);
1582 sws_printVec2(filter
->chrH
, NULL
, AV_LOG_DEBUG
);
1584 sws_printVec2(filter
->lumH
, NULL
, AV_LOG_DEBUG
);
1589 SwsVector
*sws_allocVec(int length
)
1593 if(length
<= 0 || length
> INT_MAX
/ sizeof(double))
1596 vec
= av_malloc(sizeof(SwsVector
));
1599 vec
->length
= length
;
1600 vec
->coeff
= av_malloc(sizeof(double) * length
);
1606 SwsVector
*sws_getGaussianVec(double variance
, double quality
)
1608 const int length
= (int)(variance
* quality
+ 0.5) | 1;
1610 double middle
= (length
- 1) * 0.5;
1613 if(variance
< 0 || quality
< 0)
1616 vec
= sws_allocVec(length
);
1621 for (i
= 0; i
< length
; i
++) {
1622 double dist
= i
- middle
;
1623 vec
->coeff
[i
] = exp(-dist
* dist
/ (2 * variance
* variance
)) /
1624 sqrt(2 * variance
* M_PI
);
1627 sws_normalizeVec(vec
, 1.0);
1632 SwsVector
*sws_getConstVec(double c
, int length
)
1635 SwsVector
*vec
= sws_allocVec(length
);
1640 for (i
= 0; i
< length
; i
++)
1646 SwsVector
*sws_getIdentityVec(void)
1648 return sws_getConstVec(1.0, 1);
1651 static double sws_dcVec(SwsVector
*a
)
1656 for (i
= 0; i
< a
->length
; i
++)
1662 void sws_scaleVec(SwsVector
*a
, double scalar
)
1666 for (i
= 0; i
< a
->length
; i
++)
1667 a
->coeff
[i
] *= scalar
;
1670 void sws_normalizeVec(SwsVector
*a
, double height
)
1672 sws_scaleVec(a
, height
/ sws_dcVec(a
));
1675 static SwsVector
*sws_getConvVec(SwsVector
*a
, SwsVector
*b
)
1677 int length
= a
->length
+ b
->length
- 1;
1679 SwsVector
*vec
= sws_getConstVec(0.0, length
);
1684 for (i
= 0; i
< a
->length
; i
++) {
1685 for (j
= 0; j
< b
->length
; j
++) {
1686 vec
->coeff
[i
+ j
] += a
->coeff
[i
] * b
->coeff
[j
];
1693 static SwsVector
*sws_sumVec(SwsVector
*a
, SwsVector
*b
)
1695 int length
= FFMAX(a
->length
, b
->length
);
1697 SwsVector
*vec
= sws_getConstVec(0.0, length
);
1702 for (i
= 0; i
< a
->length
; i
++)
1703 vec
->coeff
[i
+ (length
- 1) / 2 - (a
->length
- 1) / 2] += a
->coeff
[i
];
1704 for (i
= 0; i
< b
->length
; i
++)
1705 vec
->coeff
[i
+ (length
- 1) / 2 - (b
->length
- 1) / 2] += b
->coeff
[i
];
1710 static SwsVector
*sws_diffVec(SwsVector
*a
, SwsVector
*b
)
1712 int length
= FFMAX(a
->length
, b
->length
);
1714 SwsVector
*vec
= sws_getConstVec(0.0, length
);
1719 for (i
= 0; i
< a
->length
; i
++)
1720 vec
->coeff
[i
+ (length
- 1) / 2 - (a
->length
- 1) / 2] += a
->coeff
[i
];
1721 for (i
= 0; i
< b
->length
; i
++)
1722 vec
->coeff
[i
+ (length
- 1) / 2 - (b
->length
- 1) / 2] -= b
->coeff
[i
];
1727 /* shift left / or right if "shift" is negative */
1728 static SwsVector
*sws_getShiftedVec(SwsVector
*a
, int shift
)
1730 int length
= a
->length
+ FFABS(shift
) * 2;
1732 SwsVector
*vec
= sws_getConstVec(0.0, length
);
1737 for (i
= 0; i
< a
->length
; i
++) {
1738 vec
->coeff
[i
+ (length
- 1) / 2 -
1739 (a
->length
- 1) / 2 - shift
] = a
->coeff
[i
];
1745 void sws_shiftVec(SwsVector
*a
, int shift
)
1747 SwsVector
*shifted
= sws_getShiftedVec(a
, shift
);
1749 a
->coeff
= shifted
->coeff
;
1750 a
->length
= shifted
->length
;
1754 void sws_addVec(SwsVector
*a
, SwsVector
*b
)
1756 SwsVector
*sum
= sws_sumVec(a
, b
);
1758 a
->coeff
= sum
->coeff
;
1759 a
->length
= sum
->length
;
1763 void sws_subVec(SwsVector
*a
, SwsVector
*b
)
1765 SwsVector
*diff
= sws_diffVec(a
, b
);
1767 a
->coeff
= diff
->coeff
;
1768 a
->length
= diff
->length
;
1772 void sws_convVec(SwsVector
*a
, SwsVector
*b
)
1774 SwsVector
*conv
= sws_getConvVec(a
, b
);
1776 a
->coeff
= conv
->coeff
;
1777 a
->length
= conv
->length
;
1781 SwsVector
*sws_cloneVec(SwsVector
*a
)
1783 SwsVector
*vec
= sws_allocVec(a
->length
);
1788 memcpy(vec
->coeff
, a
->coeff
, a
->length
* sizeof(*a
->coeff
));
1793 void sws_printVec2(SwsVector
*a
, AVClass
*log_ctx
, int log_level
)
1800 for (i
= 0; i
< a
->length
; i
++)
1801 if (a
->coeff
[i
] > max
)
1804 for (i
= 0; i
< a
->length
; i
++)
1805 if (a
->coeff
[i
] < min
)
1810 for (i
= 0; i
< a
->length
; i
++) {
1811 int x
= (int)((a
->coeff
[i
] - min
) * 60.0 / range
+ 0.5);
1812 av_log(log_ctx
, log_level
, "%1.3f ", a
->coeff
[i
]);
1814 av_log(log_ctx
, log_level
, " ");
1815 av_log(log_ctx
, log_level
, "|\n");
1819 void sws_freeVec(SwsVector
*a
)
1823 av_freep(&a
->coeff
);
1828 void sws_freeFilter(SwsFilter
*filter
)
1833 sws_freeVec(filter
->lumH
);
1834 sws_freeVec(filter
->lumV
);
1835 sws_freeVec(filter
->chrH
);
1836 sws_freeVec(filter
->chrV
);
1840 void sws_freeContext(SwsContext
*c
)
1847 for (i
= 0; i
< c
->vLumBufSize
; i
++)
1848 av_freep(&c
->lumPixBuf
[i
]);
1849 av_freep(&c
->lumPixBuf
);
1852 if (c
->chrUPixBuf
) {
1853 for (i
= 0; i
< c
->vChrBufSize
; i
++)
1854 av_freep(&c
->chrUPixBuf
[i
]);
1855 av_freep(&c
->chrUPixBuf
);
1856 av_freep(&c
->chrVPixBuf
);
1859 if (CONFIG_SWSCALE_ALPHA
&& c
->alpPixBuf
) {
1860 for (i
= 0; i
< c
->vLumBufSize
; i
++)
1861 av_freep(&c
->alpPixBuf
[i
]);
1862 av_freep(&c
->alpPixBuf
);
1865 for (i
= 0; i
< 4; i
++)
1866 av_freep(&c
->dither_error
[i
]);
1868 av_freep(&c
->vLumFilter
);
1869 av_freep(&c
->vChrFilter
);
1870 av_freep(&c
->hLumFilter
);
1871 av_freep(&c
->hChrFilter
);
1873 av_freep(&c
->vYCoeffsBank
);
1874 av_freep(&c
->vCCoeffsBank
);
1877 av_freep(&c
->vLumFilterPos
);
1878 av_freep(&c
->vChrFilterPos
);
1879 av_freep(&c
->hLumFilterPos
);
1880 av_freep(&c
->hChrFilterPos
);
1884 if (c
->lumMmxextFilterCode
)
1885 munmap(c
->lumMmxextFilterCode
, c
->lumMmxextFilterCodeSize
);
1886 if (c
->chrMmxextFilterCode
)
1887 munmap(c
->chrMmxextFilterCode
, c
->chrMmxextFilterCodeSize
);
1888 #elif HAVE_VIRTUALALLOC
1889 if (c
->lumMmxextFilterCode
)
1890 VirtualFree(c
->lumMmxextFilterCode
, 0, MEM_RELEASE
);
1891 if (c
->chrMmxextFilterCode
)
1892 VirtualFree(c
->chrMmxextFilterCode
, 0, MEM_RELEASE
);
1894 av_free(c
->lumMmxextFilterCode
);
1895 av_free(c
->chrMmxextFilterCode
);
1897 c
->lumMmxextFilterCode
= NULL
;
1898 c
->chrMmxextFilterCode
= NULL
;
1899 #endif /* HAVE_MMX_INLINE */
1901 av_freep(&c
->yuvTable
);
1902 av_freep(&c
->formatConvBuffer
);
1907 struct SwsContext
*sws_getCachedContext(struct SwsContext
*context
, int srcW
,
1908 int srcH
, enum AVPixelFormat srcFormat
,
1910 enum AVPixelFormat dstFormat
, int flags
,
1911 SwsFilter
*srcFilter
,
1912 SwsFilter
*dstFilter
,
1913 const double *param
)
1915 static const double default_param
[2] = { SWS_PARAM_DEFAULT
,
1916 SWS_PARAM_DEFAULT
};
1919 param
= default_param
;
1922 (context
->srcW
!= srcW
||
1923 context
->srcH
!= srcH
||
1924 context
->srcFormat
!= srcFormat
||
1925 context
->dstW
!= dstW
||
1926 context
->dstH
!= dstH
||
1927 context
->dstFormat
!= dstFormat
||
1928 context
->flags
!= flags
||
1929 context
->param
[0] != param
[0] ||
1930 context
->param
[1] != param
[1])) {
1931 sws_freeContext(context
);
1936 if (!(context
= sws_alloc_context()))
1938 context
->srcW
= srcW
;
1939 context
->srcH
= srcH
;
1940 context
->srcFormat
= srcFormat
;
1941 context
->dstW
= dstW
;
1942 context
->dstH
= dstH
;
1943 context
->dstFormat
= dstFormat
;
1944 context
->flags
= flags
;
1945 context
->param
[0] = param
[0];
1946 context
->param
[1] = param
[1];
1947 if (sws_init_context(context
, srcFilter
, dstFilter
) < 0) {
1948 sws_freeContext(context
);