Commit | Line | Data |
---|---|---|
2ba45a60 DM |
1 | /* |
2 | * Copyright (C) 2004-2010 Michael Niedermayer <michaelni@gmx.at> | |
3 | * Copyright (C) 2008 David Conrad | |
4 | * | |
5 | * This file is part of FFmpeg. | |
6 | * | |
7 | * FFmpeg is free software; you can redistribute it and/or | |
8 | * modify it under the terms of the GNU Lesser General Public | |
9 | * License as published by the Free Software Foundation; either | |
10 | * version 2.1 of the License, or (at your option) any later version. | |
11 | * | |
12 | * FFmpeg is distributed in the hope that it will be useful, | |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 | * Lesser General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU Lesser General Public | |
18 | * License along with FFmpeg; if not, write to the Free Software | |
19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
20 | */ | |
21 | ||
22 | #include "libavutil/attributes.h" | |
23 | #include "libavutil/avassert.h" | |
24 | #include "libavutil/common.h" | |
25 | #include "me_cmp.h" | |
26 | #include "snow_dwt.h" | |
27 | ||
28 | int ff_slice_buffer_init(slice_buffer *buf, int line_count, | |
29 | int max_allocated_lines, int line_width, | |
30 | IDWTELEM *base_buffer) | |
31 | { | |
32 | int i; | |
33 | ||
34 | buf->base_buffer = base_buffer; | |
35 | buf->line_count = line_count; | |
36 | buf->line_width = line_width; | |
37 | buf->data_count = max_allocated_lines; | |
38 | buf->line = av_mallocz_array(line_count, sizeof(IDWTELEM *)); | |
39 | if (!buf->line) | |
40 | return AVERROR(ENOMEM); | |
41 | buf->data_stack = av_malloc_array(max_allocated_lines, sizeof(IDWTELEM *)); | |
42 | if (!buf->data_stack) { | |
43 | av_freep(&buf->line); | |
44 | return AVERROR(ENOMEM); | |
45 | } | |
46 | ||
47 | for (i = 0; i < max_allocated_lines; i++) { | |
48 | buf->data_stack[i] = av_malloc_array(line_width, sizeof(IDWTELEM)); | |
49 | if (!buf->data_stack[i]) { | |
50 | for (i--; i >=0; i--) | |
51 | av_freep(&buf->data_stack[i]); | |
52 | av_freep(&buf->data_stack); | |
53 | av_freep(&buf->line); | |
54 | return AVERROR(ENOMEM); | |
55 | } | |
56 | } | |
57 | ||
58 | buf->data_stack_top = max_allocated_lines - 1; | |
59 | return 0; | |
60 | } | |
61 | ||
62 | IDWTELEM *ff_slice_buffer_load_line(slice_buffer *buf, int line) | |
63 | { | |
64 | IDWTELEM *buffer; | |
65 | ||
66 | av_assert0(buf->data_stack_top >= 0); | |
67 | // av_assert1(!buf->line[line]); | |
68 | if (buf->line[line]) | |
69 | return buf->line[line]; | |
70 | ||
71 | buffer = buf->data_stack[buf->data_stack_top]; | |
72 | buf->data_stack_top--; | |
73 | buf->line[line] = buffer; | |
74 | ||
75 | return buffer; | |
76 | } | |
77 | ||
78 | void ff_slice_buffer_release(slice_buffer *buf, int line) | |
79 | { | |
80 | IDWTELEM *buffer; | |
81 | ||
82 | av_assert1(line >= 0 && line < buf->line_count); | |
83 | av_assert1(buf->line[line]); | |
84 | ||
85 | buffer = buf->line[line]; | |
86 | buf->data_stack_top++; | |
87 | buf->data_stack[buf->data_stack_top] = buffer; | |
88 | buf->line[line] = NULL; | |
89 | } | |
90 | ||
91 | void ff_slice_buffer_flush(slice_buffer *buf) | |
92 | { | |
93 | int i; | |
94 | for (i = 0; i < buf->line_count; i++) | |
95 | if (buf->line[i]) | |
96 | ff_slice_buffer_release(buf, i); | |
97 | } | |
98 | ||
99 | void ff_slice_buffer_destroy(slice_buffer *buf) | |
100 | { | |
101 | int i; | |
102 | ff_slice_buffer_flush(buf); | |
103 | ||
104 | for (i = buf->data_count - 1; i >= 0; i--) | |
105 | av_freep(&buf->data_stack[i]); | |
106 | av_freep(&buf->data_stack); | |
107 | av_freep(&buf->line); | |
108 | } | |
109 | ||
110 | static inline int mirror(int v, int m) | |
111 | { | |
112 | while ((unsigned)v > (unsigned)m) { | |
113 | v = -v; | |
114 | if (v < 0) | |
115 | v += 2 * m; | |
116 | } | |
117 | return v; | |
118 | } | |
119 | ||
120 | static av_always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, | |
121 | int dst_step, int src_step, int ref_step, | |
122 | int width, int mul, int add, int shift, | |
123 | int highpass, int inverse) | |
124 | { | |
125 | const int mirror_left = !highpass; | |
126 | const int mirror_right = (width & 1) ^ highpass; | |
127 | const int w = (width >> 1) - 1 + (highpass & width); | |
128 | int i; | |
129 | ||
130 | #define LIFT(src, ref, inv) ((src) + ((inv) ? -(ref) : +(ref))) | |
131 | if (mirror_left) { | |
132 | dst[0] = LIFT(src[0], ((mul * 2 * ref[0] + add) >> shift), inverse); | |
133 | dst += dst_step; | |
134 | src += src_step; | |
135 | } | |
136 | ||
137 | for (i = 0; i < w; i++) | |
138 | dst[i * dst_step] = LIFT(src[i * src_step], | |
139 | ((mul * (ref[i * ref_step] + | |
140 | ref[(i + 1) * ref_step]) + | |
141 | add) >> shift), | |
142 | inverse); | |
143 | ||
144 | if (mirror_right) | |
145 | dst[w * dst_step] = LIFT(src[w * src_step], | |
146 | ((mul * 2 * ref[w * ref_step] + add) >> shift), | |
147 | inverse); | |
148 | } | |
149 | ||
150 | static av_always_inline void liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, | |
151 | int dst_step, int src_step, int ref_step, | |
152 | int width, int mul, int add, int shift, | |
153 | int highpass, int inverse) | |
154 | { | |
155 | const int mirror_left = !highpass; | |
156 | const int mirror_right = (width & 1) ^ highpass; | |
157 | const int w = (width >> 1) - 1 + (highpass & width); | |
158 | int i; | |
159 | ||
160 | av_assert1(shift == 4); | |
161 | #define LIFTS(src, ref, inv) \ | |
162 | ((inv) ? (src) + (((ref) + 4 * (src)) >> shift) \ | |
163 | : -((-16 * (src) + (ref) + add / \ | |
164 | 4 + 1 + (5 << 25)) / (5 * 4) - (1 << 23))) | |
165 | if (mirror_left) { | |
166 | dst[0] = LIFTS(src[0], mul * 2 * ref[0] + add, inverse); | |
167 | dst += dst_step; | |
168 | src += src_step; | |
169 | } | |
170 | ||
171 | for (i = 0; i < w; i++) | |
172 | dst[i * dst_step] = LIFTS(src[i * src_step], | |
173 | mul * (ref[i * ref_step] + | |
174 | ref[(i + 1) * ref_step]) + add, | |
175 | inverse); | |
176 | ||
177 | if (mirror_right) | |
178 | dst[w * dst_step] = LIFTS(src[w * src_step], | |
179 | mul * 2 * ref[w * ref_step] + add, | |
180 | inverse); | |
181 | } | |
182 | ||
183 | static void horizontal_decompose53i(DWTELEM *b, DWTELEM *temp, int width) | |
184 | { | |
185 | const int width2 = width >> 1; | |
186 | int x; | |
187 | const int w2 = (width + 1) >> 1; | |
188 | ||
189 | for (x = 0; x < width2; x++) { | |
190 | temp[x] = b[2 * x]; | |
191 | temp[x + w2] = b[2 * x + 1]; | |
192 | } | |
193 | if (width & 1) | |
194 | temp[x] = b[2 * x]; | |
195 | lift(b + w2, temp + w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0); | |
196 | lift(b, temp, b + w2, 1, 1, 1, width, 1, 2, 2, 0, 0); | |
197 | } | |
198 | ||
199 | static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, | |
200 | int width) | |
201 | { | |
202 | int i; | |
203 | ||
204 | for (i = 0; i < width; i++) | |
205 | b1[i] -= (b0[i] + b2[i]) >> 1; | |
206 | } | |
207 | ||
208 | static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, | |
209 | int width) | |
210 | { | |
211 | int i; | |
212 | ||
213 | for (i = 0; i < width; i++) | |
214 | b1[i] += (b0[i] + b2[i] + 2) >> 2; | |
215 | } | |
216 | ||
217 | static void spatial_decompose53i(DWTELEM *buffer, DWTELEM *temp, | |
218 | int width, int height, int stride) | |
219 | { | |
220 | int y; | |
221 | DWTELEM *b0 = buffer + mirror(-2 - 1, height - 1) * stride; | |
222 | DWTELEM *b1 = buffer + mirror(-2, height - 1) * stride; | |
223 | ||
224 | for (y = -2; y < height; y += 2) { | |
225 | DWTELEM *b2 = buffer + mirror(y + 1, height - 1) * stride; | |
226 | DWTELEM *b3 = buffer + mirror(y + 2, height - 1) * stride; | |
227 | ||
228 | if (y + 1 < (unsigned)height) | |
229 | horizontal_decompose53i(b2, temp, width); | |
230 | if (y + 2 < (unsigned)height) | |
231 | horizontal_decompose53i(b3, temp, width); | |
232 | ||
233 | if (y + 1 < (unsigned)height) | |
234 | vertical_decompose53iH0(b1, b2, b3, width); | |
235 | if (y + 0 < (unsigned)height) | |
236 | vertical_decompose53iL0(b0, b1, b2, width); | |
237 | ||
238 | b0 = b2; | |
239 | b1 = b3; | |
240 | } | |
241 | } | |
242 | ||
243 | static void horizontal_decompose97i(DWTELEM *b, DWTELEM *temp, int width) | |
244 | { | |
245 | const int w2 = (width + 1) >> 1; | |
246 | ||
247 | lift(temp + w2, b + 1, b, 1, 2, 2, width, W_AM, W_AO, W_AS, 1, 1); | |
248 | liftS(temp, b, temp + w2, 1, 2, 1, width, W_BM, W_BO, W_BS, 0, 0); | |
249 | lift(b + w2, temp + w2, temp, 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0); | |
250 | lift(b, temp, b + w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0); | |
251 | } | |
252 | ||
253 | static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, | |
254 | int width) | |
255 | { | |
256 | int i; | |
257 | ||
258 | for (i = 0; i < width; i++) | |
259 | b1[i] -= (W_AM * (b0[i] + b2[i]) + W_AO) >> W_AS; | |
260 | } | |
261 | ||
262 | static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, | |
263 | int width) | |
264 | { | |
265 | int i; | |
266 | ||
267 | for (i = 0; i < width; i++) | |
268 | b1[i] += (W_CM * (b0[i] + b2[i]) + W_CO) >> W_CS; | |
269 | } | |
270 | ||
271 | static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, | |
272 | int width) | |
273 | { | |
274 | int i; | |
275 | ||
276 | for (i = 0; i < width; i++) | |
277 | b1[i] = (16 * 4 * b1[i] - 4 * (b0[i] + b2[i]) + W_BO * 5 + (5 << 27)) / | |
278 | (5 * 16) - (1 << 23); | |
279 | } | |
280 | ||
281 | static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, | |
282 | int width) | |
283 | { | |
284 | int i; | |
285 | ||
286 | for (i = 0; i < width; i++) | |
287 | b1[i] += (W_DM * (b0[i] + b2[i]) + W_DO) >> W_DS; | |
288 | } | |
289 | ||
290 | static void spatial_decompose97i(DWTELEM *buffer, DWTELEM *temp, | |
291 | int width, int height, int stride) | |
292 | { | |
293 | int y; | |
294 | DWTELEM *b0 = buffer + mirror(-4 - 1, height - 1) * stride; | |
295 | DWTELEM *b1 = buffer + mirror(-4, height - 1) * stride; | |
296 | DWTELEM *b2 = buffer + mirror(-4 + 1, height - 1) * stride; | |
297 | DWTELEM *b3 = buffer + mirror(-4 + 2, height - 1) * stride; | |
298 | ||
299 | for (y = -4; y < height; y += 2) { | |
300 | DWTELEM *b4 = buffer + mirror(y + 3, height - 1) * stride; | |
301 | DWTELEM *b5 = buffer + mirror(y + 4, height - 1) * stride; | |
302 | ||
303 | if (y + 3 < (unsigned)height) | |
304 | horizontal_decompose97i(b4, temp, width); | |
305 | if (y + 4 < (unsigned)height) | |
306 | horizontal_decompose97i(b5, temp, width); | |
307 | ||
308 | if (y + 3 < (unsigned)height) | |
309 | vertical_decompose97iH0(b3, b4, b5, width); | |
310 | if (y + 2 < (unsigned)height) | |
311 | vertical_decompose97iL0(b2, b3, b4, width); | |
312 | if (y + 1 < (unsigned)height) | |
313 | vertical_decompose97iH1(b1, b2, b3, width); | |
314 | if (y + 0 < (unsigned)height) | |
315 | vertical_decompose97iL1(b0, b1, b2, width); | |
316 | ||
317 | b0 = b2; | |
318 | b1 = b3; | |
319 | b2 = b4; | |
320 | b3 = b5; | |
321 | } | |
322 | } | |
323 | ||
324 | void ff_spatial_dwt(DWTELEM *buffer, DWTELEM *temp, int width, int height, | |
325 | int stride, int type, int decomposition_count) | |
326 | { | |
327 | int level; | |
328 | ||
329 | for (level = 0; level < decomposition_count; level++) { | |
330 | switch (type) { | |
331 | case DWT_97: | |
332 | spatial_decompose97i(buffer, temp, | |
333 | width >> level, height >> level, | |
334 | stride << level); | |
335 | break; | |
336 | case DWT_53: | |
337 | spatial_decompose53i(buffer, temp, | |
338 | width >> level, height >> level, | |
339 | stride << level); | |
340 | break; | |
341 | } | |
342 | } | |
343 | } | |
344 | ||
345 | static void horizontal_compose53i(IDWTELEM *b, IDWTELEM *temp, int width) | |
346 | { | |
347 | const int width2 = width >> 1; | |
348 | const int w2 = (width + 1) >> 1; | |
349 | int x; | |
350 | ||
351 | for (x = 0; x < width2; x++) { | |
352 | temp[2 * x] = b[x]; | |
353 | temp[2 * x + 1] = b[x + w2]; | |
354 | } | |
355 | if (width & 1) | |
356 | temp[2 * x] = b[x]; | |
357 | ||
358 | b[0] = temp[0] - ((temp[1] + 1) >> 1); | |
359 | for (x = 2; x < width - 1; x += 2) { | |
360 | b[x] = temp[x] - ((temp[x - 1] + temp[x + 1] + 2) >> 2); | |
361 | b[x - 1] = temp[x - 1] + ((b[x - 2] + b[x] + 1) >> 1); | |
362 | } | |
363 | if (width & 1) { | |
364 | b[x] = temp[x] - ((temp[x - 1] + 1) >> 1); | |
365 | b[x - 1] = temp[x - 1] + ((b[x - 2] + b[x] + 1) >> 1); | |
366 | } else | |
367 | b[x - 1] = temp[x - 1] + b[x - 2]; | |
368 | } | |
369 | ||
370 | static void vertical_compose53iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, | |
371 | int width) | |
372 | { | |
373 | int i; | |
374 | ||
375 | for (i = 0; i < width; i++) | |
376 | b1[i] += (b0[i] + b2[i]) >> 1; | |
377 | } | |
378 | ||
379 | static void vertical_compose53iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, | |
380 | int width) | |
381 | { | |
382 | int i; | |
383 | ||
384 | for (i = 0; i < width; i++) | |
385 | b1[i] -= (b0[i] + b2[i] + 2) >> 2; | |
386 | } | |
387 | ||
388 | static void spatial_compose53i_buffered_init(DWTCompose *cs, slice_buffer *sb, | |
389 | int height, int stride_line) | |
390 | { | |
391 | cs->b0 = slice_buffer_get_line(sb, | |
392 | mirror(-1 - 1, height - 1) * stride_line); | |
393 | cs->b1 = slice_buffer_get_line(sb, mirror(-1, height - 1) * stride_line); | |
394 | cs->y = -1; | |
395 | } | |
396 | ||
397 | static void spatial_compose53i_init(DWTCompose *cs, IDWTELEM *buffer, | |
398 | int height, int stride) | |
399 | { | |
400 | cs->b0 = buffer + mirror(-1 - 1, height - 1) * stride; | |
401 | cs->b1 = buffer + mirror(-1, height - 1) * stride; | |
402 | cs->y = -1; | |
403 | } | |
404 | ||
405 | static void spatial_compose53i_dy_buffered(DWTCompose *cs, slice_buffer *sb, | |
406 | IDWTELEM *temp, | |
407 | int width, int height, | |
408 | int stride_line) | |
409 | { | |
410 | int y = cs->y; | |
411 | ||
412 | IDWTELEM *b0 = cs->b0; | |
413 | IDWTELEM *b1 = cs->b1; | |
414 | IDWTELEM *b2 = slice_buffer_get_line(sb, | |
415 | mirror(y + 1, height - 1) * | |
416 | stride_line); | |
417 | IDWTELEM *b3 = slice_buffer_get_line(sb, | |
418 | mirror(y + 2, height - 1) * | |
419 | stride_line); | |
420 | ||
421 | if (y + 1 < (unsigned)height && y < (unsigned)height) { | |
422 | int x; | |
423 | ||
424 | for (x = 0; x < width; x++) { | |
425 | b2[x] -= (b1[x] + b3[x] + 2) >> 2; | |
426 | b1[x] += (b0[x] + b2[x]) >> 1; | |
427 | } | |
428 | } else { | |
429 | if (y + 1 < (unsigned)height) | |
430 | vertical_compose53iL0(b1, b2, b3, width); | |
431 | if (y + 0 < (unsigned)height) | |
432 | vertical_compose53iH0(b0, b1, b2, width); | |
433 | } | |
434 | ||
435 | if (y - 1 < (unsigned)height) | |
436 | horizontal_compose53i(b0, temp, width); | |
437 | if (y + 0 < (unsigned)height) | |
438 | horizontal_compose53i(b1, temp, width); | |
439 | ||
440 | cs->b0 = b2; | |
441 | cs->b1 = b3; | |
442 | cs->y += 2; | |
443 | } | |
444 | ||
445 | static void spatial_compose53i_dy(DWTCompose *cs, IDWTELEM *buffer, | |
446 | IDWTELEM *temp, int width, int height, | |
447 | int stride) | |
448 | { | |
449 | int y = cs->y; | |
450 | IDWTELEM *b0 = cs->b0; | |
451 | IDWTELEM *b1 = cs->b1; | |
452 | IDWTELEM *b2 = buffer + mirror(y + 1, height - 1) * stride; | |
453 | IDWTELEM *b3 = buffer + mirror(y + 2, height - 1) * stride; | |
454 | ||
455 | if (y + 1 < (unsigned)height) | |
456 | vertical_compose53iL0(b1, b2, b3, width); | |
457 | if (y + 0 < (unsigned)height) | |
458 | vertical_compose53iH0(b0, b1, b2, width); | |
459 | ||
460 | if (y - 1 < (unsigned)height) | |
461 | horizontal_compose53i(b0, temp, width); | |
462 | if (y + 0 < (unsigned)height) | |
463 | horizontal_compose53i(b1, temp, width); | |
464 | ||
465 | cs->b0 = b2; | |
466 | cs->b1 = b3; | |
467 | cs->y += 2; | |
468 | } | |
469 | ||
470 | void ff_snow_horizontal_compose97i(IDWTELEM *b, IDWTELEM *temp, int width) | |
471 | { | |
472 | const int w2 = (width + 1) >> 1; | |
473 | int x; | |
474 | ||
475 | temp[0] = b[0] - ((3 * b[w2] + 2) >> 2); | |
476 | for (x = 1; x < (width >> 1); x++) { | |
477 | temp[2 * x] = b[x] - ((3 * (b[x + w2 - 1] + b[x + w2]) + 4) >> 3); | |
478 | temp[2 * x - 1] = b[x + w2 - 1] - temp[2 * x - 2] - temp[2 * x]; | |
479 | } | |
480 | if (width & 1) { | |
481 | temp[2 * x] = b[x] - ((3 * b[x + w2 - 1] + 2) >> 2); | |
482 | temp[2 * x - 1] = b[x + w2 - 1] - temp[2 * x - 2] - temp[2 * x]; | |
483 | } else | |
484 | temp[2 * x - 1] = b[x + w2 - 1] - 2 * temp[2 * x - 2]; | |
485 | ||
486 | b[0] = temp[0] + ((2 * temp[0] + temp[1] + 4) >> 3); | |
487 | for (x = 2; x < width - 1; x += 2) { | |
488 | b[x] = temp[x] + ((4 * temp[x] + temp[x - 1] + temp[x + 1] + 8) >> 4); | |
489 | b[x - 1] = temp[x - 1] + ((3 * (b[x - 2] + b[x])) >> 1); | |
490 | } | |
491 | if (width & 1) { | |
492 | b[x] = temp[x] + ((2 * temp[x] + temp[x - 1] + 4) >> 3); | |
493 | b[x - 1] = temp[x - 1] + ((3 * (b[x - 2] + b[x])) >> 1); | |
494 | } else | |
495 | b[x - 1] = temp[x - 1] + 3 * b[x - 2]; | |
496 | } | |
497 | ||
498 | static void vertical_compose97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, | |
499 | int width) | |
500 | { | |
501 | int i; | |
502 | ||
503 | for (i = 0; i < width; i++) | |
504 | b1[i] += (W_AM * (b0[i] + b2[i]) + W_AO) >> W_AS; | |
505 | } | |
506 | ||
507 | static void vertical_compose97iH1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, | |
508 | int width) | |
509 | { | |
510 | int i; | |
511 | ||
512 | for (i = 0; i < width; i++) | |
513 | b1[i] -= (W_CM * (b0[i] + b2[i]) + W_CO) >> W_CS; | |
514 | } | |
515 | ||
516 | static void vertical_compose97iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, | |
517 | int width) | |
518 | { | |
519 | int i; | |
520 | ||
521 | for (i = 0; i < width; i++) | |
522 | b1[i] += (W_BM * (b0[i] + b2[i]) + 4 * b1[i] + W_BO) >> W_BS; | |
523 | } | |
524 | ||
525 | static void vertical_compose97iL1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, | |
526 | int width) | |
527 | { | |
528 | int i; | |
529 | ||
530 | for (i = 0; i < width; i++) | |
531 | b1[i] -= (W_DM * (b0[i] + b2[i]) + W_DO) >> W_DS; | |
532 | } | |
533 | ||
534 | void ff_snow_vertical_compose97i(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, | |
535 | IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, | |
536 | int width) | |
537 | { | |
538 | int i; | |
539 | ||
540 | for (i = 0; i < width; i++) { | |
541 | b4[i] -= (W_DM * (b3[i] + b5[i]) + W_DO) >> W_DS; | |
542 | b3[i] -= (W_CM * (b2[i] + b4[i]) + W_CO) >> W_CS; | |
543 | b2[i] += (W_BM * (b1[i] + b3[i]) + 4 * b2[i] + W_BO) >> W_BS; | |
544 | b1[i] += (W_AM * (b0[i] + b2[i]) + W_AO) >> W_AS; | |
545 | } | |
546 | } | |
547 | ||
548 | static void spatial_compose97i_buffered_init(DWTCompose *cs, slice_buffer *sb, | |
549 | int height, int stride_line) | |
550 | { | |
551 | cs->b0 = slice_buffer_get_line(sb, mirror(-3 - 1, height - 1) * stride_line); | |
552 | cs->b1 = slice_buffer_get_line(sb, mirror(-3, height - 1) * stride_line); | |
553 | cs->b2 = slice_buffer_get_line(sb, mirror(-3 + 1, height - 1) * stride_line); | |
554 | cs->b3 = slice_buffer_get_line(sb, mirror(-3 + 2, height - 1) * stride_line); | |
555 | cs->y = -3; | |
556 | } | |
557 | ||
558 | static void spatial_compose97i_init(DWTCompose *cs, IDWTELEM *buffer, int height, | |
559 | int stride) | |
560 | { | |
561 | cs->b0 = buffer + mirror(-3 - 1, height - 1) * stride; | |
562 | cs->b1 = buffer + mirror(-3, height - 1) * stride; | |
563 | cs->b2 = buffer + mirror(-3 + 1, height - 1) * stride; | |
564 | cs->b3 = buffer + mirror(-3 + 2, height - 1) * stride; | |
565 | cs->y = -3; | |
566 | } | |
567 | ||
568 | static void spatial_compose97i_dy_buffered(SnowDWTContext *dsp, DWTCompose *cs, | |
569 | slice_buffer * sb, IDWTELEM *temp, | |
570 | int width, int height, | |
571 | int stride_line) | |
572 | { | |
573 | int y = cs->y; | |
574 | ||
575 | IDWTELEM *b0 = cs->b0; | |
576 | IDWTELEM *b1 = cs->b1; | |
577 | IDWTELEM *b2 = cs->b2; | |
578 | IDWTELEM *b3 = cs->b3; | |
579 | IDWTELEM *b4 = slice_buffer_get_line(sb, | |
580 | mirror(y + 3, height - 1) * | |
581 | stride_line); | |
582 | IDWTELEM *b5 = slice_buffer_get_line(sb, | |
583 | mirror(y + 4, height - 1) * | |
584 | stride_line); | |
585 | ||
586 | if (y > 0 && y + 4 < height) { | |
587 | dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width); | |
588 | } else { | |
589 | if (y + 3 < (unsigned)height) | |
590 | vertical_compose97iL1(b3, b4, b5, width); | |
591 | if (y + 2 < (unsigned)height) | |
592 | vertical_compose97iH1(b2, b3, b4, width); | |
593 | if (y + 1 < (unsigned)height) | |
594 | vertical_compose97iL0(b1, b2, b3, width); | |
595 | if (y + 0 < (unsigned)height) | |
596 | vertical_compose97iH0(b0, b1, b2, width); | |
597 | } | |
598 | ||
599 | if (y - 1 < (unsigned)height) | |
600 | dsp->horizontal_compose97i(b0, temp, width); | |
601 | if (y + 0 < (unsigned)height) | |
602 | dsp->horizontal_compose97i(b1, temp, width); | |
603 | ||
604 | cs->b0 = b2; | |
605 | cs->b1 = b3; | |
606 | cs->b2 = b4; | |
607 | cs->b3 = b5; | |
608 | cs->y += 2; | |
609 | } | |
610 | ||
611 | static void spatial_compose97i_dy(DWTCompose *cs, IDWTELEM *buffer, | |
612 | IDWTELEM *temp, int width, int height, | |
613 | int stride) | |
614 | { | |
615 | int y = cs->y; | |
616 | IDWTELEM *b0 = cs->b0; | |
617 | IDWTELEM *b1 = cs->b1; | |
618 | IDWTELEM *b2 = cs->b2; | |
619 | IDWTELEM *b3 = cs->b3; | |
620 | IDWTELEM *b4 = buffer + mirror(y + 3, height - 1) * stride; | |
621 | IDWTELEM *b5 = buffer + mirror(y + 4, height - 1) * stride; | |
622 | ||
623 | if (y + 3 < (unsigned)height) | |
624 | vertical_compose97iL1(b3, b4, b5, width); | |
625 | if (y + 2 < (unsigned)height) | |
626 | vertical_compose97iH1(b2, b3, b4, width); | |
627 | if (y + 1 < (unsigned)height) | |
628 | vertical_compose97iL0(b1, b2, b3, width); | |
629 | if (y + 0 < (unsigned)height) | |
630 | vertical_compose97iH0(b0, b1, b2, width); | |
631 | ||
632 | if (y - 1 < (unsigned)height) | |
633 | ff_snow_horizontal_compose97i(b0, temp, width); | |
634 | if (y + 0 < (unsigned)height) | |
635 | ff_snow_horizontal_compose97i(b1, temp, width); | |
636 | ||
637 | cs->b0 = b2; | |
638 | cs->b1 = b3; | |
639 | cs->b2 = b4; | |
640 | cs->b3 = b5; | |
641 | cs->y += 2; | |
642 | } | |
643 | ||
644 | void ff_spatial_idwt_buffered_init(DWTCompose *cs, slice_buffer *sb, int width, | |
645 | int height, int stride_line, int type, | |
646 | int decomposition_count) | |
647 | { | |
648 | int level; | |
649 | for (level = decomposition_count - 1; level >= 0; level--) { | |
650 | switch (type) { | |
651 | case DWT_97: | |
652 | spatial_compose97i_buffered_init(cs + level, sb, height >> level, | |
653 | stride_line << level); | |
654 | break; | |
655 | case DWT_53: | |
656 | spatial_compose53i_buffered_init(cs + level, sb, height >> level, | |
657 | stride_line << level); | |
658 | break; | |
659 | } | |
660 | } | |
661 | } | |
662 | ||
663 | void ff_spatial_idwt_buffered_slice(SnowDWTContext *dsp, DWTCompose *cs, | |
664 | slice_buffer *slice_buf, IDWTELEM *temp, | |
665 | int width, int height, int stride_line, | |
666 | int type, int decomposition_count, int y) | |
667 | { | |
668 | const int support = type == 1 ? 3 : 5; | |
669 | int level; | |
670 | if (type == 2) | |
671 | return; | |
672 | ||
673 | for (level = decomposition_count - 1; level >= 0; level--) | |
674 | while (cs[level].y <= FFMIN((y >> level) + support, height >> level)) { | |
675 | switch (type) { | |
676 | case DWT_97: | |
677 | spatial_compose97i_dy_buffered(dsp, cs + level, slice_buf, temp, | |
678 | width >> level, | |
679 | height >> level, | |
680 | stride_line << level); | |
681 | break; | |
682 | case DWT_53: | |
683 | spatial_compose53i_dy_buffered(cs + level, slice_buf, temp, | |
684 | width >> level, | |
685 | height >> level, | |
686 | stride_line << level); | |
687 | break; | |
688 | } | |
689 | } | |
690 | } | |
691 | ||
692 | static void ff_spatial_idwt_init(DWTCompose *cs, IDWTELEM *buffer, int width, | |
693 | int height, int stride, int type, | |
694 | int decomposition_count) | |
695 | { | |
696 | int level; | |
697 | for (level = decomposition_count - 1; level >= 0; level--) { | |
698 | switch (type) { | |
699 | case DWT_97: | |
700 | spatial_compose97i_init(cs + level, buffer, height >> level, | |
701 | stride << level); | |
702 | break; | |
703 | case DWT_53: | |
704 | spatial_compose53i_init(cs + level, buffer, height >> level, | |
705 | stride << level); | |
706 | break; | |
707 | } | |
708 | } | |
709 | } | |
710 | ||
711 | static void ff_spatial_idwt_slice(DWTCompose *cs, IDWTELEM *buffer, | |
712 | IDWTELEM *temp, int width, int height, | |
713 | int stride, int type, | |
714 | int decomposition_count, int y) | |
715 | { | |
716 | const int support = type == 1 ? 3 : 5; | |
717 | int level; | |
718 | if (type == 2) | |
719 | return; | |
720 | ||
721 | for (level = decomposition_count - 1; level >= 0; level--) | |
722 | while (cs[level].y <= FFMIN((y >> level) + support, height >> level)) { | |
723 | switch (type) { | |
724 | case DWT_97: | |
725 | spatial_compose97i_dy(cs + level, buffer, temp, width >> level, | |
726 | height >> level, stride << level); | |
727 | break; | |
728 | case DWT_53: | |
729 | spatial_compose53i_dy(cs + level, buffer, temp, width >> level, | |
730 | height >> level, stride << level); | |
731 | break; | |
732 | } | |
733 | } | |
734 | } | |
735 | ||
736 | void ff_spatial_idwt(IDWTELEM *buffer, IDWTELEM *temp, int width, int height, | |
737 | int stride, int type, int decomposition_count) | |
738 | { | |
739 | DWTCompose cs[MAX_DECOMPOSITIONS]; | |
740 | int y; | |
741 | ff_spatial_idwt_init(cs, buffer, width, height, stride, type, | |
742 | decomposition_count); | |
743 | for (y = 0; y < height; y += 4) | |
744 | ff_spatial_idwt_slice(cs, buffer, temp, width, height, stride, type, | |
745 | decomposition_count, y); | |
746 | } | |
747 | ||
f6fa7814 | 748 | static inline int w_c(struct MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t line_size, |
2ba45a60 DM |
749 | int w, int h, int type) |
750 | { | |
751 | int s, i, j; | |
752 | const int dec_count = w == 8 ? 3 : 4; | |
753 | int tmp[32 * 32], tmp2[32]; | |
754 | int level, ori; | |
755 | static const int scale[2][2][4][4] = { | |
756 | { | |
757 | { // 9/7 8x8 dec=3 | |
758 | { 268, 239, 239, 213 }, | |
759 | { 0, 224, 224, 152 }, | |
760 | { 0, 135, 135, 110 }, | |
761 | }, | |
762 | { // 9/7 16x16 or 32x32 dec=4 | |
763 | { 344, 310, 310, 280 }, | |
764 | { 0, 320, 320, 228 }, | |
765 | { 0, 175, 175, 136 }, | |
766 | { 0, 129, 129, 102 }, | |
767 | } | |
768 | }, | |
769 | { | |
770 | { // 5/3 8x8 dec=3 | |
771 | { 275, 245, 245, 218 }, | |
772 | { 0, 230, 230, 156 }, | |
773 | { 0, 138, 138, 113 }, | |
774 | }, | |
775 | { // 5/3 16x16 or 32x32 dec=4 | |
776 | { 352, 317, 317, 286 }, | |
777 | { 0, 328, 328, 233 }, | |
778 | { 0, 180, 180, 140 }, | |
779 | { 0, 132, 132, 105 }, | |
780 | } | |
781 | } | |
782 | }; | |
783 | ||
784 | for (i = 0; i < h; i++) { | |
785 | for (j = 0; j < w; j += 4) { | |
786 | tmp[32 * i + j + 0] = (pix1[j + 0] - pix2[j + 0]) << 4; | |
787 | tmp[32 * i + j + 1] = (pix1[j + 1] - pix2[j + 1]) << 4; | |
788 | tmp[32 * i + j + 2] = (pix1[j + 2] - pix2[j + 2]) << 4; | |
789 | tmp[32 * i + j + 3] = (pix1[j + 3] - pix2[j + 3]) << 4; | |
790 | } | |
791 | pix1 += line_size; | |
792 | pix2 += line_size; | |
793 | } | |
794 | ||
795 | ff_spatial_dwt(tmp, tmp2, w, h, 32, type, dec_count); | |
796 | ||
797 | s = 0; | |
798 | av_assert1(w == h); | |
799 | for (level = 0; level < dec_count; level++) | |
800 | for (ori = level ? 1 : 0; ori < 4; ori++) { | |
801 | int size = w >> (dec_count - level); | |
802 | int sx = (ori & 1) ? size : 0; | |
803 | int stride = 32 << (dec_count - level); | |
804 | int sy = (ori & 2) ? stride >> 1 : 0; | |
805 | ||
806 | for (i = 0; i < size; i++) | |
807 | for (j = 0; j < size; j++) { | |
808 | int v = tmp[sx + sy + i * stride + j] * | |
809 | scale[type][dec_count - 3][level][ori]; | |
810 | s += FFABS(v); | |
811 | } | |
812 | } | |
813 | av_assert1(s >= 0); | |
814 | return s >> 9; | |
815 | } | |
816 | ||
f6fa7814 | 817 | static int w53_8_c(struct MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t line_size, int h) |
2ba45a60 DM |
818 | { |
819 | return w_c(v, pix1, pix2, line_size, 8, h, 1); | |
820 | } | |
821 | ||
f6fa7814 | 822 | static int w97_8_c(struct MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t line_size, int h) |
2ba45a60 DM |
823 | { |
824 | return w_c(v, pix1, pix2, line_size, 8, h, 0); | |
825 | } | |
826 | ||
f6fa7814 | 827 | static int w53_16_c(struct MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t line_size, int h) |
2ba45a60 DM |
828 | { |
829 | return w_c(v, pix1, pix2, line_size, 16, h, 1); | |
830 | } | |
831 | ||
f6fa7814 | 832 | static int w97_16_c(struct MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t line_size, int h) |
2ba45a60 DM |
833 | { |
834 | return w_c(v, pix1, pix2, line_size, 16, h, 0); | |
835 | } | |
836 | ||
f6fa7814 | 837 | int ff_w53_32_c(struct MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t line_size, int h) |
2ba45a60 DM |
838 | { |
839 | return w_c(v, pix1, pix2, line_size, 32, h, 1); | |
840 | } | |
841 | ||
f6fa7814 | 842 | int ff_w97_32_c(struct MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t line_size, int h) |
2ba45a60 DM |
843 | { |
844 | return w_c(v, pix1, pix2, line_size, 32, h, 0); | |
845 | } | |
846 | ||
847 | void ff_dsputil_init_dwt(MECmpContext *c) | |
848 | { | |
849 | c->w53[0] = w53_16_c; | |
850 | c->w53[1] = w53_8_c; | |
851 | c->w97[0] = w97_16_c; | |
852 | c->w97[1] = w97_8_c; | |
853 | } | |
854 | ||
855 | void ff_dwt_init(SnowDWTContext *c) | |
856 | { | |
857 | c->vertical_compose97i = ff_snow_vertical_compose97i; | |
858 | c->horizontal_compose97i = ff_snow_horizontal_compose97i; | |
859 | c->inner_add_yblock = ff_snow_inner_add_yblock; | |
860 | ||
861 | if (HAVE_MMX) | |
862 | ff_dwt_init_x86(c); | |
863 | } | |
864 | ||
865 |