| 1 | /* |
| 2 | * Copyright (C) 2004-2010 Michael Niedermayer <michaelni@gmx.at> |
| 3 | * Copyright (C) 2008 David Conrad |
| 4 | * |
| 5 | * This file is part of FFmpeg. |
| 6 | * |
| 7 | * FFmpeg is free software; you can redistribute it and/or |
| 8 | * modify it under the terms of the GNU Lesser General Public |
| 9 | * License as published by the Free Software Foundation; either |
| 10 | * version 2.1 of the License, or (at your option) any later version. |
| 11 | * |
| 12 | * FFmpeg is distributed in the hope that it will be useful, |
| 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 15 | * Lesser General Public License for more details. |
| 16 | * |
| 17 | * You should have received a copy of the GNU Lesser General Public |
| 18 | * License along with FFmpeg; if not, write to the Free Software |
| 19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| 20 | */ |
| 21 | |
| 22 | #include "libavutil/attributes.h" |
| 23 | #include "libavutil/avassert.h" |
| 24 | #include "libavutil/common.h" |
| 25 | #include "me_cmp.h" |
| 26 | #include "snow_dwt.h" |
| 27 | |
| 28 | int ff_slice_buffer_init(slice_buffer *buf, int line_count, |
| 29 | int max_allocated_lines, int line_width, |
| 30 | IDWTELEM *base_buffer) |
| 31 | { |
| 32 | int i; |
| 33 | |
| 34 | buf->base_buffer = base_buffer; |
| 35 | buf->line_count = line_count; |
| 36 | buf->line_width = line_width; |
| 37 | buf->data_count = max_allocated_lines; |
| 38 | buf->line = av_mallocz_array(line_count, sizeof(IDWTELEM *)); |
| 39 | if (!buf->line) |
| 40 | return AVERROR(ENOMEM); |
| 41 | buf->data_stack = av_malloc_array(max_allocated_lines, sizeof(IDWTELEM *)); |
| 42 | if (!buf->data_stack) { |
| 43 | av_freep(&buf->line); |
| 44 | return AVERROR(ENOMEM); |
| 45 | } |
| 46 | |
| 47 | for (i = 0; i < max_allocated_lines; i++) { |
| 48 | buf->data_stack[i] = av_malloc_array(line_width, sizeof(IDWTELEM)); |
| 49 | if (!buf->data_stack[i]) { |
| 50 | for (i--; i >=0; i--) |
| 51 | av_freep(&buf->data_stack[i]); |
| 52 | av_freep(&buf->data_stack); |
| 53 | av_freep(&buf->line); |
| 54 | return AVERROR(ENOMEM); |
| 55 | } |
| 56 | } |
| 57 | |
| 58 | buf->data_stack_top = max_allocated_lines - 1; |
| 59 | return 0; |
| 60 | } |
| 61 | |
| 62 | IDWTELEM *ff_slice_buffer_load_line(slice_buffer *buf, int line) |
| 63 | { |
| 64 | IDWTELEM *buffer; |
| 65 | |
| 66 | av_assert0(buf->data_stack_top >= 0); |
| 67 | // av_assert1(!buf->line[line]); |
| 68 | if (buf->line[line]) |
| 69 | return buf->line[line]; |
| 70 | |
| 71 | buffer = buf->data_stack[buf->data_stack_top]; |
| 72 | buf->data_stack_top--; |
| 73 | buf->line[line] = buffer; |
| 74 | |
| 75 | return buffer; |
| 76 | } |
| 77 | |
| 78 | void ff_slice_buffer_release(slice_buffer *buf, int line) |
| 79 | { |
| 80 | IDWTELEM *buffer; |
| 81 | |
| 82 | av_assert1(line >= 0 && line < buf->line_count); |
| 83 | av_assert1(buf->line[line]); |
| 84 | |
| 85 | buffer = buf->line[line]; |
| 86 | buf->data_stack_top++; |
| 87 | buf->data_stack[buf->data_stack_top] = buffer; |
| 88 | buf->line[line] = NULL; |
| 89 | } |
| 90 | |
| 91 | void ff_slice_buffer_flush(slice_buffer *buf) |
| 92 | { |
| 93 | int i; |
| 94 | for (i = 0; i < buf->line_count; i++) |
| 95 | if (buf->line[i]) |
| 96 | ff_slice_buffer_release(buf, i); |
| 97 | } |
| 98 | |
| 99 | void ff_slice_buffer_destroy(slice_buffer *buf) |
| 100 | { |
| 101 | int i; |
| 102 | ff_slice_buffer_flush(buf); |
| 103 | |
| 104 | for (i = buf->data_count - 1; i >= 0; i--) |
| 105 | av_freep(&buf->data_stack[i]); |
| 106 | av_freep(&buf->data_stack); |
| 107 | av_freep(&buf->line); |
| 108 | } |
| 109 | |
| 110 | static inline int mirror(int v, int m) |
| 111 | { |
| 112 | while ((unsigned)v > (unsigned)m) { |
| 113 | v = -v; |
| 114 | if (v < 0) |
| 115 | v += 2 * m; |
| 116 | } |
| 117 | return v; |
| 118 | } |
| 119 | |
| 120 | static av_always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, |
| 121 | int dst_step, int src_step, int ref_step, |
| 122 | int width, int mul, int add, int shift, |
| 123 | int highpass, int inverse) |
| 124 | { |
| 125 | const int mirror_left = !highpass; |
| 126 | const int mirror_right = (width & 1) ^ highpass; |
| 127 | const int w = (width >> 1) - 1 + (highpass & width); |
| 128 | int i; |
| 129 | |
| 130 | #define LIFT(src, ref, inv) ((src) + ((inv) ? -(ref) : +(ref))) |
| 131 | if (mirror_left) { |
| 132 | dst[0] = LIFT(src[0], ((mul * 2 * ref[0] + add) >> shift), inverse); |
| 133 | dst += dst_step; |
| 134 | src += src_step; |
| 135 | } |
| 136 | |
| 137 | for (i = 0; i < w; i++) |
| 138 | dst[i * dst_step] = LIFT(src[i * src_step], |
| 139 | ((mul * (ref[i * ref_step] + |
| 140 | ref[(i + 1) * ref_step]) + |
| 141 | add) >> shift), |
| 142 | inverse); |
| 143 | |
| 144 | if (mirror_right) |
| 145 | dst[w * dst_step] = LIFT(src[w * src_step], |
| 146 | ((mul * 2 * ref[w * ref_step] + add) >> shift), |
| 147 | inverse); |
| 148 | } |
| 149 | |
| 150 | static av_always_inline void liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, |
| 151 | int dst_step, int src_step, int ref_step, |
| 152 | int width, int mul, int add, int shift, |
| 153 | int highpass, int inverse) |
| 154 | { |
| 155 | const int mirror_left = !highpass; |
| 156 | const int mirror_right = (width & 1) ^ highpass; |
| 157 | const int w = (width >> 1) - 1 + (highpass & width); |
| 158 | int i; |
| 159 | |
| 160 | av_assert1(shift == 4); |
| 161 | #define LIFTS(src, ref, inv) \ |
| 162 | ((inv) ? (src) + (((ref) + 4 * (src)) >> shift) \ |
| 163 | : -((-16 * (src) + (ref) + add / \ |
| 164 | 4 + 1 + (5 << 25)) / (5 * 4) - (1 << 23))) |
| 165 | if (mirror_left) { |
| 166 | dst[0] = LIFTS(src[0], mul * 2 * ref[0] + add, inverse); |
| 167 | dst += dst_step; |
| 168 | src += src_step; |
| 169 | } |
| 170 | |
| 171 | for (i = 0; i < w; i++) |
| 172 | dst[i * dst_step] = LIFTS(src[i * src_step], |
| 173 | mul * (ref[i * ref_step] + |
| 174 | ref[(i + 1) * ref_step]) + add, |
| 175 | inverse); |
| 176 | |
| 177 | if (mirror_right) |
| 178 | dst[w * dst_step] = LIFTS(src[w * src_step], |
| 179 | mul * 2 * ref[w * ref_step] + add, |
| 180 | inverse); |
| 181 | } |
| 182 | |
| 183 | static void horizontal_decompose53i(DWTELEM *b, DWTELEM *temp, int width) |
| 184 | { |
| 185 | const int width2 = width >> 1; |
| 186 | int x; |
| 187 | const int w2 = (width + 1) >> 1; |
| 188 | |
| 189 | for (x = 0; x < width2; x++) { |
| 190 | temp[x] = b[2 * x]; |
| 191 | temp[x + w2] = b[2 * x + 1]; |
| 192 | } |
| 193 | if (width & 1) |
| 194 | temp[x] = b[2 * x]; |
| 195 | lift(b + w2, temp + w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0); |
| 196 | lift(b, temp, b + w2, 1, 1, 1, width, 1, 2, 2, 0, 0); |
| 197 | } |
| 198 | |
| 199 | static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, |
| 200 | int width) |
| 201 | { |
| 202 | int i; |
| 203 | |
| 204 | for (i = 0; i < width; i++) |
| 205 | b1[i] -= (b0[i] + b2[i]) >> 1; |
| 206 | } |
| 207 | |
| 208 | static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, |
| 209 | int width) |
| 210 | { |
| 211 | int i; |
| 212 | |
| 213 | for (i = 0; i < width; i++) |
| 214 | b1[i] += (b0[i] + b2[i] + 2) >> 2; |
| 215 | } |
| 216 | |
| 217 | static void spatial_decompose53i(DWTELEM *buffer, DWTELEM *temp, |
| 218 | int width, int height, int stride) |
| 219 | { |
| 220 | int y; |
| 221 | DWTELEM *b0 = buffer + mirror(-2 - 1, height - 1) * stride; |
| 222 | DWTELEM *b1 = buffer + mirror(-2, height - 1) * stride; |
| 223 | |
| 224 | for (y = -2; y < height; y += 2) { |
| 225 | DWTELEM *b2 = buffer + mirror(y + 1, height - 1) * stride; |
| 226 | DWTELEM *b3 = buffer + mirror(y + 2, height - 1) * stride; |
| 227 | |
| 228 | if (y + 1 < (unsigned)height) |
| 229 | horizontal_decompose53i(b2, temp, width); |
| 230 | if (y + 2 < (unsigned)height) |
| 231 | horizontal_decompose53i(b3, temp, width); |
| 232 | |
| 233 | if (y + 1 < (unsigned)height) |
| 234 | vertical_decompose53iH0(b1, b2, b3, width); |
| 235 | if (y + 0 < (unsigned)height) |
| 236 | vertical_decompose53iL0(b0, b1, b2, width); |
| 237 | |
| 238 | b0 = b2; |
| 239 | b1 = b3; |
| 240 | } |
| 241 | } |
| 242 | |
| 243 | static void horizontal_decompose97i(DWTELEM *b, DWTELEM *temp, int width) |
| 244 | { |
| 245 | const int w2 = (width + 1) >> 1; |
| 246 | |
| 247 | lift(temp + w2, b + 1, b, 1, 2, 2, width, W_AM, W_AO, W_AS, 1, 1); |
| 248 | liftS(temp, b, temp + w2, 1, 2, 1, width, W_BM, W_BO, W_BS, 0, 0); |
| 249 | lift(b + w2, temp + w2, temp, 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0); |
| 250 | lift(b, temp, b + w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0); |
| 251 | } |
| 252 | |
| 253 | static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, |
| 254 | int width) |
| 255 | { |
| 256 | int i; |
| 257 | |
| 258 | for (i = 0; i < width; i++) |
| 259 | b1[i] -= (W_AM * (b0[i] + b2[i]) + W_AO) >> W_AS; |
| 260 | } |
| 261 | |
| 262 | static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, |
| 263 | int width) |
| 264 | { |
| 265 | int i; |
| 266 | |
| 267 | for (i = 0; i < width; i++) |
| 268 | b1[i] += (W_CM * (b0[i] + b2[i]) + W_CO) >> W_CS; |
| 269 | } |
| 270 | |
| 271 | static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, |
| 272 | int width) |
| 273 | { |
| 274 | int i; |
| 275 | |
| 276 | for (i = 0; i < width; i++) |
| 277 | b1[i] = (16 * 4 * b1[i] - 4 * (b0[i] + b2[i]) + W_BO * 5 + (5 << 27)) / |
| 278 | (5 * 16) - (1 << 23); |
| 279 | } |
| 280 | |
| 281 | static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, |
| 282 | int width) |
| 283 | { |
| 284 | int i; |
| 285 | |
| 286 | for (i = 0; i < width; i++) |
| 287 | b1[i] += (W_DM * (b0[i] + b2[i]) + W_DO) >> W_DS; |
| 288 | } |
| 289 | |
| 290 | static void spatial_decompose97i(DWTELEM *buffer, DWTELEM *temp, |
| 291 | int width, int height, int stride) |
| 292 | { |
| 293 | int y; |
| 294 | DWTELEM *b0 = buffer + mirror(-4 - 1, height - 1) * stride; |
| 295 | DWTELEM *b1 = buffer + mirror(-4, height - 1) * stride; |
| 296 | DWTELEM *b2 = buffer + mirror(-4 + 1, height - 1) * stride; |
| 297 | DWTELEM *b3 = buffer + mirror(-4 + 2, height - 1) * stride; |
| 298 | |
| 299 | for (y = -4; y < height; y += 2) { |
| 300 | DWTELEM *b4 = buffer + mirror(y + 3, height - 1) * stride; |
| 301 | DWTELEM *b5 = buffer + mirror(y + 4, height - 1) * stride; |
| 302 | |
| 303 | if (y + 3 < (unsigned)height) |
| 304 | horizontal_decompose97i(b4, temp, width); |
| 305 | if (y + 4 < (unsigned)height) |
| 306 | horizontal_decompose97i(b5, temp, width); |
| 307 | |
| 308 | if (y + 3 < (unsigned)height) |
| 309 | vertical_decompose97iH0(b3, b4, b5, width); |
| 310 | if (y + 2 < (unsigned)height) |
| 311 | vertical_decompose97iL0(b2, b3, b4, width); |
| 312 | if (y + 1 < (unsigned)height) |
| 313 | vertical_decompose97iH1(b1, b2, b3, width); |
| 314 | if (y + 0 < (unsigned)height) |
| 315 | vertical_decompose97iL1(b0, b1, b2, width); |
| 316 | |
| 317 | b0 = b2; |
| 318 | b1 = b3; |
| 319 | b2 = b4; |
| 320 | b3 = b5; |
| 321 | } |
| 322 | } |
| 323 | |
| 324 | void ff_spatial_dwt(DWTELEM *buffer, DWTELEM *temp, int width, int height, |
| 325 | int stride, int type, int decomposition_count) |
| 326 | { |
| 327 | int level; |
| 328 | |
| 329 | for (level = 0; level < decomposition_count; level++) { |
| 330 | switch (type) { |
| 331 | case DWT_97: |
| 332 | spatial_decompose97i(buffer, temp, |
| 333 | width >> level, height >> level, |
| 334 | stride << level); |
| 335 | break; |
| 336 | case DWT_53: |
| 337 | spatial_decompose53i(buffer, temp, |
| 338 | width >> level, height >> level, |
| 339 | stride << level); |
| 340 | break; |
| 341 | } |
| 342 | } |
| 343 | } |
| 344 | |
| 345 | static void horizontal_compose53i(IDWTELEM *b, IDWTELEM *temp, int width) |
| 346 | { |
| 347 | const int width2 = width >> 1; |
| 348 | const int w2 = (width + 1) >> 1; |
| 349 | int x; |
| 350 | |
| 351 | for (x = 0; x < width2; x++) { |
| 352 | temp[2 * x] = b[x]; |
| 353 | temp[2 * x + 1] = b[x + w2]; |
| 354 | } |
| 355 | if (width & 1) |
| 356 | temp[2 * x] = b[x]; |
| 357 | |
| 358 | b[0] = temp[0] - ((temp[1] + 1) >> 1); |
| 359 | for (x = 2; x < width - 1; x += 2) { |
| 360 | b[x] = temp[x] - ((temp[x - 1] + temp[x + 1] + 2) >> 2); |
| 361 | b[x - 1] = temp[x - 1] + ((b[x - 2] + b[x] + 1) >> 1); |
| 362 | } |
| 363 | if (width & 1) { |
| 364 | b[x] = temp[x] - ((temp[x - 1] + 1) >> 1); |
| 365 | b[x - 1] = temp[x - 1] + ((b[x - 2] + b[x] + 1) >> 1); |
| 366 | } else |
| 367 | b[x - 1] = temp[x - 1] + b[x - 2]; |
| 368 | } |
| 369 | |
| 370 | static void vertical_compose53iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, |
| 371 | int width) |
| 372 | { |
| 373 | int i; |
| 374 | |
| 375 | for (i = 0; i < width; i++) |
| 376 | b1[i] += (b0[i] + b2[i]) >> 1; |
| 377 | } |
| 378 | |
| 379 | static void vertical_compose53iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, |
| 380 | int width) |
| 381 | { |
| 382 | int i; |
| 383 | |
| 384 | for (i = 0; i < width; i++) |
| 385 | b1[i] -= (b0[i] + b2[i] + 2) >> 2; |
| 386 | } |
| 387 | |
| 388 | static void spatial_compose53i_buffered_init(DWTCompose *cs, slice_buffer *sb, |
| 389 | int height, int stride_line) |
| 390 | { |
| 391 | cs->b0 = slice_buffer_get_line(sb, |
| 392 | mirror(-1 - 1, height - 1) * stride_line); |
| 393 | cs->b1 = slice_buffer_get_line(sb, mirror(-1, height - 1) * stride_line); |
| 394 | cs->y = -1; |
| 395 | } |
| 396 | |
| 397 | static void spatial_compose53i_init(DWTCompose *cs, IDWTELEM *buffer, |
| 398 | int height, int stride) |
| 399 | { |
| 400 | cs->b0 = buffer + mirror(-1 - 1, height - 1) * stride; |
| 401 | cs->b1 = buffer + mirror(-1, height - 1) * stride; |
| 402 | cs->y = -1; |
| 403 | } |
| 404 | |
| 405 | static void spatial_compose53i_dy_buffered(DWTCompose *cs, slice_buffer *sb, |
| 406 | IDWTELEM *temp, |
| 407 | int width, int height, |
| 408 | int stride_line) |
| 409 | { |
| 410 | int y = cs->y; |
| 411 | |
| 412 | IDWTELEM *b0 = cs->b0; |
| 413 | IDWTELEM *b1 = cs->b1; |
| 414 | IDWTELEM *b2 = slice_buffer_get_line(sb, |
| 415 | mirror(y + 1, height - 1) * |
| 416 | stride_line); |
| 417 | IDWTELEM *b3 = slice_buffer_get_line(sb, |
| 418 | mirror(y + 2, height - 1) * |
| 419 | stride_line); |
| 420 | |
| 421 | if (y + 1 < (unsigned)height && y < (unsigned)height) { |
| 422 | int x; |
| 423 | |
| 424 | for (x = 0; x < width; x++) { |
| 425 | b2[x] -= (b1[x] + b3[x] + 2) >> 2; |
| 426 | b1[x] += (b0[x] + b2[x]) >> 1; |
| 427 | } |
| 428 | } else { |
| 429 | if (y + 1 < (unsigned)height) |
| 430 | vertical_compose53iL0(b1, b2, b3, width); |
| 431 | if (y + 0 < (unsigned)height) |
| 432 | vertical_compose53iH0(b0, b1, b2, width); |
| 433 | } |
| 434 | |
| 435 | if (y - 1 < (unsigned)height) |
| 436 | horizontal_compose53i(b0, temp, width); |
| 437 | if (y + 0 < (unsigned)height) |
| 438 | horizontal_compose53i(b1, temp, width); |
| 439 | |
| 440 | cs->b0 = b2; |
| 441 | cs->b1 = b3; |
| 442 | cs->y += 2; |
| 443 | } |
| 444 | |
| 445 | static void spatial_compose53i_dy(DWTCompose *cs, IDWTELEM *buffer, |
| 446 | IDWTELEM *temp, int width, int height, |
| 447 | int stride) |
| 448 | { |
| 449 | int y = cs->y; |
| 450 | IDWTELEM *b0 = cs->b0; |
| 451 | IDWTELEM *b1 = cs->b1; |
| 452 | IDWTELEM *b2 = buffer + mirror(y + 1, height - 1) * stride; |
| 453 | IDWTELEM *b3 = buffer + mirror(y + 2, height - 1) * stride; |
| 454 | |
| 455 | if (y + 1 < (unsigned)height) |
| 456 | vertical_compose53iL0(b1, b2, b3, width); |
| 457 | if (y + 0 < (unsigned)height) |
| 458 | vertical_compose53iH0(b0, b1, b2, width); |
| 459 | |
| 460 | if (y - 1 < (unsigned)height) |
| 461 | horizontal_compose53i(b0, temp, width); |
| 462 | if (y + 0 < (unsigned)height) |
| 463 | horizontal_compose53i(b1, temp, width); |
| 464 | |
| 465 | cs->b0 = b2; |
| 466 | cs->b1 = b3; |
| 467 | cs->y += 2; |
| 468 | } |
| 469 | |
| 470 | void ff_snow_horizontal_compose97i(IDWTELEM *b, IDWTELEM *temp, int width) |
| 471 | { |
| 472 | const int w2 = (width + 1) >> 1; |
| 473 | int x; |
| 474 | |
| 475 | temp[0] = b[0] - ((3 * b[w2] + 2) >> 2); |
| 476 | for (x = 1; x < (width >> 1); x++) { |
| 477 | temp[2 * x] = b[x] - ((3 * (b[x + w2 - 1] + b[x + w2]) + 4) >> 3); |
| 478 | temp[2 * x - 1] = b[x + w2 - 1] - temp[2 * x - 2] - temp[2 * x]; |
| 479 | } |
| 480 | if (width & 1) { |
| 481 | temp[2 * x] = b[x] - ((3 * b[x + w2 - 1] + 2) >> 2); |
| 482 | temp[2 * x - 1] = b[x + w2 - 1] - temp[2 * x - 2] - temp[2 * x]; |
| 483 | } else |
| 484 | temp[2 * x - 1] = b[x + w2 - 1] - 2 * temp[2 * x - 2]; |
| 485 | |
| 486 | b[0] = temp[0] + ((2 * temp[0] + temp[1] + 4) >> 3); |
| 487 | for (x = 2; x < width - 1; x += 2) { |
| 488 | b[x] = temp[x] + ((4 * temp[x] + temp[x - 1] + temp[x + 1] + 8) >> 4); |
| 489 | b[x - 1] = temp[x - 1] + ((3 * (b[x - 2] + b[x])) >> 1); |
| 490 | } |
| 491 | if (width & 1) { |
| 492 | b[x] = temp[x] + ((2 * temp[x] + temp[x - 1] + 4) >> 3); |
| 493 | b[x - 1] = temp[x - 1] + ((3 * (b[x - 2] + b[x])) >> 1); |
| 494 | } else |
| 495 | b[x - 1] = temp[x - 1] + 3 * b[x - 2]; |
| 496 | } |
| 497 | |
| 498 | static void vertical_compose97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, |
| 499 | int width) |
| 500 | { |
| 501 | int i; |
| 502 | |
| 503 | for (i = 0; i < width; i++) |
| 504 | b1[i] += (W_AM * (b0[i] + b2[i]) + W_AO) >> W_AS; |
| 505 | } |
| 506 | |
| 507 | static void vertical_compose97iH1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, |
| 508 | int width) |
| 509 | { |
| 510 | int i; |
| 511 | |
| 512 | for (i = 0; i < width; i++) |
| 513 | b1[i] -= (W_CM * (b0[i] + b2[i]) + W_CO) >> W_CS; |
| 514 | } |
| 515 | |
| 516 | static void vertical_compose97iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, |
| 517 | int width) |
| 518 | { |
| 519 | int i; |
| 520 | |
| 521 | for (i = 0; i < width; i++) |
| 522 | b1[i] += (W_BM * (b0[i] + b2[i]) + 4 * b1[i] + W_BO) >> W_BS; |
| 523 | } |
| 524 | |
| 525 | static void vertical_compose97iL1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, |
| 526 | int width) |
| 527 | { |
| 528 | int i; |
| 529 | |
| 530 | for (i = 0; i < width; i++) |
| 531 | b1[i] -= (W_DM * (b0[i] + b2[i]) + W_DO) >> W_DS; |
| 532 | } |
| 533 | |
| 534 | void ff_snow_vertical_compose97i(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, |
| 535 | IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, |
| 536 | int width) |
| 537 | { |
| 538 | int i; |
| 539 | |
| 540 | for (i = 0; i < width; i++) { |
| 541 | b4[i] -= (W_DM * (b3[i] + b5[i]) + W_DO) >> W_DS; |
| 542 | b3[i] -= (W_CM * (b2[i] + b4[i]) + W_CO) >> W_CS; |
| 543 | b2[i] += (W_BM * (b1[i] + b3[i]) + 4 * b2[i] + W_BO) >> W_BS; |
| 544 | b1[i] += (W_AM * (b0[i] + b2[i]) + W_AO) >> W_AS; |
| 545 | } |
| 546 | } |
| 547 | |
| 548 | static void spatial_compose97i_buffered_init(DWTCompose *cs, slice_buffer *sb, |
| 549 | int height, int stride_line) |
| 550 | { |
| 551 | cs->b0 = slice_buffer_get_line(sb, mirror(-3 - 1, height - 1) * stride_line); |
| 552 | cs->b1 = slice_buffer_get_line(sb, mirror(-3, height - 1) * stride_line); |
| 553 | cs->b2 = slice_buffer_get_line(sb, mirror(-3 + 1, height - 1) * stride_line); |
| 554 | cs->b3 = slice_buffer_get_line(sb, mirror(-3 + 2, height - 1) * stride_line); |
| 555 | cs->y = -3; |
| 556 | } |
| 557 | |
| 558 | static void spatial_compose97i_init(DWTCompose *cs, IDWTELEM *buffer, int height, |
| 559 | int stride) |
| 560 | { |
| 561 | cs->b0 = buffer + mirror(-3 - 1, height - 1) * stride; |
| 562 | cs->b1 = buffer + mirror(-3, height - 1) * stride; |
| 563 | cs->b2 = buffer + mirror(-3 + 1, height - 1) * stride; |
| 564 | cs->b3 = buffer + mirror(-3 + 2, height - 1) * stride; |
| 565 | cs->y = -3; |
| 566 | } |
| 567 | |
| 568 | static void spatial_compose97i_dy_buffered(SnowDWTContext *dsp, DWTCompose *cs, |
| 569 | slice_buffer * sb, IDWTELEM *temp, |
| 570 | int width, int height, |
| 571 | int stride_line) |
| 572 | { |
| 573 | int y = cs->y; |
| 574 | |
| 575 | IDWTELEM *b0 = cs->b0; |
| 576 | IDWTELEM *b1 = cs->b1; |
| 577 | IDWTELEM *b2 = cs->b2; |
| 578 | IDWTELEM *b3 = cs->b3; |
| 579 | IDWTELEM *b4 = slice_buffer_get_line(sb, |
| 580 | mirror(y + 3, height - 1) * |
| 581 | stride_line); |
| 582 | IDWTELEM *b5 = slice_buffer_get_line(sb, |
| 583 | mirror(y + 4, height - 1) * |
| 584 | stride_line); |
| 585 | |
| 586 | if (y > 0 && y + 4 < height) { |
| 587 | dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width); |
| 588 | } else { |
| 589 | if (y + 3 < (unsigned)height) |
| 590 | vertical_compose97iL1(b3, b4, b5, width); |
| 591 | if (y + 2 < (unsigned)height) |
| 592 | vertical_compose97iH1(b2, b3, b4, width); |
| 593 | if (y + 1 < (unsigned)height) |
| 594 | vertical_compose97iL0(b1, b2, b3, width); |
| 595 | if (y + 0 < (unsigned)height) |
| 596 | vertical_compose97iH0(b0, b1, b2, width); |
| 597 | } |
| 598 | |
| 599 | if (y - 1 < (unsigned)height) |
| 600 | dsp->horizontal_compose97i(b0, temp, width); |
| 601 | if (y + 0 < (unsigned)height) |
| 602 | dsp->horizontal_compose97i(b1, temp, width); |
| 603 | |
| 604 | cs->b0 = b2; |
| 605 | cs->b1 = b3; |
| 606 | cs->b2 = b4; |
| 607 | cs->b3 = b5; |
| 608 | cs->y += 2; |
| 609 | } |
| 610 | |
| 611 | static void spatial_compose97i_dy(DWTCompose *cs, IDWTELEM *buffer, |
| 612 | IDWTELEM *temp, int width, int height, |
| 613 | int stride) |
| 614 | { |
| 615 | int y = cs->y; |
| 616 | IDWTELEM *b0 = cs->b0; |
| 617 | IDWTELEM *b1 = cs->b1; |
| 618 | IDWTELEM *b2 = cs->b2; |
| 619 | IDWTELEM *b3 = cs->b3; |
| 620 | IDWTELEM *b4 = buffer + mirror(y + 3, height - 1) * stride; |
| 621 | IDWTELEM *b5 = buffer + mirror(y + 4, height - 1) * stride; |
| 622 | |
| 623 | if (y + 3 < (unsigned)height) |
| 624 | vertical_compose97iL1(b3, b4, b5, width); |
| 625 | if (y + 2 < (unsigned)height) |
| 626 | vertical_compose97iH1(b2, b3, b4, width); |
| 627 | if (y + 1 < (unsigned)height) |
| 628 | vertical_compose97iL0(b1, b2, b3, width); |
| 629 | if (y + 0 < (unsigned)height) |
| 630 | vertical_compose97iH0(b0, b1, b2, width); |
| 631 | |
| 632 | if (y - 1 < (unsigned)height) |
| 633 | ff_snow_horizontal_compose97i(b0, temp, width); |
| 634 | if (y + 0 < (unsigned)height) |
| 635 | ff_snow_horizontal_compose97i(b1, temp, width); |
| 636 | |
| 637 | cs->b0 = b2; |
| 638 | cs->b1 = b3; |
| 639 | cs->b2 = b4; |
| 640 | cs->b3 = b5; |
| 641 | cs->y += 2; |
| 642 | } |
| 643 | |
| 644 | void ff_spatial_idwt_buffered_init(DWTCompose *cs, slice_buffer *sb, int width, |
| 645 | int height, int stride_line, int type, |
| 646 | int decomposition_count) |
| 647 | { |
| 648 | int level; |
| 649 | for (level = decomposition_count - 1; level >= 0; level--) { |
| 650 | switch (type) { |
| 651 | case DWT_97: |
| 652 | spatial_compose97i_buffered_init(cs + level, sb, height >> level, |
| 653 | stride_line << level); |
| 654 | break; |
| 655 | case DWT_53: |
| 656 | spatial_compose53i_buffered_init(cs + level, sb, height >> level, |
| 657 | stride_line << level); |
| 658 | break; |
| 659 | } |
| 660 | } |
| 661 | } |
| 662 | |
| 663 | void ff_spatial_idwt_buffered_slice(SnowDWTContext *dsp, DWTCompose *cs, |
| 664 | slice_buffer *slice_buf, IDWTELEM *temp, |
| 665 | int width, int height, int stride_line, |
| 666 | int type, int decomposition_count, int y) |
| 667 | { |
| 668 | const int support = type == 1 ? 3 : 5; |
| 669 | int level; |
| 670 | if (type == 2) |
| 671 | return; |
| 672 | |
| 673 | for (level = decomposition_count - 1; level >= 0; level--) |
| 674 | while (cs[level].y <= FFMIN((y >> level) + support, height >> level)) { |
| 675 | switch (type) { |
| 676 | case DWT_97: |
| 677 | spatial_compose97i_dy_buffered(dsp, cs + level, slice_buf, temp, |
| 678 | width >> level, |
| 679 | height >> level, |
| 680 | stride_line << level); |
| 681 | break; |
| 682 | case DWT_53: |
| 683 | spatial_compose53i_dy_buffered(cs + level, slice_buf, temp, |
| 684 | width >> level, |
| 685 | height >> level, |
| 686 | stride_line << level); |
| 687 | break; |
| 688 | } |
| 689 | } |
| 690 | } |
| 691 | |
| 692 | static void ff_spatial_idwt_init(DWTCompose *cs, IDWTELEM *buffer, int width, |
| 693 | int height, int stride, int type, |
| 694 | int decomposition_count) |
| 695 | { |
| 696 | int level; |
| 697 | for (level = decomposition_count - 1; level >= 0; level--) { |
| 698 | switch (type) { |
| 699 | case DWT_97: |
| 700 | spatial_compose97i_init(cs + level, buffer, height >> level, |
| 701 | stride << level); |
| 702 | break; |
| 703 | case DWT_53: |
| 704 | spatial_compose53i_init(cs + level, buffer, height >> level, |
| 705 | stride << level); |
| 706 | break; |
| 707 | } |
| 708 | } |
| 709 | } |
| 710 | |
| 711 | static void ff_spatial_idwt_slice(DWTCompose *cs, IDWTELEM *buffer, |
| 712 | IDWTELEM *temp, int width, int height, |
| 713 | int stride, int type, |
| 714 | int decomposition_count, int y) |
| 715 | { |
| 716 | const int support = type == 1 ? 3 : 5; |
| 717 | int level; |
| 718 | if (type == 2) |
| 719 | return; |
| 720 | |
| 721 | for (level = decomposition_count - 1; level >= 0; level--) |
| 722 | while (cs[level].y <= FFMIN((y >> level) + support, height >> level)) { |
| 723 | switch (type) { |
| 724 | case DWT_97: |
| 725 | spatial_compose97i_dy(cs + level, buffer, temp, width >> level, |
| 726 | height >> level, stride << level); |
| 727 | break; |
| 728 | case DWT_53: |
| 729 | spatial_compose53i_dy(cs + level, buffer, temp, width >> level, |
| 730 | height >> level, stride << level); |
| 731 | break; |
| 732 | } |
| 733 | } |
| 734 | } |
| 735 | |
| 736 | void ff_spatial_idwt(IDWTELEM *buffer, IDWTELEM *temp, int width, int height, |
| 737 | int stride, int type, int decomposition_count) |
| 738 | { |
| 739 | DWTCompose cs[MAX_DECOMPOSITIONS]; |
| 740 | int y; |
| 741 | ff_spatial_idwt_init(cs, buffer, width, height, stride, type, |
| 742 | decomposition_count); |
| 743 | for (y = 0; y < height; y += 4) |
| 744 | ff_spatial_idwt_slice(cs, buffer, temp, width, height, stride, type, |
| 745 | decomposition_count, y); |
| 746 | } |
| 747 | |
| 748 | static inline int w_c(struct MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, int line_size, |
| 749 | int w, int h, int type) |
| 750 | { |
| 751 | int s, i, j; |
| 752 | const int dec_count = w == 8 ? 3 : 4; |
| 753 | int tmp[32 * 32], tmp2[32]; |
| 754 | int level, ori; |
| 755 | static const int scale[2][2][4][4] = { |
| 756 | { |
| 757 | { // 9/7 8x8 dec=3 |
| 758 | { 268, 239, 239, 213 }, |
| 759 | { 0, 224, 224, 152 }, |
| 760 | { 0, 135, 135, 110 }, |
| 761 | }, |
| 762 | { // 9/7 16x16 or 32x32 dec=4 |
| 763 | { 344, 310, 310, 280 }, |
| 764 | { 0, 320, 320, 228 }, |
| 765 | { 0, 175, 175, 136 }, |
| 766 | { 0, 129, 129, 102 }, |
| 767 | } |
| 768 | }, |
| 769 | { |
| 770 | { // 5/3 8x8 dec=3 |
| 771 | { 275, 245, 245, 218 }, |
| 772 | { 0, 230, 230, 156 }, |
| 773 | { 0, 138, 138, 113 }, |
| 774 | }, |
| 775 | { // 5/3 16x16 or 32x32 dec=4 |
| 776 | { 352, 317, 317, 286 }, |
| 777 | { 0, 328, 328, 233 }, |
| 778 | { 0, 180, 180, 140 }, |
| 779 | { 0, 132, 132, 105 }, |
| 780 | } |
| 781 | } |
| 782 | }; |
| 783 | |
| 784 | for (i = 0; i < h; i++) { |
| 785 | for (j = 0; j < w; j += 4) { |
| 786 | tmp[32 * i + j + 0] = (pix1[j + 0] - pix2[j + 0]) << 4; |
| 787 | tmp[32 * i + j + 1] = (pix1[j + 1] - pix2[j + 1]) << 4; |
| 788 | tmp[32 * i + j + 2] = (pix1[j + 2] - pix2[j + 2]) << 4; |
| 789 | tmp[32 * i + j + 3] = (pix1[j + 3] - pix2[j + 3]) << 4; |
| 790 | } |
| 791 | pix1 += line_size; |
| 792 | pix2 += line_size; |
| 793 | } |
| 794 | |
| 795 | ff_spatial_dwt(tmp, tmp2, w, h, 32, type, dec_count); |
| 796 | |
| 797 | s = 0; |
| 798 | av_assert1(w == h); |
| 799 | for (level = 0; level < dec_count; level++) |
| 800 | for (ori = level ? 1 : 0; ori < 4; ori++) { |
| 801 | int size = w >> (dec_count - level); |
| 802 | int sx = (ori & 1) ? size : 0; |
| 803 | int stride = 32 << (dec_count - level); |
| 804 | int sy = (ori & 2) ? stride >> 1 : 0; |
| 805 | |
| 806 | for (i = 0; i < size; i++) |
| 807 | for (j = 0; j < size; j++) { |
| 808 | int v = tmp[sx + sy + i * stride + j] * |
| 809 | scale[type][dec_count - 3][level][ori]; |
| 810 | s += FFABS(v); |
| 811 | } |
| 812 | } |
| 813 | av_assert1(s >= 0); |
| 814 | return s >> 9; |
| 815 | } |
| 816 | |
| 817 | static int w53_8_c(struct MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
| 818 | { |
| 819 | return w_c(v, pix1, pix2, line_size, 8, h, 1); |
| 820 | } |
| 821 | |
| 822 | static int w97_8_c(struct MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
| 823 | { |
| 824 | return w_c(v, pix1, pix2, line_size, 8, h, 0); |
| 825 | } |
| 826 | |
| 827 | static int w53_16_c(struct MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
| 828 | { |
| 829 | return w_c(v, pix1, pix2, line_size, 16, h, 1); |
| 830 | } |
| 831 | |
| 832 | static int w97_16_c(struct MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
| 833 | { |
| 834 | return w_c(v, pix1, pix2, line_size, 16, h, 0); |
| 835 | } |
| 836 | |
| 837 | int ff_w53_32_c(struct MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
| 838 | { |
| 839 | return w_c(v, pix1, pix2, line_size, 32, h, 1); |
| 840 | } |
| 841 | |
| 842 | int ff_w97_32_c(struct MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
| 843 | { |
| 844 | return w_c(v, pix1, pix2, line_size, 32, h, 0); |
| 845 | } |
| 846 | |
| 847 | void ff_dsputil_init_dwt(MECmpContext *c) |
| 848 | { |
| 849 | c->w53[0] = w53_16_c; |
| 850 | c->w53[1] = w53_8_c; |
| 851 | c->w97[0] = w97_16_c; |
| 852 | c->w97[1] = w97_8_c; |
| 853 | } |
| 854 | |
| 855 | void ff_dwt_init(SnowDWTContext *c) |
| 856 | { |
| 857 | c->vertical_compose97i = ff_snow_vertical_compose97i; |
| 858 | c->horizontal_compose97i = ff_snow_horizontal_compose97i; |
| 859 | c->inner_add_yblock = ff_snow_inner_add_yblock; |
| 860 | |
| 861 | if (HAVE_MMX) |
| 862 | ff_dwt_init_x86(c); |
| 863 | } |
| 864 | |
| 865 | |