4 * Copyright (C) 2012 - 2013 Guillaume Martres
6 * This file is part of FFmpeg.
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 #include "bit_depth_template.c"
30 static void FUNC(put_pcm
)(uint8_t *_dst
, ptrdiff_t stride
, int width
, int height
,
31 GetBitContext
*gb
, int pcm_bit_depth
)
34 pixel
*dst
= (pixel
*)_dst
;
36 stride
/= sizeof(pixel
);
38 for (y
= 0; y
< height
; y
++) {
39 for (x
= 0; x
< width
; x
++)
40 dst
[x
] = get_bits(gb
, pcm_bit_depth
) << (BIT_DEPTH
- pcm_bit_depth
);
45 static av_always_inline
void FUNC(transquant_bypass
)(uint8_t *_dst
, int16_t *coeffs
,
46 ptrdiff_t stride
, int size
)
49 pixel
*dst
= (pixel
*)_dst
;
51 stride
/= sizeof(pixel
);
53 for (y
= 0; y
< size
; y
++) {
54 for (x
= 0; x
< size
; x
++) {
55 dst
[x
] = av_clip_pixel(dst
[x
] + *coeffs
);
62 static void FUNC(transform_add4x4
)(uint8_t *_dst
, int16_t *coeffs
,
65 FUNC(transquant_bypass
)(_dst
, coeffs
, stride
, 4);
68 static void FUNC(transform_add8x8
)(uint8_t *_dst
, int16_t *coeffs
,
71 FUNC(transquant_bypass
)(_dst
, coeffs
, stride
, 8);
74 static void FUNC(transform_add16x16
)(uint8_t *_dst
, int16_t *coeffs
,
77 FUNC(transquant_bypass
)(_dst
, coeffs
, stride
, 16);
80 static void FUNC(transform_add32x32
)(uint8_t *_dst
, int16_t *coeffs
,
83 FUNC(transquant_bypass
)(_dst
, coeffs
, stride
, 32);
87 static void FUNC(transform_rdpcm
)(int16_t *_coeffs
, int16_t log2_size
, int mode
)
89 int16_t *coeffs
= (int16_t *) _coeffs
;
91 int size
= 1 << log2_size
;
95 for (y
= 0; y
< size
- 1; y
++) {
96 for (x
= 0; x
< size
; x
++)
97 coeffs
[x
] += coeffs
[x
- size
];
101 for (y
= 0; y
< size
; y
++) {
102 for (x
= 1; x
< size
; x
++)
103 coeffs
[x
] += coeffs
[x
- 1];
109 static void FUNC(transform_skip
)(int16_t *_coeffs
, int16_t log2_size
)
111 int shift
= 15 - BIT_DEPTH
- log2_size
;
113 int size
= 1 << log2_size
;
114 int16_t *coeffs
= _coeffs
;
118 int offset
= 1 << (shift
- 1);
119 for (y
= 0; y
< size
; y
++) {
120 for (x
= 0; x
< size
; x
++) {
121 *coeffs
= (*coeffs
+ offset
) >> shift
;
126 for (y
= 0; y
< size
; y
++) {
127 for (x
= 0; x
< size
; x
++) {
128 *coeffs
= *coeffs
<< -shift
;
135 #define SET(dst, x) (dst) = (x)
136 #define SCALE(dst, x) (dst) = av_clip_int16(((x) + add) >> shift)
137 #define ADD_AND_SCALE(dst, x) \
138 (dst) = av_clip_pixel((dst) + av_clip_int16(((x) + add) >> shift))
140 #define TR_4x4_LUMA(dst, src, step, assign) \
142 int c0 = src[0 * step] + src[2 * step]; \
143 int c1 = src[2 * step] + src[3 * step]; \
144 int c2 = src[0 * step] - src[3 * step]; \
145 int c3 = 74 * src[1 * step]; \
147 assign(dst[2 * step], 74 * (src[0 * step] - \
150 assign(dst[0 * step], 29 * c0 + 55 * c1 + c3); \
151 assign(dst[1 * step], 55 * c2 - 29 * c1 + c3); \
152 assign(dst[3 * step], 55 * c0 + 29 * c2 - c3); \
155 static void FUNC(transform_4x4_luma
)(int16_t *coeffs
)
159 int add
= 1 << (shift
- 1);
160 int16_t *src
= coeffs
;
162 for (i
= 0; i
< 4; i
++) {
163 TR_4x4_LUMA(src
, src
, 4, SCALE
);
167 shift
= 20 - BIT_DEPTH
;
168 add
= 1 << (shift
- 1);
169 for (i
= 0; i
< 4; i
++) {
170 TR_4x4_LUMA(coeffs
, coeffs
, 1, SCALE
);
177 #define TR_4(dst, src, dstep, sstep, assign, end) \
179 const int e0 = 64 * src[0 * sstep] + 64 * src[2 * sstep]; \
180 const int e1 = 64 * src[0 * sstep] - 64 * src[2 * sstep]; \
181 const int o0 = 83 * src[1 * sstep] + 36 * src[3 * sstep]; \
182 const int o1 = 36 * src[1 * sstep] - 83 * src[3 * sstep]; \
184 assign(dst[0 * dstep], e0 + o0); \
185 assign(dst[1 * dstep], e1 + o1); \
186 assign(dst[2 * dstep], e1 - o1); \
187 assign(dst[3 * dstep], e0 - o0); \
190 #define TR_8(dst, src, dstep, sstep, assign, end) \
194 int o_8[4] = { 0 }; \
195 for (i = 0; i < 4; i++) \
196 for (j = 1; j < end; j += 2) \
197 o_8[i] += transform[4 * j][i] * src[j * sstep]; \
198 TR_4(e_8, src, 1, 2 * sstep, SET, 4); \
200 for (i = 0; i < 4; i++) { \
201 assign(dst[i * dstep], e_8[i] + o_8[i]); \
202 assign(dst[(7 - i) * dstep], e_8[i] - o_8[i]); \
206 #define TR_16(dst, src, dstep, sstep, assign, end) \
210 int o_16[8] = { 0 }; \
211 for (i = 0; i < 8; i++) \
212 for (j = 1; j < end; j += 2) \
213 o_16[i] += transform[2 * j][i] * src[j * sstep]; \
214 TR_8(e_16, src, 1, 2 * sstep, SET, 8); \
216 for (i = 0; i < 8; i++) { \
217 assign(dst[i * dstep], e_16[i] + o_16[i]); \
218 assign(dst[(15 - i) * dstep], e_16[i] - o_16[i]); \
222 #define TR_32(dst, src, dstep, sstep, assign, end) \
226 int o_32[16] = { 0 }; \
227 for (i = 0; i < 16; i++) \
228 for (j = 1; j < end; j += 2) \
229 o_32[i] += transform[j][i] * src[j * sstep]; \
230 TR_16(e_32, src, 1, 2 * sstep, SET, end/2); \
232 for (i = 0; i < 16; i++) { \
233 assign(dst[i * dstep], e_32[i] + o_32[i]); \
234 assign(dst[(31 - i) * dstep], e_32[i] - o_32[i]); \
238 #define IDCT_VAR4(H) \
239 int limit2 = FFMIN(col_limit + 4, H)
240 #define IDCT_VAR8(H) \
241 int limit = FFMIN(col_limit, H); \
242 int limit2 = FFMIN(col_limit + 4, H)
243 #define IDCT_VAR16(H) IDCT_VAR8(H)
244 #define IDCT_VAR32(H) IDCT_VAR8(H)
247 static void FUNC(idct_##H ##x ##H )( \
248 int16_t *coeffs, int col_limit) { \
251 int add = 1 << (shift - 1); \
252 int16_t *src = coeffs; \
255 for (i = 0; i < H; i++) { \
256 TR_ ## H(src, src, H, H, SCALE, limit2); \
257 if (limit2 < H && i%4 == 0 && !!i) \
262 shift = 20 - BIT_DEPTH; \
263 add = 1 << (shift - 1); \
264 for (i = 0; i < H; i++) { \
265 TR_ ## H(coeffs, coeffs, 1, 1, SCALE, limit); \
271 static void FUNC(idct_##H ##x ##H ##_dc)( \
274 int shift = 14 - BIT_DEPTH; \
275 int add = 1 << (shift - 1); \
276 int coeff = (((coeffs[0] + 1) >> 1) + add) >> shift; \
278 for (j = 0; j < H; j++) { \
279 for (i = 0; i < H; i++) { \
280 coeffs[i+j*H] = coeff; \
304 static void FUNC(sao_band_filter_0
)(uint8_t *_dst
, uint8_t *_src
,
305 ptrdiff_t stride_dst
, ptrdiff_t stride_src
, SAOParams
*sao
,
306 int *borders
, int width
, int height
,
309 pixel
*dst
= (pixel
*)_dst
;
310 pixel
*src
= (pixel
*)_src
;
311 int offset_table
[32] = { 0 };
313 int shift
= BIT_DEPTH
- 5;
314 int16_t *sao_offset_val
= sao
->offset_val
[c_idx
];
315 int sao_left_class
= sao
->band_position
[c_idx
];
317 stride_dst
/= sizeof(pixel
);
318 stride_src
/= sizeof(pixel
);
320 for (k
= 0; k
< 4; k
++)
321 offset_table
[(k
+ sao_left_class
) & 31] = sao_offset_val
[k
+ 1];
322 for (y
= 0; y
< height
; y
++) {
323 for (x
= 0; x
< width
; x
++)
324 dst
[x
] = av_clip_pixel(src
[x
] + offset_table
[src
[x
] >> shift
]);
330 #define CMP(a, b) ((a) > (b) ? 1 : ((a) == (b) ? 0 : -1))
332 static void FUNC(sao_edge_filter
)(uint8_t *_dst
, uint8_t *_src
,
333 ptrdiff_t stride_dst
, ptrdiff_t stride_src
, SAOParams
*sao
,
334 int width
, int height
,
335 int c_idx
, int init_x
, int init_y
) {
337 static const uint8_t edge_idx
[] = { 1, 2, 0, 3, 4 };
338 static const int8_t pos
[4][2][2] = {
339 { { -1, 0 }, { 1, 0 } }, // horizontal
340 { { 0, -1 }, { 0, 1 } }, // vertical
341 { { -1, -1 }, { 1, 1 } }, // 45 degree
342 { { 1, -1 }, { -1, 1 } }, // 135 degree
344 int16_t *sao_offset_val
= sao
->offset_val
[c_idx
];
345 int sao_eo_class
= sao
->eo_class
[c_idx
];
346 pixel
*dst
= (pixel
*)_dst
;
347 pixel
*src
= (pixel
*)_src
;
349 int y_stride_src
= init_y
* stride_src
;
350 int y_stride_dst
= init_y
* stride_dst
;
351 int pos_0_0
= pos
[sao_eo_class
][0][0];
352 int pos_0_1
= pos
[sao_eo_class
][0][1];
353 int pos_1_0
= pos
[sao_eo_class
][1][0];
354 int pos_1_1
= pos
[sao_eo_class
][1][1];
357 int y_stride_0_1
= (init_y
+ pos_0_1
) * stride_src
;
358 int y_stride_1_1
= (init_y
+ pos_1_1
) * stride_src
;
359 for (y
= init_y
; y
< height
; y
++) {
360 for (x
= init_x
; x
< width
; x
++) {
361 int diff0
= CMP(src
[x
+ y_stride_src
], src
[x
+ pos_0_0
+ y_stride_0_1
]);
362 int diff1
= CMP(src
[x
+ y_stride_src
], src
[x
+ pos_1_0
+ y_stride_1_1
]);
363 int offset_val
= edge_idx
[2 + diff0
+ diff1
];
364 dst
[x
+ y_stride_dst
] = av_clip_pixel(src
[x
+ y_stride_src
] + sao_offset_val
[offset_val
]);
366 y_stride_src
+= stride_src
;
367 y_stride_dst
+= stride_dst
;
368 y_stride_0_1
+= stride_src
;
369 y_stride_1_1
+= stride_src
;
373 static void FUNC(sao_edge_filter_0
)(uint8_t *_dst
, uint8_t *_src
,
374 ptrdiff_t stride_dst
, ptrdiff_t stride_src
, SAOParams
*sao
,
375 int *borders
, int _width
, int _height
,
376 int c_idx
, uint8_t *vert_edge
,
377 uint8_t *horiz_edge
, uint8_t *diag_edge
)
380 pixel
*dst
= (pixel
*)_dst
;
381 pixel
*src
= (pixel
*)_src
;
382 int16_t *sao_offset_val
= sao
->offset_val
[c_idx
];
383 int sao_eo_class
= sao
->eo_class
[c_idx
];
384 int init_x
= 0, init_y
= 0, width
= _width
, height
= _height
;
386 stride_dst
/= sizeof(pixel
);
387 stride_src
/= sizeof(pixel
);
389 if (sao_eo_class
!= SAO_EO_VERT
) {
391 int offset_val
= sao_offset_val
[0];
392 for (y
= 0; y
< height
; y
++) {
393 dst
[y
* stride_dst
] = av_clip_pixel(src
[y
* stride_src
] + offset_val
);
398 int offset_val
= sao_offset_val
[0];
399 int offset
= width
- 1;
400 for (x
= 0; x
< height
; x
++) {
401 dst
[x
* stride_dst
+ offset
] = av_clip_pixel(src
[x
* stride_src
+ offset
] + offset_val
);
406 if (sao_eo_class
!= SAO_EO_HORIZ
) {
408 int offset_val
= sao_offset_val
[0];
409 for (x
= init_x
; x
< width
; x
++)
410 dst
[x
] = av_clip_pixel(src
[x
] + offset_val
);
414 int offset_val
= sao_offset_val
[0];
415 int y_stride_dst
= stride_dst
* (height
- 1);
416 int y_stride_src
= stride_src
* (height
- 1);
417 for (x
= init_x
; x
< width
; x
++)
418 dst
[x
+ y_stride_dst
] = av_clip_pixel(src
[x
+ y_stride_src
] + offset_val
);
423 FUNC(sao_edge_filter
)((uint8_t *)dst
, (uint8_t *)src
, stride_dst
, stride_src
, sao
, width
, height
, c_idx
, init_x
, init_y
);
426 static void FUNC(sao_edge_filter_1
)(uint8_t *_dst
, uint8_t *_src
,
427 ptrdiff_t stride_dst
, ptrdiff_t stride_src
, SAOParams
*sao
,
428 int *borders
, int _width
, int _height
,
429 int c_idx
, uint8_t *vert_edge
,
430 uint8_t *horiz_edge
, uint8_t *diag_edge
)
433 pixel
*dst
= (pixel
*)_dst
;
434 pixel
*src
= (pixel
*)_src
;
435 int16_t *sao_offset_val
= sao
->offset_val
[c_idx
];
436 int sao_eo_class
= sao
->eo_class
[c_idx
];
437 int init_x
= 0, init_y
= 0, width
= _width
, height
= _height
;
439 stride_dst
/= sizeof(pixel
);
440 stride_src
/= sizeof(pixel
);
442 if (sao_eo_class
!= SAO_EO_VERT
) {
444 int offset_val
= sao_offset_val
[0];
445 for (y
= 0; y
< height
; y
++) {
446 dst
[y
* stride_dst
] = av_clip_pixel(src
[y
* stride_src
] + offset_val
);
451 int offset_val
= sao_offset_val
[0];
452 int offset
= width
- 1;
453 for (x
= 0; x
< height
; x
++) {
454 dst
[x
* stride_dst
+ offset
] = av_clip_pixel(src
[x
* stride_src
+ offset
] + offset_val
);
459 if (sao_eo_class
!= SAO_EO_HORIZ
) {
461 int offset_val
= sao_offset_val
[0];
462 for (x
= init_x
; x
< width
; x
++)
463 dst
[x
] = av_clip_pixel(src
[x
] + offset_val
);
467 int offset_val
= sao_offset_val
[0];
468 int y_stride_dst
= stride_dst
* (height
- 1);
469 int y_stride_src
= stride_src
* (height
- 1);
470 for (x
= init_x
; x
< width
; x
++)
471 dst
[x
+ y_stride_dst
] = av_clip_pixel(src
[x
+ y_stride_src
] + offset_val
);
476 FUNC(sao_edge_filter
)((uint8_t *)dst
, (uint8_t *)src
, stride_dst
, stride_src
, sao
, width
, height
, c_idx
, init_x
, init_y
);
479 int save_upper_left
= !diag_edge
[0] && sao_eo_class
== SAO_EO_135D
&& !borders
[0] && !borders
[1];
480 int save_upper_right
= !diag_edge
[1] && sao_eo_class
== SAO_EO_45D
&& !borders
[1] && !borders
[2];
481 int save_lower_right
= !diag_edge
[2] && sao_eo_class
== SAO_EO_135D
&& !borders
[2] && !borders
[3];
482 int save_lower_left
= !diag_edge
[3] && sao_eo_class
== SAO_EO_45D
&& !borders
[0] && !borders
[3];
484 // Restore pixels that can't be modified
485 if(vert_edge
[0] && sao_eo_class
!= SAO_EO_VERT
) {
486 for(y
= init_y
+save_upper_left
; y
< height
-save_lower_left
; y
++)
487 dst
[y
*stride_dst
] = src
[y
*stride_src
];
489 if(vert_edge
[1] && sao_eo_class
!= SAO_EO_VERT
) {
490 for(y
= init_y
+save_upper_right
; y
< height
-save_lower_right
; y
++)
491 dst
[y
*stride_dst
+width
-1] = src
[y
*stride_src
+width
-1];
494 if(horiz_edge
[0] && sao_eo_class
!= SAO_EO_HORIZ
) {
495 for(x
= init_x
+save_upper_left
; x
< width
-save_upper_right
; x
++)
498 if(horiz_edge
[1] && sao_eo_class
!= SAO_EO_HORIZ
) {
499 for(x
= init_x
+save_lower_left
; x
< width
-save_lower_right
; x
++)
500 dst
[(height
-1)*stride_dst
+x
] = src
[(height
-1)*stride_src
+x
];
502 if(diag_edge
[0] && sao_eo_class
== SAO_EO_135D
)
504 if(diag_edge
[1] && sao_eo_class
== SAO_EO_45D
)
505 dst
[width
-1] = src
[width
-1];
506 if(diag_edge
[2] && sao_eo_class
== SAO_EO_135D
)
507 dst
[stride_dst
*(height
-1)+width
-1] = src
[stride_src
*(height
-1)+width
-1];
508 if(diag_edge
[3] && sao_eo_class
== SAO_EO_45D
)
509 dst
[stride_dst
*(height
-1)] = src
[stride_src
*(height
-1)];
516 ////////////////////////////////////////////////////////////////////////////////
518 ////////////////////////////////////////////////////////////////////////////////
519 static void FUNC(put_hevc_pel_pixels
)(int16_t *dst
,
520 uint8_t *_src
, ptrdiff_t _srcstride
,
521 int height
, intptr_t mx
, intptr_t my
, int width
)
524 pixel
*src
= (pixel
*)_src
;
525 ptrdiff_t srcstride
= _srcstride
/ sizeof(pixel
);
527 for (y
= 0; y
< height
; y
++) {
528 for (x
= 0; x
< width
; x
++)
529 dst
[x
] = src
[x
] << (14 - BIT_DEPTH
);
535 static void FUNC(put_hevc_pel_uni_pixels
)(uint8_t *_dst
, ptrdiff_t _dststride
, uint8_t *_src
, ptrdiff_t _srcstride
,
536 int height
, intptr_t mx
, intptr_t my
, int width
)
539 pixel
*src
= (pixel
*)_src
;
540 ptrdiff_t srcstride
= _srcstride
/ sizeof(pixel
);
541 pixel
*dst
= (pixel
*)_dst
;
542 ptrdiff_t dststride
= _dststride
/ sizeof(pixel
);
544 for (y
= 0; y
< height
; y
++) {
545 memcpy(dst
, src
, width
* sizeof(pixel
));
551 static void FUNC(put_hevc_pel_bi_pixels
)(uint8_t *_dst
, ptrdiff_t _dststride
, uint8_t *_src
, ptrdiff_t _srcstride
,
553 int height
, intptr_t mx
, intptr_t my
, int width
)
556 pixel
*src
= (pixel
*)_src
;
557 ptrdiff_t srcstride
= _srcstride
/ sizeof(pixel
);
558 pixel
*dst
= (pixel
*)_dst
;
559 ptrdiff_t dststride
= _dststride
/ sizeof(pixel
);
561 int shift
= 14 + 1 - BIT_DEPTH
;
563 int offset
= 1 << (shift
- 1);
568 for (y
= 0; y
< height
; y
++) {
569 for (x
= 0; x
< width
; x
++)
570 dst
[x
] = av_clip_pixel(((src
[x
] << (14 - BIT_DEPTH
)) + src2
[x
] + offset
) >> shift
);
577 static void FUNC(put_hevc_pel_uni_w_pixels
)(uint8_t *_dst
, ptrdiff_t _dststride
, uint8_t *_src
, ptrdiff_t _srcstride
,
578 int height
, int denom
, int wx
, int ox
, intptr_t mx
, intptr_t my
, int width
)
581 pixel
*src
= (pixel
*)_src
;
582 ptrdiff_t srcstride
= _srcstride
/ sizeof(pixel
);
583 pixel
*dst
= (pixel
*)_dst
;
584 ptrdiff_t dststride
= _dststride
/ sizeof(pixel
);
585 int shift
= denom
+ 14 - BIT_DEPTH
;
587 int offset
= 1 << (shift
- 1);
592 ox
= ox
* (1 << (BIT_DEPTH
- 8));
593 for (y
= 0; y
< height
; y
++) {
594 for (x
= 0; x
< width
; x
++)
595 dst
[x
] = av_clip_pixel((((src
[x
] << (14 - BIT_DEPTH
)) * wx
+ offset
) >> shift
) + ox
);
601 static void FUNC(put_hevc_pel_bi_w_pixels
)(uint8_t *_dst
, ptrdiff_t _dststride
, uint8_t *_src
, ptrdiff_t _srcstride
,
603 int height
, int denom
, int wx0
, int wx1
,
604 int ox0
, int ox1
, intptr_t mx
, intptr_t my
, int width
)
607 pixel
*src
= (pixel
*)_src
;
608 ptrdiff_t srcstride
= _srcstride
/ sizeof(pixel
);
609 pixel
*dst
= (pixel
*)_dst
;
610 ptrdiff_t dststride
= _dststride
/ sizeof(pixel
);
612 int shift
= 14 + 1 - BIT_DEPTH
;
613 int log2Wd
= denom
+ shift
- 1;
615 ox0
= ox0
* (1 << (BIT_DEPTH
- 8));
616 ox1
= ox1
* (1 << (BIT_DEPTH
- 8));
617 for (y
= 0; y
< height
; y
++) {
618 for (x
= 0; x
< width
; x
++) {
619 dst
[x
] = av_clip_pixel(( (src
[x
] << (14 - BIT_DEPTH
)) * wx1
+ src2
[x
] * wx0
+ ((ox0
+ ox1
+ 1) << log2Wd
)) >> (log2Wd
+ 1));
627 ////////////////////////////////////////////////////////////////////////////////
629 ////////////////////////////////////////////////////////////////////////////////
630 #define QPEL_FILTER(src, stride) \
631 (filter[0] * src[x - 3 * stride] + \
632 filter[1] * src[x - 2 * stride] + \
633 filter[2] * src[x - stride] + \
634 filter[3] * src[x ] + \
635 filter[4] * src[x + stride] + \
636 filter[5] * src[x + 2 * stride] + \
637 filter[6] * src[x + 3 * stride] + \
638 filter[7] * src[x + 4 * stride])
640 static void FUNC(put_hevc_qpel_h
)(int16_t *dst
,
641 uint8_t *_src
, ptrdiff_t _srcstride
,
642 int height
, intptr_t mx
, intptr_t my
, int width
)
645 pixel
*src
= (pixel
*)_src
;
646 ptrdiff_t srcstride
= _srcstride
/ sizeof(pixel
);
647 const int8_t *filter
= ff_hevc_qpel_filters
[mx
- 1];
648 for (y
= 0; y
< height
; y
++) {
649 for (x
= 0; x
< width
; x
++)
650 dst
[x
] = QPEL_FILTER(src
, 1) >> (BIT_DEPTH
- 8);
656 static void FUNC(put_hevc_qpel_v
)(int16_t *dst
,
657 uint8_t *_src
, ptrdiff_t _srcstride
,
658 int height
, intptr_t mx
, intptr_t my
, int width
)
661 pixel
*src
= (pixel
*)_src
;
662 ptrdiff_t srcstride
= _srcstride
/ sizeof(pixel
);
663 const int8_t *filter
= ff_hevc_qpel_filters
[my
- 1];
664 for (y
= 0; y
< height
; y
++) {
665 for (x
= 0; x
< width
; x
++)
666 dst
[x
] = QPEL_FILTER(src
, srcstride
) >> (BIT_DEPTH
- 8);
672 static void FUNC(put_hevc_qpel_hv
)(int16_t *dst
,
674 ptrdiff_t _srcstride
,
675 int height
, intptr_t mx
,
676 intptr_t my
, int width
)
679 const int8_t *filter
;
680 pixel
*src
= (pixel
*)_src
;
681 ptrdiff_t srcstride
= _srcstride
/ sizeof(pixel
);
682 int16_t tmp_array
[(MAX_PB_SIZE
+ QPEL_EXTRA
) * MAX_PB_SIZE
];
683 int16_t *tmp
= tmp_array
;
685 src
-= QPEL_EXTRA_BEFORE
* srcstride
;
686 filter
= ff_hevc_qpel_filters
[mx
- 1];
687 for (y
= 0; y
< height
+ QPEL_EXTRA
; y
++) {
688 for (x
= 0; x
< width
; x
++)
689 tmp
[x
] = QPEL_FILTER(src
, 1) >> (BIT_DEPTH
- 8);
694 tmp
= tmp_array
+ QPEL_EXTRA_BEFORE
* MAX_PB_SIZE
;
695 filter
= ff_hevc_qpel_filters
[my
- 1];
696 for (y
= 0; y
< height
; y
++) {
697 for (x
= 0; x
< width
; x
++)
698 dst
[x
] = QPEL_FILTER(tmp
, MAX_PB_SIZE
) >> 6;
704 static void FUNC(put_hevc_qpel_uni_h
)(uint8_t *_dst
, ptrdiff_t _dststride
,
705 uint8_t *_src
, ptrdiff_t _srcstride
,
706 int height
, intptr_t mx
, intptr_t my
, int width
)
709 pixel
*src
= (pixel
*)_src
;
710 ptrdiff_t srcstride
= _srcstride
/ sizeof(pixel
);
711 pixel
*dst
= (pixel
*)_dst
;
712 ptrdiff_t dststride
= _dststride
/ sizeof(pixel
);
713 const int8_t *filter
= ff_hevc_qpel_filters
[mx
- 1];
714 int shift
= 14 - BIT_DEPTH
;
717 int offset
= 1 << (shift
- 1);
722 for (y
= 0; y
< height
; y
++) {
723 for (x
= 0; x
< width
; x
++)
724 dst
[x
] = av_clip_pixel(((QPEL_FILTER(src
, 1) >> (BIT_DEPTH
- 8)) + offset
) >> shift
);
730 static void FUNC(put_hevc_qpel_bi_h
)(uint8_t *_dst
, ptrdiff_t _dststride
, uint8_t *_src
, ptrdiff_t _srcstride
,
732 int height
, intptr_t mx
, intptr_t my
, int width
)
735 pixel
*src
= (pixel
*)_src
;
736 ptrdiff_t srcstride
= _srcstride
/ sizeof(pixel
);
737 pixel
*dst
= (pixel
*)_dst
;
738 ptrdiff_t dststride
= _dststride
/ sizeof(pixel
);
740 const int8_t *filter
= ff_hevc_qpel_filters
[mx
- 1];
742 int shift
= 14 + 1 - BIT_DEPTH
;
744 int offset
= 1 << (shift
- 1);
749 for (y
= 0; y
< height
; y
++) {
750 for (x
= 0; x
< width
; x
++)
751 dst
[x
] = av_clip_pixel(((QPEL_FILTER(src
, 1) >> (BIT_DEPTH
- 8)) + src2
[x
] + offset
) >> shift
);
758 static void FUNC(put_hevc_qpel_uni_v
)(uint8_t *_dst
, ptrdiff_t _dststride
,
759 uint8_t *_src
, ptrdiff_t _srcstride
,
760 int height
, intptr_t mx
, intptr_t my
, int width
)
763 pixel
*src
= (pixel
*)_src
;
764 ptrdiff_t srcstride
= _srcstride
/ sizeof(pixel
);
765 pixel
*dst
= (pixel
*)_dst
;
766 ptrdiff_t dststride
= _dststride
/ sizeof(pixel
);
767 const int8_t *filter
= ff_hevc_qpel_filters
[my
- 1];
768 int shift
= 14 - BIT_DEPTH
;
771 int offset
= 1 << (shift
- 1);
776 for (y
= 0; y
< height
; y
++) {
777 for (x
= 0; x
< width
; x
++)
778 dst
[x
] = av_clip_pixel(((QPEL_FILTER(src
, srcstride
) >> (BIT_DEPTH
- 8)) + offset
) >> shift
);
785 static void FUNC(put_hevc_qpel_bi_v
)(uint8_t *_dst
, ptrdiff_t _dststride
, uint8_t *_src
, ptrdiff_t _srcstride
,
787 int height
, intptr_t mx
, intptr_t my
, int width
)
790 pixel
*src
= (pixel
*)_src
;
791 ptrdiff_t srcstride
= _srcstride
/ sizeof(pixel
);
792 pixel
*dst
= (pixel
*)_dst
;
793 ptrdiff_t dststride
= _dststride
/ sizeof(pixel
);
795 const int8_t *filter
= ff_hevc_qpel_filters
[my
- 1];
797 int shift
= 14 + 1 - BIT_DEPTH
;
799 int offset
= 1 << (shift
- 1);
804 for (y
= 0; y
< height
; y
++) {
805 for (x
= 0; x
< width
; x
++)
806 dst
[x
] = av_clip_pixel(((QPEL_FILTER(src
, srcstride
) >> (BIT_DEPTH
- 8)) + src2
[x
] + offset
) >> shift
);
813 static void FUNC(put_hevc_qpel_uni_hv
)(uint8_t *_dst
, ptrdiff_t _dststride
,
814 uint8_t *_src
, ptrdiff_t _srcstride
,
815 int height
, intptr_t mx
, intptr_t my
, int width
)
818 const int8_t *filter
;
819 pixel
*src
= (pixel
*)_src
;
820 ptrdiff_t srcstride
= _srcstride
/ sizeof(pixel
);
821 pixel
*dst
= (pixel
*)_dst
;
822 ptrdiff_t dststride
= _dststride
/ sizeof(pixel
);
823 int16_t tmp_array
[(MAX_PB_SIZE
+ QPEL_EXTRA
) * MAX_PB_SIZE
];
824 int16_t *tmp
= tmp_array
;
825 int shift
= 14 - BIT_DEPTH
;
828 int offset
= 1 << (shift
- 1);
833 src
-= QPEL_EXTRA_BEFORE
* srcstride
;
834 filter
= ff_hevc_qpel_filters
[mx
- 1];
835 for (y
= 0; y
< height
+ QPEL_EXTRA
; y
++) {
836 for (x
= 0; x
< width
; x
++)
837 tmp
[x
] = QPEL_FILTER(src
, 1) >> (BIT_DEPTH
- 8);
842 tmp
= tmp_array
+ QPEL_EXTRA_BEFORE
* MAX_PB_SIZE
;
843 filter
= ff_hevc_qpel_filters
[my
- 1];
845 for (y
= 0; y
< height
; y
++) {
846 for (x
= 0; x
< width
; x
++)
847 dst
[x
] = av_clip_pixel(((QPEL_FILTER(tmp
, MAX_PB_SIZE
) >> 6) + offset
) >> shift
);
853 static void FUNC(put_hevc_qpel_bi_hv
)(uint8_t *_dst
, ptrdiff_t _dststride
, uint8_t *_src
, ptrdiff_t _srcstride
,
855 int height
, intptr_t mx
, intptr_t my
, int width
)
858 const int8_t *filter
;
859 pixel
*src
= (pixel
*)_src
;
860 ptrdiff_t srcstride
= _srcstride
/ sizeof(pixel
);
861 pixel
*dst
= (pixel
*)_dst
;
862 ptrdiff_t dststride
= _dststride
/ sizeof(pixel
);
863 int16_t tmp_array
[(MAX_PB_SIZE
+ QPEL_EXTRA
) * MAX_PB_SIZE
];
864 int16_t *tmp
= tmp_array
;
865 int shift
= 14 + 1 - BIT_DEPTH
;
867 int offset
= 1 << (shift
- 1);
872 src
-= QPEL_EXTRA_BEFORE
* srcstride
;
873 filter
= ff_hevc_qpel_filters
[mx
- 1];
874 for (y
= 0; y
< height
+ QPEL_EXTRA
; y
++) {
875 for (x
= 0; x
< width
; x
++)
876 tmp
[x
] = QPEL_FILTER(src
, 1) >> (BIT_DEPTH
- 8);
881 tmp
= tmp_array
+ QPEL_EXTRA_BEFORE
* MAX_PB_SIZE
;
882 filter
= ff_hevc_qpel_filters
[my
- 1];
884 for (y
= 0; y
< height
; y
++) {
885 for (x
= 0; x
< width
; x
++)
886 dst
[x
] = av_clip_pixel(((QPEL_FILTER(tmp
, MAX_PB_SIZE
) >> 6) + src2
[x
] + offset
) >> shift
);
893 static void FUNC(put_hevc_qpel_uni_w_h
)(uint8_t *_dst
, ptrdiff_t _dststride
,
894 uint8_t *_src
, ptrdiff_t _srcstride
,
895 int height
, int denom
, int wx
, int ox
,
896 intptr_t mx
, intptr_t my
, int width
)
899 pixel
*src
= (pixel
*)_src
;
900 ptrdiff_t srcstride
= _srcstride
/ sizeof(pixel
);
901 pixel
*dst
= (pixel
*)_dst
;
902 ptrdiff_t dststride
= _dststride
/ sizeof(pixel
);
903 const int8_t *filter
= ff_hevc_qpel_filters
[mx
- 1];
904 int shift
= denom
+ 14 - BIT_DEPTH
;
906 int offset
= 1 << (shift
- 1);
911 ox
= ox
* (1 << (BIT_DEPTH
- 8));
912 for (y
= 0; y
< height
; y
++) {
913 for (x
= 0; x
< width
; x
++)
914 dst
[x
] = av_clip_pixel((((QPEL_FILTER(src
, 1) >> (BIT_DEPTH
- 8)) * wx
+ offset
) >> shift
) + ox
);
920 static void FUNC(put_hevc_qpel_bi_w_h
)(uint8_t *_dst
, ptrdiff_t _dststride
, uint8_t *_src
, ptrdiff_t _srcstride
,
922 int height
, int denom
, int wx0
, int wx1
,
923 int ox0
, int ox1
, intptr_t mx
, intptr_t my
, int width
)
926 pixel
*src
= (pixel
*)_src
;
927 ptrdiff_t srcstride
= _srcstride
/ sizeof(pixel
);
928 pixel
*dst
= (pixel
*)_dst
;
929 ptrdiff_t dststride
= _dststride
/ sizeof(pixel
);
931 const int8_t *filter
= ff_hevc_qpel_filters
[mx
- 1];
933 int shift
= 14 + 1 - BIT_DEPTH
;
934 int log2Wd
= denom
+ shift
- 1;
936 ox0
= ox0
* (1 << (BIT_DEPTH
- 8));
937 ox1
= ox1
* (1 << (BIT_DEPTH
- 8));
938 for (y
= 0; y
< height
; y
++) {
939 for (x
= 0; x
< width
; x
++)
940 dst
[x
] = av_clip_pixel(((QPEL_FILTER(src
, 1) >> (BIT_DEPTH
- 8)) * wx1
+ src2
[x
] * wx0
+
941 ((ox0
+ ox1
+ 1) << log2Wd
)) >> (log2Wd
+ 1));
948 static void FUNC(put_hevc_qpel_uni_w_v
)(uint8_t *_dst
, ptrdiff_t _dststride
,
949 uint8_t *_src
, ptrdiff_t _srcstride
,
950 int height
, int denom
, int wx
, int ox
,
951 intptr_t mx
, intptr_t my
, int width
)
954 pixel
*src
= (pixel
*)_src
;
955 ptrdiff_t srcstride
= _srcstride
/ sizeof(pixel
);
956 pixel
*dst
= (pixel
*)_dst
;
957 ptrdiff_t dststride
= _dststride
/ sizeof(pixel
);
958 const int8_t *filter
= ff_hevc_qpel_filters
[my
- 1];
959 int shift
= denom
+ 14 - BIT_DEPTH
;
961 int offset
= 1 << (shift
- 1);
966 ox
= ox
* (1 << (BIT_DEPTH
- 8));
967 for (y
= 0; y
< height
; y
++) {
968 for (x
= 0; x
< width
; x
++)
969 dst
[x
] = av_clip_pixel((((QPEL_FILTER(src
, srcstride
) >> (BIT_DEPTH
- 8)) * wx
+ offset
) >> shift
) + ox
);
975 static void FUNC(put_hevc_qpel_bi_w_v
)(uint8_t *_dst
, ptrdiff_t _dststride
, uint8_t *_src
, ptrdiff_t _srcstride
,
977 int height
, int denom
, int wx0
, int wx1
,
978 int ox0
, int ox1
, intptr_t mx
, intptr_t my
, int width
)
981 pixel
*src
= (pixel
*)_src
;
982 ptrdiff_t srcstride
= _srcstride
/ sizeof(pixel
);
983 pixel
*dst
= (pixel
*)_dst
;
984 ptrdiff_t dststride
= _dststride
/ sizeof(pixel
);
986 const int8_t *filter
= ff_hevc_qpel_filters
[my
- 1];
988 int shift
= 14 + 1 - BIT_DEPTH
;
989 int log2Wd
= denom
+ shift
- 1;
991 ox0
= ox0
* (1 << (BIT_DEPTH
- 8));
992 ox1
= ox1
* (1 << (BIT_DEPTH
- 8));
993 for (y
= 0; y
< height
; y
++) {
994 for (x
= 0; x
< width
; x
++)
995 dst
[x
] = av_clip_pixel(((QPEL_FILTER(src
, srcstride
) >> (BIT_DEPTH
- 8)) * wx1
+ src2
[x
] * wx0
+
996 ((ox0
+ ox1
+ 1) << log2Wd
)) >> (log2Wd
+ 1));
1003 static void FUNC(put_hevc_qpel_uni_w_hv
)(uint8_t *_dst
, ptrdiff_t _dststride
,
1004 uint8_t *_src
, ptrdiff_t _srcstride
,
1005 int height
, int denom
, int wx
, int ox
,
1006 intptr_t mx
, intptr_t my
, int width
)
1009 const int8_t *filter
;
1010 pixel
*src
= (pixel
*)_src
;
1011 ptrdiff_t srcstride
= _srcstride
/ sizeof(pixel
);
1012 pixel
*dst
= (pixel
*)_dst
;
1013 ptrdiff_t dststride
= _dststride
/ sizeof(pixel
);
1014 int16_t tmp_array
[(MAX_PB_SIZE
+ QPEL_EXTRA
) * MAX_PB_SIZE
];
1015 int16_t *tmp
= tmp_array
;
1016 int shift
= denom
+ 14 - BIT_DEPTH
;
1018 int offset
= 1 << (shift
- 1);
1023 src
-= QPEL_EXTRA_BEFORE
* srcstride
;
1024 filter
= ff_hevc_qpel_filters
[mx
- 1];
1025 for (y
= 0; y
< height
+ QPEL_EXTRA
; y
++) {
1026 for (x
= 0; x
< width
; x
++)
1027 tmp
[x
] = QPEL_FILTER(src
, 1) >> (BIT_DEPTH
- 8);
1032 tmp
= tmp_array
+ QPEL_EXTRA_BEFORE
* MAX_PB_SIZE
;
1033 filter
= ff_hevc_qpel_filters
[my
- 1];
1035 ox
= ox
* (1 << (BIT_DEPTH
- 8));
1036 for (y
= 0; y
< height
; y
++) {
1037 for (x
= 0; x
< width
; x
++)
1038 dst
[x
] = av_clip_pixel((((QPEL_FILTER(tmp
, MAX_PB_SIZE
) >> 6) * wx
+ offset
) >> shift
) + ox
);
1044 static void FUNC(put_hevc_qpel_bi_w_hv
)(uint8_t *_dst
, ptrdiff_t _dststride
, uint8_t *_src
, ptrdiff_t _srcstride
,
1046 int height
, int denom
, int wx0
, int wx1
,
1047 int ox0
, int ox1
, intptr_t mx
, intptr_t my
, int width
)
1050 const int8_t *filter
;
1051 pixel
*src
= (pixel
*)_src
;
1052 ptrdiff_t srcstride
= _srcstride
/ sizeof(pixel
);
1053 pixel
*dst
= (pixel
*)_dst
;
1054 ptrdiff_t dststride
= _dststride
/ sizeof(pixel
);
1055 int16_t tmp_array
[(MAX_PB_SIZE
+ QPEL_EXTRA
) * MAX_PB_SIZE
];
1056 int16_t *tmp
= tmp_array
;
1057 int shift
= 14 + 1 - BIT_DEPTH
;
1058 int log2Wd
= denom
+ shift
- 1;
1060 src
-= QPEL_EXTRA_BEFORE
* srcstride
;
1061 filter
= ff_hevc_qpel_filters
[mx
- 1];
1062 for (y
= 0; y
< height
+ QPEL_EXTRA
; y
++) {
1063 for (x
= 0; x
< width
; x
++)
1064 tmp
[x
] = QPEL_FILTER(src
, 1) >> (BIT_DEPTH
- 8);
1069 tmp
= tmp_array
+ QPEL_EXTRA_BEFORE
* MAX_PB_SIZE
;
1070 filter
= ff_hevc_qpel_filters
[my
- 1];
1072 ox0
= ox0
* (1 << (BIT_DEPTH
- 8));
1073 ox1
= ox1
* (1 << (BIT_DEPTH
- 8));
1074 for (y
= 0; y
< height
; y
++) {
1075 for (x
= 0; x
< width
; x
++)
1076 dst
[x
] = av_clip_pixel(((QPEL_FILTER(tmp
, MAX_PB_SIZE
) >> 6) * wx1
+ src2
[x
] * wx0
+
1077 ((ox0
+ ox1
+ 1) << log2Wd
)) >> (log2Wd
+ 1));
1080 src2
+= MAX_PB_SIZE
;
1084 ////////////////////////////////////////////////////////////////////////////////
1086 ////////////////////////////////////////////////////////////////////////////////
1087 #define EPEL_FILTER(src, stride) \
1088 (filter[0] * src[x - stride] + \
1089 filter[1] * src[x] + \
1090 filter[2] * src[x + stride] + \
1091 filter[3] * src[x + 2 * stride])
1093 static void FUNC(put_hevc_epel_h
)(int16_t *dst
,
1094 uint8_t *_src
, ptrdiff_t _srcstride
,
1095 int height
, intptr_t mx
, intptr_t my
, int width
)
1098 pixel
*src
= (pixel
*)_src
;
1099 ptrdiff_t srcstride
= _srcstride
/ sizeof(pixel
);
1100 const int8_t *filter
= ff_hevc_epel_filters
[mx
- 1];
1101 for (y
= 0; y
< height
; y
++) {
1102 for (x
= 0; x
< width
; x
++)
1103 dst
[x
] = EPEL_FILTER(src
, 1) >> (BIT_DEPTH
- 8);
1109 static void FUNC(put_hevc_epel_v
)(int16_t *dst
,
1110 uint8_t *_src
, ptrdiff_t _srcstride
,
1111 int height
, intptr_t mx
, intptr_t my
, int width
)
1114 pixel
*src
= (pixel
*)_src
;
1115 ptrdiff_t srcstride
= _srcstride
/ sizeof(pixel
);
1116 const int8_t *filter
= ff_hevc_epel_filters
[my
- 1];
1118 for (y
= 0; y
< height
; y
++) {
1119 for (x
= 0; x
< width
; x
++)
1120 dst
[x
] = EPEL_FILTER(src
, srcstride
) >> (BIT_DEPTH
- 8);
1126 static void FUNC(put_hevc_epel_hv
)(int16_t *dst
,
1127 uint8_t *_src
, ptrdiff_t _srcstride
,
1128 int height
, intptr_t mx
, intptr_t my
, int width
)
1131 pixel
*src
= (pixel
*)_src
;
1132 ptrdiff_t srcstride
= _srcstride
/ sizeof(pixel
);
1133 const int8_t *filter
= ff_hevc_epel_filters
[mx
- 1];
1134 int16_t tmp_array
[(MAX_PB_SIZE
+ EPEL_EXTRA
) * MAX_PB_SIZE
];
1135 int16_t *tmp
= tmp_array
;
1137 src
-= EPEL_EXTRA_BEFORE
* srcstride
;
1139 for (y
= 0; y
< height
+ EPEL_EXTRA
; y
++) {
1140 for (x
= 0; x
< width
; x
++)
1141 tmp
[x
] = EPEL_FILTER(src
, 1) >> (BIT_DEPTH
- 8);
1146 tmp
= tmp_array
+ EPEL_EXTRA_BEFORE
* MAX_PB_SIZE
;
1147 filter
= ff_hevc_epel_filters
[my
- 1];
1149 for (y
= 0; y
< height
; y
++) {
1150 for (x
= 0; x
< width
; x
++)
1151 dst
[x
] = EPEL_FILTER(tmp
, MAX_PB_SIZE
) >> 6;
1157 static void FUNC(put_hevc_epel_uni_h
)(uint8_t *_dst
, ptrdiff_t _dststride
, uint8_t *_src
, ptrdiff_t _srcstride
,
1158 int height
, intptr_t mx
, intptr_t my
, int width
)
1161 pixel
*src
= (pixel
*)_src
;
1162 ptrdiff_t srcstride
= _srcstride
/ sizeof(pixel
);
1163 pixel
*dst
= (pixel
*)_dst
;
1164 ptrdiff_t dststride
= _dststride
/ sizeof(pixel
);
1165 const int8_t *filter
= ff_hevc_epel_filters
[mx
- 1];
1166 int shift
= 14 - BIT_DEPTH
;
1168 int offset
= 1 << (shift
- 1);
1173 for (y
= 0; y
< height
; y
++) {
1174 for (x
= 0; x
< width
; x
++)
1175 dst
[x
] = av_clip_pixel(((EPEL_FILTER(src
, 1) >> (BIT_DEPTH
- 8)) + offset
) >> shift
);
1181 static void FUNC(put_hevc_epel_bi_h
)(uint8_t *_dst
, ptrdiff_t _dststride
, uint8_t *_src
, ptrdiff_t _srcstride
,
1183 int height
, intptr_t mx
, intptr_t my
, int width
)
1186 pixel
*src
= (pixel
*)_src
;
1187 ptrdiff_t srcstride
= _srcstride
/ sizeof(pixel
);
1188 pixel
*dst
= (pixel
*)_dst
;
1189 ptrdiff_t dststride
= _dststride
/ sizeof(pixel
);
1190 const int8_t *filter
= ff_hevc_epel_filters
[mx
- 1];
1191 int shift
= 14 + 1 - BIT_DEPTH
;
1193 int offset
= 1 << (shift
- 1);
1198 for (y
= 0; y
< height
; y
++) {
1199 for (x
= 0; x
< width
; x
++) {
1200 dst
[x
] = av_clip_pixel(((EPEL_FILTER(src
, 1) >> (BIT_DEPTH
- 8)) + src2
[x
] + offset
) >> shift
);
1204 src2
+= MAX_PB_SIZE
;
1208 static void FUNC(put_hevc_epel_uni_v
)(uint8_t *_dst
, ptrdiff_t _dststride
, uint8_t *_src
, ptrdiff_t _srcstride
,
1209 int height
, intptr_t mx
, intptr_t my
, int width
)
1212 pixel
*src
= (pixel
*)_src
;
1213 ptrdiff_t srcstride
= _srcstride
/ sizeof(pixel
);
1214 pixel
*dst
= (pixel
*)_dst
;
1215 ptrdiff_t dststride
= _dststride
/ sizeof(pixel
);
1216 const int8_t *filter
= ff_hevc_epel_filters
[my
- 1];
1217 int shift
= 14 - BIT_DEPTH
;
1219 int offset
= 1 << (shift
- 1);
1224 for (y
= 0; y
< height
; y
++) {
1225 for (x
= 0; x
< width
; x
++)
1226 dst
[x
] = av_clip_pixel(((EPEL_FILTER(src
, srcstride
) >> (BIT_DEPTH
- 8)) + offset
) >> shift
);
1232 static void FUNC(put_hevc_epel_bi_v
)(uint8_t *_dst
, ptrdiff_t _dststride
, uint8_t *_src
, ptrdiff_t _srcstride
,
1234 int height
, intptr_t mx
, intptr_t my
, int width
)
1237 pixel
*src
= (pixel
*)_src
;
1238 ptrdiff_t srcstride
= _srcstride
/ sizeof(pixel
);
1239 const int8_t *filter
= ff_hevc_epel_filters
[my
- 1];
1240 pixel
*dst
= (pixel
*)_dst
;
1241 ptrdiff_t dststride
= _dststride
/ sizeof(pixel
);
1242 int shift
= 14 + 1 - BIT_DEPTH
;
1244 int offset
= 1 << (shift
- 1);
1249 for (y
= 0; y
< height
; y
++) {
1250 for (x
= 0; x
< width
; x
++)
1251 dst
[x
] = av_clip_pixel(((EPEL_FILTER(src
, srcstride
) >> (BIT_DEPTH
- 8)) + src2
[x
] + offset
) >> shift
);
1254 src2
+= MAX_PB_SIZE
;
1258 static void FUNC(put_hevc_epel_uni_hv
)(uint8_t *_dst
, ptrdiff_t _dststride
, uint8_t *_src
, ptrdiff_t _srcstride
,
1259 int height
, intptr_t mx
, intptr_t my
, int width
)
1262 pixel
*src
= (pixel
*)_src
;
1263 ptrdiff_t srcstride
= _srcstride
/ sizeof(pixel
);
1264 pixel
*dst
= (pixel
*)_dst
;
1265 ptrdiff_t dststride
= _dststride
/ sizeof(pixel
);
1266 const int8_t *filter
= ff_hevc_epel_filters
[mx
- 1];
1267 int16_t tmp_array
[(MAX_PB_SIZE
+ EPEL_EXTRA
) * MAX_PB_SIZE
];
1268 int16_t *tmp
= tmp_array
;
1269 int shift
= 14 - BIT_DEPTH
;
1271 int offset
= 1 << (shift
- 1);
1276 src
-= EPEL_EXTRA_BEFORE
* srcstride
;
1278 for (y
= 0; y
< height
+ EPEL_EXTRA
; y
++) {
1279 for (x
= 0; x
< width
; x
++)
1280 tmp
[x
] = EPEL_FILTER(src
, 1) >> (BIT_DEPTH
- 8);
1285 tmp
= tmp_array
+ EPEL_EXTRA_BEFORE
* MAX_PB_SIZE
;
1286 filter
= ff_hevc_epel_filters
[my
- 1];
1288 for (y
= 0; y
< height
; y
++) {
1289 for (x
= 0; x
< width
; x
++)
1290 dst
[x
] = av_clip_pixel(((EPEL_FILTER(tmp
, MAX_PB_SIZE
) >> 6) + offset
) >> shift
);
1296 static void FUNC(put_hevc_epel_bi_hv
)(uint8_t *_dst
, ptrdiff_t _dststride
, uint8_t *_src
, ptrdiff_t _srcstride
,
1298 int height
, intptr_t mx
, intptr_t my
, int width
)
1301 pixel
*src
= (pixel
*)_src
;
1302 ptrdiff_t srcstride
= _srcstride
/ sizeof(pixel
);
1303 pixel
*dst
= (pixel
*)_dst
;
1304 ptrdiff_t dststride
= _dststride
/ sizeof(pixel
);
1305 const int8_t *filter
= ff_hevc_epel_filters
[mx
- 1];
1306 int16_t tmp_array
[(MAX_PB_SIZE
+ EPEL_EXTRA
) * MAX_PB_SIZE
];
1307 int16_t *tmp
= tmp_array
;
1308 int shift
= 14 + 1 - BIT_DEPTH
;
1310 int offset
= 1 << (shift
- 1);
1315 src
-= EPEL_EXTRA_BEFORE
* srcstride
;
1317 for (y
= 0; y
< height
+ EPEL_EXTRA
; y
++) {
1318 for (x
= 0; x
< width
; x
++)
1319 tmp
[x
] = EPEL_FILTER(src
, 1) >> (BIT_DEPTH
- 8);
1324 tmp
= tmp_array
+ EPEL_EXTRA_BEFORE
* MAX_PB_SIZE
;
1325 filter
= ff_hevc_epel_filters
[my
- 1];
1327 for (y
= 0; y
< height
; y
++) {
1328 for (x
= 0; x
< width
; x
++)
1329 dst
[x
] = av_clip_pixel(((EPEL_FILTER(tmp
, MAX_PB_SIZE
) >> 6) + src2
[x
] + offset
) >> shift
);
1332 src2
+= MAX_PB_SIZE
;
1336 static void FUNC(put_hevc_epel_uni_w_h
)(uint8_t *_dst
, ptrdiff_t _dststride
, uint8_t *_src
, ptrdiff_t _srcstride
,
1337 int height
, int denom
, int wx
, int ox
, intptr_t mx
, intptr_t my
, int width
)
1340 pixel
*src
= (pixel
*)_src
;
1341 ptrdiff_t srcstride
= _srcstride
/ sizeof(pixel
);
1342 pixel
*dst
= (pixel
*)_dst
;
1343 ptrdiff_t dststride
= _dststride
/ sizeof(pixel
);
1344 const int8_t *filter
= ff_hevc_epel_filters
[mx
- 1];
1345 int shift
= denom
+ 14 - BIT_DEPTH
;
1347 int offset
= 1 << (shift
- 1);
1352 ox
= ox
* (1 << (BIT_DEPTH
- 8));
1353 for (y
= 0; y
< height
; y
++) {
1354 for (x
= 0; x
< width
; x
++) {
1355 dst
[x
] = av_clip_pixel((((EPEL_FILTER(src
, 1) >> (BIT_DEPTH
- 8)) * wx
+ offset
) >> shift
) + ox
);
1362 static void FUNC(put_hevc_epel_bi_w_h
)(uint8_t *_dst
, ptrdiff_t _dststride
, uint8_t *_src
, ptrdiff_t _srcstride
,
1364 int height
, int denom
, int wx0
, int wx1
,
1365 int ox0
, int ox1
, intptr_t mx
, intptr_t my
, int width
)
1368 pixel
*src
= (pixel
*)_src
;
1369 ptrdiff_t srcstride
= _srcstride
/ sizeof(pixel
);
1370 pixel
*dst
= (pixel
*)_dst
;
1371 ptrdiff_t dststride
= _dststride
/ sizeof(pixel
);
1372 const int8_t *filter
= ff_hevc_epel_filters
[mx
- 1];
1373 int shift
= 14 + 1 - BIT_DEPTH
;
1374 int log2Wd
= denom
+ shift
- 1;
1376 ox0
= ox0
* (1 << (BIT_DEPTH
- 8));
1377 ox1
= ox1
* (1 << (BIT_DEPTH
- 8));
1378 for (y
= 0; y
< height
; y
++) {
1379 for (x
= 0; x
< width
; x
++)
1380 dst
[x
] = av_clip_pixel(((EPEL_FILTER(src
, 1) >> (BIT_DEPTH
- 8)) * wx1
+ src2
[x
] * wx0
+
1381 ((ox0
+ ox1
+ 1) << log2Wd
)) >> (log2Wd
+ 1));
1384 src2
+= MAX_PB_SIZE
;
1388 static void FUNC(put_hevc_epel_uni_w_v
)(uint8_t *_dst
, ptrdiff_t _dststride
, uint8_t *_src
, ptrdiff_t _srcstride
,
1389 int height
, int denom
, int wx
, int ox
, intptr_t mx
, intptr_t my
, int width
)
1392 pixel
*src
= (pixel
*)_src
;
1393 ptrdiff_t srcstride
= _srcstride
/ sizeof(pixel
);
1394 pixel
*dst
= (pixel
*)_dst
;
1395 ptrdiff_t dststride
= _dststride
/ sizeof(pixel
);
1396 const int8_t *filter
= ff_hevc_epel_filters
[my
- 1];
1397 int shift
= denom
+ 14 - BIT_DEPTH
;
1399 int offset
= 1 << (shift
- 1);
1404 ox
= ox
* (1 << (BIT_DEPTH
- 8));
1405 for (y
= 0; y
< height
; y
++) {
1406 for (x
= 0; x
< width
; x
++) {
1407 dst
[x
] = av_clip_pixel((((EPEL_FILTER(src
, srcstride
) >> (BIT_DEPTH
- 8)) * wx
+ offset
) >> shift
) + ox
);
1414 static void FUNC(put_hevc_epel_bi_w_v
)(uint8_t *_dst
, ptrdiff_t _dststride
, uint8_t *_src
, ptrdiff_t _srcstride
,
1416 int height
, int denom
, int wx0
, int wx1
,
1417 int ox0
, int ox1
, intptr_t mx
, intptr_t my
, int width
)
1420 pixel
*src
= (pixel
*)_src
;
1421 ptrdiff_t srcstride
= _srcstride
/ sizeof(pixel
);
1422 const int8_t *filter
= ff_hevc_epel_filters
[my
- 1];
1423 pixel
*dst
= (pixel
*)_dst
;
1424 ptrdiff_t dststride
= _dststride
/ sizeof(pixel
);
1425 int shift
= 14 + 1 - BIT_DEPTH
;
1426 int log2Wd
= denom
+ shift
- 1;
1428 ox0
= ox0
* (1 << (BIT_DEPTH
- 8));
1429 ox1
= ox1
* (1 << (BIT_DEPTH
- 8));
1430 for (y
= 0; y
< height
; y
++) {
1431 for (x
= 0; x
< width
; x
++)
1432 dst
[x
] = av_clip_pixel(((EPEL_FILTER(src
, srcstride
) >> (BIT_DEPTH
- 8)) * wx1
+ src2
[x
] * wx0
+
1433 ((ox0
+ ox1
+ 1) << log2Wd
)) >> (log2Wd
+ 1));
1436 src2
+= MAX_PB_SIZE
;
1440 static void FUNC(put_hevc_epel_uni_w_hv
)(uint8_t *_dst
, ptrdiff_t _dststride
, uint8_t *_src
, ptrdiff_t _srcstride
,
1441 int height
, int denom
, int wx
, int ox
, intptr_t mx
, intptr_t my
, int width
)
1444 pixel
*src
= (pixel
*)_src
;
1445 ptrdiff_t srcstride
= _srcstride
/ sizeof(pixel
);
1446 pixel
*dst
= (pixel
*)_dst
;
1447 ptrdiff_t dststride
= _dststride
/ sizeof(pixel
);
1448 const int8_t *filter
= ff_hevc_epel_filters
[mx
- 1];
1449 int16_t tmp_array
[(MAX_PB_SIZE
+ EPEL_EXTRA
) * MAX_PB_SIZE
];
1450 int16_t *tmp
= tmp_array
;
1451 int shift
= denom
+ 14 - BIT_DEPTH
;
1453 int offset
= 1 << (shift
- 1);
1458 src
-= EPEL_EXTRA_BEFORE
* srcstride
;
1460 for (y
= 0; y
< height
+ EPEL_EXTRA
; y
++) {
1461 for (x
= 0; x
< width
; x
++)
1462 tmp
[x
] = EPEL_FILTER(src
, 1) >> (BIT_DEPTH
- 8);
1467 tmp
= tmp_array
+ EPEL_EXTRA_BEFORE
* MAX_PB_SIZE
;
1468 filter
= ff_hevc_epel_filters
[my
- 1];
1470 ox
= ox
* (1 << (BIT_DEPTH
- 8));
1471 for (y
= 0; y
< height
; y
++) {
1472 for (x
= 0; x
< width
; x
++)
1473 dst
[x
] = av_clip_pixel((((EPEL_FILTER(tmp
, MAX_PB_SIZE
) >> 6) * wx
+ offset
) >> shift
) + ox
);
1479 static void FUNC(put_hevc_epel_bi_w_hv
)(uint8_t *_dst
, ptrdiff_t _dststride
, uint8_t *_src
, ptrdiff_t _srcstride
,
1481 int height
, int denom
, int wx0
, int wx1
,
1482 int ox0
, int ox1
, intptr_t mx
, intptr_t my
, int width
)
1485 pixel
*src
= (pixel
*)_src
;
1486 ptrdiff_t srcstride
= _srcstride
/ sizeof(pixel
);
1487 pixel
*dst
= (pixel
*)_dst
;
1488 ptrdiff_t dststride
= _dststride
/ sizeof(pixel
);
1489 const int8_t *filter
= ff_hevc_epel_filters
[mx
- 1];
1490 int16_t tmp_array
[(MAX_PB_SIZE
+ EPEL_EXTRA
) * MAX_PB_SIZE
];
1491 int16_t *tmp
= tmp_array
;
1492 int shift
= 14 + 1 - BIT_DEPTH
;
1493 int log2Wd
= denom
+ shift
- 1;
1495 src
-= EPEL_EXTRA_BEFORE
* srcstride
;
1497 for (y
= 0; y
< height
+ EPEL_EXTRA
; y
++) {
1498 for (x
= 0; x
< width
; x
++)
1499 tmp
[x
] = EPEL_FILTER(src
, 1) >> (BIT_DEPTH
- 8);
1504 tmp
= tmp_array
+ EPEL_EXTRA_BEFORE
* MAX_PB_SIZE
;
1505 filter
= ff_hevc_epel_filters
[my
- 1];
1507 ox0
= ox0
* (1 << (BIT_DEPTH
- 8));
1508 ox1
= ox1
* (1 << (BIT_DEPTH
- 8));
1509 for (y
= 0; y
< height
; y
++) {
1510 for (x
= 0; x
< width
; x
++)
1511 dst
[x
] = av_clip_pixel(((EPEL_FILTER(tmp
, MAX_PB_SIZE
) >> 6) * wx1
+ src2
[x
] * wx0
+
1512 ((ox0
+ ox1
+ 1) << log2Wd
)) >> (log2Wd
+ 1));
1515 src2
+= MAX_PB_SIZE
;
1518 #define P3 pix[-4 * xstride]
1519 #define P2 pix[-3 * xstride]
1520 #define P1 pix[-2 * xstride]
1521 #define P0 pix[-1 * xstride]
1522 #define Q0 pix[0 * xstride]
1523 #define Q1 pix[1 * xstride]
1524 #define Q2 pix[2 * xstride]
1525 #define Q3 pix[3 * xstride]
1527 // line three. used only for deblocking decision
1528 #define TP3 pix[-4 * xstride + 3 * ystride]
1529 #define TP2 pix[-3 * xstride + 3 * ystride]
1530 #define TP1 pix[-2 * xstride + 3 * ystride]
1531 #define TP0 pix[-1 * xstride + 3 * ystride]
1532 #define TQ0 pix[0 * xstride + 3 * ystride]
1533 #define TQ1 pix[1 * xstride + 3 * ystride]
1534 #define TQ2 pix[2 * xstride + 3 * ystride]
1535 #define TQ3 pix[3 * xstride + 3 * ystride]
1537 static void FUNC(hevc_loop_filter_luma
)(uint8_t *_pix
,
1538 ptrdiff_t _xstride
, ptrdiff_t _ystride
,
1540 uint8_t *_no_p
, uint8_t *_no_q
)
1543 pixel
*pix
= (pixel
*)_pix
;
1544 ptrdiff_t xstride
= _xstride
/ sizeof(pixel
);
1545 ptrdiff_t ystride
= _ystride
/ sizeof(pixel
);
1547 beta
<<= BIT_DEPTH
- 8;
1549 for (j
= 0; j
< 2; j
++) {
1550 const int dp0
= abs(P2
- 2 * P1
+ P0
);
1551 const int dq0
= abs(Q2
- 2 * Q1
+ Q0
);
1552 const int dp3
= abs(TP2
- 2 * TP1
+ TP0
);
1553 const int dq3
= abs(TQ2
- 2 * TQ1
+ TQ0
);
1554 const int d0
= dp0
+ dq0
;
1555 const int d3
= dp3
+ dq3
;
1556 const int tc
= _tc
[j
] << (BIT_DEPTH
- 8);
1557 const int no_p
= _no_p
[j
];
1558 const int no_q
= _no_q
[j
];
1560 if (d0
+ d3
>= beta
) {
1564 const int beta_3
= beta
>> 3;
1565 const int beta_2
= beta
>> 2;
1566 const int tc25
= ((tc
* 5 + 1) >> 1);
1568 if (abs(P3
- P0
) + abs(Q3
- Q0
) < beta_3
&& abs(P0
- Q0
) < tc25
&&
1569 abs(TP3
- TP0
) + abs(TQ3
- TQ0
) < beta_3
&& abs(TP0
- TQ0
) < tc25
&&
1570 (d0
<< 1) < beta_2
&& (d3
<< 1) < beta_2
) {
1572 const int tc2
= tc
<< 1;
1573 for (d
= 0; d
< 4; d
++) {
1583 P0
= p0
+ av_clip(((p2
+ 2 * p1
+ 2 * p0
+ 2 * q0
+ q1
+ 4) >> 3) - p0
, -tc2
, tc2
);
1584 P1
= p1
+ av_clip(((p2
+ p1
+ p0
+ q0
+ 2) >> 2) - p1
, -tc2
, tc2
);
1585 P2
= p2
+ av_clip(((2 * p3
+ 3 * p2
+ p1
+ p0
+ q0
+ 4) >> 3) - p2
, -tc2
, tc2
);
1588 Q0
= q0
+ av_clip(((p1
+ 2 * p0
+ 2 * q0
+ 2 * q1
+ q2
+ 4) >> 3) - q0
, -tc2
, tc2
);
1589 Q1
= q1
+ av_clip(((p0
+ q0
+ q1
+ q2
+ 2) >> 2) - q1
, -tc2
, tc2
);
1590 Q2
= q2
+ av_clip(((2 * q3
+ 3 * q2
+ q1
+ q0
+ p0
+ 4) >> 3) - q2
, -tc2
, tc2
);
1594 } else { // normal filtering
1597 const int tc_2
= tc
>> 1;
1598 if (dp0
+ dp3
< ((beta
+ (beta
>> 1)) >> 3))
1600 if (dq0
+ dq3
< ((beta
+ (beta
>> 1)) >> 3))
1603 for (d
= 0; d
< 4; d
++) {
1610 int delta0
= (9 * (q0
- p0
) - 3 * (q1
- p1
) + 8) >> 4;
1611 if (abs(delta0
) < 10 * tc
) {
1612 delta0
= av_clip(delta0
, -tc
, tc
);
1614 P0
= av_clip_pixel(p0
+ delta0
);
1616 Q0
= av_clip_pixel(q0
- delta0
);
1617 if (!no_p
&& nd_p
> 1) {
1618 const int deltap1
= av_clip((((p2
+ p0
+ 1) >> 1) - p1
+ delta0
) >> 1, -tc_2
, tc_2
);
1619 P1
= av_clip_pixel(p1
+ deltap1
);
1621 if (!no_q
&& nd_q
> 1) {
1622 const int deltaq1
= av_clip((((q2
+ q0
+ 1) >> 1) - q1
- delta0
) >> 1, -tc_2
, tc_2
);
1623 Q1
= av_clip_pixel(q1
+ deltaq1
);
1633 static void FUNC(hevc_loop_filter_chroma
)(uint8_t *_pix
, ptrdiff_t _xstride
,
1634 ptrdiff_t _ystride
, int *_tc
,
1635 uint8_t *_no_p
, uint8_t *_no_q
)
1637 int d
, j
, no_p
, no_q
;
1638 pixel
*pix
= (pixel
*)_pix
;
1639 ptrdiff_t xstride
= _xstride
/ sizeof(pixel
);
1640 ptrdiff_t ystride
= _ystride
/ sizeof(pixel
);
1642 for (j
= 0; j
< 2; j
++) {
1643 const int tc
= _tc
[j
] << (BIT_DEPTH
- 8);
1651 for (d
= 0; d
< 4; d
++) {
1657 delta0
= av_clip((((q0
- p0
) * 4) + p1
- q1
+ 4) >> 3, -tc
, tc
);
1659 P0
= av_clip_pixel(p0
+ delta0
);
1661 Q0
= av_clip_pixel(q0
- delta0
);
1667 static void FUNC(hevc_h_loop_filter_chroma
)(uint8_t *pix
, ptrdiff_t stride
,
1668 int32_t *tc
, uint8_t *no_p
,
1671 FUNC(hevc_loop_filter_chroma
)(pix
, stride
, sizeof(pixel
), tc
, no_p
, no_q
);
1674 static void FUNC(hevc_v_loop_filter_chroma
)(uint8_t *pix
, ptrdiff_t stride
,
1675 int32_t *tc
, uint8_t *no_p
,
1678 FUNC(hevc_loop_filter_chroma
)(pix
, sizeof(pixel
), stride
, tc
, no_p
, no_q
);
1681 static void FUNC(hevc_h_loop_filter_luma
)(uint8_t *pix
, ptrdiff_t stride
,
1682 int beta
, int32_t *tc
, uint8_t *no_p
,
1685 FUNC(hevc_loop_filter_luma
)(pix
, stride
, sizeof(pixel
),
1686 beta
, tc
, no_p
, no_q
);
1689 static void FUNC(hevc_v_loop_filter_luma
)(uint8_t *pix
, ptrdiff_t stride
,
1690 int beta
, int32_t *tc
, uint8_t *no_p
,
1693 FUNC(hevc_loop_filter_luma
)(pix
, sizeof(pixel
), stride
,
1694 beta
, tc
, no_p
, no_q
);