4 * Copyright (C) 2012 - 2013 Guillaume Martres
6 * This file is part of FFmpeg.
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 #include "bit_depth_template.c"
30 static void FUNC(put_pcm
)(uint8_t *_dst
, ptrdiff_t stride
, int width
, int height
,
31 GetBitContext
*gb
, int pcm_bit_depth
)
34 pixel
*dst
= (pixel
*)_dst
;
36 stride
/= sizeof(pixel
);
38 for (y
= 0; y
< height
; y
++) {
39 for (x
= 0; x
< width
; x
++)
40 dst
[x
] = get_bits(gb
, pcm_bit_depth
) << (BIT_DEPTH
- pcm_bit_depth
);
45 static void FUNC(transform_add4x4
)(uint8_t *_dst
, int16_t *coeffs
,
49 pixel
*dst
= (pixel
*)_dst
;
51 stride
/= sizeof(pixel
);
53 for (y
= 0; y
< 4; y
++) {
54 for (x
= 0; x
< 4; x
++) {
55 dst
[x
] = av_clip_pixel(dst
[x
] + *coeffs
);
62 static void FUNC(transform_add8x8
)(uint8_t *_dst
, int16_t *coeffs
,
66 pixel
*dst
= (pixel
*)_dst
;
68 stride
/= sizeof(pixel
);
70 for (y
= 0; y
< 8; y
++) {
71 for (x
= 0; x
< 8; x
++) {
72 dst
[x
] = av_clip_pixel(dst
[x
] + *coeffs
);
79 static void FUNC(transform_add16x16
)(uint8_t *_dst
, int16_t *coeffs
,
83 pixel
*dst
= (pixel
*)_dst
;
85 stride
/= sizeof(pixel
);
87 for (y
= 0; y
< 16; y
++) {
88 for (x
= 0; x
< 16; x
++) {
89 dst
[x
] = av_clip_pixel(dst
[x
] + *coeffs
);
96 static void FUNC(transform_add32x32
)(uint8_t *_dst
, int16_t *coeffs
,
100 pixel
*dst
= (pixel
*)_dst
;
102 stride
/= sizeof(pixel
);
104 for (y
= 0; y
< 32; y
++) {
105 for (x
= 0; x
< 32; x
++) {
106 dst
[x
] = av_clip_pixel(dst
[x
] + *coeffs
);
114 static void FUNC(transform_rdpcm
)(int16_t *_coeffs
, int16_t log2_size
, int mode
)
116 int16_t *coeffs
= (int16_t *) _coeffs
;
118 int size
= 1 << log2_size
;
122 for (y
= 0; y
< size
- 1; y
++) {
123 for (x
= 0; x
< size
; x
++)
124 coeffs
[x
] += coeffs
[x
- size
];
128 for (y
= 0; y
< size
; y
++) {
129 for (x
= 1; x
< size
; x
++)
130 coeffs
[x
] += coeffs
[x
- 1];
136 static void FUNC(transform_skip
)(int16_t *_coeffs
, int16_t log2_size
)
138 int shift
= 15 - BIT_DEPTH
- log2_size
;
140 int size
= 1 << log2_size
;
141 int16_t *coeffs
= _coeffs
;
145 int offset
= 1 << (shift
- 1);
146 for (y
= 0; y
< size
; y
++) {
147 for (x
= 0; x
< size
; x
++) {
148 *coeffs
= (*coeffs
+ offset
) >> shift
;
153 for (y
= 0; y
< size
; y
++) {
154 for (x
= 0; x
< size
; x
++) {
155 *coeffs
= *coeffs
<< -shift
;
162 #define SET(dst, x) (dst) = (x)
163 #define SCALE(dst, x) (dst) = av_clip_int16(((x) + add) >> shift)
164 #define ADD_AND_SCALE(dst, x) \
165 (dst) = av_clip_pixel((dst) + av_clip_int16(((x) + add) >> shift))
167 #define TR_4x4_LUMA(dst, src, step, assign) \
169 int c0 = src[0 * step] + src[2 * step]; \
170 int c1 = src[2 * step] + src[3 * step]; \
171 int c2 = src[0 * step] - src[3 * step]; \
172 int c3 = 74 * src[1 * step]; \
174 assign(dst[2 * step], 74 * (src[0 * step] - \
177 assign(dst[0 * step], 29 * c0 + 55 * c1 + c3); \
178 assign(dst[1 * step], 55 * c2 - 29 * c1 + c3); \
179 assign(dst[3 * step], 55 * c0 + 29 * c2 - c3); \
182 static void FUNC(transform_4x4_luma
)(int16_t *coeffs
)
186 int add
= 1 << (shift
- 1);
187 int16_t *src
= coeffs
;
189 for (i
= 0; i
< 4; i
++) {
190 TR_4x4_LUMA(src
, src
, 4, SCALE
);
194 shift
= 20 - BIT_DEPTH
;
195 add
= 1 << (shift
- 1);
196 for (i
= 0; i
< 4; i
++) {
197 TR_4x4_LUMA(coeffs
, coeffs
, 1, SCALE
);
204 #define TR_4(dst, src, dstep, sstep, assign, end) \
206 const int e0 = 64 * src[0 * sstep] + 64 * src[2 * sstep]; \
207 const int e1 = 64 * src[0 * sstep] - 64 * src[2 * sstep]; \
208 const int o0 = 83 * src[1 * sstep] + 36 * src[3 * sstep]; \
209 const int o1 = 36 * src[1 * sstep] - 83 * src[3 * sstep]; \
211 assign(dst[0 * dstep], e0 + o0); \
212 assign(dst[1 * dstep], e1 + o1); \
213 assign(dst[2 * dstep], e1 - o1); \
214 assign(dst[3 * dstep], e0 - o0); \
217 #define TR_8(dst, src, dstep, sstep, assign, end) \
221 int o_8[4] = { 0 }; \
222 for (i = 0; i < 4; i++) \
223 for (j = 1; j < end; j += 2) \
224 o_8[i] += transform[4 * j][i] * src[j * sstep]; \
225 TR_4(e_8, src, 1, 2 * sstep, SET, 4); \
227 for (i = 0; i < 4; i++) { \
228 assign(dst[i * dstep], e_8[i] + o_8[i]); \
229 assign(dst[(7 - i) * dstep], e_8[i] - o_8[i]); \
233 #define TR_16(dst, src, dstep, sstep, assign, end) \
237 int o_16[8] = { 0 }; \
238 for (i = 0; i < 8; i++) \
239 for (j = 1; j < end; j += 2) \
240 o_16[i] += transform[2 * j][i] * src[j * sstep]; \
241 TR_8(e_16, src, 1, 2 * sstep, SET, 8); \
243 for (i = 0; i < 8; i++) { \
244 assign(dst[i * dstep], e_16[i] + o_16[i]); \
245 assign(dst[(15 - i) * dstep], e_16[i] - o_16[i]); \
249 #define TR_32(dst, src, dstep, sstep, assign, end) \
253 int o_32[16] = { 0 }; \
254 for (i = 0; i < 16; i++) \
255 for (j = 1; j < end; j += 2) \
256 o_32[i] += transform[j][i] * src[j * sstep]; \
257 TR_16(e_32, src, 1, 2 * sstep, SET, end/2); \
259 for (i = 0; i < 16; i++) { \
260 assign(dst[i * dstep], e_32[i] + o_32[i]); \
261 assign(dst[(31 - i) * dstep], e_32[i] - o_32[i]); \
265 #define IDCT_VAR4(H) \
266 int limit2 = FFMIN(col_limit + 4, H)
267 #define IDCT_VAR8(H) \
268 int limit = FFMIN(col_limit, H); \
269 int limit2 = FFMIN(col_limit + 4, H)
270 #define IDCT_VAR16(H) IDCT_VAR8(H)
271 #define IDCT_VAR32(H) IDCT_VAR8(H)
274 static void FUNC(idct_##H ##x ##H )( \
275 int16_t *coeffs, int col_limit) { \
278 int add = 1 << (shift - 1); \
279 int16_t *src = coeffs; \
282 for (i = 0; i < H; i++) { \
283 TR_ ## H(src, src, H, H, SCALE, limit2); \
284 if (limit2 < H && i%4 == 0 && !!i) \
289 shift = 20 - BIT_DEPTH; \
290 add = 1 << (shift - 1); \
291 for (i = 0; i < H; i++) { \
292 TR_ ## H(coeffs, coeffs, 1, 1, SCALE, limit); \
298 static void FUNC(idct_##H ##x ##H ##_dc)( \
301 int shift = 14 - BIT_DEPTH; \
302 int add = 1 << (shift - 1); \
303 int coeff = (((coeffs[0] + 1) >> 1) + add) >> shift; \
305 for (j = 0; j < H; j++) { \
306 for (i = 0; i < H; i++) { \
307 coeffs[i+j*H] = coeff; \
331 static void FUNC(sao_band_filter_0
)(uint8_t *_dst
, uint8_t *_src
,
332 ptrdiff_t stride_dst
, ptrdiff_t stride_src
, SAOParams
*sao
,
333 int *borders
, int width
, int height
,
336 pixel
*dst
= (pixel
*)_dst
;
337 pixel
*src
= (pixel
*)_src
;
338 int offset_table
[32] = { 0 };
340 int shift
= BIT_DEPTH
- 5;
341 int16_t *sao_offset_val
= sao
->offset_val
[c_idx
];
342 int sao_left_class
= sao
->band_position
[c_idx
];
344 stride_dst
/= sizeof(pixel
);
345 stride_src
/= sizeof(pixel
);
347 for (k
= 0; k
< 4; k
++)
348 offset_table
[(k
+ sao_left_class
) & 31] = sao_offset_val
[k
+ 1];
349 for (y
= 0; y
< height
; y
++) {
350 for (x
= 0; x
< width
; x
++)
351 dst
[x
] = av_clip_pixel(src
[x
] + offset_table
[src
[x
] >> shift
]);
357 #define CMP(a, b) ((a) > (b) ? 1 : ((a) == (b) ? 0 : -1))
359 static void FUNC(sao_edge_filter
)(uint8_t *_dst
, uint8_t *_src
,
360 ptrdiff_t stride_dst
, ptrdiff_t stride_src
, SAOParams
*sao
,
361 int width
, int height
,
362 int c_idx
, int init_x
, int init_y
) {
364 static const uint8_t edge_idx
[] = { 1, 2, 0, 3, 4 };
365 static const int8_t pos
[4][2][2] = {
366 { { -1, 0 }, { 1, 0 } }, // horizontal
367 { { 0, -1 }, { 0, 1 } }, // vertical
368 { { -1, -1 }, { 1, 1 } }, // 45 degree
369 { { 1, -1 }, { -1, 1 } }, // 135 degree
371 int16_t *sao_offset_val
= sao
->offset_val
[c_idx
];
372 int sao_eo_class
= sao
->eo_class
[c_idx
];
373 pixel
*dst
= (pixel
*)_dst
;
374 pixel
*src
= (pixel
*)_src
;
376 int y_stride_src
= init_y
* stride_src
;
377 int y_stride_dst
= init_y
* stride_dst
;
378 int pos_0_0
= pos
[sao_eo_class
][0][0];
379 int pos_0_1
= pos
[sao_eo_class
][0][1];
380 int pos_1_0
= pos
[sao_eo_class
][1][0];
381 int pos_1_1
= pos
[sao_eo_class
][1][1];
384 int y_stride_0_1
= (init_y
+ pos_0_1
) * stride_src
;
385 int y_stride_1_1
= (init_y
+ pos_1_1
) * stride_src
;
386 for (y
= init_y
; y
< height
; y
++) {
387 for (x
= init_x
; x
< width
; x
++) {
388 int diff0
= CMP(src
[x
+ y_stride_src
], src
[x
+ pos_0_0
+ y_stride_0_1
]);
389 int diff1
= CMP(src
[x
+ y_stride_src
], src
[x
+ pos_1_0
+ y_stride_1_1
]);
390 int offset_val
= edge_idx
[2 + diff0
+ diff1
];
391 dst
[x
+ y_stride_dst
] = av_clip_pixel(src
[x
+ y_stride_src
] + sao_offset_val
[offset_val
]);
393 y_stride_src
+= stride_src
;
394 y_stride_dst
+= stride_dst
;
395 y_stride_0_1
+= stride_src
;
396 y_stride_1_1
+= stride_src
;
400 static void FUNC(sao_edge_filter_0
)(uint8_t *_dst
, uint8_t *_src
,
401 ptrdiff_t stride_dst
, ptrdiff_t stride_src
, SAOParams
*sao
,
402 int *borders
, int _width
, int _height
,
403 int c_idx
, uint8_t *vert_edge
,
404 uint8_t *horiz_edge
, uint8_t *diag_edge
)
407 pixel
*dst
= (pixel
*)_dst
;
408 pixel
*src
= (pixel
*)_src
;
409 int16_t *sao_offset_val
= sao
->offset_val
[c_idx
];
410 int sao_eo_class
= sao
->eo_class
[c_idx
];
411 int init_x
= 0, init_y
= 0, width
= _width
, height
= _height
;
413 stride_dst
/= sizeof(pixel
);
414 stride_src
/= sizeof(pixel
);
416 if (sao_eo_class
!= SAO_EO_VERT
) {
418 int offset_val
= sao_offset_val
[0];
419 for (y
= 0; y
< height
; y
++) {
420 dst
[y
* stride_dst
] = av_clip_pixel(src
[y
* stride_src
] + offset_val
);
425 int offset_val
= sao_offset_val
[0];
426 int offset
= width
- 1;
427 for (x
= 0; x
< height
; x
++) {
428 dst
[x
* stride_dst
+ offset
] = av_clip_pixel(src
[x
* stride_src
+ offset
] + offset_val
);
433 if (sao_eo_class
!= SAO_EO_HORIZ
) {
435 int offset_val
= sao_offset_val
[0];
436 for (x
= init_x
; x
< width
; x
++)
437 dst
[x
] = av_clip_pixel(src
[x
] + offset_val
);
441 int offset_val
= sao_offset_val
[0];
442 int y_stride_dst
= stride_dst
* (height
- 1);
443 int y_stride_src
= stride_src
* (height
- 1);
444 for (x
= init_x
; x
< width
; x
++)
445 dst
[x
+ y_stride_dst
] = av_clip_pixel(src
[x
+ y_stride_src
] + offset_val
);
450 FUNC(sao_edge_filter
)((uint8_t *)dst
, (uint8_t *)src
, stride_dst
, stride_src
, sao
, width
, height
, c_idx
, init_x
, init_y
);
453 static void FUNC(sao_edge_filter_1
)(uint8_t *_dst
, uint8_t *_src
,
454 ptrdiff_t stride_dst
, ptrdiff_t stride_src
, SAOParams
*sao
,
455 int *borders
, int _width
, int _height
,
456 int c_idx
, uint8_t *vert_edge
,
457 uint8_t *horiz_edge
, uint8_t *diag_edge
)
460 pixel
*dst
= (pixel
*)_dst
;
461 pixel
*src
= (pixel
*)_src
;
462 int16_t *sao_offset_val
= sao
->offset_val
[c_idx
];
463 int sao_eo_class
= sao
->eo_class
[c_idx
];
464 int init_x
= 0, init_y
= 0, width
= _width
, height
= _height
;
466 stride_dst
/= sizeof(pixel
);
467 stride_src
/= sizeof(pixel
);
469 if (sao_eo_class
!= SAO_EO_VERT
) {
471 int offset_val
= sao_offset_val
[0];
472 for (y
= 0; y
< height
; y
++) {
473 dst
[y
* stride_dst
] = av_clip_pixel(src
[y
* stride_src
] + offset_val
);
478 int offset_val
= sao_offset_val
[0];
479 int offset
= width
- 1;
480 for (x
= 0; x
< height
; x
++) {
481 dst
[x
* stride_dst
+ offset
] = av_clip_pixel(src
[x
* stride_src
+ offset
] + offset_val
);
486 if (sao_eo_class
!= SAO_EO_HORIZ
) {
488 int offset_val
= sao_offset_val
[0];
489 for (x
= init_x
; x
< width
; x
++)
490 dst
[x
] = av_clip_pixel(src
[x
] + offset_val
);
494 int offset_val
= sao_offset_val
[0];
495 int y_stride_dst
= stride_dst
* (height
- 1);
496 int y_stride_src
= stride_src
* (height
- 1);
497 for (x
= init_x
; x
< width
; x
++)
498 dst
[x
+ y_stride_dst
] = av_clip_pixel(src
[x
+ y_stride_src
] + offset_val
);
503 FUNC(sao_edge_filter
)((uint8_t *)dst
, (uint8_t *)src
, stride_dst
, stride_src
, sao
, width
, height
, c_idx
, init_x
, init_y
);
506 int save_upper_left
= !diag_edge
[0] && sao_eo_class
== SAO_EO_135D
&& !borders
[0] && !borders
[1];
507 int save_upper_right
= !diag_edge
[1] && sao_eo_class
== SAO_EO_45D
&& !borders
[1] && !borders
[2];
508 int save_lower_right
= !diag_edge
[2] && sao_eo_class
== SAO_EO_135D
&& !borders
[2] && !borders
[3];
509 int save_lower_left
= !diag_edge
[3] && sao_eo_class
== SAO_EO_45D
&& !borders
[0] && !borders
[3];
511 // Restore pixels that can't be modified
512 if(vert_edge
[0] && sao_eo_class
!= SAO_EO_VERT
) {
513 for(y
= init_y
+save_upper_left
; y
< height
-save_lower_left
; y
++)
514 dst
[y
*stride_dst
] = src
[y
*stride_src
];
516 if(vert_edge
[1] && sao_eo_class
!= SAO_EO_VERT
) {
517 for(y
= init_y
+save_upper_right
; y
< height
-save_lower_right
; y
++)
518 dst
[y
*stride_dst
+width
-1] = src
[y
*stride_src
+width
-1];
521 if(horiz_edge
[0] && sao_eo_class
!= SAO_EO_HORIZ
) {
522 for(x
= init_x
+save_upper_left
; x
< width
-save_upper_right
; x
++)
525 if(horiz_edge
[1] && sao_eo_class
!= SAO_EO_HORIZ
) {
526 for(x
= init_x
+save_lower_left
; x
< width
-save_lower_right
; x
++)
527 dst
[(height
-1)*stride_dst
+x
] = src
[(height
-1)*stride_src
+x
];
529 if(diag_edge
[0] && sao_eo_class
== SAO_EO_135D
)
531 if(diag_edge
[1] && sao_eo_class
== SAO_EO_45D
)
532 dst
[width
-1] = src
[width
-1];
533 if(diag_edge
[2] && sao_eo_class
== SAO_EO_135D
)
534 dst
[stride_dst
*(height
-1)+width
-1] = src
[stride_src
*(height
-1)+width
-1];
535 if(diag_edge
[3] && sao_eo_class
== SAO_EO_45D
)
536 dst
[stride_dst
*(height
-1)] = src
[stride_src
*(height
-1)];
543 ////////////////////////////////////////////////////////////////////////////////
545 ////////////////////////////////////////////////////////////////////////////////
546 static void FUNC(put_hevc_pel_pixels
)(int16_t *dst
,
547 uint8_t *_src
, ptrdiff_t _srcstride
,
548 int height
, intptr_t mx
, intptr_t my
, int width
)
551 pixel
*src
= (pixel
*)_src
;
552 ptrdiff_t srcstride
= _srcstride
/ sizeof(pixel
);
554 for (y
= 0; y
< height
; y
++) {
555 for (x
= 0; x
< width
; x
++)
556 dst
[x
] = src
[x
] << (14 - BIT_DEPTH
);
562 static void FUNC(put_hevc_pel_uni_pixels
)(uint8_t *_dst
, ptrdiff_t _dststride
, uint8_t *_src
, ptrdiff_t _srcstride
,
563 int height
, intptr_t mx
, intptr_t my
, int width
)
566 pixel
*src
= (pixel
*)_src
;
567 ptrdiff_t srcstride
= _srcstride
/ sizeof(pixel
);
568 pixel
*dst
= (pixel
*)_dst
;
569 ptrdiff_t dststride
= _dststride
/ sizeof(pixel
);
571 for (y
= 0; y
< height
; y
++) {
572 memcpy(dst
, src
, width
* sizeof(pixel
));
578 static void FUNC(put_hevc_pel_bi_pixels
)(uint8_t *_dst
, ptrdiff_t _dststride
, uint8_t *_src
, ptrdiff_t _srcstride
,
580 int height
, intptr_t mx
, intptr_t my
, int width
)
583 pixel
*src
= (pixel
*)_src
;
584 ptrdiff_t srcstride
= _srcstride
/ sizeof(pixel
);
585 pixel
*dst
= (pixel
*)_dst
;
586 ptrdiff_t dststride
= _dststride
/ sizeof(pixel
);
588 int shift
= 14 + 1 - BIT_DEPTH
;
590 int offset
= 1 << (shift
- 1);
595 for (y
= 0; y
< height
; y
++) {
596 for (x
= 0; x
< width
; x
++)
597 dst
[x
] = av_clip_pixel(((src
[x
] << (14 - BIT_DEPTH
)) + src2
[x
] + offset
) >> shift
);
604 static void FUNC(put_hevc_pel_uni_w_pixels
)(uint8_t *_dst
, ptrdiff_t _dststride
, uint8_t *_src
, ptrdiff_t _srcstride
,
605 int height
, int denom
, int wx
, int ox
, intptr_t mx
, intptr_t my
, int width
)
608 pixel
*src
= (pixel
*)_src
;
609 ptrdiff_t srcstride
= _srcstride
/ sizeof(pixel
);
610 pixel
*dst
= (pixel
*)_dst
;
611 ptrdiff_t dststride
= _dststride
/ sizeof(pixel
);
612 int shift
= denom
+ 14 - BIT_DEPTH
;
614 int offset
= 1 << (shift
- 1);
619 ox
= ox
* (1 << (BIT_DEPTH
- 8));
620 for (y
= 0; y
< height
; y
++) {
621 for (x
= 0; x
< width
; x
++)
622 dst
[x
] = av_clip_pixel((((src
[x
] << (14 - BIT_DEPTH
)) * wx
+ offset
) >> shift
) + ox
);
628 static void FUNC(put_hevc_pel_bi_w_pixels
)(uint8_t *_dst
, ptrdiff_t _dststride
, uint8_t *_src
, ptrdiff_t _srcstride
,
630 int height
, int denom
, int wx0
, int wx1
,
631 int ox0
, int ox1
, intptr_t mx
, intptr_t my
, int width
)
634 pixel
*src
= (pixel
*)_src
;
635 ptrdiff_t srcstride
= _srcstride
/ sizeof(pixel
);
636 pixel
*dst
= (pixel
*)_dst
;
637 ptrdiff_t dststride
= _dststride
/ sizeof(pixel
);
639 int shift
= 14 + 1 - BIT_DEPTH
;
640 int log2Wd
= denom
+ shift
- 1;
642 ox0
= ox0
* (1 << (BIT_DEPTH
- 8));
643 ox1
= ox1
* (1 << (BIT_DEPTH
- 8));
644 for (y
= 0; y
< height
; y
++) {
645 for (x
= 0; x
< width
; x
++) {
646 dst
[x
] = av_clip_pixel(( (src
[x
] << (14 - BIT_DEPTH
)) * wx1
+ src2
[x
] * wx0
+ ((ox0
+ ox1
+ 1) << log2Wd
)) >> (log2Wd
+ 1));
654 ////////////////////////////////////////////////////////////////////////////////
656 ////////////////////////////////////////////////////////////////////////////////
657 #define QPEL_FILTER(src, stride) \
658 (filter[0] * src[x - 3 * stride] + \
659 filter[1] * src[x - 2 * stride] + \
660 filter[2] * src[x - stride] + \
661 filter[3] * src[x ] + \
662 filter[4] * src[x + stride] + \
663 filter[5] * src[x + 2 * stride] + \
664 filter[6] * src[x + 3 * stride] + \
665 filter[7] * src[x + 4 * stride])
667 static void FUNC(put_hevc_qpel_h
)(int16_t *dst
,
668 uint8_t *_src
, ptrdiff_t _srcstride
,
669 int height
, intptr_t mx
, intptr_t my
, int width
)
672 pixel
*src
= (pixel
*)_src
;
673 ptrdiff_t srcstride
= _srcstride
/ sizeof(pixel
);
674 const int8_t *filter
= ff_hevc_qpel_filters
[mx
- 1];
675 for (y
= 0; y
< height
; y
++) {
676 for (x
= 0; x
< width
; x
++)
677 dst
[x
] = QPEL_FILTER(src
, 1) >> (BIT_DEPTH
- 8);
683 static void FUNC(put_hevc_qpel_v
)(int16_t *dst
,
684 uint8_t *_src
, ptrdiff_t _srcstride
,
685 int height
, intptr_t mx
, intptr_t my
, int width
)
688 pixel
*src
= (pixel
*)_src
;
689 ptrdiff_t srcstride
= _srcstride
/ sizeof(pixel
);
690 const int8_t *filter
= ff_hevc_qpel_filters
[my
- 1];
691 for (y
= 0; y
< height
; y
++) {
692 for (x
= 0; x
< width
; x
++)
693 dst
[x
] = QPEL_FILTER(src
, srcstride
) >> (BIT_DEPTH
- 8);
699 static void FUNC(put_hevc_qpel_hv
)(int16_t *dst
,
701 ptrdiff_t _srcstride
,
702 int height
, intptr_t mx
,
703 intptr_t my
, int width
)
706 const int8_t *filter
;
707 pixel
*src
= (pixel
*)_src
;
708 ptrdiff_t srcstride
= _srcstride
/ sizeof(pixel
);
709 int16_t tmp_array
[(MAX_PB_SIZE
+ QPEL_EXTRA
) * MAX_PB_SIZE
];
710 int16_t *tmp
= tmp_array
;
712 src
-= QPEL_EXTRA_BEFORE
* srcstride
;
713 filter
= ff_hevc_qpel_filters
[mx
- 1];
714 for (y
= 0; y
< height
+ QPEL_EXTRA
; y
++) {
715 for (x
= 0; x
< width
; x
++)
716 tmp
[x
] = QPEL_FILTER(src
, 1) >> (BIT_DEPTH
- 8);
721 tmp
= tmp_array
+ QPEL_EXTRA_BEFORE
* MAX_PB_SIZE
;
722 filter
= ff_hevc_qpel_filters
[my
- 1];
723 for (y
= 0; y
< height
; y
++) {
724 for (x
= 0; x
< width
; x
++)
725 dst
[x
] = QPEL_FILTER(tmp
, MAX_PB_SIZE
) >> 6;
731 static void FUNC(put_hevc_qpel_uni_h
)(uint8_t *_dst
, ptrdiff_t _dststride
,
732 uint8_t *_src
, ptrdiff_t _srcstride
,
733 int height
, intptr_t mx
, intptr_t my
, int width
)
736 pixel
*src
= (pixel
*)_src
;
737 ptrdiff_t srcstride
= _srcstride
/ sizeof(pixel
);
738 pixel
*dst
= (pixel
*)_dst
;
739 ptrdiff_t dststride
= _dststride
/ sizeof(pixel
);
740 const int8_t *filter
= ff_hevc_qpel_filters
[mx
- 1];
741 int shift
= 14 - BIT_DEPTH
;
744 int offset
= 1 << (shift
- 1);
749 for (y
= 0; y
< height
; y
++) {
750 for (x
= 0; x
< width
; x
++)
751 dst
[x
] = av_clip_pixel(((QPEL_FILTER(src
, 1) >> (BIT_DEPTH
- 8)) + offset
) >> shift
);
757 static void FUNC(put_hevc_qpel_bi_h
)(uint8_t *_dst
, ptrdiff_t _dststride
, uint8_t *_src
, ptrdiff_t _srcstride
,
759 int height
, intptr_t mx
, intptr_t my
, int width
)
762 pixel
*src
= (pixel
*)_src
;
763 ptrdiff_t srcstride
= _srcstride
/ sizeof(pixel
);
764 pixel
*dst
= (pixel
*)_dst
;
765 ptrdiff_t dststride
= _dststride
/ sizeof(pixel
);
767 const int8_t *filter
= ff_hevc_qpel_filters
[mx
- 1];
769 int shift
= 14 + 1 - BIT_DEPTH
;
771 int offset
= 1 << (shift
- 1);
776 for (y
= 0; y
< height
; y
++) {
777 for (x
= 0; x
< width
; x
++)
778 dst
[x
] = av_clip_pixel(((QPEL_FILTER(src
, 1) >> (BIT_DEPTH
- 8)) + src2
[x
] + offset
) >> shift
);
785 static void FUNC(put_hevc_qpel_uni_v
)(uint8_t *_dst
, ptrdiff_t _dststride
,
786 uint8_t *_src
, ptrdiff_t _srcstride
,
787 int height
, intptr_t mx
, intptr_t my
, int width
)
790 pixel
*src
= (pixel
*)_src
;
791 ptrdiff_t srcstride
= _srcstride
/ sizeof(pixel
);
792 pixel
*dst
= (pixel
*)_dst
;
793 ptrdiff_t dststride
= _dststride
/ sizeof(pixel
);
794 const int8_t *filter
= ff_hevc_qpel_filters
[my
- 1];
795 int shift
= 14 - BIT_DEPTH
;
798 int offset
= 1 << (shift
- 1);
803 for (y
= 0; y
< height
; y
++) {
804 for (x
= 0; x
< width
; x
++)
805 dst
[x
] = av_clip_pixel(((QPEL_FILTER(src
, srcstride
) >> (BIT_DEPTH
- 8)) + offset
) >> shift
);
812 static void FUNC(put_hevc_qpel_bi_v
)(uint8_t *_dst
, ptrdiff_t _dststride
, uint8_t *_src
, ptrdiff_t _srcstride
,
814 int height
, intptr_t mx
, intptr_t my
, int width
)
817 pixel
*src
= (pixel
*)_src
;
818 ptrdiff_t srcstride
= _srcstride
/ sizeof(pixel
);
819 pixel
*dst
= (pixel
*)_dst
;
820 ptrdiff_t dststride
= _dststride
/ sizeof(pixel
);
822 const int8_t *filter
= ff_hevc_qpel_filters
[my
- 1];
824 int shift
= 14 + 1 - BIT_DEPTH
;
826 int offset
= 1 << (shift
- 1);
831 for (y
= 0; y
< height
; y
++) {
832 for (x
= 0; x
< width
; x
++)
833 dst
[x
] = av_clip_pixel(((QPEL_FILTER(src
, srcstride
) >> (BIT_DEPTH
- 8)) + src2
[x
] + offset
) >> shift
);
840 static void FUNC(put_hevc_qpel_uni_hv
)(uint8_t *_dst
, ptrdiff_t _dststride
,
841 uint8_t *_src
, ptrdiff_t _srcstride
,
842 int height
, intptr_t mx
, intptr_t my
, int width
)
845 const int8_t *filter
;
846 pixel
*src
= (pixel
*)_src
;
847 ptrdiff_t srcstride
= _srcstride
/ sizeof(pixel
);
848 pixel
*dst
= (pixel
*)_dst
;
849 ptrdiff_t dststride
= _dststride
/ sizeof(pixel
);
850 int16_t tmp_array
[(MAX_PB_SIZE
+ QPEL_EXTRA
) * MAX_PB_SIZE
];
851 int16_t *tmp
= tmp_array
;
852 int shift
= 14 - BIT_DEPTH
;
855 int offset
= 1 << (shift
- 1);
860 src
-= QPEL_EXTRA_BEFORE
* srcstride
;
861 filter
= ff_hevc_qpel_filters
[mx
- 1];
862 for (y
= 0; y
< height
+ QPEL_EXTRA
; y
++) {
863 for (x
= 0; x
< width
; x
++)
864 tmp
[x
] = QPEL_FILTER(src
, 1) >> (BIT_DEPTH
- 8);
869 tmp
= tmp_array
+ QPEL_EXTRA_BEFORE
* MAX_PB_SIZE
;
870 filter
= ff_hevc_qpel_filters
[my
- 1];
872 for (y
= 0; y
< height
; y
++) {
873 for (x
= 0; x
< width
; x
++)
874 dst
[x
] = av_clip_pixel(((QPEL_FILTER(tmp
, MAX_PB_SIZE
) >> 6) + offset
) >> shift
);
880 static void FUNC(put_hevc_qpel_bi_hv
)(uint8_t *_dst
, ptrdiff_t _dststride
, uint8_t *_src
, ptrdiff_t _srcstride
,
882 int height
, intptr_t mx
, intptr_t my
, int width
)
885 const int8_t *filter
;
886 pixel
*src
= (pixel
*)_src
;
887 ptrdiff_t srcstride
= _srcstride
/ sizeof(pixel
);
888 pixel
*dst
= (pixel
*)_dst
;
889 ptrdiff_t dststride
= _dststride
/ sizeof(pixel
);
890 int16_t tmp_array
[(MAX_PB_SIZE
+ QPEL_EXTRA
) * MAX_PB_SIZE
];
891 int16_t *tmp
= tmp_array
;
892 int shift
= 14 + 1 - BIT_DEPTH
;
894 int offset
= 1 << (shift
- 1);
899 src
-= QPEL_EXTRA_BEFORE
* srcstride
;
900 filter
= ff_hevc_qpel_filters
[mx
- 1];
901 for (y
= 0; y
< height
+ QPEL_EXTRA
; y
++) {
902 for (x
= 0; x
< width
; x
++)
903 tmp
[x
] = QPEL_FILTER(src
, 1) >> (BIT_DEPTH
- 8);
908 tmp
= tmp_array
+ QPEL_EXTRA_BEFORE
* MAX_PB_SIZE
;
909 filter
= ff_hevc_qpel_filters
[my
- 1];
911 for (y
= 0; y
< height
; y
++) {
912 for (x
= 0; x
< width
; x
++)
913 dst
[x
] = av_clip_pixel(((QPEL_FILTER(tmp
, MAX_PB_SIZE
) >> 6) + src2
[x
] + offset
) >> shift
);
920 static void FUNC(put_hevc_qpel_uni_w_h
)(uint8_t *_dst
, ptrdiff_t _dststride
,
921 uint8_t *_src
, ptrdiff_t _srcstride
,
922 int height
, int denom
, int wx
, int ox
,
923 intptr_t mx
, intptr_t my
, int width
)
926 pixel
*src
= (pixel
*)_src
;
927 ptrdiff_t srcstride
= _srcstride
/ sizeof(pixel
);
928 pixel
*dst
= (pixel
*)_dst
;
929 ptrdiff_t dststride
= _dststride
/ sizeof(pixel
);
930 const int8_t *filter
= ff_hevc_qpel_filters
[mx
- 1];
931 int shift
= denom
+ 14 - BIT_DEPTH
;
933 int offset
= 1 << (shift
- 1);
938 ox
= ox
* (1 << (BIT_DEPTH
- 8));
939 for (y
= 0; y
< height
; y
++) {
940 for (x
= 0; x
< width
; x
++)
941 dst
[x
] = av_clip_pixel((((QPEL_FILTER(src
, 1) >> (BIT_DEPTH
- 8)) * wx
+ offset
) >> shift
) + ox
);
947 static void FUNC(put_hevc_qpel_bi_w_h
)(uint8_t *_dst
, ptrdiff_t _dststride
, uint8_t *_src
, ptrdiff_t _srcstride
,
949 int height
, int denom
, int wx0
, int wx1
,
950 int ox0
, int ox1
, intptr_t mx
, intptr_t my
, int width
)
953 pixel
*src
= (pixel
*)_src
;
954 ptrdiff_t srcstride
= _srcstride
/ sizeof(pixel
);
955 pixel
*dst
= (pixel
*)_dst
;
956 ptrdiff_t dststride
= _dststride
/ sizeof(pixel
);
958 const int8_t *filter
= ff_hevc_qpel_filters
[mx
- 1];
960 int shift
= 14 + 1 - BIT_DEPTH
;
961 int log2Wd
= denom
+ shift
- 1;
963 ox0
= ox0
* (1 << (BIT_DEPTH
- 8));
964 ox1
= ox1
* (1 << (BIT_DEPTH
- 8));
965 for (y
= 0; y
< height
; y
++) {
966 for (x
= 0; x
< width
; x
++)
967 dst
[x
] = av_clip_pixel(((QPEL_FILTER(src
, 1) >> (BIT_DEPTH
- 8)) * wx1
+ src2
[x
] * wx0
+
968 ((ox0
+ ox1
+ 1) << log2Wd
)) >> (log2Wd
+ 1));
975 static void FUNC(put_hevc_qpel_uni_w_v
)(uint8_t *_dst
, ptrdiff_t _dststride
,
976 uint8_t *_src
, ptrdiff_t _srcstride
,
977 int height
, int denom
, int wx
, int ox
,
978 intptr_t mx
, intptr_t my
, int width
)
981 pixel
*src
= (pixel
*)_src
;
982 ptrdiff_t srcstride
= _srcstride
/ sizeof(pixel
);
983 pixel
*dst
= (pixel
*)_dst
;
984 ptrdiff_t dststride
= _dststride
/ sizeof(pixel
);
985 const int8_t *filter
= ff_hevc_qpel_filters
[my
- 1];
986 int shift
= denom
+ 14 - BIT_DEPTH
;
988 int offset
= 1 << (shift
- 1);
993 ox
= ox
* (1 << (BIT_DEPTH
- 8));
994 for (y
= 0; y
< height
; y
++) {
995 for (x
= 0; x
< width
; x
++)
996 dst
[x
] = av_clip_pixel((((QPEL_FILTER(src
, srcstride
) >> (BIT_DEPTH
- 8)) * wx
+ offset
) >> shift
) + ox
);
1002 static void FUNC(put_hevc_qpel_bi_w_v
)(uint8_t *_dst
, ptrdiff_t _dststride
, uint8_t *_src
, ptrdiff_t _srcstride
,
1004 int height
, int denom
, int wx0
, int wx1
,
1005 int ox0
, int ox1
, intptr_t mx
, intptr_t my
, int width
)
1008 pixel
*src
= (pixel
*)_src
;
1009 ptrdiff_t srcstride
= _srcstride
/ sizeof(pixel
);
1010 pixel
*dst
= (pixel
*)_dst
;
1011 ptrdiff_t dststride
= _dststride
/ sizeof(pixel
);
1013 const int8_t *filter
= ff_hevc_qpel_filters
[my
- 1];
1015 int shift
= 14 + 1 - BIT_DEPTH
;
1016 int log2Wd
= denom
+ shift
- 1;
1018 ox0
= ox0
* (1 << (BIT_DEPTH
- 8));
1019 ox1
= ox1
* (1 << (BIT_DEPTH
- 8));
1020 for (y
= 0; y
< height
; y
++) {
1021 for (x
= 0; x
< width
; x
++)
1022 dst
[x
] = av_clip_pixel(((QPEL_FILTER(src
, srcstride
) >> (BIT_DEPTH
- 8)) * wx1
+ src2
[x
] * wx0
+
1023 ((ox0
+ ox1
+ 1) << log2Wd
)) >> (log2Wd
+ 1));
1026 src2
+= MAX_PB_SIZE
;
1030 static void FUNC(put_hevc_qpel_uni_w_hv
)(uint8_t *_dst
, ptrdiff_t _dststride
,
1031 uint8_t *_src
, ptrdiff_t _srcstride
,
1032 int height
, int denom
, int wx
, int ox
,
1033 intptr_t mx
, intptr_t my
, int width
)
1036 const int8_t *filter
;
1037 pixel
*src
= (pixel
*)_src
;
1038 ptrdiff_t srcstride
= _srcstride
/ sizeof(pixel
);
1039 pixel
*dst
= (pixel
*)_dst
;
1040 ptrdiff_t dststride
= _dststride
/ sizeof(pixel
);
1041 int16_t tmp_array
[(MAX_PB_SIZE
+ QPEL_EXTRA
) * MAX_PB_SIZE
];
1042 int16_t *tmp
= tmp_array
;
1043 int shift
= denom
+ 14 - BIT_DEPTH
;
1045 int offset
= 1 << (shift
- 1);
1050 src
-= QPEL_EXTRA_BEFORE
* srcstride
;
1051 filter
= ff_hevc_qpel_filters
[mx
- 1];
1052 for (y
= 0; y
< height
+ QPEL_EXTRA
; y
++) {
1053 for (x
= 0; x
< width
; x
++)
1054 tmp
[x
] = QPEL_FILTER(src
, 1) >> (BIT_DEPTH
- 8);
1059 tmp
= tmp_array
+ QPEL_EXTRA_BEFORE
* MAX_PB_SIZE
;
1060 filter
= ff_hevc_qpel_filters
[my
- 1];
1062 ox
= ox
* (1 << (BIT_DEPTH
- 8));
1063 for (y
= 0; y
< height
; y
++) {
1064 for (x
= 0; x
< width
; x
++)
1065 dst
[x
] = av_clip_pixel((((QPEL_FILTER(tmp
, MAX_PB_SIZE
) >> 6) * wx
+ offset
) >> shift
) + ox
);
1071 static void FUNC(put_hevc_qpel_bi_w_hv
)(uint8_t *_dst
, ptrdiff_t _dststride
, uint8_t *_src
, ptrdiff_t _srcstride
,
1073 int height
, int denom
, int wx0
, int wx1
,
1074 int ox0
, int ox1
, intptr_t mx
, intptr_t my
, int width
)
1077 const int8_t *filter
;
1078 pixel
*src
= (pixel
*)_src
;
1079 ptrdiff_t srcstride
= _srcstride
/ sizeof(pixel
);
1080 pixel
*dst
= (pixel
*)_dst
;
1081 ptrdiff_t dststride
= _dststride
/ sizeof(pixel
);
1082 int16_t tmp_array
[(MAX_PB_SIZE
+ QPEL_EXTRA
) * MAX_PB_SIZE
];
1083 int16_t *tmp
= tmp_array
;
1084 int shift
= 14 + 1 - BIT_DEPTH
;
1085 int log2Wd
= denom
+ shift
- 1;
1087 src
-= QPEL_EXTRA_BEFORE
* srcstride
;
1088 filter
= ff_hevc_qpel_filters
[mx
- 1];
1089 for (y
= 0; y
< height
+ QPEL_EXTRA
; y
++) {
1090 for (x
= 0; x
< width
; x
++)
1091 tmp
[x
] = QPEL_FILTER(src
, 1) >> (BIT_DEPTH
- 8);
1096 tmp
= tmp_array
+ QPEL_EXTRA_BEFORE
* MAX_PB_SIZE
;
1097 filter
= ff_hevc_qpel_filters
[my
- 1];
1099 ox0
= ox0
* (1 << (BIT_DEPTH
- 8));
1100 ox1
= ox1
* (1 << (BIT_DEPTH
- 8));
1101 for (y
= 0; y
< height
; y
++) {
1102 for (x
= 0; x
< width
; x
++)
1103 dst
[x
] = av_clip_pixel(((QPEL_FILTER(tmp
, MAX_PB_SIZE
) >> 6) * wx1
+ src2
[x
] * wx0
+
1104 ((ox0
+ ox1
+ 1) << log2Wd
)) >> (log2Wd
+ 1));
1107 src2
+= MAX_PB_SIZE
;
1111 ////////////////////////////////////////////////////////////////////////////////
1113 ////////////////////////////////////////////////////////////////////////////////
1114 #define EPEL_FILTER(src, stride) \
1115 (filter[0] * src[x - stride] + \
1116 filter[1] * src[x] + \
1117 filter[2] * src[x + stride] + \
1118 filter[3] * src[x + 2 * stride])
1120 static void FUNC(put_hevc_epel_h
)(int16_t *dst
,
1121 uint8_t *_src
, ptrdiff_t _srcstride
,
1122 int height
, intptr_t mx
, intptr_t my
, int width
)
1125 pixel
*src
= (pixel
*)_src
;
1126 ptrdiff_t srcstride
= _srcstride
/ sizeof(pixel
);
1127 const int8_t *filter
= ff_hevc_epel_filters
[mx
- 1];
1128 for (y
= 0; y
< height
; y
++) {
1129 for (x
= 0; x
< width
; x
++)
1130 dst
[x
] = EPEL_FILTER(src
, 1) >> (BIT_DEPTH
- 8);
1136 static void FUNC(put_hevc_epel_v
)(int16_t *dst
,
1137 uint8_t *_src
, ptrdiff_t _srcstride
,
1138 int height
, intptr_t mx
, intptr_t my
, int width
)
1141 pixel
*src
= (pixel
*)_src
;
1142 ptrdiff_t srcstride
= _srcstride
/ sizeof(pixel
);
1143 const int8_t *filter
= ff_hevc_epel_filters
[my
- 1];
1145 for (y
= 0; y
< height
; y
++) {
1146 for (x
= 0; x
< width
; x
++)
1147 dst
[x
] = EPEL_FILTER(src
, srcstride
) >> (BIT_DEPTH
- 8);
1153 static void FUNC(put_hevc_epel_hv
)(int16_t *dst
,
1154 uint8_t *_src
, ptrdiff_t _srcstride
,
1155 int height
, intptr_t mx
, intptr_t my
, int width
)
1158 pixel
*src
= (pixel
*)_src
;
1159 ptrdiff_t srcstride
= _srcstride
/ sizeof(pixel
);
1160 const int8_t *filter
= ff_hevc_epel_filters
[mx
- 1];
1161 int16_t tmp_array
[(MAX_PB_SIZE
+ EPEL_EXTRA
) * MAX_PB_SIZE
];
1162 int16_t *tmp
= tmp_array
;
1164 src
-= EPEL_EXTRA_BEFORE
* srcstride
;
1166 for (y
= 0; y
< height
+ EPEL_EXTRA
; y
++) {
1167 for (x
= 0; x
< width
; x
++)
1168 tmp
[x
] = EPEL_FILTER(src
, 1) >> (BIT_DEPTH
- 8);
1173 tmp
= tmp_array
+ EPEL_EXTRA_BEFORE
* MAX_PB_SIZE
;
1174 filter
= ff_hevc_epel_filters
[my
- 1];
1176 for (y
= 0; y
< height
; y
++) {
1177 for (x
= 0; x
< width
; x
++)
1178 dst
[x
] = EPEL_FILTER(tmp
, MAX_PB_SIZE
) >> 6;
1184 static void FUNC(put_hevc_epel_uni_h
)(uint8_t *_dst
, ptrdiff_t _dststride
, uint8_t *_src
, ptrdiff_t _srcstride
,
1185 int height
, intptr_t mx
, intptr_t my
, int width
)
1188 pixel
*src
= (pixel
*)_src
;
1189 ptrdiff_t srcstride
= _srcstride
/ sizeof(pixel
);
1190 pixel
*dst
= (pixel
*)_dst
;
1191 ptrdiff_t dststride
= _dststride
/ sizeof(pixel
);
1192 const int8_t *filter
= ff_hevc_epel_filters
[mx
- 1];
1193 int shift
= 14 - BIT_DEPTH
;
1195 int offset
= 1 << (shift
- 1);
1200 for (y
= 0; y
< height
; y
++) {
1201 for (x
= 0; x
< width
; x
++)
1202 dst
[x
] = av_clip_pixel(((EPEL_FILTER(src
, 1) >> (BIT_DEPTH
- 8)) + offset
) >> shift
);
1208 static void FUNC(put_hevc_epel_bi_h
)(uint8_t *_dst
, ptrdiff_t _dststride
, uint8_t *_src
, ptrdiff_t _srcstride
,
1210 int height
, intptr_t mx
, intptr_t my
, int width
)
1213 pixel
*src
= (pixel
*)_src
;
1214 ptrdiff_t srcstride
= _srcstride
/ sizeof(pixel
);
1215 pixel
*dst
= (pixel
*)_dst
;
1216 ptrdiff_t dststride
= _dststride
/ sizeof(pixel
);
1217 const int8_t *filter
= ff_hevc_epel_filters
[mx
- 1];
1218 int shift
= 14 + 1 - BIT_DEPTH
;
1220 int offset
= 1 << (shift
- 1);
1225 for (y
= 0; y
< height
; y
++) {
1226 for (x
= 0; x
< width
; x
++) {
1227 dst
[x
] = av_clip_pixel(((EPEL_FILTER(src
, 1) >> (BIT_DEPTH
- 8)) + src2
[x
] + offset
) >> shift
);
1231 src2
+= MAX_PB_SIZE
;
1235 static void FUNC(put_hevc_epel_uni_v
)(uint8_t *_dst
, ptrdiff_t _dststride
, uint8_t *_src
, ptrdiff_t _srcstride
,
1236 int height
, intptr_t mx
, intptr_t my
, int width
)
1239 pixel
*src
= (pixel
*)_src
;
1240 ptrdiff_t srcstride
= _srcstride
/ sizeof(pixel
);
1241 pixel
*dst
= (pixel
*)_dst
;
1242 ptrdiff_t dststride
= _dststride
/ sizeof(pixel
);
1243 const int8_t *filter
= ff_hevc_epel_filters
[my
- 1];
1244 int shift
= 14 - BIT_DEPTH
;
1246 int offset
= 1 << (shift
- 1);
1251 for (y
= 0; y
< height
; y
++) {
1252 for (x
= 0; x
< width
; x
++)
1253 dst
[x
] = av_clip_pixel(((EPEL_FILTER(src
, srcstride
) >> (BIT_DEPTH
- 8)) + offset
) >> shift
);
1259 static void FUNC(put_hevc_epel_bi_v
)(uint8_t *_dst
, ptrdiff_t _dststride
, uint8_t *_src
, ptrdiff_t _srcstride
,
1261 int height
, intptr_t mx
, intptr_t my
, int width
)
1264 pixel
*src
= (pixel
*)_src
;
1265 ptrdiff_t srcstride
= _srcstride
/ sizeof(pixel
);
1266 const int8_t *filter
= ff_hevc_epel_filters
[my
- 1];
1267 pixel
*dst
= (pixel
*)_dst
;
1268 ptrdiff_t dststride
= _dststride
/ sizeof(pixel
);
1269 int shift
= 14 + 1 - BIT_DEPTH
;
1271 int offset
= 1 << (shift
- 1);
1276 for (y
= 0; y
< height
; y
++) {
1277 for (x
= 0; x
< width
; x
++)
1278 dst
[x
] = av_clip_pixel(((EPEL_FILTER(src
, srcstride
) >> (BIT_DEPTH
- 8)) + src2
[x
] + offset
) >> shift
);
1281 src2
+= MAX_PB_SIZE
;
1285 static void FUNC(put_hevc_epel_uni_hv
)(uint8_t *_dst
, ptrdiff_t _dststride
, uint8_t *_src
, ptrdiff_t _srcstride
,
1286 int height
, intptr_t mx
, intptr_t my
, int width
)
1289 pixel
*src
= (pixel
*)_src
;
1290 ptrdiff_t srcstride
= _srcstride
/ sizeof(pixel
);
1291 pixel
*dst
= (pixel
*)_dst
;
1292 ptrdiff_t dststride
= _dststride
/ sizeof(pixel
);
1293 const int8_t *filter
= ff_hevc_epel_filters
[mx
- 1];
1294 int16_t tmp_array
[(MAX_PB_SIZE
+ EPEL_EXTRA
) * MAX_PB_SIZE
];
1295 int16_t *tmp
= tmp_array
;
1296 int shift
= 14 - BIT_DEPTH
;
1298 int offset
= 1 << (shift
- 1);
1303 src
-= EPEL_EXTRA_BEFORE
* srcstride
;
1305 for (y
= 0; y
< height
+ EPEL_EXTRA
; y
++) {
1306 for (x
= 0; x
< width
; x
++)
1307 tmp
[x
] = EPEL_FILTER(src
, 1) >> (BIT_DEPTH
- 8);
1312 tmp
= tmp_array
+ EPEL_EXTRA_BEFORE
* MAX_PB_SIZE
;
1313 filter
= ff_hevc_epel_filters
[my
- 1];
1315 for (y
= 0; y
< height
; y
++) {
1316 for (x
= 0; x
< width
; x
++)
1317 dst
[x
] = av_clip_pixel(((EPEL_FILTER(tmp
, MAX_PB_SIZE
) >> 6) + offset
) >> shift
);
1323 static void FUNC(put_hevc_epel_bi_hv
)(uint8_t *_dst
, ptrdiff_t _dststride
, uint8_t *_src
, ptrdiff_t _srcstride
,
1325 int height
, intptr_t mx
, intptr_t my
, int width
)
1328 pixel
*src
= (pixel
*)_src
;
1329 ptrdiff_t srcstride
= _srcstride
/ sizeof(pixel
);
1330 pixel
*dst
= (pixel
*)_dst
;
1331 ptrdiff_t dststride
= _dststride
/ sizeof(pixel
);
1332 const int8_t *filter
= ff_hevc_epel_filters
[mx
- 1];
1333 int16_t tmp_array
[(MAX_PB_SIZE
+ EPEL_EXTRA
) * MAX_PB_SIZE
];
1334 int16_t *tmp
= tmp_array
;
1335 int shift
= 14 + 1 - BIT_DEPTH
;
1337 int offset
= 1 << (shift
- 1);
1342 src
-= EPEL_EXTRA_BEFORE
* srcstride
;
1344 for (y
= 0; y
< height
+ EPEL_EXTRA
; y
++) {
1345 for (x
= 0; x
< width
; x
++)
1346 tmp
[x
] = EPEL_FILTER(src
, 1) >> (BIT_DEPTH
- 8);
1351 tmp
= tmp_array
+ EPEL_EXTRA_BEFORE
* MAX_PB_SIZE
;
1352 filter
= ff_hevc_epel_filters
[my
- 1];
1354 for (y
= 0; y
< height
; y
++) {
1355 for (x
= 0; x
< width
; x
++)
1356 dst
[x
] = av_clip_pixel(((EPEL_FILTER(tmp
, MAX_PB_SIZE
) >> 6) + src2
[x
] + offset
) >> shift
);
1359 src2
+= MAX_PB_SIZE
;
1363 static void FUNC(put_hevc_epel_uni_w_h
)(uint8_t *_dst
, ptrdiff_t _dststride
, uint8_t *_src
, ptrdiff_t _srcstride
,
1364 int height
, int denom
, int wx
, int ox
, intptr_t mx
, intptr_t my
, int width
)
1367 pixel
*src
= (pixel
*)_src
;
1368 ptrdiff_t srcstride
= _srcstride
/ sizeof(pixel
);
1369 pixel
*dst
= (pixel
*)_dst
;
1370 ptrdiff_t dststride
= _dststride
/ sizeof(pixel
);
1371 const int8_t *filter
= ff_hevc_epel_filters
[mx
- 1];
1372 int shift
= denom
+ 14 - BIT_DEPTH
;
1374 int offset
= 1 << (shift
- 1);
1379 ox
= ox
* (1 << (BIT_DEPTH
- 8));
1380 for (y
= 0; y
< height
; y
++) {
1381 for (x
= 0; x
< width
; x
++) {
1382 dst
[x
] = av_clip_pixel((((EPEL_FILTER(src
, 1) >> (BIT_DEPTH
- 8)) * wx
+ offset
) >> shift
) + ox
);
1389 static void FUNC(put_hevc_epel_bi_w_h
)(uint8_t *_dst
, ptrdiff_t _dststride
, uint8_t *_src
, ptrdiff_t _srcstride
,
1391 int height
, int denom
, int wx0
, int wx1
,
1392 int ox0
, int ox1
, intptr_t mx
, intptr_t my
, int width
)
1395 pixel
*src
= (pixel
*)_src
;
1396 ptrdiff_t srcstride
= _srcstride
/ sizeof(pixel
);
1397 pixel
*dst
= (pixel
*)_dst
;
1398 ptrdiff_t dststride
= _dststride
/ sizeof(pixel
);
1399 const int8_t *filter
= ff_hevc_epel_filters
[mx
- 1];
1400 int shift
= 14 + 1 - BIT_DEPTH
;
1401 int log2Wd
= denom
+ shift
- 1;
1403 ox0
= ox0
* (1 << (BIT_DEPTH
- 8));
1404 ox1
= ox1
* (1 << (BIT_DEPTH
- 8));
1405 for (y
= 0; y
< height
; y
++) {
1406 for (x
= 0; x
< width
; x
++)
1407 dst
[x
] = av_clip_pixel(((EPEL_FILTER(src
, 1) >> (BIT_DEPTH
- 8)) * wx1
+ src2
[x
] * wx0
+
1408 ((ox0
+ ox1
+ 1) << log2Wd
)) >> (log2Wd
+ 1));
1411 src2
+= MAX_PB_SIZE
;
1415 static void FUNC(put_hevc_epel_uni_w_v
)(uint8_t *_dst
, ptrdiff_t _dststride
, uint8_t *_src
, ptrdiff_t _srcstride
,
1416 int height
, int denom
, int wx
, int ox
, intptr_t mx
, intptr_t my
, int width
)
1419 pixel
*src
= (pixel
*)_src
;
1420 ptrdiff_t srcstride
= _srcstride
/ sizeof(pixel
);
1421 pixel
*dst
= (pixel
*)_dst
;
1422 ptrdiff_t dststride
= _dststride
/ sizeof(pixel
);
1423 const int8_t *filter
= ff_hevc_epel_filters
[my
- 1];
1424 int shift
= denom
+ 14 - BIT_DEPTH
;
1426 int offset
= 1 << (shift
- 1);
1431 ox
= ox
* (1 << (BIT_DEPTH
- 8));
1432 for (y
= 0; y
< height
; y
++) {
1433 for (x
= 0; x
< width
; x
++) {
1434 dst
[x
] = av_clip_pixel((((EPEL_FILTER(src
, srcstride
) >> (BIT_DEPTH
- 8)) * wx
+ offset
) >> shift
) + ox
);
1441 static void FUNC(put_hevc_epel_bi_w_v
)(uint8_t *_dst
, ptrdiff_t _dststride
, uint8_t *_src
, ptrdiff_t _srcstride
,
1443 int height
, int denom
, int wx0
, int wx1
,
1444 int ox0
, int ox1
, intptr_t mx
, intptr_t my
, int width
)
1447 pixel
*src
= (pixel
*)_src
;
1448 ptrdiff_t srcstride
= _srcstride
/ sizeof(pixel
);
1449 const int8_t *filter
= ff_hevc_epel_filters
[my
- 1];
1450 pixel
*dst
= (pixel
*)_dst
;
1451 ptrdiff_t dststride
= _dststride
/ sizeof(pixel
);
1452 int shift
= 14 + 1 - BIT_DEPTH
;
1453 int log2Wd
= denom
+ shift
- 1;
1455 ox0
= ox0
* (1 << (BIT_DEPTH
- 8));
1456 ox1
= ox1
* (1 << (BIT_DEPTH
- 8));
1457 for (y
= 0; y
< height
; y
++) {
1458 for (x
= 0; x
< width
; x
++)
1459 dst
[x
] = av_clip_pixel(((EPEL_FILTER(src
, srcstride
) >> (BIT_DEPTH
- 8)) * wx1
+ src2
[x
] * wx0
+
1460 ((ox0
+ ox1
+ 1) << log2Wd
)) >> (log2Wd
+ 1));
1463 src2
+= MAX_PB_SIZE
;
1467 static void FUNC(put_hevc_epel_uni_w_hv
)(uint8_t *_dst
, ptrdiff_t _dststride
, uint8_t *_src
, ptrdiff_t _srcstride
,
1468 int height
, int denom
, int wx
, int ox
, intptr_t mx
, intptr_t my
, int width
)
1471 pixel
*src
= (pixel
*)_src
;
1472 ptrdiff_t srcstride
= _srcstride
/ sizeof(pixel
);
1473 pixel
*dst
= (pixel
*)_dst
;
1474 ptrdiff_t dststride
= _dststride
/ sizeof(pixel
);
1475 const int8_t *filter
= ff_hevc_epel_filters
[mx
- 1];
1476 int16_t tmp_array
[(MAX_PB_SIZE
+ EPEL_EXTRA
) * MAX_PB_SIZE
];
1477 int16_t *tmp
= tmp_array
;
1478 int shift
= denom
+ 14 - BIT_DEPTH
;
1480 int offset
= 1 << (shift
- 1);
1485 src
-= EPEL_EXTRA_BEFORE
* srcstride
;
1487 for (y
= 0; y
< height
+ EPEL_EXTRA
; y
++) {
1488 for (x
= 0; x
< width
; x
++)
1489 tmp
[x
] = EPEL_FILTER(src
, 1) >> (BIT_DEPTH
- 8);
1494 tmp
= tmp_array
+ EPEL_EXTRA_BEFORE
* MAX_PB_SIZE
;
1495 filter
= ff_hevc_epel_filters
[my
- 1];
1497 ox
= ox
* (1 << (BIT_DEPTH
- 8));
1498 for (y
= 0; y
< height
; y
++) {
1499 for (x
= 0; x
< width
; x
++)
1500 dst
[x
] = av_clip_pixel((((EPEL_FILTER(tmp
, MAX_PB_SIZE
) >> 6) * wx
+ offset
) >> shift
) + ox
);
1506 static void FUNC(put_hevc_epel_bi_w_hv
)(uint8_t *_dst
, ptrdiff_t _dststride
, uint8_t *_src
, ptrdiff_t _srcstride
,
1508 int height
, int denom
, int wx0
, int wx1
,
1509 int ox0
, int ox1
, intptr_t mx
, intptr_t my
, int width
)
1512 pixel
*src
= (pixel
*)_src
;
1513 ptrdiff_t srcstride
= _srcstride
/ sizeof(pixel
);
1514 pixel
*dst
= (pixel
*)_dst
;
1515 ptrdiff_t dststride
= _dststride
/ sizeof(pixel
);
1516 const int8_t *filter
= ff_hevc_epel_filters
[mx
- 1];
1517 int16_t tmp_array
[(MAX_PB_SIZE
+ EPEL_EXTRA
) * MAX_PB_SIZE
];
1518 int16_t *tmp
= tmp_array
;
1519 int shift
= 14 + 1 - BIT_DEPTH
;
1520 int log2Wd
= denom
+ shift
- 1;
1522 src
-= EPEL_EXTRA_BEFORE
* srcstride
;
1524 for (y
= 0; y
< height
+ EPEL_EXTRA
; y
++) {
1525 for (x
= 0; x
< width
; x
++)
1526 tmp
[x
] = EPEL_FILTER(src
, 1) >> (BIT_DEPTH
- 8);
1531 tmp
= tmp_array
+ EPEL_EXTRA_BEFORE
* MAX_PB_SIZE
;
1532 filter
= ff_hevc_epel_filters
[my
- 1];
1534 ox0
= ox0
* (1 << (BIT_DEPTH
- 8));
1535 ox1
= ox1
* (1 << (BIT_DEPTH
- 8));
1536 for (y
= 0; y
< height
; y
++) {
1537 for (x
= 0; x
< width
; x
++)
1538 dst
[x
] = av_clip_pixel(((EPEL_FILTER(tmp
, MAX_PB_SIZE
) >> 6) * wx1
+ src2
[x
] * wx0
+
1539 ((ox0
+ ox1
+ 1) << log2Wd
)) >> (log2Wd
+ 1));
1542 src2
+= MAX_PB_SIZE
;
1545 #define P3 pix[-4 * xstride]
1546 #define P2 pix[-3 * xstride]
1547 #define P1 pix[-2 * xstride]
1548 #define P0 pix[-1 * xstride]
1549 #define Q0 pix[0 * xstride]
1550 #define Q1 pix[1 * xstride]
1551 #define Q2 pix[2 * xstride]
1552 #define Q3 pix[3 * xstride]
1554 // line three. used only for deblocking decision
1555 #define TP3 pix[-4 * xstride + 3 * ystride]
1556 #define TP2 pix[-3 * xstride + 3 * ystride]
1557 #define TP1 pix[-2 * xstride + 3 * ystride]
1558 #define TP0 pix[-1 * xstride + 3 * ystride]
1559 #define TQ0 pix[0 * xstride + 3 * ystride]
1560 #define TQ1 pix[1 * xstride + 3 * ystride]
1561 #define TQ2 pix[2 * xstride + 3 * ystride]
1562 #define TQ3 pix[3 * xstride + 3 * ystride]
1564 static void FUNC(hevc_loop_filter_luma
)(uint8_t *_pix
,
1565 ptrdiff_t _xstride
, ptrdiff_t _ystride
,
1567 uint8_t *_no_p
, uint8_t *_no_q
)
1570 pixel
*pix
= (pixel
*)_pix
;
1571 ptrdiff_t xstride
= _xstride
/ sizeof(pixel
);
1572 ptrdiff_t ystride
= _ystride
/ sizeof(pixel
);
1574 beta
<<= BIT_DEPTH
- 8;
1576 for (j
= 0; j
< 2; j
++) {
1577 const int dp0
= abs(P2
- 2 * P1
+ P0
);
1578 const int dq0
= abs(Q2
- 2 * Q1
+ Q0
);
1579 const int dp3
= abs(TP2
- 2 * TP1
+ TP0
);
1580 const int dq3
= abs(TQ2
- 2 * TQ1
+ TQ0
);
1581 const int d0
= dp0
+ dq0
;
1582 const int d3
= dp3
+ dq3
;
1583 const int tc
= _tc
[j
] << (BIT_DEPTH
- 8);
1584 const int no_p
= _no_p
[j
];
1585 const int no_q
= _no_q
[j
];
1587 if (d0
+ d3
>= beta
) {
1591 const int beta_3
= beta
>> 3;
1592 const int beta_2
= beta
>> 2;
1593 const int tc25
= ((tc
* 5 + 1) >> 1);
1595 if (abs(P3
- P0
) + abs(Q3
- Q0
) < beta_3
&& abs(P0
- Q0
) < tc25
&&
1596 abs(TP3
- TP0
) + abs(TQ3
- TQ0
) < beta_3
&& abs(TP0
- TQ0
) < tc25
&&
1597 (d0
<< 1) < beta_2
&& (d3
<< 1) < beta_2
) {
1599 const int tc2
= tc
<< 1;
1600 for (d
= 0; d
< 4; d
++) {
1610 P0
= p0
+ av_clip(((p2
+ 2 * p1
+ 2 * p0
+ 2 * q0
+ q1
+ 4) >> 3) - p0
, -tc2
, tc2
);
1611 P1
= p1
+ av_clip(((p2
+ p1
+ p0
+ q0
+ 2) >> 2) - p1
, -tc2
, tc2
);
1612 P2
= p2
+ av_clip(((2 * p3
+ 3 * p2
+ p1
+ p0
+ q0
+ 4) >> 3) - p2
, -tc2
, tc2
);
1615 Q0
= q0
+ av_clip(((p1
+ 2 * p0
+ 2 * q0
+ 2 * q1
+ q2
+ 4) >> 3) - q0
, -tc2
, tc2
);
1616 Q1
= q1
+ av_clip(((p0
+ q0
+ q1
+ q2
+ 2) >> 2) - q1
, -tc2
, tc2
);
1617 Q2
= q2
+ av_clip(((2 * q3
+ 3 * q2
+ q1
+ q0
+ p0
+ 4) >> 3) - q2
, -tc2
, tc2
);
1621 } else { // normal filtering
1624 const int tc_2
= tc
>> 1;
1625 if (dp0
+ dp3
< ((beta
+ (beta
>> 1)) >> 3))
1627 if (dq0
+ dq3
< ((beta
+ (beta
>> 1)) >> 3))
1630 for (d
= 0; d
< 4; d
++) {
1637 int delta0
= (9 * (q0
- p0
) - 3 * (q1
- p1
) + 8) >> 4;
1638 if (abs(delta0
) < 10 * tc
) {
1639 delta0
= av_clip(delta0
, -tc
, tc
);
1641 P0
= av_clip_pixel(p0
+ delta0
);
1643 Q0
= av_clip_pixel(q0
- delta0
);
1644 if (!no_p
&& nd_p
> 1) {
1645 const int deltap1
= av_clip((((p2
+ p0
+ 1) >> 1) - p1
+ delta0
) >> 1, -tc_2
, tc_2
);
1646 P1
= av_clip_pixel(p1
+ deltap1
);
1648 if (!no_q
&& nd_q
> 1) {
1649 const int deltaq1
= av_clip((((q2
+ q0
+ 1) >> 1) - q1
- delta0
) >> 1, -tc_2
, tc_2
);
1650 Q1
= av_clip_pixel(q1
+ deltaq1
);
1660 static void FUNC(hevc_loop_filter_chroma
)(uint8_t *_pix
, ptrdiff_t _xstride
,
1661 ptrdiff_t _ystride
, int *_tc
,
1662 uint8_t *_no_p
, uint8_t *_no_q
)
1664 int d
, j
, no_p
, no_q
;
1665 pixel
*pix
= (pixel
*)_pix
;
1666 ptrdiff_t xstride
= _xstride
/ sizeof(pixel
);
1667 ptrdiff_t ystride
= _ystride
/ sizeof(pixel
);
1669 for (j
= 0; j
< 2; j
++) {
1670 const int tc
= _tc
[j
] << (BIT_DEPTH
- 8);
1678 for (d
= 0; d
< 4; d
++) {
1684 delta0
= av_clip((((q0
- p0
) * 4) + p1
- q1
+ 4) >> 3, -tc
, tc
);
1686 P0
= av_clip_pixel(p0
+ delta0
);
1688 Q0
= av_clip_pixel(q0
- delta0
);
1694 static void FUNC(hevc_h_loop_filter_chroma
)(uint8_t *pix
, ptrdiff_t stride
,
1695 int32_t *tc
, uint8_t *no_p
,
1698 FUNC(hevc_loop_filter_chroma
)(pix
, stride
, sizeof(pixel
), tc
, no_p
, no_q
);
1701 static void FUNC(hevc_v_loop_filter_chroma
)(uint8_t *pix
, ptrdiff_t stride
,
1702 int32_t *tc
, uint8_t *no_p
,
1705 FUNC(hevc_loop_filter_chroma
)(pix
, sizeof(pixel
), stride
, tc
, no_p
, no_q
);
1708 static void FUNC(hevc_h_loop_filter_luma
)(uint8_t *pix
, ptrdiff_t stride
,
1709 int beta
, int32_t *tc
, uint8_t *no_p
,
1712 FUNC(hevc_loop_filter_luma
)(pix
, stride
, sizeof(pixel
),
1713 beta
, tc
, no_p
, no_q
);
1716 static void FUNC(hevc_v_loop_filter_luma
)(uint8_t *pix
, ptrdiff_t stride
,
1717 int beta
, int32_t *tc
, uint8_t *no_p
,
1720 FUNC(hevc_loop_filter_luma
)(pix
, sizeof(pixel
), stride
,
1721 beta
, tc
, no_p
, no_q
);