4 * Copyright (C) 2012 - 2013 Guillaume Martres
6 * This file is part of FFmpeg.
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 #include "libavutil/pixdesc.h"
25 #include "bit_depth_template.c"
28 #define POS(x, y) src[(x) + stride * (y)]
30 static av_always_inline
void FUNC(intra_pred
)(HEVCContext
*s
, int x0
, int y0
,
31 int log2_size
, int c_idx
)
34 ((x) >> s->sps->log2_min_pu_size)
36 (s->ref->tab_mvf[(x) + (y) * min_pu_width])
37 #define MVF_PU(x, y) \
38 MVF(PU(x0 + ((x) << hshift)), PU(y0 + ((y) << vshift)))
39 #define IS_INTRA(x, y) \
40 (MVF_PU(x, y).pred_flag == PF_INTRA)
41 #define MIN_TB_ADDR_ZS(x, y) \
42 s->pps->min_tb_addr_zs[(y) * (s->sps->tb_mask+2) + (x)]
43 #define EXTEND(ptr, val, len) \
45 pixel4 pix = PIXEL_SPLAT_X4(val); \
46 for (i = 0; i < (len); i += 4) \
47 AV_WN4P(ptr + i, pix); \
50 #define EXTEND_RIGHT_CIP(ptr, start, length) \
51 for (i = start; i < (start) + (length); i += 4) \
52 if (!IS_INTRA(i, -1)) \
53 AV_WN4P(&ptr[i], a); \
55 a = PIXEL_SPLAT_X4(ptr[i+3])
56 #define EXTEND_LEFT_CIP(ptr, start, length) \
57 for (i = start; i > (start) - (length); i--) \
58 if (!IS_INTRA(i - 1, -1)) \
60 #define EXTEND_UP_CIP(ptr, start, length) \
61 for (i = (start); i > (start) - (length); i -= 4) \
62 if (!IS_INTRA(-1, i - 3)) \
63 AV_WN4P(&ptr[i - 3], a); \
65 a = PIXEL_SPLAT_X4(ptr[i - 3])
66 #define EXTEND_DOWN_CIP(ptr, start, length) \
67 for (i = start; i < (start) + (length); i += 4) \
68 if (!IS_INTRA(-1, i)) \
69 AV_WN4P(&ptr[i], a); \
71 a = PIXEL_SPLAT_X4(ptr[i + 3])
73 HEVCLocalContext
*lc
= s
->HEVClc
;
75 int hshift
= s
->sps
->hshift
[c_idx
];
76 int vshift
= s
->sps
->vshift
[c_idx
];
77 int size
= (1 << log2_size
);
78 int size_in_luma_h
= size
<< hshift
;
79 int size_in_tbs_h
= size_in_luma_h
>> s
->sps
->log2_min_tb_size
;
80 int size_in_luma_v
= size
<< vshift
;
81 int size_in_tbs_v
= size_in_luma_v
>> s
->sps
->log2_min_tb_size
;
84 int x_tb
= (x0
>> s
->sps
->log2_min_tb_size
) & s
->sps
->tb_mask
;
85 int y_tb
= (y0
>> s
->sps
->log2_min_tb_size
) & s
->sps
->tb_mask
;
87 int cur_tb_addr
= MIN_TB_ADDR_ZS(x_tb
, y_tb
);
89 ptrdiff_t stride
= s
->frame
->linesize
[c_idx
] / sizeof(pixel
);
90 pixel
*src
= (pixel
*)s
->frame
->data
[c_idx
] + x
+ y
* stride
;
92 int min_pu_width
= s
->sps
->min_pu_width
;
94 enum IntraPredMode mode
= c_idx
? lc
->tu
.intra_pred_mode_c
:
95 lc
->tu
.intra_pred_mode
;
97 pixel left_array
[2 * MAX_TB_SIZE
+ 1];
98 pixel filtered_left_array
[2 * MAX_TB_SIZE
+ 1];
99 pixel top_array
[2 * MAX_TB_SIZE
+ 1];
100 pixel filtered_top_array
[2 * MAX_TB_SIZE
+ 1];
102 pixel
*left
= left_array
+ 1;
103 pixel
*top
= top_array
+ 1;
104 pixel
*filtered_left
= filtered_left_array
+ 1;
105 pixel
*filtered_top
= filtered_top_array
+ 1;
106 int cand_bottom_left
= lc
->na
.cand_bottom_left
&& cur_tb_addr
> MIN_TB_ADDR_ZS( x_tb
- 1, (y_tb
+ size_in_tbs_v
) & s
->sps
->tb_mask
);
107 int cand_left
= lc
->na
.cand_left
;
108 int cand_up_left
= lc
->na
.cand_up_left
;
109 int cand_up
= lc
->na
.cand_up
;
110 int cand_up_right
= lc
->na
.cand_up_right
&& cur_tb_addr
> MIN_TB_ADDR_ZS((x_tb
+ size_in_tbs_h
) & s
->sps
->tb_mask
, y_tb
- 1);
112 int bottom_left_size
= (FFMIN(y0
+ 2 * size_in_luma_v
, s
->sps
->height
) -
113 (y0
+ size_in_luma_v
)) >> vshift
;
114 int top_right_size
= (FFMIN(x0
+ 2 * size_in_luma_h
, s
->sps
->width
) -
115 (x0
+ size_in_luma_h
)) >> hshift
;
117 if (s
->pps
->constrained_intra_pred_flag
== 1) {
118 int size_in_luma_pu_v
= PU(size_in_luma_v
);
119 int size_in_luma_pu_h
= PU(size_in_luma_h
);
120 int on_pu_edge_x
= !(x0
& ((1 << s
->sps
->log2_min_pu_size
) - 1));
121 int on_pu_edge_y
= !(y0
& ((1 << s
->sps
->log2_min_pu_size
) - 1));
122 if (!size_in_luma_pu_h
)
124 if (cand_bottom_left
== 1 && on_pu_edge_x
) {
125 int x_left_pu
= PU(x0
- 1);
126 int y_bottom_pu
= PU(y0
+ size_in_luma_v
);
127 int max
= FFMIN(size_in_luma_pu_v
, s
->sps
->min_pu_height
- y_bottom_pu
);
128 cand_bottom_left
= 0;
129 for (i
= 0; i
< max
; i
+= 2)
130 cand_bottom_left
|= (MVF(x_left_pu
, y_bottom_pu
+ i
).pred_flag
== PF_INTRA
);
132 if (cand_left
== 1 && on_pu_edge_x
) {
133 int x_left_pu
= PU(x0
- 1);
134 int y_left_pu
= PU(y0
);
135 int max
= FFMIN(size_in_luma_pu_v
, s
->sps
->min_pu_height
- y_left_pu
);
137 for (i
= 0; i
< max
; i
+= 2)
138 cand_left
|= (MVF(x_left_pu
, y_left_pu
+ i
).pred_flag
== PF_INTRA
);
140 if (cand_up_left
== 1) {
141 int x_left_pu
= PU(x0
- 1);
142 int y_top_pu
= PU(y0
- 1);
143 cand_up_left
= MVF(x_left_pu
, y_top_pu
).pred_flag
== PF_INTRA
;
145 if (cand_up
== 1 && on_pu_edge_y
) {
146 int x_top_pu
= PU(x0
);
147 int y_top_pu
= PU(y0
- 1);
148 int max
= FFMIN(size_in_luma_pu_h
, s
->sps
->min_pu_width
- x_top_pu
);
150 for (i
= 0; i
< max
; i
+= 2)
151 cand_up
|= (MVF(x_top_pu
+ i
, y_top_pu
).pred_flag
== PF_INTRA
);
153 if (cand_up_right
== 1 && on_pu_edge_y
) {
154 int y_top_pu
= PU(y0
- 1);
155 int x_right_pu
= PU(x0
+ size_in_luma_h
);
156 int max
= FFMIN(size_in_luma_pu_h
, s
->sps
->min_pu_width
- x_right_pu
);
158 for (i
= 0; i
< max
; i
+= 2)
159 cand_up_right
|= (MVF(x_right_pu
+ i
, y_top_pu
).pred_flag
== PF_INTRA
);
161 memset(left
, 128, 2 * MAX_TB_SIZE
*sizeof(pixel
));
162 memset(top
, 128, 2 * MAX_TB_SIZE
*sizeof(pixel
));
166 left
[-1] = POS(-1, -1);
170 memcpy(top
, src
- stride
, size
* sizeof(pixel
));
172 memcpy(top
+ size
, src
- stride
+ size
, size
* sizeof(pixel
));
173 EXTEND(top
+ size
+ top_right_size
, POS(size
+ top_right_size
- 1, -1),
174 size
- top_right_size
);
177 for (i
= 0; i
< size
; i
++)
178 left
[i
] = POS(-1, i
);
179 if (cand_bottom_left
) {
180 for (i
= size
; i
< size
+ bottom_left_size
; i
++)
181 left
[i
] = POS(-1, i
);
182 EXTEND(left
+ size
+ bottom_left_size
, POS(-1, size
+ bottom_left_size
- 1),
183 size
- bottom_left_size
);
186 if (s
->pps
->constrained_intra_pred_flag
== 1) {
187 if (cand_bottom_left
|| cand_left
|| cand_up_left
|| cand_up
|| cand_up_right
) {
188 int size_max_x
= x0
+ ((2 * size
) << hshift
) < s
->sps
->width
?
189 2 * size
: (s
->sps
->width
- x0
) >> hshift
;
190 int size_max_y
= y0
+ ((2 * size
) << vshift
) < s
->sps
->height
?
191 2 * size
: (s
->sps
->height
- y0
) >> vshift
;
192 int j
= size
+ (cand_bottom_left
? bottom_left_size
: 0) -1;
193 if (!cand_up_right
) {
194 size_max_x
= x0
+ ((size
) << hshift
) < s
->sps
->width
?
195 size
: (s
->sps
->width
- x0
) >> hshift
;
197 if (!cand_bottom_left
) {
198 size_max_y
= y0
+ (( size
) << vshift
) < s
->sps
->height
?
199 size
: (s
->sps
->height
- y0
) >> vshift
;
201 if (cand_bottom_left
|| cand_left
|| cand_up_left
) {
202 while (j
> -1 && !IS_INTRA(-1, j
))
204 if (!IS_INTRA(-1, j
)) {
206 while (j
< size_max_x
&& !IS_INTRA(j
, -1))
208 EXTEND_LEFT_CIP(top
, j
, j
+ 1);
213 while (j
< size_max_x
&& !IS_INTRA(j
, -1))
217 EXTEND_LEFT_CIP(top
, j
, j
+ 1);
219 EXTEND_LEFT_CIP(top
, j
, j
);
225 if (cand_bottom_left
|| cand_left
) {
226 a
= PIXEL_SPLAT_X4(left
[-1]);
227 EXTEND_DOWN_CIP(left
, 0, size_max_y
);
230 EXTEND(left
, left
[-1], size
);
231 if (!cand_bottom_left
)
232 EXTEND(left
+ size
, left
[size
- 1], size
);
233 if (x0
!= 0 && y0
!= 0) {
234 a
= PIXEL_SPLAT_X4(left
[size_max_y
- 1]);
235 EXTEND_UP_CIP(left
, size_max_y
- 1, size_max_y
);
236 if (!IS_INTRA(-1, - 1))
238 } else if (x0
== 0) {
239 EXTEND(left
, 0, size_max_y
);
241 a
= PIXEL_SPLAT_X4(left
[size_max_y
- 1]);
242 EXTEND_UP_CIP(left
, size_max_y
- 1, size_max_y
);
246 a
= PIXEL_SPLAT_X4(left
[-1]);
247 EXTEND_RIGHT_CIP(top
, 0, size_max_x
);
251 // Infer the unavailable samples
252 if (!cand_bottom_left
) {
254 EXTEND(left
+ size
, left
[size
- 1], size
);
255 } else if (cand_up_left
) {
256 EXTEND(left
, left
[-1], 2 * size
);
258 } else if (cand_up
) {
260 EXTEND(left
, left
[-1], 2 * size
);
263 } else if (cand_up_right
) {
264 EXTEND(top
, top
[size
], size
);
265 left
[-1] = top
[size
];
266 EXTEND(left
, left
[-1], 2 * size
);
270 } else { // No samples available
271 left
[-1] = (1 << (BIT_DEPTH
- 1));
272 EXTEND(top
, left
[-1], 2 * size
);
273 EXTEND(left
, left
[-1], 2 * size
);
278 EXTEND(left
, left
[size
], size
);
283 EXTEND(top
, left
[-1], size
);
285 EXTEND(top
+ size
, top
[size
- 1], size
);
290 if (!s
->sps
->intra_smoothing_disabled_flag
&& (c_idx
== 0 || s
->sps
->chroma_format_idc
== 3)) {
291 if (mode
!= INTRA_DC
&& size
!= 4){
292 int intra_hor_ver_dist_thresh
[] = { 7, 1, 0 };
293 int min_dist_vert_hor
= FFMIN(FFABS((int)(mode
- 26U)),
294 FFABS((int)(mode
- 10U)));
295 if (min_dist_vert_hor
> intra_hor_ver_dist_thresh
[log2_size
- 3]) {
296 int threshold
= 1 << (BIT_DEPTH
- 5);
297 if (s
->sps
->sps_strong_intra_smoothing_enable_flag
&& c_idx
== 0 &&
299 FFABS(top
[-1] + top
[63] - 2 * top
[31]) < threshold
&&
300 FFABS(left
[-1] + left
[63] - 2 * left
[31]) < threshold
) {
301 // We can't just overwrite values in top because it could be
302 // a pointer into src
303 filtered_top
[-1] = top
[-1];
304 filtered_top
[63] = top
[63];
305 for (i
= 0; i
< 63; i
++)
306 filtered_top
[i
] = ((64 - (i
+ 1)) * top
[-1] +
307 (i
+ 1) * top
[63] + 32) >> 6;
308 for (i
= 0; i
< 63; i
++)
309 left
[i
] = ((64 - (i
+ 1)) * left
[-1] +
310 (i
+ 1) * left
[63] + 32) >> 6;
313 filtered_left
[2 * size
- 1] = left
[2 * size
- 1];
314 filtered_top
[2 * size
- 1] = top
[2 * size
- 1];
315 for (i
= 2 * size
- 2; i
>= 0; i
--)
316 filtered_left
[i
] = (left
[i
+ 1] + 2 * left
[i
] +
317 left
[i
- 1] + 2) >> 2;
319 filtered_left
[-1] = (left
[0] + 2 * left
[-1] + top
[0] + 2) >> 2;
320 for (i
= 2 * size
- 2; i
>= 0; i
--)
321 filtered_top
[i
] = (top
[i
+ 1] + 2 * top
[i
] +
322 top
[i
- 1] + 2) >> 2;
323 left
= filtered_left
;
332 s
->hpc
.pred_planar
[log2_size
- 2]((uint8_t *)src
, (uint8_t *)top
,
333 (uint8_t *)left
, stride
);
336 s
->hpc
.pred_dc((uint8_t *)src
, (uint8_t *)top
,
337 (uint8_t *)left
, stride
, log2_size
, c_idx
);
340 s
->hpc
.pred_angular
[log2_size
- 2]((uint8_t *)src
, (uint8_t *)top
,
341 (uint8_t *)left
, stride
, c_idx
,
347 #define INTRA_PRED(size) \
348 static void FUNC(intra_pred_ ## size)(HEVCContext *s, int x0, int y0, int c_idx) \
350 FUNC(intra_pred)(s, x0, y0, size, c_idx); \
360 static av_always_inline
void FUNC(pred_planar
)(uint8_t *_src
, const uint8_t *_top
,
361 const uint8_t *_left
, ptrdiff_t stride
,
365 pixel
*src
= (pixel
*)_src
;
366 const pixel
*top
= (const pixel
*)_top
;
367 const pixel
*left
= (const pixel
*)_left
;
368 int size
= 1 << trafo_size
;
369 for (y
= 0; y
< size
; y
++)
370 for (x
= 0; x
< size
; x
++)
371 POS(x
, y
) = ((size
- 1 - x
) * left
[y
] + (x
+ 1) * top
[size
] +
372 (size
- 1 - y
) * top
[x
] + (y
+ 1) * left
[size
] + size
) >> (trafo_size
+ 1);
375 #define PRED_PLANAR(size)\
376 static void FUNC(pred_planar_ ## size)(uint8_t *src, const uint8_t *top, \
377 const uint8_t *left, ptrdiff_t stride) \
379 FUNC(pred_planar)(src, top, left, stride, size + 2); \
389 static void FUNC(pred_dc
)(uint8_t *_src
, const uint8_t *_top
,
390 const uint8_t *_left
,
391 ptrdiff_t stride
, int log2_size
, int c_idx
)
394 int size
= (1 << log2_size
);
395 pixel
*src
= (pixel
*)_src
;
396 const pixel
*top
= (const pixel
*)_top
;
397 const pixel
*left
= (const pixel
*)_left
;
400 for (i
= 0; i
< size
; i
++)
401 dc
+= left
[i
] + top
[i
];
403 dc
>>= log2_size
+ 1;
405 a
= PIXEL_SPLAT_X4(dc
);
407 for (i
= 0; i
< size
; i
++)
408 for (j
= 0; j
< size
; j
+=4)
409 AV_WN4P(&POS(j
, i
), a
);
411 if (c_idx
== 0 && size
< 32) {
412 POS(0, 0) = (left
[0] + 2 * dc
+ top
[0] + 2) >> 2;
413 for (x
= 1; x
< size
; x
++)
414 POS(x
, 0) = (top
[x
] + 3 * dc
+ 2) >> 2;
415 for (y
= 1; y
< size
; y
++)
416 POS(0, y
) = (left
[y
] + 3 * dc
+ 2) >> 2;
420 static av_always_inline
void FUNC(pred_angular
)(uint8_t *_src
,
422 const uint8_t *_left
,
423 ptrdiff_t stride
, int c_idx
,
427 pixel
*src
= (pixel
*)_src
;
428 const pixel
*top
= (const pixel
*)_top
;
429 const pixel
*left
= (const pixel
*)_left
;
431 static const int intra_pred_angle
[] = {
432 32, 26, 21, 17, 13, 9, 5, 2, 0, -2, -5, -9, -13, -17, -21, -26, -32,
433 -26, -21, -17, -13, -9, -5, -2, 0, 2, 5, 9, 13, 17, 21, 26, 32
435 static const int inv_angle
[] = {
436 -4096, -1638, -910, -630, -482, -390, -315, -256, -315, -390, -482,
437 -630, -910, -1638, -4096
440 int angle
= intra_pred_angle
[mode
- 2];
441 pixel ref_array
[3 * MAX_TB_SIZE
+ 4];
442 pixel
*ref_tmp
= ref_array
+ size
;
444 int last
= (size
* angle
) >> 5;
448 if (angle
< 0 && last
< -1) {
449 for (x
= 0; x
<= size
; x
+= 4)
450 AV_WN4P(&ref_tmp
[x
], AV_RN4P(&top
[x
- 1]));
451 for (x
= last
; x
<= -1; x
++)
452 ref_tmp
[x
] = left
[-1 + ((x
* inv_angle
[mode
- 11] + 128) >> 8)];
456 for (y
= 0; y
< size
; y
++) {
457 int idx
= ((y
+ 1) * angle
) >> 5;
458 int fact
= ((y
+ 1) * angle
) & 31;
460 for (x
= 0; x
< size
; x
+= 4) {
461 POS(x
, y
) = ((32 - fact
) * ref
[x
+ idx
+ 1] +
462 fact
* ref
[x
+ idx
+ 2] + 16) >> 5;
463 POS(x
+ 1, y
) = ((32 - fact
) * ref
[x
+ 1 + idx
+ 1] +
464 fact
* ref
[x
+ 1 + idx
+ 2] + 16) >> 5;
465 POS(x
+ 2, y
) = ((32 - fact
) * ref
[x
+ 2 + idx
+ 1] +
466 fact
* ref
[x
+ 2 + idx
+ 2] + 16) >> 5;
467 POS(x
+ 3, y
) = ((32 - fact
) * ref
[x
+ 3 + idx
+ 1] +
468 fact
* ref
[x
+ 3 + idx
+ 2] + 16) >> 5;
471 for (x
= 0; x
< size
; x
+= 4)
472 AV_WN4P(&POS(x
, y
), AV_RN4P(&ref
[x
+ idx
+ 1]));
475 if (mode
== 26 && c_idx
== 0 && size
< 32) {
476 for (y
= 0; y
< size
; y
++)
477 POS(0, y
) = av_clip_pixel(top
[0] + ((left
[y
] - left
[-1]) >> 1));
481 if (angle
< 0 && last
< -1) {
482 for (x
= 0; x
<= size
; x
+= 4)
483 AV_WN4P(&ref_tmp
[x
], AV_RN4P(&left
[x
- 1]));
484 for (x
= last
; x
<= -1; x
++)
485 ref_tmp
[x
] = top
[-1 + ((x
* inv_angle
[mode
- 11] + 128) >> 8)];
489 for (x
= 0; x
< size
; x
++) {
490 int idx
= ((x
+ 1) * angle
) >> 5;
491 int fact
= ((x
+ 1) * angle
) & 31;
493 for (y
= 0; y
< size
; y
++) {
494 POS(x
, y
) = ((32 - fact
) * ref
[y
+ idx
+ 1] +
495 fact
* ref
[y
+ idx
+ 2] + 16) >> 5;
498 for (y
= 0; y
< size
; y
++)
499 POS(x
, y
) = ref
[y
+ idx
+ 1];
502 if (mode
== 10 && c_idx
== 0 && size
< 32) {
503 for (x
= 0; x
< size
; x
+= 4) {
504 POS(x
, 0) = av_clip_pixel(left
[0] + ((top
[x
] - top
[-1]) >> 1));
505 POS(x
+ 1, 0) = av_clip_pixel(left
[0] + ((top
[x
+ 1] - top
[-1]) >> 1));
506 POS(x
+ 2, 0) = av_clip_pixel(left
[0] + ((top
[x
+ 2] - top
[-1]) >> 1));
507 POS(x
+ 3, 0) = av_clip_pixel(left
[0] + ((top
[x
+ 3] - top
[-1]) >> 1));
513 static void FUNC(pred_angular_0
)(uint8_t *src
, const uint8_t *top
,
515 ptrdiff_t stride
, int c_idx
, int mode
)
517 FUNC(pred_angular
)(src
, top
, left
, stride
, c_idx
, mode
, 1 << 2);
520 static void FUNC(pred_angular_1
)(uint8_t *src
, const uint8_t *top
,
522 ptrdiff_t stride
, int c_idx
, int mode
)
524 FUNC(pred_angular
)(src
, top
, left
, stride
, c_idx
, mode
, 1 << 3);
527 static void FUNC(pred_angular_2
)(uint8_t *src
, const uint8_t *top
,
529 ptrdiff_t stride
, int c_idx
, int mode
)
531 FUNC(pred_angular
)(src
, top
, left
, stride
, c_idx
, mode
, 1 << 4);
534 static void FUNC(pred_angular_3
)(uint8_t *src
, const uint8_t *top
,
536 ptrdiff_t stride
, int c_idx
, int mode
)
538 FUNC(pred_angular
)(src
, top
, left
, stride
, c_idx
, mode
, 1 << 5);
541 #undef EXTEND_LEFT_CIP
542 #undef EXTEND_RIGHT_CIP
544 #undef EXTEND_DOWN_CIP
550 #undef MIN_TB_ADDR_ZS