Imported Upstream version 1.4
[deb_x265.git] / source / common / predict.cpp
CommitLineData
72b9787e
JB
1/*****************************************************************************
2* Copyright (C) 2013 x265 project
3*
4* Authors: Deepthi Nandakumar <deepthi@multicorewareinc.com>
5*
6* This program is free software; you can redistribute it and/or modify
7* it under the terms of the GNU General Public License as published by
8* the Free Software Foundation; either version 2 of the License, or
9* (at your option) any later version.
10*
11* This program is distributed in the hope that it will be useful,
12* but WITHOUT ANY WARRANTY; without even the implied warranty of
13* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14* GNU General Public License for more details.
15*
16* You should have received a copy of the GNU General Public License
17* along with this program; if not, write to the Free Software
18* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
19*
20* This program is also available under a commercial proprietary license.
21* For more information, contact us at license @ x265.com.
22*****************************************************************************/
23
24#include "common.h"
25#include "slice.h"
26#include "framedata.h"
27#include "picyuv.h"
28#include "predict.h"
29#include "primitives.h"
30
31using namespace x265;
32
33namespace
34{
35inline pixel weightBidir(int w0, int16_t P0, int w1, int16_t P1, int round, int shift, int offset)
36{
37 return Clip((w0 * (P0 + IF_INTERNAL_OFFS) + w1 * (P1 + IF_INTERNAL_OFFS) + round + (offset << (shift - 1))) >> shift);
38}
39}
40
41Predict::Predict()
42{
43 m_predBuf = NULL;
44 m_refAbove = NULL;
45 m_refAboveFlt = NULL;
46 m_refLeft = NULL;
47 m_refLeftFlt = NULL;
48 m_immedVals = NULL;
49}
50
51Predict::~Predict()
52{
53 X265_FREE(m_predBuf);
54 X265_FREE(m_refAbove);
55 X265_FREE(m_immedVals);
56 m_predShortYuv[0].destroy();
57 m_predShortYuv[1].destroy();
58}
59
60bool Predict::allocBuffers(int csp)
61{
62 m_csp = csp;
63 m_hChromaShift = CHROMA_H_SHIFT(csp);
64 m_vChromaShift = CHROMA_V_SHIFT(csp);
65
66 int predBufHeight = ((MAX_CU_SIZE + 2) << 4);
67 int predBufStride = ((MAX_CU_SIZE + 8) << 4);
68 CHECKED_MALLOC(m_predBuf, pixel, predBufStride * predBufHeight);
69 CHECKED_MALLOC(m_immedVals, int16_t, 64 * (64 + NTAPS_LUMA - 1));
70 CHECKED_MALLOC(m_refAbove, pixel, 12 * MAX_CU_SIZE);
71
72 m_refAboveFlt = m_refAbove + 3 * MAX_CU_SIZE;
73 m_refLeft = m_refAboveFlt + 3 * MAX_CU_SIZE;
74 m_refLeftFlt = m_refLeft + 3 * MAX_CU_SIZE;
75
76 return m_predShortYuv[0].create(MAX_CU_SIZE, csp) && m_predShortYuv[1].create(MAX_CU_SIZE, csp);
77
78fail:
79 return false;
80}
81
82void Predict::predIntraLumaAng(uint32_t dirMode, pixel* dst, intptr_t stride, uint32_t log2TrSize)
83{
84 int tuSize = 1 << log2TrSize;
85
86 pixel *refLft, *refAbv;
87
88 if (!(g_intraFilterFlags[dirMode] & tuSize))
89 {
90 refLft = m_refLeft + tuSize - 1;
91 refAbv = m_refAbove + tuSize - 1;
92 }
93 else
94 {
95 refLft = m_refLeftFlt + tuSize - 1;
96 refAbv = m_refAboveFlt + tuSize - 1;
97 }
98
99 bool bFilter = log2TrSize <= 4;
100 int sizeIdx = log2TrSize - 2;
101 X265_CHECK(sizeIdx >= 0 && sizeIdx < 4, "intra block size is out of range\n");
102 primitives.intra_pred[dirMode][sizeIdx](dst, stride, refLft, refAbv, dirMode, bFilter);
103}
104
105void Predict::predIntraChromaAng(pixel* src, uint32_t dirMode, pixel* dst, intptr_t stride, uint32_t log2TrSizeC, int chFmt)
106{
107 int tuSize = 1 << log2TrSizeC;
108 int tuSize2 = tuSize << 1;
109
110 // Create the prediction
111 const int bufOffset = tuSize - 1;
112 pixel buf0[3 * MAX_CU_SIZE];
113 pixel buf1[3 * MAX_CU_SIZE];
114 pixel* above;
115 pixel* left = buf0 + bufOffset;
116
117 int limit = (dirMode <= 25 && dirMode >= 11) ? (tuSize + 1 + 1) : (tuSize2 + 1);
118 for (int k = 0; k < limit; k++)
119 left[k] = src[k * ADI_BUF_STRIDE];
120
121 if (chFmt == X265_CSP_I444 && (g_intraFilterFlags[dirMode] & tuSize))
122 {
123 // generate filtered intra prediction samples
124 buf0[bufOffset - 1] = src[1];
125 left = buf1 + bufOffset;
126 for (int i = 0; i < tuSize2; i++)
127 left[i] = (buf0[bufOffset + i - 1] + 2 * buf0[bufOffset + i] + buf0[bufOffset + i + 1] + 2) >> 2;
128 left[tuSize2] = buf0[bufOffset + tuSize2];
129
130 above = buf0 + bufOffset;
131 above[0] = left[0];
132 for (int i = 1; i < tuSize2; i++)
133 above[i] = (src[i - 1] + 2 * src[i] + src[i + 1] + 2) >> 2;
134 above[tuSize2] = src[tuSize2];
135 }
136 else
137 {
138 above = buf1 + bufOffset;
139 memcpy(above, src, (tuSize2 + 1) * sizeof(pixel));
140 }
141
142 int sizeIdx = log2TrSizeC - 2;
143 X265_CHECK(sizeIdx >= 0 && sizeIdx < 4, "intra block size is out of range\n");
144 primitives.intra_pred[dirMode][sizeIdx](dst, stride, left, above, dirMode, 0);
145}
146
147void Predict::initMotionCompensation(const CUData& cu, const CUGeom& cuGeom, int partIdx)
148{
149 m_predSlice = cu.m_slice;
150 cu.getPartIndexAndSize(partIdx, m_puAbsPartIdx, m_puWidth, m_puHeight);
151 m_ctuAddr = cu.m_cuAddr;
152 m_cuAbsPartIdx = cuGeom.encodeIdx;
153}
154
155void Predict::prepMotionCompensation(const CUData& cu, const CUGeom& cuGeom, int partIdx)
156{
157 initMotionCompensation(cu, cuGeom, partIdx);
158
159 m_refIdx0 = cu.m_refIdx[0][m_puAbsPartIdx];
160 m_clippedMv[0] = cu.m_mv[0][m_puAbsPartIdx];
161 m_refIdx1 = cu.m_refIdx[1][m_puAbsPartIdx];
162 m_clippedMv[1] = cu.m_mv[1][m_puAbsPartIdx];
163 cu.clipMv(m_clippedMv[0]);
164 cu.clipMv(m_clippedMv[1]);
165}
166
167void Predict::motionCompensation(Yuv& predYuv, bool bLuma, bool bChroma)
168{
169 if (m_predSlice->isInterP())
170 {
171 /* P Slice */
172 WeightValues wv0[3];
173 X265_CHECK(m_refIdx0 >= 0, "invalid P refidx\n");
174 X265_CHECK(m_refIdx0 < m_predSlice->m_numRefIdx[0], "P refidx out of range\n");
175 const WeightParam *wp0 = m_predSlice->m_weightPredTable[0][m_refIdx0];
176
177 if (m_predSlice->m_pps->bUseWeightPred && wp0->bPresentFlag)
178 {
179 for (int plane = 0; plane < 3; plane++)
180 {
181 wv0[plane].w = wp0[plane].inputWeight;
182 wv0[plane].offset = wp0[plane].inputOffset * (1 << (X265_DEPTH - 8));
183 wv0[plane].shift = wp0[plane].log2WeightDenom;
184 wv0[plane].round = wp0[plane].log2WeightDenom >= 1 ? 1 << (wp0[plane].log2WeightDenom - 1) : 0;
185 }
186
187 ShortYuv& shortYuv = m_predShortYuv[0];
188
189 if (bLuma)
190 predInterLumaShort(shortYuv, *m_predSlice->m_refPicList[0][m_refIdx0]->m_reconPicYuv, m_clippedMv[0]);
191 if (bChroma)
192 predInterChromaShort(shortYuv, *m_predSlice->m_refPicList[0][m_refIdx0]->m_reconPicYuv, m_clippedMv[0]);
193
194 addWeightUni(predYuv, shortYuv, wv0, bLuma, bChroma);
195 }
196 else
197 {
198 if (bLuma)
199 predInterLumaPixel(predYuv, *m_predSlice->m_refPicList[0][m_refIdx0]->m_reconPicYuv, m_clippedMv[0]);
200 if (bChroma)
201 predInterChromaPixel(predYuv, *m_predSlice->m_refPicList[0][m_refIdx0]->m_reconPicYuv, m_clippedMv[0]);
202 }
203 }
204 else
205 {
206 /* B Slice */
207
208 WeightValues wv0[3], wv1[3];
209 const WeightParam *pwp0, *pwp1;
210
211 if (m_predSlice->m_pps->bUseWeightedBiPred)
212 {
213 pwp0 = m_refIdx0 >= 0 ? m_predSlice->m_weightPredTable[0][m_refIdx0] : NULL;
214 pwp1 = m_refIdx1 >= 0 ? m_predSlice->m_weightPredTable[1][m_refIdx1] : NULL;
215
216 if (pwp0 && pwp1 && (pwp0->bPresentFlag || pwp1->bPresentFlag))
217 {
218 /* biprediction weighting */
219 for (int plane = 0; plane < 3; plane++)
220 {
221 wv0[plane].w = pwp0[plane].inputWeight;
222 wv0[plane].o = pwp0[plane].inputOffset * (1 << (X265_DEPTH - 8));
223 wv0[plane].shift = pwp0[plane].log2WeightDenom;
224 wv0[plane].round = 1 << pwp0[plane].log2WeightDenom;
225
226 wv1[plane].w = pwp1[plane].inputWeight;
227 wv1[plane].o = pwp1[plane].inputOffset * (1 << (X265_DEPTH - 8));
228 wv1[plane].shift = wv0[plane].shift;
229 wv1[plane].round = wv0[plane].round;
230 }
231 }
232 else
233 {
234 /* uniprediction weighting, always outputs to wv0 */
235 const WeightParam* pwp = (m_refIdx0 >= 0) ? pwp0 : pwp1;
236 for (int plane = 0; plane < 3; plane++)
237 {
238 wv0[plane].w = pwp[plane].inputWeight;
239 wv0[plane].offset = pwp[plane].inputOffset * (1 << (X265_DEPTH - 8));
240 wv0[plane].shift = pwp[plane].log2WeightDenom;
241 wv0[plane].round = pwp[plane].log2WeightDenom >= 1 ? 1 << (pwp[plane].log2WeightDenom - 1) : 0;
242 }
243 }
244 }
245 else
246 pwp0 = pwp1 = NULL;
247
248 if (m_refIdx0 >= 0 && m_refIdx1 >= 0)
249 {
250 /* Biprediction */
251 X265_CHECK(m_refIdx0 < m_predSlice->m_numRefIdx[0], "bidir refidx0 out of range\n");
252 X265_CHECK(m_refIdx1 < m_predSlice->m_numRefIdx[1], "bidir refidx1 out of range\n");
253
254 if (bLuma)
255 {
256 predInterLumaShort(m_predShortYuv[0], *m_predSlice->m_refPicList[0][m_refIdx0]->m_reconPicYuv, m_clippedMv[0]);
257 predInterLumaShort(m_predShortYuv[1], *m_predSlice->m_refPicList[1][m_refIdx1]->m_reconPicYuv, m_clippedMv[1]);
258 }
259 if (bChroma)
260 {
261 predInterChromaShort(m_predShortYuv[0], *m_predSlice->m_refPicList[0][m_refIdx0]->m_reconPicYuv, m_clippedMv[0]);
262 predInterChromaShort(m_predShortYuv[1], *m_predSlice->m_refPicList[1][m_refIdx1]->m_reconPicYuv, m_clippedMv[1]);
263 }
264
265 if (pwp0 && pwp1 && (pwp0->bPresentFlag || pwp1->bPresentFlag))
266 addWeightBi(predYuv, m_predShortYuv[0], m_predShortYuv[1], wv0, wv1, bLuma, bChroma);
267 else
268 predYuv.addAvg(m_predShortYuv[0], m_predShortYuv[1], m_puAbsPartIdx, m_puWidth, m_puHeight, bLuma, bChroma);
269 }
270 else if (m_refIdx0 >= 0)
271 {
272 /* uniprediction to L0 */
273 X265_CHECK(m_refIdx0 < m_predSlice->m_numRefIdx[0], "unidir refidx0 out of range\n");
274
275 if (pwp0 && pwp0->bPresentFlag)
276 {
277 ShortYuv& shortYuv = m_predShortYuv[0];
278
279 if (bLuma)
280 predInterLumaShort(shortYuv, *m_predSlice->m_refPicList[0][m_refIdx0]->m_reconPicYuv, m_clippedMv[0]);
281 if (bChroma)
282 predInterChromaShort(shortYuv, *m_predSlice->m_refPicList[0][m_refIdx0]->m_reconPicYuv, m_clippedMv[0]);
283
284 addWeightUni(predYuv, shortYuv, wv0, bLuma, bChroma);
285 }
286 else
287 {
288 if (bLuma)
289 predInterLumaPixel(predYuv, *m_predSlice->m_refPicList[0][m_refIdx0]->m_reconPicYuv, m_clippedMv[0]);
290 if (bChroma)
291 predInterChromaPixel(predYuv, *m_predSlice->m_refPicList[0][m_refIdx0]->m_reconPicYuv, m_clippedMv[0]);
292 }
293 }
294 else
295 {
296 /* uniprediction to L1 */
297 X265_CHECK(m_refIdx1 >= 0, "refidx1 was not positive\n");
298 X265_CHECK(m_refIdx1 < m_predSlice->m_numRefIdx[1], "unidir refidx1 out of range\n");
299
300 if (pwp1 && pwp1->bPresentFlag)
301 {
302 ShortYuv& shortYuv = m_predShortYuv[0];
303
304 if (bLuma)
305 predInterLumaShort(shortYuv, *m_predSlice->m_refPicList[1][m_refIdx1]->m_reconPicYuv, m_clippedMv[1]);
306 if (bChroma)
307 predInterChromaShort(shortYuv, *m_predSlice->m_refPicList[1][m_refIdx1]->m_reconPicYuv, m_clippedMv[1]);
308
309 addWeightUni(predYuv, shortYuv, wv0, bLuma, bChroma);
310 }
311 else
312 {
313 if (bLuma)
314 predInterLumaPixel(predYuv, *m_predSlice->m_refPicList[1][m_refIdx1]->m_reconPicYuv, m_clippedMv[1]);
315 if (bChroma)
316 predInterChromaPixel(predYuv, *m_predSlice->m_refPicList[1][m_refIdx1]->m_reconPicYuv, m_clippedMv[1]);
317 }
318 }
319 }
320}
321
322void Predict::predInterLumaPixel(Yuv& dstYuv, const PicYuv& refPic, const MV& mv) const
323{
324 pixel *dst = dstYuv.getLumaAddr(m_puAbsPartIdx);
325 intptr_t dstStride = dstYuv.m_size;
326
327 intptr_t srcStride = refPic.m_stride;
328 intptr_t srcOffset = (mv.x >> 2) + (mv.y >> 2) * srcStride;
329 int partEnum = partitionFromSizes(m_puWidth, m_puHeight);
330 pixel* src = const_cast<PicYuv&>(refPic).getLumaAddr(m_ctuAddr, m_cuAbsPartIdx + m_puAbsPartIdx) + srcOffset;
331
332 int xFrac = mv.x & 0x3;
333 int yFrac = mv.y & 0x3;
334
335 if (!(yFrac | xFrac))
336 primitives.luma_copy_pp[partEnum](dst, dstStride, src, srcStride);
337 else if (!yFrac)
338 primitives.luma_hpp[partEnum](src, srcStride, dst, dstStride, xFrac);
339 else if (!xFrac)
340 primitives.luma_vpp[partEnum](src, srcStride, dst, dstStride, yFrac);
341 else
342 {
343 int tmpStride = m_puWidth;
344 int filterSize = NTAPS_LUMA;
345 int halfFilterSize = (filterSize >> 1);
346 primitives.luma_hps[partEnum](src, srcStride, m_immedVals, tmpStride, xFrac, 1);
347 primitives.luma_vsp[partEnum](m_immedVals + (halfFilterSize - 1) * tmpStride, tmpStride, dst, dstStride, yFrac);
348 }
349}
350
351void Predict::predInterLumaShort(ShortYuv& dstSYuv, const PicYuv& refPic, const MV& mv) const
352{
353 int16_t *dst = dstSYuv.getLumaAddr(m_puAbsPartIdx);
354 int dstStride = dstSYuv.m_size;
355
356 intptr_t srcStride = refPic.m_stride;
357 intptr_t srcOffset = (mv.x >> 2) + (mv.y >> 2) * srcStride;
358 pixel *src = const_cast<PicYuv&>(refPic).getLumaAddr(m_ctuAddr, m_cuAbsPartIdx + m_puAbsPartIdx) + srcOffset;
359
360 int xFrac = mv.x & 0x3;
361 int yFrac = mv.y & 0x3;
362
363 int partEnum = partitionFromSizes(m_puWidth, m_puHeight);
364
365 X265_CHECK((m_puWidth % 4) + (m_puHeight % 4) == 0, "width or height not divisible by 4\n");
366 X265_CHECK(dstStride == MAX_CU_SIZE, "stride expected to be max cu size\n");
367
368 if (!(yFrac | xFrac))
369 primitives.luma_p2s(src, srcStride, dst, m_puWidth, m_puHeight);
370 else if (!yFrac)
371 primitives.luma_hps[partEnum](src, srcStride, dst, dstStride, xFrac, 0);
372 else if (!xFrac)
373 primitives.luma_vps[partEnum](src, srcStride, dst, dstStride, yFrac);
374 else
375 {
376 int tmpStride = m_puWidth;
377 int filterSize = NTAPS_LUMA;
378 int halfFilterSize = (filterSize >> 1);
379 primitives.luma_hps[partEnum](src, srcStride, m_immedVals, tmpStride, xFrac, 1);
380 primitives.luma_vss[partEnum](m_immedVals + (halfFilterSize - 1) * tmpStride, tmpStride, dst, dstStride, yFrac);
381 }
382}
383
384void Predict::predInterChromaPixel(Yuv& dstYuv, const PicYuv& refPic, const MV& mv) const
385{
386 intptr_t dstStride = dstYuv.m_csize;
387 intptr_t refStride = refPic.m_strideC;
388
389 int shiftHor = (2 + m_hChromaShift);
390 int shiftVer = (2 + m_vChromaShift);
391
392 intptr_t refOffset = (mv.x >> shiftHor) + (mv.y >> shiftVer) * refStride;
393
394 pixel* refCb = const_cast<PicYuv&>(refPic).getCbAddr(m_ctuAddr, m_cuAbsPartIdx + m_puAbsPartIdx) + refOffset;
395 pixel* refCr = const_cast<PicYuv&>(refPic).getCrAddr(m_ctuAddr, m_cuAbsPartIdx + m_puAbsPartIdx) + refOffset;
396
397 pixel* dstCb = dstYuv.getCbAddr(m_puAbsPartIdx);
398 pixel* dstCr = dstYuv.getCrAddr(m_puAbsPartIdx);
399
400 int xFrac = mv.x & ((1 << shiftHor) - 1);
401 int yFrac = mv.y & ((1 << shiftVer) - 1);
402
403 int partEnum = partitionFromSizes(m_puWidth, m_puHeight);
404
405 if (!(yFrac | xFrac))
406 {
407 primitives.chroma[m_csp].copy_pp[partEnum](dstCb, dstStride, refCb, refStride);
408 primitives.chroma[m_csp].copy_pp[partEnum](dstCr, dstStride, refCr, refStride);
409 }
410 else if (!yFrac)
411 {
412 primitives.chroma[m_csp].filter_hpp[partEnum](refCb, refStride, dstCb, dstStride, xFrac << (1 - m_hChromaShift));
413 primitives.chroma[m_csp].filter_hpp[partEnum](refCr, refStride, dstCr, dstStride, xFrac << (1 - m_hChromaShift));
414 }
415 else if (!xFrac)
416 {
417 primitives.chroma[m_csp].filter_vpp[partEnum](refCb, refStride, dstCb, dstStride, yFrac << (1 - m_vChromaShift));
418 primitives.chroma[m_csp].filter_vpp[partEnum](refCr, refStride, dstCr, dstStride, yFrac << (1 - m_vChromaShift));
419 }
420 else
421 {
422 int extStride = m_puWidth >> m_hChromaShift;
423 int filterSize = NTAPS_CHROMA;
424 int halfFilterSize = (filterSize >> 1);
425
426 primitives.chroma[m_csp].filter_hps[partEnum](refCb, refStride, m_immedVals, extStride, xFrac << (1 - m_hChromaShift), 1);
427 primitives.chroma[m_csp].filter_vsp[partEnum](m_immedVals + (halfFilterSize - 1) * extStride, extStride, dstCb, dstStride, yFrac << (1 - m_vChromaShift));
428
429 primitives.chroma[m_csp].filter_hps[partEnum](refCr, refStride, m_immedVals, extStride, xFrac << (1 - m_hChromaShift), 1);
430 primitives.chroma[m_csp].filter_vsp[partEnum](m_immedVals + (halfFilterSize - 1) * extStride, extStride, dstCr, dstStride, yFrac << (1 - m_vChromaShift));
431 }
432}
433
434void Predict::predInterChromaShort(ShortYuv& dstSYuv, const PicYuv& refPic, const MV& mv) const
435{
436 intptr_t refStride = refPic.m_strideC;
437 intptr_t dstStride = dstSYuv.m_csize;
438
439 int shiftHor = (2 + m_hChromaShift);
440 int shiftVer = (2 + m_vChromaShift);
441
442 intptr_t refOffset = (mv.x >> shiftHor) + (mv.y >> shiftVer) * refStride;
443
444 pixel* refCb = const_cast<PicYuv&>(refPic).getCbAddr(m_ctuAddr, m_cuAbsPartIdx + m_puAbsPartIdx) + refOffset;
445 pixel* refCr = const_cast<PicYuv&>(refPic).getCrAddr(m_ctuAddr, m_cuAbsPartIdx + m_puAbsPartIdx) + refOffset;
446
447 int16_t* dstCb = dstSYuv.getCbAddr(m_puAbsPartIdx);
448 int16_t* dstCr = dstSYuv.getCrAddr(m_puAbsPartIdx);
449
450 int xFrac = mv.x & ((1 << shiftHor) - 1);
451 int yFrac = mv.y & ((1 << shiftVer) - 1);
452
453 int partEnum = partitionFromSizes(m_puWidth, m_puHeight);
454
455 uint32_t cxWidth = m_puWidth >> m_hChromaShift;
456 uint32_t cxHeight = m_puHeight >> m_vChromaShift;
457
458 X265_CHECK(((cxWidth | cxHeight) % 2) == 0, "chroma block size expected to be multiple of 2\n");
459
460 if (!(yFrac | xFrac))
461 {
462 primitives.chroma_p2s[m_csp](refCb, refStride, dstCb, cxWidth, cxHeight);
463 primitives.chroma_p2s[m_csp](refCr, refStride, dstCr, cxWidth, cxHeight);
464 }
465 else if (!yFrac)
466 {
467 primitives.chroma[m_csp].filter_hps[partEnum](refCb, refStride, dstCb, dstStride, xFrac << (1 - m_hChromaShift), 0);
468 primitives.chroma[m_csp].filter_hps[partEnum](refCr, refStride, dstCr, dstStride, xFrac << (1 - m_hChromaShift), 0);
469 }
470 else if (!xFrac)
471 {
472 primitives.chroma[m_csp].filter_vps[partEnum](refCb, refStride, dstCb, dstStride, yFrac << (1 - m_vChromaShift));
473 primitives.chroma[m_csp].filter_vps[partEnum](refCr, refStride, dstCr, dstStride, yFrac << (1 - m_vChromaShift));
474 }
475 else
476 {
477 int extStride = cxWidth;
478 int filterSize = NTAPS_CHROMA;
479 int halfFilterSize = (filterSize >> 1);
480 primitives.chroma[m_csp].filter_hps[partEnum](refCb, refStride, m_immedVals, extStride, xFrac << (1 - m_hChromaShift), 1);
481 primitives.chroma[m_csp].filter_vss[partEnum](m_immedVals + (halfFilterSize - 1) * extStride, extStride, dstCb, dstStride, yFrac << (1 - m_vChromaShift));
482 primitives.chroma[m_csp].filter_hps[partEnum](refCr, refStride, m_immedVals, extStride, xFrac << (1 - m_hChromaShift), 1);
483 primitives.chroma[m_csp].filter_vss[partEnum](m_immedVals + (halfFilterSize - 1) * extStride, extStride, dstCr, dstStride, yFrac << (1 - m_vChromaShift));
484 }
485}
486
487/* weighted averaging for bi-pred */
488void Predict::addWeightBi(Yuv& predYuv, const ShortYuv& srcYuv0, const ShortYuv& srcYuv1, const WeightValues wp0[3], const WeightValues wp1[3], bool bLuma, bool bChroma) const
489{
490 int x, y;
491
492 int w0, w1, offset, shiftNum, shift, round;
493 uint32_t src0Stride, src1Stride, dststride;
494
495 pixel* dstY = predYuv.getLumaAddr(m_puAbsPartIdx);
496 pixel* dstU = predYuv.getCbAddr(m_puAbsPartIdx);
497 pixel* dstV = predYuv.getCrAddr(m_puAbsPartIdx);
498
499 const int16_t* srcY0 = srcYuv0.getLumaAddr(m_puAbsPartIdx);
500 const int16_t* srcU0 = srcYuv0.getCbAddr(m_puAbsPartIdx);
501 const int16_t* srcV0 = srcYuv0.getCrAddr(m_puAbsPartIdx);
502
503 const int16_t* srcY1 = srcYuv1.getLumaAddr(m_puAbsPartIdx);
504 const int16_t* srcU1 = srcYuv1.getCbAddr(m_puAbsPartIdx);
505 const int16_t* srcV1 = srcYuv1.getCrAddr(m_puAbsPartIdx);
506
507 if (bLuma)
508 {
509 // Luma
510 w0 = wp0[0].w;
511 offset = wp0[0].o + wp1[0].o;
512 shiftNum = IF_INTERNAL_PREC - X265_DEPTH;
513 shift = wp0[0].shift + shiftNum + 1;
514 round = shift ? (1 << (shift - 1)) : 0;
515 w1 = wp1[0].w;
516
517 src0Stride = srcYuv0.m_size;
518 src1Stride = srcYuv1.m_size;
519 dststride = predYuv.m_size;
520
521 // TODO: can we use weight_sp here?
522 for (y = m_puHeight - 1; y >= 0; y--)
523 {
524 for (x = m_puWidth - 1; x >= 0; )
525 {
526 // note: luma min width is 4
527 dstY[x] = weightBidir(w0, srcY0[x], w1, srcY1[x], round, shift, offset);
528 x--;
529 dstY[x] = weightBidir(w0, srcY0[x], w1, srcY1[x], round, shift, offset);
530 x--;
531 dstY[x] = weightBidir(w0, srcY0[x], w1, srcY1[x], round, shift, offset);
532 x--;
533 dstY[x] = weightBidir(w0, srcY0[x], w1, srcY1[x], round, shift, offset);
534 x--;
535 }
536
537 srcY0 += src0Stride;
538 srcY1 += src1Stride;
539 dstY += dststride;
540 }
541 }
542
543 if (bChroma)
544 {
545 // Chroma U
546 w0 = wp0[1].w;
547 offset = wp0[1].o + wp1[1].o;
548 shiftNum = IF_INTERNAL_PREC - X265_DEPTH;
549 shift = wp0[1].shift + shiftNum + 1;
550 round = shift ? (1 << (shift - 1)) : 0;
551 w1 = wp1[1].w;
552
553 src0Stride = srcYuv0.m_csize;
554 src1Stride = srcYuv1.m_csize;
555 dststride = predYuv.m_csize;
556
557 uint32_t cwidth = m_puWidth >> srcYuv0.m_hChromaShift;
558 uint32_t cheight = m_puHeight >> srcYuv0.m_vChromaShift;
559
560 // TODO: can we use weight_sp here?
561 for (y = cheight - 1; y >= 0; y--)
562 {
563 for (x = cwidth - 1; x >= 0;)
564 {
565 // note: chroma min width is 2
566 dstU[x] = weightBidir(w0, srcU0[x], w1, srcU1[x], round, shift, offset);
567 x--;
568 dstU[x] = weightBidir(w0, srcU0[x], w1, srcU1[x], round, shift, offset);
569 x--;
570 }
571
572 srcU0 += src0Stride;
573 srcU1 += src1Stride;
574 dstU += dststride;
575 }
576
577 // Chroma V
578 w0 = wp0[2].w;
579 offset = wp0[2].o + wp1[2].o;
580 shift = wp0[2].shift + shiftNum + 1;
581 round = shift ? (1 << (shift - 1)) : 0;
582 w1 = wp1[2].w;
583
584 for (y = cheight - 1; y >= 0; y--)
585 {
586 for (x = cwidth - 1; x >= 0;)
587 {
588 // note: chroma min width is 2
589 dstV[x] = weightBidir(w0, srcV0[x], w1, srcV1[x], round, shift, offset);
590 x--;
591 dstV[x] = weightBidir(w0, srcV0[x], w1, srcV1[x], round, shift, offset);
592 x--;
593 }
594
595 srcV0 += src0Stride;
596 srcV1 += src1Stride;
597 dstV += dststride;
598 }
599 }
600}
601
602/* weighted averaging for uni-pred */
603void Predict::addWeightUni(Yuv& predYuv, const ShortYuv& srcYuv, const WeightValues wp[3], bool bLuma, bool bChroma) const
604{
605 pixel* dstY = predYuv.getLumaAddr(m_puAbsPartIdx);
606 pixel* dstU = predYuv.getCbAddr(m_puAbsPartIdx);
607 pixel* dstV = predYuv.getCrAddr(m_puAbsPartIdx);
608
609 const int16_t* srcY0 = srcYuv.getLumaAddr(m_puAbsPartIdx);
610 const int16_t* srcU0 = srcYuv.getCbAddr(m_puAbsPartIdx);
611 const int16_t* srcV0 = srcYuv.getCrAddr(m_puAbsPartIdx);
612
613 int w0, offset, shiftNum, shift, round;
614 uint32_t srcStride, dstStride;
615
616 if (bLuma)
617 {
618 // Luma
619 w0 = wp[0].w;
620 offset = wp[0].offset;
621 shiftNum = IF_INTERNAL_PREC - X265_DEPTH;
622 shift = wp[0].shift + shiftNum;
623 round = shift ? (1 << (shift - 1)) : 0;
624 srcStride = srcYuv.m_size;
625 dstStride = predYuv.m_size;
626
627 primitives.weight_sp(const_cast<int16_t*>(srcY0), dstY, srcStride, dstStride, m_puWidth, m_puHeight, w0, round, shift, offset);
628 }
629
630 if (bChroma)
631 {
632 // Chroma U
633 w0 = wp[1].w;
634 offset = wp[1].offset;
635 shiftNum = IF_INTERNAL_PREC - X265_DEPTH;
636 shift = wp[1].shift + shiftNum;
637 round = shift ? (1 << (shift - 1)) : 0;
638
639 srcStride = srcYuv.m_csize;
640 dstStride = predYuv.m_csize;
641
642 uint32_t cwidth = m_puWidth >> srcYuv.m_hChromaShift;
643 uint32_t cheight = m_puHeight >> srcYuv.m_vChromaShift;
644
645 primitives.weight_sp(const_cast<int16_t*>(srcU0), dstU, srcStride, dstStride, cwidth, cheight, w0, round, shift, offset);
646
647 // Chroma V
648 w0 = wp[2].w;
649 offset = wp[2].offset;
650 shift = wp[2].shift + shiftNum;
651 round = shift ? (1 << (shift - 1)) : 0;
652
653 primitives.weight_sp(const_cast<int16_t*>(srcV0), dstV, srcStride, dstStride, cwidth, cheight, w0, round, shift, offset);
654 }
655}
656
657void Predict::initAdiPattern(const CUData& cu, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t partDepth, int dirMode)
658{
659 IntraNeighbors intraNeighbors;
660 initIntraNeighbors(cu, absPartIdx, partDepth, true, &intraNeighbors);
661
662 pixel* adiBuf = m_predBuf;
663 pixel* refAbove = m_refAbove;
664 pixel* refLeft = m_refLeft;
665 pixel* refAboveFlt = m_refAboveFlt;
666 pixel* refLeftFlt = m_refLeftFlt;
667
668 int tuSize = intraNeighbors.tuSize;
669 int tuSize2 = tuSize << 1;
670
671 pixel* adiOrigin = cu.m_encData->m_reconPicYuv->getLumaAddr(cu.m_cuAddr, cuGeom.encodeIdx + absPartIdx);
672 intptr_t picStride = cu.m_encData->m_reconPicYuv->m_stride;
673
674 fillReferenceSamples(adiOrigin, picStride, adiBuf, intraNeighbors);
675
676 // initialization of ADI buffers
677 const int bufOffset = tuSize - 1;
678 refAbove += bufOffset;
679 refLeft += bufOffset;
680
681 // ADI_BUF_STRIDE * (2 * tuSize + 1);
682 memcpy(refAbove, adiBuf, (tuSize2 + 1) * sizeof(pixel));
683 for (int k = 0; k < tuSize2 + 1; k++)
684 refLeft[k] = adiBuf[k * ADI_BUF_STRIDE];
685
686 if (dirMode == ALL_IDX ? (8 | 16 | 32) & tuSize : g_intraFilterFlags[dirMode] & tuSize)
687 {
688 // generate filtered intra prediction samples
689 refAboveFlt += bufOffset;
690 refLeftFlt += bufOffset;
691
692 bool bStrongSmoothing = (tuSize == 32 && cu.m_slice->m_sps->bUseStrongIntraSmoothing);
693
694 if (bStrongSmoothing)
695 {
696 const int trSize = 32;
697 const int trSize2 = 32 * 2;
698 const int threshold = 1 << (X265_DEPTH - 5);
699 int refBL = refLeft[trSize2];
700 int refTL = refAbove[0];
701 int refTR = refAbove[trSize2];
702 bStrongSmoothing = (abs(refBL + refTL - 2 * refLeft[trSize]) < threshold &&
703 abs(refTL + refTR - 2 * refAbove[trSize]) < threshold);
704
705 if (bStrongSmoothing)
706 {
707 // bilinear interpolation
708 const int shift = 5 + 1; // intraNeighbors.log2TrSize + 1;
709 int init = (refTL << shift) + tuSize;
710 int delta;
711
712 refLeftFlt[0] = refAboveFlt[0] = refAbove[0];
713
714 //TODO: Performance Primitive???
715 delta = refBL - refTL;
716 for (int i = 1; i < trSize2; i++)
717 refLeftFlt[i] = (pixel)((init + delta * i) >> shift);
718 refLeftFlt[trSize2] = refLeft[trSize2];
719
720 delta = refTR - refTL;
721 for (int i = 1; i < trSize2; i++)
722 refAboveFlt[i] = (pixel)((init + delta * i) >> shift);
723 refAboveFlt[trSize2] = refAbove[trSize2];
724
725 return;
726 }
727 }
728
729 refLeft[-1] = refAbove[1];
730 for (int i = 0; i < tuSize2; i++)
731 refLeftFlt[i] = (refLeft[i - 1] + 2 * refLeft[i] + refLeft[i + 1] + 2) >> 2;
732 refLeftFlt[tuSize2] = refLeft[tuSize2];
733
734 refAboveFlt[0] = refLeftFlt[0];
735 for (int i = 1; i < tuSize2; i++)
736 refAboveFlt[i] = (refAbove[i - 1] + 2 * refAbove[i] + refAbove[i + 1] + 2) >> 2;
737 refAboveFlt[tuSize2] = refAbove[tuSize2];
738 }
739}
740
741void Predict::initAdiPatternChroma(const CUData& cu, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t partDepth, uint32_t chromaId)
742{
743 IntraNeighbors intraNeighbors;
744 initIntraNeighbors(cu, absPartIdx, partDepth, false, &intraNeighbors);
745 uint32_t tuSize = intraNeighbors.tuSize;
746
747 const pixel* adiOrigin = cu.m_encData->m_reconPicYuv->getChromaAddr(chromaId, cu.m_cuAddr, cuGeom.encodeIdx + absPartIdx);
748 intptr_t picStride = cu.m_encData->m_reconPicYuv->m_strideC;
749 pixel* adiRef = getAdiChromaBuf(chromaId, tuSize);
750
751 fillReferenceSamples(adiOrigin, picStride, adiRef, intraNeighbors);
752}
753
754void Predict::initIntraNeighbors(const CUData& cu, uint32_t absPartIdx, uint32_t partDepth, bool isLuma, IntraNeighbors *intraNeighbors)
755{
756 uint32_t log2TrSize = cu.m_log2CUSize[0] - partDepth;
757 int log2UnitWidth = LOG2_UNIT_SIZE;
758 int log2UnitHeight = LOG2_UNIT_SIZE;
759
760 if (!isLuma)
761 {
762 log2TrSize -= cu.m_hChromaShift;
763 log2UnitWidth -= cu.m_hChromaShift;
764 log2UnitHeight -= cu.m_vChromaShift;
765 }
766
767 int numIntraNeighbor = 0;
768 bool *bNeighborFlags = intraNeighbors->bNeighborFlags;
769
770 uint32_t partIdxLT, partIdxRT, partIdxLB;
771
772 cu.deriveLeftRightTopIdxAdi(partIdxLT, partIdxRT, absPartIdx, partDepth);
773
774 uint32_t tuSize = 1 << log2TrSize;
775 int tuWidthInUnits = tuSize >> log2UnitWidth;
776 int tuHeightInUnits = tuSize >> log2UnitHeight;
777 int aboveUnits = tuWidthInUnits << 1;
778 int leftUnits = tuHeightInUnits << 1;
779 int partIdxStride = cu.m_slice->m_sps->numPartInCUSize;
780 partIdxLB = g_rasterToZscan[g_zscanToRaster[partIdxLT] + ((tuHeightInUnits - 1) * partIdxStride)];
781
782 bNeighborFlags[leftUnits] = isAboveLeftAvailable(cu, partIdxLT);
783 numIntraNeighbor += (int)(bNeighborFlags[leftUnits]);
784 numIntraNeighbor += isAboveAvailable(cu, partIdxLT, partIdxRT, (bNeighborFlags + leftUnits + 1));
785 numIntraNeighbor += isAboveRightAvailable(cu, partIdxLT, partIdxRT, (bNeighborFlags + leftUnits + 1 + tuWidthInUnits));
786 numIntraNeighbor += isLeftAvailable(cu, partIdxLT, partIdxLB, (bNeighborFlags + leftUnits - 1));
787 numIntraNeighbor += isBelowLeftAvailable(cu, partIdxLT, partIdxLB, (bNeighborFlags + leftUnits - 1 - tuHeightInUnits));
788
789 intraNeighbors->numIntraNeighbor = numIntraNeighbor;
790 intraNeighbors->totalUnits = aboveUnits + leftUnits + 1;
791 intraNeighbors->aboveUnits = aboveUnits;
792 intraNeighbors->leftUnits = leftUnits;
793 intraNeighbors->unitWidth = 1 << log2UnitWidth;
794 intraNeighbors->unitHeight = 1 << log2UnitHeight;
795 intraNeighbors->tuSize = tuSize;
796 intraNeighbors->log2TrSize = log2TrSize;
797}
798
799void Predict::fillReferenceSamples(const pixel* adiOrigin, intptr_t picStride, pixel* adiRef, const IntraNeighbors& intraNeighbors)
800{
801 const pixel dcValue = (pixel)(1 << (X265_DEPTH - 1));
802 int numIntraNeighbor = intraNeighbors.numIntraNeighbor;
803 int totalUnits = intraNeighbors.totalUnits;
804 uint32_t tuSize = intraNeighbors.tuSize;
805 uint32_t refSize = tuSize * 2 + 1;
806
807 if (numIntraNeighbor == 0)
808 {
809 // Fill border with DC value
810 for (uint32_t i = 0; i < refSize; i++)
811 adiRef[i] = dcValue;
812
813 for (uint32_t i = 1; i < refSize; i++)
814 adiRef[i * ADI_BUF_STRIDE] = dcValue;
815 }
816 else if (numIntraNeighbor == totalUnits)
817 {
818 // Fill top border with rec. samples
819 const pixel* adiTemp = adiOrigin - picStride - 1;
820 memcpy(adiRef, adiTemp, refSize * sizeof(*adiRef));
821
822 // Fill left border with rec. samples
823 adiTemp = adiOrigin - 1;
824 for (uint32_t i = 1; i < refSize; i++)
825 {
826 adiRef[i * ADI_BUF_STRIDE] = adiTemp[0];
827 adiTemp += picStride;
828 }
829 }
830 else // reference samples are partially available
831 {
832 const bool *bNeighborFlags = intraNeighbors.bNeighborFlags;
833 const bool *pNeighborFlags;
834 int aboveUnits = intraNeighbors.aboveUnits;
835 int leftUnits = intraNeighbors.leftUnits;
836 int unitWidth = intraNeighbors.unitWidth;
837 int unitHeight = intraNeighbors.unitHeight;
838 int totalSamples = (leftUnits * unitHeight) + ((aboveUnits + 1) * unitWidth);
839 pixel adiLineBuffer[5 * MAX_CU_SIZE];
840 pixel *adi;
841
842 // Initialize
843 for (int i = 0; i < totalSamples; i++)
844 adiLineBuffer[i] = dcValue;
845
846 // Fill top-left sample
847 const pixel* adiTemp = adiOrigin - picStride - 1;
848 adi = adiLineBuffer + (leftUnits * unitHeight);
849 pNeighborFlags = bNeighborFlags + leftUnits;
850 if (*pNeighborFlags)
851 {
852 pixel topLeftVal = adiTemp[0];
853 for (int i = 0; i < unitWidth; i++)
854 adi[i] = topLeftVal;
855 }
856
857 // Fill left & below-left samples
858 adiTemp += picStride;
859 adi--;
860 pNeighborFlags--;
861 for (int j = 0; j < leftUnits; j++)
862 {
863 if (*pNeighborFlags)
864 for (int i = 0; i < unitHeight; i++)
865 adi[-i] = adiTemp[i * picStride];
866
867 adiTemp += unitHeight * picStride;
868 adi -= unitHeight;
869 pNeighborFlags--;
870 }
871
872 // Fill above & above-right samples
873 adiTemp = adiOrigin - picStride;
874 adi = adiLineBuffer + (leftUnits * unitHeight) + unitWidth;
875 pNeighborFlags = bNeighborFlags + leftUnits + 1;
876 for (int j = 0; j < aboveUnits; j++)
877 {
878 if (*pNeighborFlags)
879 memcpy(adi, adiTemp, unitWidth * sizeof(*adiTemp));
880 adiTemp += unitWidth;
881 adi += unitWidth;
882 pNeighborFlags++;
883 }
884
885 // Pad reference samples when necessary
886 int curr = 0;
887 int next = 1;
888 adi = adiLineBuffer;
889 int pAdiLineTopRowOffset = leftUnits * (unitHeight - unitWidth);
890 if (!bNeighborFlags[0])
891 {
892 // very bottom unit of bottom-left; at least one unit will be valid.
893 while (next < totalUnits && !bNeighborFlags[next])
894 next++;
895
896 pixel *pAdiLineNext = adiLineBuffer + ((next < leftUnits) ? (next * unitHeight) : (pAdiLineTopRowOffset + (next * unitWidth)));
897 const pixel refSample = *pAdiLineNext;
898 // Pad unavailable samples with new value
899 int nextOrTop = X265_MIN(next, leftUnits);
900 // fill left column
901 while (curr < nextOrTop)
902 {
903 for (int i = 0; i < unitHeight; i++)
904 adi[i] = refSample;
905
906 adi += unitHeight;
907 curr++;
908 }
909
910 // fill top row
911 while (curr < next)
912 {
913 for (int i = 0; i < unitWidth; i++)
914 adi[i] = refSample;
915
916 adi += unitWidth;
917 curr++;
918 }
919 }
920
921 // pad all other reference samples.
922 while (curr < totalUnits)
923 {
924 if (!bNeighborFlags[curr]) // samples not available
925 {
926 int numSamplesInCurrUnit = (curr >= leftUnits) ? unitWidth : unitHeight;
927 const pixel refSample = *(adi - 1);
928 for (int i = 0; i < numSamplesInCurrUnit; i++)
929 adi[i] = refSample;
930
931 adi += numSamplesInCurrUnit;
932 curr++;
933 }
934 else
935 {
936 adi += (curr >= leftUnits) ? unitWidth : unitHeight;
937 curr++;
938 }
939 }
940
941 // Copy processed samples
942 adi = adiLineBuffer + refSize + unitWidth - 2;
943 memcpy(adiRef, adi, refSize * sizeof(*adiRef));
944
945 adi = adiLineBuffer + refSize - 1;
946 for (int i = 1; i < (int)refSize; i++)
947 adiRef[i * ADI_BUF_STRIDE] = adi[-i];
948 }
949}
950
951bool Predict::isAboveLeftAvailable(const CUData& cu, uint32_t partIdxLT)
952{
953 uint32_t partAboveLeft;
954 const CUData* cuAboveLeft = cu.getPUAboveLeft(partAboveLeft, partIdxLT);
955
956 if (!cu.m_slice->m_pps->bConstrainedIntraPred)
957 return cuAboveLeft ? true : false;
958 else
959 return cuAboveLeft && cuAboveLeft->isIntra(partAboveLeft);
960}
961
962int Predict::isAboveAvailable(const CUData& cu, uint32_t partIdxLT, uint32_t partIdxRT, bool *bValidFlags)
963{
964 const uint32_t rasterPartBegin = g_zscanToRaster[partIdxLT];
965 const uint32_t rasterPartEnd = g_zscanToRaster[partIdxRT] + 1;
966 const uint32_t idxStep = 1;
967 bool *validFlagPtr = bValidFlags;
968 int numIntra = 0;
969
970 for (uint32_t rasterPart = rasterPartBegin; rasterPart < rasterPartEnd; rasterPart += idxStep)
971 {
972 uint32_t partAbove;
973 const CUData* cuAbove = cu.getPUAbove(partAbove, g_rasterToZscan[rasterPart]);
974 if (cuAbove && (!cu.m_slice->m_pps->bConstrainedIntraPred || cuAbove->isIntra(partAbove)))
975 {
976 numIntra++;
977 *validFlagPtr = true;
978 }
979 else
980 *validFlagPtr = false;
981
982 validFlagPtr++;
983 }
984
985 return numIntra;
986}
987
988int Predict::isLeftAvailable(const CUData& cu, uint32_t partIdxLT, uint32_t partIdxLB, bool *bValidFlags)
989{
990 const uint32_t rasterPartBegin = g_zscanToRaster[partIdxLT];
991 const uint32_t rasterPartEnd = g_zscanToRaster[partIdxLB] + 1;
992 const uint32_t idxStep = cu.m_slice->m_sps->numPartInCUSize;
993 bool *validFlagPtr = bValidFlags;
994 int numIntra = 0;
995
996 for (uint32_t rasterPart = rasterPartBegin; rasterPart < rasterPartEnd; rasterPart += idxStep)
997 {
998 uint32_t partLeft;
999 const CUData* cuLeft = cu.getPULeft(partLeft, g_rasterToZscan[rasterPart]);
1000 if (cuLeft && (!cu.m_slice->m_pps->bConstrainedIntraPred || cuLeft->isIntra(partLeft)))
1001 {
1002 numIntra++;
1003 *validFlagPtr = true;
1004 }
1005 else
1006 *validFlagPtr = false;
1007
1008 validFlagPtr--; // opposite direction
1009 }
1010
1011 return numIntra;
1012}
1013
1014int Predict::isAboveRightAvailable(const CUData& cu, uint32_t partIdxLT, uint32_t partIdxRT, bool *bValidFlags)
1015{
1016 const uint32_t numUnitsInPU = g_zscanToRaster[partIdxRT] - g_zscanToRaster[partIdxLT] + 1;
1017 bool *validFlagPtr = bValidFlags;
1018 int numIntra = 0;
1019
1020 for (uint32_t offset = 1; offset <= numUnitsInPU; offset++)
1021 {
1022 uint32_t partAboveRight;
1023 const CUData* cuAboveRight = cu.getPUAboveRightAdi(partAboveRight, partIdxRT, offset);
1024 if (cuAboveRight && (!cu.m_slice->m_pps->bConstrainedIntraPred || cuAboveRight->isIntra(partAboveRight)))
1025 {
1026 numIntra++;
1027 *validFlagPtr = true;
1028 }
1029 else
1030 *validFlagPtr = false;
1031
1032 validFlagPtr++;
1033 }
1034
1035 return numIntra;
1036}
1037
1038int Predict::isBelowLeftAvailable(const CUData& cu, uint32_t partIdxLT, uint32_t partIdxLB, bool *bValidFlags)
1039{
1040 const uint32_t numUnitsInPU = (g_zscanToRaster[partIdxLB] - g_zscanToRaster[partIdxLT]) / cu.m_slice->m_sps->numPartInCUSize + 1;
1041 bool *validFlagPtr = bValidFlags;
1042 int numIntra = 0;
1043
1044 for (uint32_t offset = 1; offset <= numUnitsInPU; offset++)
1045 {
1046 uint32_t partBelowLeft;
1047 const CUData* cuBelowLeft = cu.getPUBelowLeftAdi(partBelowLeft, partIdxLB, offset);
1048 if (cuBelowLeft && (!cu.m_slice->m_pps->bConstrainedIntraPred || cuBelowLeft->isIntra(partBelowLeft)))
1049 {
1050 numIntra++;
1051 *validFlagPtr = true;
1052 }
1053 else
1054 *validFlagPtr = false;
1055
1056 validFlagPtr--; // opposite direction
1057 }
1058
1059 return numIntra;
1060}