Fix cut&paste typo in changelog.
[deb_x265.git] / source / common / predict.cpp
... / ...
CommitLineData
1/*****************************************************************************
2* Copyright (C) 2013 x265 project
3*
4* Authors: Deepthi Nandakumar <deepthi@multicorewareinc.com>
5*
6* This program is free software; you can redistribute it and/or modify
7* it under the terms of the GNU General Public License as published by
8* the Free Software Foundation; either version 2 of the License, or
9* (at your option) any later version.
10*
11* This program is distributed in the hope that it will be useful,
12* but WITHOUT ANY WARRANTY; without even the implied warranty of
13* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14* GNU General Public License for more details.
15*
16* You should have received a copy of the GNU General Public License
17* along with this program; if not, write to the Free Software
18* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
19*
20* This program is also available under a commercial proprietary license.
21* For more information, contact us at license @ x265.com.
22*****************************************************************************/
23
24#include "common.h"
25#include "slice.h"
26#include "framedata.h"
27#include "picyuv.h"
28#include "predict.h"
29#include "primitives.h"
30
31using namespace x265;
32
33namespace
34{
35inline pixel weightBidir(int w0, int16_t P0, int w1, int16_t P1, int round, int shift, int offset)
36{
37 return Clip((w0 * (P0 + IF_INTERNAL_OFFS) + w1 * (P1 + IF_INTERNAL_OFFS) + round + (offset << (shift - 1))) >> shift);
38}
39}
40
41Predict::Predict()
42{
43 m_predBuf = NULL;
44 m_refAbove = NULL;
45 m_refAboveFlt = NULL;
46 m_refLeft = NULL;
47 m_refLeftFlt = NULL;
48 m_immedVals = NULL;
49}
50
51Predict::~Predict()
52{
53 X265_FREE(m_predBuf);
54 X265_FREE(m_refAbove);
55 X265_FREE(m_immedVals);
56 m_predShortYuv[0].destroy();
57 m_predShortYuv[1].destroy();
58}
59
60bool Predict::allocBuffers(int csp)
61{
62 m_csp = csp;
63 m_hChromaShift = CHROMA_H_SHIFT(csp);
64 m_vChromaShift = CHROMA_V_SHIFT(csp);
65
66 int predBufHeight = ((MAX_CU_SIZE + 2) << 4);
67 int predBufStride = ((MAX_CU_SIZE + 8) << 4);
68 CHECKED_MALLOC(m_predBuf, pixel, predBufStride * predBufHeight);
69 CHECKED_MALLOC(m_immedVals, int16_t, 64 * (64 + NTAPS_LUMA - 1));
70 CHECKED_MALLOC(m_refAbove, pixel, 12 * MAX_CU_SIZE);
71
72 m_refAboveFlt = m_refAbove + 3 * MAX_CU_SIZE;
73 m_refLeft = m_refAboveFlt + 3 * MAX_CU_SIZE;
74 m_refLeftFlt = m_refLeft + 3 * MAX_CU_SIZE;
75
76 return m_predShortYuv[0].create(MAX_CU_SIZE, csp) && m_predShortYuv[1].create(MAX_CU_SIZE, csp);
77
78fail:
79 return false;
80}
81
82void Predict::predIntraLumaAng(uint32_t dirMode, pixel* dst, intptr_t stride, uint32_t log2TrSize)
83{
84 int tuSize = 1 << log2TrSize;
85
86 pixel* refLft;
87 pixel* refAbv;
88
89 if (!(g_intraFilterFlags[dirMode] & tuSize))
90 {
91 refLft = m_refLeft + tuSize - 1;
92 refAbv = m_refAbove + tuSize - 1;
93 }
94 else
95 {
96 refLft = m_refLeftFlt + tuSize - 1;
97 refAbv = m_refAboveFlt + tuSize - 1;
98 }
99
100 bool bFilter = log2TrSize <= 4;
101 int sizeIdx = log2TrSize - 2;
102 X265_CHECK(sizeIdx >= 0 && sizeIdx < 4, "intra block size is out of range\n");
103 primitives.intra_pred[dirMode][sizeIdx](dst, stride, refLft, refAbv, dirMode, bFilter);
104}
105
106void Predict::predIntraChromaAng(pixel* src, uint32_t dirMode, pixel* dst, intptr_t stride, uint32_t log2TrSizeC, int chFmt)
107{
108 int tuSize = 1 << log2TrSizeC;
109 int tuSize2 = tuSize << 1;
110
111 // Create the prediction
112 const int bufOffset = tuSize - 1;
113 pixel buf0[3 * MAX_CU_SIZE];
114 pixel buf1[3 * MAX_CU_SIZE];
115 pixel* above;
116 pixel* left = buf0 + bufOffset;
117
118 int limit = (dirMode <= 25 && dirMode >= 11) ? (tuSize + 1 + 1) : (tuSize2 + 1);
119 for (int k = 0; k < limit; k++)
120 left[k] = src[k * ADI_BUF_STRIDE];
121
122 if (chFmt == X265_CSP_I444 && (g_intraFilterFlags[dirMode] & tuSize))
123 {
124 // generate filtered intra prediction samples
125 buf0[bufOffset - 1] = src[1];
126 left = buf1 + bufOffset;
127 for (int i = 0; i < tuSize2; i++)
128 left[i] = (buf0[bufOffset + i - 1] + 2 * buf0[bufOffset + i] + buf0[bufOffset + i + 1] + 2) >> 2;
129 left[tuSize2] = buf0[bufOffset + tuSize2];
130
131 above = buf0 + bufOffset;
132 above[0] = left[0];
133 for (int i = 1; i < tuSize2; i++)
134 above[i] = (src[i - 1] + 2 * src[i] + src[i + 1] + 2) >> 2;
135 above[tuSize2] = src[tuSize2];
136 }
137 else
138 {
139 above = buf1 + bufOffset;
140 memcpy(above, src, (tuSize2 + 1) * sizeof(pixel));
141 }
142
143 int sizeIdx = log2TrSizeC - 2;
144 X265_CHECK(sizeIdx >= 0 && sizeIdx < 4, "intra block size is out of range\n");
145 primitives.intra_pred[dirMode][sizeIdx](dst, stride, left, above, dirMode, 0);
146}
147
148void Predict::initMotionCompensation(const CUData& cu, const CUGeom& cuGeom, int partIdx)
149{
150 m_predSlice = cu.m_slice;
151 cu.getPartIndexAndSize(partIdx, m_puAbsPartIdx, m_puWidth, m_puHeight);
152 m_ctuAddr = cu.m_cuAddr;
153 m_cuAbsPartIdx = cuGeom.encodeIdx;
154}
155
156void Predict::prepMotionCompensation(const CUData& cu, const CUGeom& cuGeom, int partIdx)
157{
158 initMotionCompensation(cu, cuGeom, partIdx);
159
160 m_refIdx0 = cu.m_refIdx[0][m_puAbsPartIdx];
161 m_clippedMv[0] = cu.m_mv[0][m_puAbsPartIdx];
162 m_refIdx1 = cu.m_refIdx[1][m_puAbsPartIdx];
163 m_clippedMv[1] = cu.m_mv[1][m_puAbsPartIdx];
164 cu.clipMv(m_clippedMv[0]);
165 cu.clipMv(m_clippedMv[1]);
166}
167
168void Predict::motionCompensation(Yuv& predYuv, bool bLuma, bool bChroma)
169{
170 if (m_predSlice->isInterP())
171 {
172 /* P Slice */
173 WeightValues wv0[3];
174 X265_CHECK(m_refIdx0 >= 0, "invalid P refidx\n");
175 X265_CHECK(m_refIdx0 < m_predSlice->m_numRefIdx[0], "P refidx out of range\n");
176 const WeightParam *wp0 = m_predSlice->m_weightPredTable[0][m_refIdx0];
177
178 if (m_predSlice->m_pps->bUseWeightPred && wp0->bPresentFlag)
179 {
180 for (int plane = 0; plane < 3; plane++)
181 {
182 wv0[plane].w = wp0[plane].inputWeight;
183 wv0[plane].offset = wp0[plane].inputOffset * (1 << (X265_DEPTH - 8));
184 wv0[plane].shift = wp0[plane].log2WeightDenom;
185 wv0[plane].round = wp0[plane].log2WeightDenom >= 1 ? 1 << (wp0[plane].log2WeightDenom - 1) : 0;
186 }
187
188 ShortYuv& shortYuv = m_predShortYuv[0];
189
190 if (bLuma)
191 predInterLumaShort(shortYuv, *m_predSlice->m_refPicList[0][m_refIdx0]->m_reconPic, m_clippedMv[0]);
192 if (bChroma)
193 predInterChromaShort(shortYuv, *m_predSlice->m_refPicList[0][m_refIdx0]->m_reconPic, m_clippedMv[0]);
194
195 addWeightUni(predYuv, shortYuv, wv0, bLuma, bChroma);
196 }
197 else
198 {
199 if (bLuma)
200 predInterLumaPixel(predYuv, *m_predSlice->m_refPicList[0][m_refIdx0]->m_reconPic, m_clippedMv[0]);
201 if (bChroma)
202 predInterChromaPixel(predYuv, *m_predSlice->m_refPicList[0][m_refIdx0]->m_reconPic, m_clippedMv[0]);
203 }
204 }
205 else
206 {
207 /* B Slice */
208
209 WeightValues wv0[3], wv1[3];
210 const WeightParam *pwp0, *pwp1;
211
212 if (m_predSlice->m_pps->bUseWeightedBiPred)
213 {
214 pwp0 = m_refIdx0 >= 0 ? m_predSlice->m_weightPredTable[0][m_refIdx0] : NULL;
215 pwp1 = m_refIdx1 >= 0 ? m_predSlice->m_weightPredTable[1][m_refIdx1] : NULL;
216
217 if (pwp0 && pwp1 && (pwp0->bPresentFlag || pwp1->bPresentFlag))
218 {
219 /* biprediction weighting */
220 for (int plane = 0; plane < 3; plane++)
221 {
222 wv0[plane].w = pwp0[plane].inputWeight;
223 wv0[plane].o = pwp0[plane].inputOffset * (1 << (X265_DEPTH - 8));
224 wv0[plane].shift = pwp0[plane].log2WeightDenom;
225 wv0[plane].round = 1 << pwp0[plane].log2WeightDenom;
226
227 wv1[plane].w = pwp1[plane].inputWeight;
228 wv1[plane].o = pwp1[plane].inputOffset * (1 << (X265_DEPTH - 8));
229 wv1[plane].shift = wv0[plane].shift;
230 wv1[plane].round = wv0[plane].round;
231 }
232 }
233 else
234 {
235 /* uniprediction weighting, always outputs to wv0 */
236 const WeightParam* pwp = (m_refIdx0 >= 0) ? pwp0 : pwp1;
237 for (int plane = 0; plane < 3; plane++)
238 {
239 wv0[plane].w = pwp[plane].inputWeight;
240 wv0[plane].offset = pwp[plane].inputOffset * (1 << (X265_DEPTH - 8));
241 wv0[plane].shift = pwp[plane].log2WeightDenom;
242 wv0[plane].round = pwp[plane].log2WeightDenom >= 1 ? 1 << (pwp[plane].log2WeightDenom - 1) : 0;
243 }
244 }
245 }
246 else
247 pwp0 = pwp1 = NULL;
248
249 if (m_refIdx0 >= 0 && m_refIdx1 >= 0)
250 {
251 /* Biprediction */
252 X265_CHECK(m_refIdx0 < m_predSlice->m_numRefIdx[0], "bidir refidx0 out of range\n");
253 X265_CHECK(m_refIdx1 < m_predSlice->m_numRefIdx[1], "bidir refidx1 out of range\n");
254
255 if (bLuma)
256 {
257 predInterLumaShort(m_predShortYuv[0], *m_predSlice->m_refPicList[0][m_refIdx0]->m_reconPic, m_clippedMv[0]);
258 predInterLumaShort(m_predShortYuv[1], *m_predSlice->m_refPicList[1][m_refIdx1]->m_reconPic, m_clippedMv[1]);
259 }
260 if (bChroma)
261 {
262 predInterChromaShort(m_predShortYuv[0], *m_predSlice->m_refPicList[0][m_refIdx0]->m_reconPic, m_clippedMv[0]);
263 predInterChromaShort(m_predShortYuv[1], *m_predSlice->m_refPicList[1][m_refIdx1]->m_reconPic, m_clippedMv[1]);
264 }
265
266 if (pwp0 && pwp1 && (pwp0->bPresentFlag || pwp1->bPresentFlag))
267 addWeightBi(predYuv, m_predShortYuv[0], m_predShortYuv[1], wv0, wv1, bLuma, bChroma);
268 else
269 predYuv.addAvg(m_predShortYuv[0], m_predShortYuv[1], m_puAbsPartIdx, m_puWidth, m_puHeight, bLuma, bChroma);
270 }
271 else if (m_refIdx0 >= 0)
272 {
273 /* uniprediction to L0 */
274 X265_CHECK(m_refIdx0 < m_predSlice->m_numRefIdx[0], "unidir refidx0 out of range\n");
275
276 if (pwp0 && pwp0->bPresentFlag)
277 {
278 ShortYuv& shortYuv = m_predShortYuv[0];
279
280 if (bLuma)
281 predInterLumaShort(shortYuv, *m_predSlice->m_refPicList[0][m_refIdx0]->m_reconPic, m_clippedMv[0]);
282 if (bChroma)
283 predInterChromaShort(shortYuv, *m_predSlice->m_refPicList[0][m_refIdx0]->m_reconPic, m_clippedMv[0]);
284
285 addWeightUni(predYuv, shortYuv, wv0, bLuma, bChroma);
286 }
287 else
288 {
289 if (bLuma)
290 predInterLumaPixel(predYuv, *m_predSlice->m_refPicList[0][m_refIdx0]->m_reconPic, m_clippedMv[0]);
291 if (bChroma)
292 predInterChromaPixel(predYuv, *m_predSlice->m_refPicList[0][m_refIdx0]->m_reconPic, m_clippedMv[0]);
293 }
294 }
295 else
296 {
297 /* uniprediction to L1 */
298 X265_CHECK(m_refIdx1 >= 0, "refidx1 was not positive\n");
299 X265_CHECK(m_refIdx1 < m_predSlice->m_numRefIdx[1], "unidir refidx1 out of range\n");
300
301 if (pwp1 && pwp1->bPresentFlag)
302 {
303 ShortYuv& shortYuv = m_predShortYuv[0];
304
305 if (bLuma)
306 predInterLumaShort(shortYuv, *m_predSlice->m_refPicList[1][m_refIdx1]->m_reconPic, m_clippedMv[1]);
307 if (bChroma)
308 predInterChromaShort(shortYuv, *m_predSlice->m_refPicList[1][m_refIdx1]->m_reconPic, m_clippedMv[1]);
309
310 addWeightUni(predYuv, shortYuv, wv0, bLuma, bChroma);
311 }
312 else
313 {
314 if (bLuma)
315 predInterLumaPixel(predYuv, *m_predSlice->m_refPicList[1][m_refIdx1]->m_reconPic, m_clippedMv[1]);
316 if (bChroma)
317 predInterChromaPixel(predYuv, *m_predSlice->m_refPicList[1][m_refIdx1]->m_reconPic, m_clippedMv[1]);
318 }
319 }
320 }
321}
322
323void Predict::predInterLumaPixel(Yuv& dstYuv, const PicYuv& refPic, const MV& mv) const
324{
325 pixel* dst = dstYuv.getLumaAddr(m_puAbsPartIdx);
326 intptr_t dstStride = dstYuv.m_size;
327
328 intptr_t srcStride = refPic.m_stride;
329 intptr_t srcOffset = (mv.x >> 2) + (mv.y >> 2) * srcStride;
330 int partEnum = partitionFromSizes(m_puWidth, m_puHeight);
331 const pixel* src = refPic.getLumaAddr(m_ctuAddr, m_cuAbsPartIdx + m_puAbsPartIdx) + srcOffset;
332
333 int xFrac = mv.x & 0x3;
334 int yFrac = mv.y & 0x3;
335
336 if (!(yFrac | xFrac))
337 primitives.luma_copy_pp[partEnum](dst, dstStride, src, srcStride);
338 else if (!yFrac)
339 primitives.luma_hpp[partEnum](src, srcStride, dst, dstStride, xFrac);
340 else if (!xFrac)
341 primitives.luma_vpp[partEnum](src, srcStride, dst, dstStride, yFrac);
342 else
343 {
344 int tmpStride = m_puWidth;
345 int filterSize = NTAPS_LUMA;
346 int halfFilterSize = (filterSize >> 1);
347 primitives.luma_hps[partEnum](src, srcStride, m_immedVals, tmpStride, xFrac, 1);
348 primitives.luma_vsp[partEnum](m_immedVals + (halfFilterSize - 1) * tmpStride, tmpStride, dst, dstStride, yFrac);
349 }
350}
351
352void Predict::predInterLumaShort(ShortYuv& dstSYuv, const PicYuv& refPic, const MV& mv) const
353{
354 int16_t* dst = dstSYuv.getLumaAddr(m_puAbsPartIdx);
355 int dstStride = dstSYuv.m_size;
356
357 intptr_t srcStride = refPic.m_stride;
358 intptr_t srcOffset = (mv.x >> 2) + (mv.y >> 2) * srcStride;
359 const pixel* src = refPic.getLumaAddr(m_ctuAddr, m_cuAbsPartIdx + m_puAbsPartIdx) + srcOffset;
360
361 int xFrac = mv.x & 0x3;
362 int yFrac = mv.y & 0x3;
363
364 int partEnum = partitionFromSizes(m_puWidth, m_puHeight);
365
366 X265_CHECK((m_puWidth % 4) + (m_puHeight % 4) == 0, "width or height not divisible by 4\n");
367 X265_CHECK(dstStride == MAX_CU_SIZE, "stride expected to be max cu size\n");
368
369 if (!(yFrac | xFrac))
370 primitives.luma_p2s(src, srcStride, dst, m_puWidth, m_puHeight);
371 else if (!yFrac)
372 primitives.luma_hps[partEnum](src, srcStride, dst, dstStride, xFrac, 0);
373 else if (!xFrac)
374 primitives.luma_vps[partEnum](src, srcStride, dst, dstStride, yFrac);
375 else
376 {
377 int tmpStride = m_puWidth;
378 int filterSize = NTAPS_LUMA;
379 int halfFilterSize = (filterSize >> 1);
380 primitives.luma_hps[partEnum](src, srcStride, m_immedVals, tmpStride, xFrac, 1);
381 primitives.luma_vss[partEnum](m_immedVals + (halfFilterSize - 1) * tmpStride, tmpStride, dst, dstStride, yFrac);
382 }
383}
384
385void Predict::predInterChromaPixel(Yuv& dstYuv, const PicYuv& refPic, const MV& mv) const
386{
387 intptr_t dstStride = dstYuv.m_csize;
388 intptr_t refStride = refPic.m_strideC;
389
390 int shiftHor = (2 + m_hChromaShift);
391 int shiftVer = (2 + m_vChromaShift);
392
393 intptr_t refOffset = (mv.x >> shiftHor) + (mv.y >> shiftVer) * refStride;
394
395 const pixel* refCb = refPic.getCbAddr(m_ctuAddr, m_cuAbsPartIdx + m_puAbsPartIdx) + refOffset;
396 const pixel* refCr = refPic.getCrAddr(m_ctuAddr, m_cuAbsPartIdx + m_puAbsPartIdx) + refOffset;
397
398 pixel* dstCb = dstYuv.getCbAddr(m_puAbsPartIdx);
399 pixel* dstCr = dstYuv.getCrAddr(m_puAbsPartIdx);
400
401 int xFrac = mv.x & ((1 << shiftHor) - 1);
402 int yFrac = mv.y & ((1 << shiftVer) - 1);
403
404 int partEnum = partitionFromSizes(m_puWidth, m_puHeight);
405
406 if (!(yFrac | xFrac))
407 {
408 primitives.chroma[m_csp].copy_pp[partEnum](dstCb, dstStride, refCb, refStride);
409 primitives.chroma[m_csp].copy_pp[partEnum](dstCr, dstStride, refCr, refStride);
410 }
411 else if (!yFrac)
412 {
413 primitives.chroma[m_csp].filter_hpp[partEnum](refCb, refStride, dstCb, dstStride, xFrac << (1 - m_hChromaShift));
414 primitives.chroma[m_csp].filter_hpp[partEnum](refCr, refStride, dstCr, dstStride, xFrac << (1 - m_hChromaShift));
415 }
416 else if (!xFrac)
417 {
418 primitives.chroma[m_csp].filter_vpp[partEnum](refCb, refStride, dstCb, dstStride, yFrac << (1 - m_vChromaShift));
419 primitives.chroma[m_csp].filter_vpp[partEnum](refCr, refStride, dstCr, dstStride, yFrac << (1 - m_vChromaShift));
420 }
421 else
422 {
423 int extStride = m_puWidth >> m_hChromaShift;
424 int filterSize = NTAPS_CHROMA;
425 int halfFilterSize = (filterSize >> 1);
426
427 primitives.chroma[m_csp].filter_hps[partEnum](refCb, refStride, m_immedVals, extStride, xFrac << (1 - m_hChromaShift), 1);
428 primitives.chroma[m_csp].filter_vsp[partEnum](m_immedVals + (halfFilterSize - 1) * extStride, extStride, dstCb, dstStride, yFrac << (1 - m_vChromaShift));
429
430 primitives.chroma[m_csp].filter_hps[partEnum](refCr, refStride, m_immedVals, extStride, xFrac << (1 - m_hChromaShift), 1);
431 primitives.chroma[m_csp].filter_vsp[partEnum](m_immedVals + (halfFilterSize - 1) * extStride, extStride, dstCr, dstStride, yFrac << (1 - m_vChromaShift));
432 }
433}
434
435void Predict::predInterChromaShort(ShortYuv& dstSYuv, const PicYuv& refPic, const MV& mv) const
436{
437 intptr_t refStride = refPic.m_strideC;
438 intptr_t dstStride = dstSYuv.m_csize;
439
440 int shiftHor = (2 + m_hChromaShift);
441 int shiftVer = (2 + m_vChromaShift);
442
443 intptr_t refOffset = (mv.x >> shiftHor) + (mv.y >> shiftVer) * refStride;
444
445 const pixel* refCb = refPic.getCbAddr(m_ctuAddr, m_cuAbsPartIdx + m_puAbsPartIdx) + refOffset;
446 const pixel* refCr = refPic.getCrAddr(m_ctuAddr, m_cuAbsPartIdx + m_puAbsPartIdx) + refOffset;
447
448 int16_t* dstCb = dstSYuv.getCbAddr(m_puAbsPartIdx);
449 int16_t* dstCr = dstSYuv.getCrAddr(m_puAbsPartIdx);
450
451 int xFrac = mv.x & ((1 << shiftHor) - 1);
452 int yFrac = mv.y & ((1 << shiftVer) - 1);
453
454 int partEnum = partitionFromSizes(m_puWidth, m_puHeight);
455
456 uint32_t cxWidth = m_puWidth >> m_hChromaShift;
457 uint32_t cxHeight = m_puHeight >> m_vChromaShift;
458
459 X265_CHECK(((cxWidth | cxHeight) % 2) == 0, "chroma block size expected to be multiple of 2\n");
460
461 if (!(yFrac | xFrac))
462 {
463 primitives.chroma[m_csp].p2s(refCb, refStride, dstCb, cxWidth, cxHeight);
464 primitives.chroma[m_csp].p2s(refCr, refStride, dstCr, cxWidth, cxHeight);
465 }
466 else if (!yFrac)
467 {
468 primitives.chroma[m_csp].filter_hps[partEnum](refCb, refStride, dstCb, dstStride, xFrac << (1 - m_hChromaShift), 0);
469 primitives.chroma[m_csp].filter_hps[partEnum](refCr, refStride, dstCr, dstStride, xFrac << (1 - m_hChromaShift), 0);
470 }
471 else if (!xFrac)
472 {
473 primitives.chroma[m_csp].filter_vps[partEnum](refCb, refStride, dstCb, dstStride, yFrac << (1 - m_vChromaShift));
474 primitives.chroma[m_csp].filter_vps[partEnum](refCr, refStride, dstCr, dstStride, yFrac << (1 - m_vChromaShift));
475 }
476 else
477 {
478 int extStride = cxWidth;
479 int filterSize = NTAPS_CHROMA;
480 int halfFilterSize = (filterSize >> 1);
481 primitives.chroma[m_csp].filter_hps[partEnum](refCb, refStride, m_immedVals, extStride, xFrac << (1 - m_hChromaShift), 1);
482 primitives.chroma[m_csp].filter_vss[partEnum](m_immedVals + (halfFilterSize - 1) * extStride, extStride, dstCb, dstStride, yFrac << (1 - m_vChromaShift));
483 primitives.chroma[m_csp].filter_hps[partEnum](refCr, refStride, m_immedVals, extStride, xFrac << (1 - m_hChromaShift), 1);
484 primitives.chroma[m_csp].filter_vss[partEnum](m_immedVals + (halfFilterSize - 1) * extStride, extStride, dstCr, dstStride, yFrac << (1 - m_vChromaShift));
485 }
486}
487
488/* weighted averaging for bi-pred */
489void Predict::addWeightBi(Yuv& predYuv, const ShortYuv& srcYuv0, const ShortYuv& srcYuv1, const WeightValues wp0[3], const WeightValues wp1[3], bool bLuma, bool bChroma) const
490{
491 int x, y;
492
493 int w0, w1, offset, shiftNum, shift, round;
494 uint32_t src0Stride, src1Stride, dststride;
495
496 if (bLuma)
497 {
498 pixel* dstY = predYuv.getLumaAddr(m_puAbsPartIdx);
499 const int16_t* srcY0 = srcYuv0.getLumaAddr(m_puAbsPartIdx);
500 const int16_t* srcY1 = srcYuv1.getLumaAddr(m_puAbsPartIdx);
501
502 // Luma
503 w0 = wp0[0].w;
504 offset = wp0[0].o + wp1[0].o;
505 shiftNum = IF_INTERNAL_PREC - X265_DEPTH;
506 shift = wp0[0].shift + shiftNum + 1;
507 round = shift ? (1 << (shift - 1)) : 0;
508 w1 = wp1[0].w;
509
510 src0Stride = srcYuv0.m_size;
511 src1Stride = srcYuv1.m_size;
512 dststride = predYuv.m_size;
513
514 // TODO: can we use weight_sp here?
515 for (y = m_puHeight - 1; y >= 0; y--)
516 {
517 for (x = m_puWidth - 1; x >= 0; )
518 {
519 // note: luma min width is 4
520 dstY[x] = weightBidir(w0, srcY0[x], w1, srcY1[x], round, shift, offset);
521 x--;
522 dstY[x] = weightBidir(w0, srcY0[x], w1, srcY1[x], round, shift, offset);
523 x--;
524 dstY[x] = weightBidir(w0, srcY0[x], w1, srcY1[x], round, shift, offset);
525 x--;
526 dstY[x] = weightBidir(w0, srcY0[x], w1, srcY1[x], round, shift, offset);
527 x--;
528 }
529
530 srcY0 += src0Stride;
531 srcY1 += src1Stride;
532 dstY += dststride;
533 }
534 }
535
536 if (bChroma)
537 {
538 pixel* dstU = predYuv.getCbAddr(m_puAbsPartIdx);
539 pixel* dstV = predYuv.getCrAddr(m_puAbsPartIdx);
540 const int16_t* srcU0 = srcYuv0.getCbAddr(m_puAbsPartIdx);
541 const int16_t* srcV0 = srcYuv0.getCrAddr(m_puAbsPartIdx);
542 const int16_t* srcU1 = srcYuv1.getCbAddr(m_puAbsPartIdx);
543 const int16_t* srcV1 = srcYuv1.getCrAddr(m_puAbsPartIdx);
544
545 // Chroma U
546 w0 = wp0[1].w;
547 offset = wp0[1].o + wp1[1].o;
548 shiftNum = IF_INTERNAL_PREC - X265_DEPTH;
549 shift = wp0[1].shift + shiftNum + 1;
550 round = shift ? (1 << (shift - 1)) : 0;
551 w1 = wp1[1].w;
552
553 src0Stride = srcYuv0.m_csize;
554 src1Stride = srcYuv1.m_csize;
555 dststride = predYuv.m_csize;
556
557 uint32_t cwidth = m_puWidth >> srcYuv0.m_hChromaShift;
558 uint32_t cheight = m_puHeight >> srcYuv0.m_vChromaShift;
559
560 // TODO: can we use weight_sp here?
561 for (y = cheight - 1; y >= 0; y--)
562 {
563 for (x = cwidth - 1; x >= 0;)
564 {
565 // note: chroma min width is 2
566 dstU[x] = weightBidir(w0, srcU0[x], w1, srcU1[x], round, shift, offset);
567 x--;
568 dstU[x] = weightBidir(w0, srcU0[x], w1, srcU1[x], round, shift, offset);
569 x--;
570 }
571
572 srcU0 += src0Stride;
573 srcU1 += src1Stride;
574 dstU += dststride;
575 }
576
577 // Chroma V
578 w0 = wp0[2].w;
579 offset = wp0[2].o + wp1[2].o;
580 shift = wp0[2].shift + shiftNum + 1;
581 round = shift ? (1 << (shift - 1)) : 0;
582 w1 = wp1[2].w;
583
584 for (y = cheight - 1; y >= 0; y--)
585 {
586 for (x = cwidth - 1; x >= 0;)
587 {
588 // note: chroma min width is 2
589 dstV[x] = weightBidir(w0, srcV0[x], w1, srcV1[x], round, shift, offset);
590 x--;
591 dstV[x] = weightBidir(w0, srcV0[x], w1, srcV1[x], round, shift, offset);
592 x--;
593 }
594
595 srcV0 += src0Stride;
596 srcV1 += src1Stride;
597 dstV += dststride;
598 }
599 }
600}
601
602/* weighted averaging for uni-pred */
603void Predict::addWeightUni(Yuv& predYuv, const ShortYuv& srcYuv, const WeightValues wp[3], bool bLuma, bool bChroma) const
604{
605 int w0, offset, shiftNum, shift, round;
606 uint32_t srcStride, dstStride;
607
608 if (bLuma)
609 {
610 pixel* dstY = predYuv.getLumaAddr(m_puAbsPartIdx);
611 const int16_t* srcY0 = srcYuv.getLumaAddr(m_puAbsPartIdx);
612
613 // Luma
614 w0 = wp[0].w;
615 offset = wp[0].offset;
616 shiftNum = IF_INTERNAL_PREC - X265_DEPTH;
617 shift = wp[0].shift + shiftNum;
618 round = shift ? (1 << (shift - 1)) : 0;
619 srcStride = srcYuv.m_size;
620 dstStride = predYuv.m_size;
621
622 primitives.weight_sp(srcY0, dstY, srcStride, dstStride, m_puWidth, m_puHeight, w0, round, shift, offset);
623 }
624
625 if (bChroma)
626 {
627 pixel* dstU = predYuv.getCbAddr(m_puAbsPartIdx);
628 pixel* dstV = predYuv.getCrAddr(m_puAbsPartIdx);
629 const int16_t* srcU0 = srcYuv.getCbAddr(m_puAbsPartIdx);
630 const int16_t* srcV0 = srcYuv.getCrAddr(m_puAbsPartIdx);
631
632 // Chroma U
633 w0 = wp[1].w;
634 offset = wp[1].offset;
635 shiftNum = IF_INTERNAL_PREC - X265_DEPTH;
636 shift = wp[1].shift + shiftNum;
637 round = shift ? (1 << (shift - 1)) : 0;
638
639 srcStride = srcYuv.m_csize;
640 dstStride = predYuv.m_csize;
641
642 uint32_t cwidth = m_puWidth >> srcYuv.m_hChromaShift;
643 uint32_t cheight = m_puHeight >> srcYuv.m_vChromaShift;
644
645 primitives.weight_sp(srcU0, dstU, srcStride, dstStride, cwidth, cheight, w0, round, shift, offset);
646
647 // Chroma V
648 w0 = wp[2].w;
649 offset = wp[2].offset;
650 shift = wp[2].shift + shiftNum;
651 round = shift ? (1 << (shift - 1)) : 0;
652
653 primitives.weight_sp(srcV0, dstV, srcStride, dstStride, cwidth, cheight, w0, round, shift, offset);
654 }
655}
656
657void Predict::initAdiPattern(const CUData& cu, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t partDepth, int dirMode)
658{
659 IntraNeighbors intraNeighbors;
660 initIntraNeighbors(cu, absPartIdx, partDepth, true, &intraNeighbors);
661
662 pixel* adiBuf = m_predBuf;
663 pixel* refAbove = m_refAbove;
664 pixel* refLeft = m_refLeft;
665 pixel* refAboveFlt = m_refAboveFlt;
666 pixel* refLeftFlt = m_refLeftFlt;
667
668 int tuSize = intraNeighbors.tuSize;
669 int tuSize2 = tuSize << 1;
670
671 pixel* adiOrigin = cu.m_encData->m_reconPic->getLumaAddr(cu.m_cuAddr, cuGeom.encodeIdx + absPartIdx);
672 intptr_t picStride = cu.m_encData->m_reconPic->m_stride;
673
674 fillReferenceSamples(adiOrigin, picStride, adiBuf, intraNeighbors);
675
676 // initialization of ADI buffers
677 const int bufOffset = tuSize - 1;
678 refAbove += bufOffset;
679 refLeft += bufOffset;
680
681 // ADI_BUF_STRIDE * (2 * tuSize + 1);
682 memcpy(refAbove, adiBuf, (tuSize2 + 1) * sizeof(pixel));
683 for (int k = 0; k < tuSize2 + 1; k++)
684 refLeft[k] = adiBuf[k * ADI_BUF_STRIDE];
685
686 if (dirMode == ALL_IDX ? (8 | 16 | 32) & tuSize : g_intraFilterFlags[dirMode] & tuSize)
687 {
688 // generate filtered intra prediction samples
689 refAboveFlt += bufOffset;
690 refLeftFlt += bufOffset;
691
692 bool bStrongSmoothing = (tuSize == 32 && cu.m_slice->m_sps->bUseStrongIntraSmoothing);
693
694 if (bStrongSmoothing)
695 {
696 const int trSize = 32;
697 const int trSize2 = 32 * 2;
698 const int threshold = 1 << (X265_DEPTH - 5);
699 int refBL = refLeft[trSize2];
700 int refTL = refAbove[0];
701 int refTR = refAbove[trSize2];
702 bStrongSmoothing = (abs(refBL + refTL - 2 * refLeft[trSize]) < threshold &&
703 abs(refTL + refTR - 2 * refAbove[trSize]) < threshold);
704
705 if (bStrongSmoothing)
706 {
707 // bilinear interpolation
708 const int shift = 5 + 1; // intraNeighbors.log2TrSize + 1;
709 int init = (refTL << shift) + tuSize;
710 int delta;
711
712 refLeftFlt[0] = refAboveFlt[0] = refAbove[0];
713
714 //TODO: Performance Primitive???
715 delta = refBL - refTL;
716 for (int i = 1; i < trSize2; i++)
717 refLeftFlt[i] = (pixel)((init + delta * i) >> shift);
718 refLeftFlt[trSize2] = refLeft[trSize2];
719
720 delta = refTR - refTL;
721 for (int i = 1; i < trSize2; i++)
722 refAboveFlt[i] = (pixel)((init + delta * i) >> shift);
723 refAboveFlt[trSize2] = refAbove[trSize2];
724
725 return;
726 }
727 }
728
729 refLeft[-1] = refAbove[1];
730 for (int i = 0; i < tuSize2; i++)
731 refLeftFlt[i] = (refLeft[i - 1] + 2 * refLeft[i] + refLeft[i + 1] + 2) >> 2;
732 refLeftFlt[tuSize2] = refLeft[tuSize2];
733
734 refAboveFlt[0] = refLeftFlt[0];
735 for (int i = 1; i < tuSize2; i++)
736 refAboveFlt[i] = (refAbove[i - 1] + 2 * refAbove[i] + refAbove[i + 1] + 2) >> 2;
737 refAboveFlt[tuSize2] = refAbove[tuSize2];
738 }
739}
740
741void Predict::initAdiPatternChroma(const CUData& cu, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t partDepth, uint32_t chromaId)
742{
743 IntraNeighbors intraNeighbors;
744 initIntraNeighbors(cu, absPartIdx, partDepth, false, &intraNeighbors);
745 uint32_t tuSize = intraNeighbors.tuSize;
746
747 const pixel* adiOrigin = cu.m_encData->m_reconPic->getChromaAddr(chromaId, cu.m_cuAddr, cuGeom.encodeIdx + absPartIdx);
748 intptr_t picStride = cu.m_encData->m_reconPic->m_strideC;
749 pixel* adiRef = getAdiChromaBuf(chromaId, tuSize);
750
751 fillReferenceSamples(adiOrigin, picStride, adiRef, intraNeighbors);
752}
753
754void Predict::initIntraNeighbors(const CUData& cu, uint32_t absPartIdx, uint32_t partDepth, bool isLuma, IntraNeighbors *intraNeighbors)
755{
756 uint32_t log2TrSize = cu.m_log2CUSize[0] - partDepth;
757 int log2UnitWidth = LOG2_UNIT_SIZE;
758 int log2UnitHeight = LOG2_UNIT_SIZE;
759
760 if (!isLuma)
761 {
762 log2TrSize -= cu.m_hChromaShift;
763 log2UnitWidth -= cu.m_hChromaShift;
764 log2UnitHeight -= cu.m_vChromaShift;
765 }
766
767 int numIntraNeighbor = 0;
768 bool* bNeighborFlags = intraNeighbors->bNeighborFlags;
769
770 uint32_t partIdxLT, partIdxRT, partIdxLB;
771
772 cu.deriveLeftRightTopIdxAdi(partIdxLT, partIdxRT, absPartIdx, partDepth);
773
774 uint32_t tuSize = 1 << log2TrSize;
775 int tuWidthInUnits = tuSize >> log2UnitWidth;
776 int tuHeightInUnits = tuSize >> log2UnitHeight;
777 int aboveUnits = tuWidthInUnits << 1;
778 int leftUnits = tuHeightInUnits << 1;
779 int partIdxStride = cu.m_slice->m_sps->numPartInCUSize;
780 partIdxLB = g_rasterToZscan[g_zscanToRaster[partIdxLT] + ((tuHeightInUnits - 1) * partIdxStride)];
781
782 bNeighborFlags[leftUnits] = isAboveLeftAvailable(cu, partIdxLT);
783 numIntraNeighbor += (int)(bNeighborFlags[leftUnits]);
784 numIntraNeighbor += isAboveAvailable(cu, partIdxLT, partIdxRT, (bNeighborFlags + leftUnits + 1));
785 numIntraNeighbor += isAboveRightAvailable(cu, partIdxLT, partIdxRT, (bNeighborFlags + leftUnits + 1 + tuWidthInUnits));
786 numIntraNeighbor += isLeftAvailable(cu, partIdxLT, partIdxLB, (bNeighborFlags + leftUnits - 1));
787 numIntraNeighbor += isBelowLeftAvailable(cu, partIdxLT, partIdxLB, (bNeighborFlags + leftUnits - 1 - tuHeightInUnits));
788
789 intraNeighbors->numIntraNeighbor = numIntraNeighbor;
790 intraNeighbors->totalUnits = aboveUnits + leftUnits + 1;
791 intraNeighbors->aboveUnits = aboveUnits;
792 intraNeighbors->leftUnits = leftUnits;
793 intraNeighbors->unitWidth = 1 << log2UnitWidth;
794 intraNeighbors->unitHeight = 1 << log2UnitHeight;
795 intraNeighbors->tuSize = tuSize;
796 intraNeighbors->log2TrSize = log2TrSize;
797}
798
799void Predict::fillReferenceSamples(const pixel* adiOrigin, intptr_t picStride, pixel* adiRef, const IntraNeighbors& intraNeighbors)
800{
801 const pixel dcValue = (pixel)(1 << (X265_DEPTH - 1));
802 int numIntraNeighbor = intraNeighbors.numIntraNeighbor;
803 int totalUnits = intraNeighbors.totalUnits;
804 uint32_t tuSize = intraNeighbors.tuSize;
805 uint32_t refSize = tuSize * 2 + 1;
806
807 if (numIntraNeighbor == 0)
808 {
809 // Fill border with DC value
810 for (uint32_t i = 0; i < refSize; i++)
811 adiRef[i] = dcValue;
812
813 for (uint32_t i = 1; i < refSize; i++)
814 adiRef[i * ADI_BUF_STRIDE] = dcValue;
815 }
816 else if (numIntraNeighbor == totalUnits)
817 {
818 // Fill top border with rec. samples
819 const pixel* adiTemp = adiOrigin - picStride - 1;
820 memcpy(adiRef, adiTemp, refSize * sizeof(*adiRef));
821
822 // Fill left border with rec. samples
823 adiTemp = adiOrigin - 1;
824 for (uint32_t i = 1; i < refSize; i++)
825 {
826 adiRef[i * ADI_BUF_STRIDE] = adiTemp[0];
827 adiTemp += picStride;
828 }
829 }
830 else // reference samples are partially available
831 {
832 const bool* bNeighborFlags = intraNeighbors.bNeighborFlags;
833 const bool* pNeighborFlags;
834 int aboveUnits = intraNeighbors.aboveUnits;
835 int leftUnits = intraNeighbors.leftUnits;
836 int unitWidth = intraNeighbors.unitWidth;
837 int unitHeight = intraNeighbors.unitHeight;
838 int totalSamples = (leftUnits * unitHeight) + ((aboveUnits + 1) * unitWidth);
839 pixel adiLineBuffer[5 * MAX_CU_SIZE];
840 pixel* adi;
841
842 // Initialize
843 for (int i = 0; i < totalSamples; i++)
844 adiLineBuffer[i] = dcValue;
845
846 // Fill top-left sample
847 const pixel* adiTemp = adiOrigin - picStride - 1;
848 adi = adiLineBuffer + (leftUnits * unitHeight);
849 pNeighborFlags = bNeighborFlags + leftUnits;
850 if (*pNeighborFlags)
851 {
852 pixel topLeftVal = adiTemp[0];
853 for (int i = 0; i < unitWidth; i++)
854 adi[i] = topLeftVal;
855 }
856
857 // Fill left & below-left samples
858 adiTemp += picStride;
859 adi--;
860 pNeighborFlags--;
861 for (int j = 0; j < leftUnits; j++)
862 {
863 if (*pNeighborFlags)
864 for (int i = 0; i < unitHeight; i++)
865 adi[-i] = adiTemp[i * picStride];
866
867 adiTemp += unitHeight * picStride;
868 adi -= unitHeight;
869 pNeighborFlags--;
870 }
871
872 // Fill above & above-right samples
873 adiTemp = adiOrigin - picStride;
874 adi = adiLineBuffer + (leftUnits * unitHeight) + unitWidth;
875 pNeighborFlags = bNeighborFlags + leftUnits + 1;
876 for (int j = 0; j < aboveUnits; j++)
877 {
878 if (*pNeighborFlags)
879 memcpy(adi, adiTemp, unitWidth * sizeof(*adiTemp));
880 adiTemp += unitWidth;
881 adi += unitWidth;
882 pNeighborFlags++;
883 }
884
885 // Pad reference samples when necessary
886 int curr = 0;
887 int next = 1;
888 adi = adiLineBuffer;
889 int pAdiLineTopRowOffset = leftUnits * (unitHeight - unitWidth);
890 if (!bNeighborFlags[0])
891 {
892 // very bottom unit of bottom-left; at least one unit will be valid.
893 while (next < totalUnits && !bNeighborFlags[next])
894 next++;
895
896 pixel* pAdiLineNext = adiLineBuffer + ((next < leftUnits) ? (next * unitHeight) : (pAdiLineTopRowOffset + (next * unitWidth)));
897 const pixel refSample = *pAdiLineNext;
898 // Pad unavailable samples with new value
899 int nextOrTop = X265_MIN(next, leftUnits);
900 // fill left column
901 while (curr < nextOrTop)
902 {
903 for (int i = 0; i < unitHeight; i++)
904 adi[i] = refSample;
905
906 adi += unitHeight;
907 curr++;
908 }
909
910 // fill top row
911 while (curr < next)
912 {
913 for (int i = 0; i < unitWidth; i++)
914 adi[i] = refSample;
915
916 adi += unitWidth;
917 curr++;
918 }
919 }
920
921 // pad all other reference samples.
922 while (curr < totalUnits)
923 {
924 if (!bNeighborFlags[curr]) // samples not available
925 {
926 int numSamplesInCurrUnit = (curr >= leftUnits) ? unitWidth : unitHeight;
927 const pixel refSample = *(adi - 1);
928 for (int i = 0; i < numSamplesInCurrUnit; i++)
929 adi[i] = refSample;
930
931 adi += numSamplesInCurrUnit;
932 curr++;
933 }
934 else
935 {
936 adi += (curr >= leftUnits) ? unitWidth : unitHeight;
937 curr++;
938 }
939 }
940
941 // Copy processed samples
942 adi = adiLineBuffer + refSize + unitWidth - 2;
943 memcpy(adiRef, adi, refSize * sizeof(*adiRef));
944
945 adi = adiLineBuffer + refSize - 1;
946 for (int i = 1; i < (int)refSize; i++)
947 adiRef[i * ADI_BUF_STRIDE] = adi[-i];
948 }
949}
950
951bool Predict::isAboveLeftAvailable(const CUData& cu, uint32_t partIdxLT)
952{
953 uint32_t partAboveLeft;
954 const CUData* cuAboveLeft = cu.getPUAboveLeft(partAboveLeft, partIdxLT);
955
956 if (!cu.m_slice->m_pps->bConstrainedIntraPred)
957 return cuAboveLeft ? true : false;
958 else
959 return cuAboveLeft && cuAboveLeft->isIntra(partAboveLeft);
960}
961
962int Predict::isAboveAvailable(const CUData& cu, uint32_t partIdxLT, uint32_t partIdxRT, bool* bValidFlags)
963{
964 const uint32_t rasterPartBegin = g_zscanToRaster[partIdxLT];
965 const uint32_t rasterPartEnd = g_zscanToRaster[partIdxRT] + 1;
966 const uint32_t idxStep = 1;
967 bool* validFlagPtr = bValidFlags;
968 int numIntra = 0;
969
970 for (uint32_t rasterPart = rasterPartBegin; rasterPart < rasterPartEnd; rasterPart += idxStep)
971 {
972 uint32_t partAbove;
973 const CUData* cuAbove = cu.getPUAbove(partAbove, g_rasterToZscan[rasterPart]);
974 if (cuAbove && (!cu.m_slice->m_pps->bConstrainedIntraPred || cuAbove->isIntra(partAbove)))
975 {
976 numIntra++;
977 *validFlagPtr = true;
978 }
979 else
980 *validFlagPtr = false;
981
982 validFlagPtr++;
983 }
984
985 return numIntra;
986}
987
988int Predict::isLeftAvailable(const CUData& cu, uint32_t partIdxLT, uint32_t partIdxLB, bool* bValidFlags)
989{
990 const uint32_t rasterPartBegin = g_zscanToRaster[partIdxLT];
991 const uint32_t rasterPartEnd = g_zscanToRaster[partIdxLB] + 1;
992 const uint32_t idxStep = cu.m_slice->m_sps->numPartInCUSize;
993 bool* validFlagPtr = bValidFlags;
994 int numIntra = 0;
995
996 for (uint32_t rasterPart = rasterPartBegin; rasterPart < rasterPartEnd; rasterPart += idxStep)
997 {
998 uint32_t partLeft;
999 const CUData* cuLeft = cu.getPULeft(partLeft, g_rasterToZscan[rasterPart]);
1000 if (cuLeft && (!cu.m_slice->m_pps->bConstrainedIntraPred || cuLeft->isIntra(partLeft)))
1001 {
1002 numIntra++;
1003 *validFlagPtr = true;
1004 }
1005 else
1006 *validFlagPtr = false;
1007
1008 validFlagPtr--; // opposite direction
1009 }
1010
1011 return numIntra;
1012}
1013
1014int Predict::isAboveRightAvailable(const CUData& cu, uint32_t partIdxLT, uint32_t partIdxRT, bool* bValidFlags)
1015{
1016 const uint32_t numUnitsInPU = g_zscanToRaster[partIdxRT] - g_zscanToRaster[partIdxLT] + 1;
1017 bool* validFlagPtr = bValidFlags;
1018 int numIntra = 0;
1019
1020 for (uint32_t offset = 1; offset <= numUnitsInPU; offset++)
1021 {
1022 uint32_t partAboveRight;
1023 const CUData* cuAboveRight = cu.getPUAboveRightAdi(partAboveRight, partIdxRT, offset);
1024 if (cuAboveRight && (!cu.m_slice->m_pps->bConstrainedIntraPred || cuAboveRight->isIntra(partAboveRight)))
1025 {
1026 numIntra++;
1027 *validFlagPtr = true;
1028 }
1029 else
1030 *validFlagPtr = false;
1031
1032 validFlagPtr++;
1033 }
1034
1035 return numIntra;
1036}
1037
1038int Predict::isBelowLeftAvailable(const CUData& cu, uint32_t partIdxLT, uint32_t partIdxLB, bool* bValidFlags)
1039{
1040 const uint32_t numUnitsInPU = (g_zscanToRaster[partIdxLB] - g_zscanToRaster[partIdxLT]) / cu.m_slice->m_sps->numPartInCUSize + 1;
1041 bool* validFlagPtr = bValidFlags;
1042 int numIntra = 0;
1043
1044 for (uint32_t offset = 1; offset <= numUnitsInPU; offset++)
1045 {
1046 uint32_t partBelowLeft;
1047 const CUData* cuBelowLeft = cu.getPUBelowLeftAdi(partBelowLeft, partIdxLB, offset);
1048 if (cuBelowLeft && (!cu.m_slice->m_pps->bConstrainedIntraPred || cuBelowLeft->isIntra(partBelowLeft)))
1049 {
1050 numIntra++;
1051 *validFlagPtr = true;
1052 }
1053 else
1054 *validFlagPtr = false;
1055
1056 validFlagPtr--; // opposite direction
1057 }
1058
1059 return numIntra;
1060}