predict.cpp

   1 /*****************************************************************************
   2 * Copyright (C) 2013 x265 project
   3 *
   4 * Authors: Deepthi Nandakumar <deepthi@multicorewareinc.com>
   5 *
   6 * This program is free software; you can redistribute it and/or modify
   7 * it under the terms of the GNU General Public License as published by
   8 * the Free Software Foundation; either version 2 of the License, or
   9 * (at your option) any later version.
  10 *
  11 * This program is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 * GNU General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU General Public License
  17 * along with this program; if not, write to the Free Software
  18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
  19 *
  20 * This program is also available under a commercial proprietary license.
  21 * For more information, contact us at license @ x265.com.
  22 *****************************************************************************/
  23
  24 #include "common.h"
  25 #include "slice.h"
  26 #include "framedata.h"
  27 #include "picyuv.h"
  28 #include "predict.h"
  29 #include "primitives.h"
  30
  31 using namespace x265;
  32
  33 namespace
  34 {
  35 inline pixel weightBidir(int w0, int16_t P0, int w1, int16_t P1, int round, int shift, int offset)
  36 {
  37     return Clip((w0 * (P0 + IF_INTERNAL_OFFS) + w1 * (P1 + IF_INTERNAL_OFFS) + round + (offset << (shift - 1))) >> shift);
  38 }
  39 }
  40
  41 Predict::Predict()
  42 {
  43     m_predBuf = NULL;
  44     m_refAbove = NULL;
  45     m_refAboveFlt = NULL;
  46     m_refLeft = NULL;
  47     m_refLeftFlt = NULL;
  48     m_immedVals = NULL;
  49 }
  50
  51 Predict::~Predict()
  52 {
  53     X265_FREE(m_predBuf);
  54     X265_FREE(m_refAbove);
  55     X265_FREE(m_immedVals);
  56     m_predShortYuv[0].destroy();
  57     m_predShortYuv[1].destroy();
  58 }
  59
  60 bool Predict::allocBuffers(int csp)
  61 {
  62     m_csp = csp;
  63     m_hChromaShift = CHROMA_H_SHIFT(csp);
  64     m_vChromaShift = CHROMA_V_SHIFT(csp);
  65
  66     int predBufHeight = ((MAX_CU_SIZE + 2) << 4);
  67     int predBufStride = ((MAX_CU_SIZE + 8) << 4);
  68     CHECKED_MALLOC(m_predBuf, pixel, predBufStride * predBufHeight);
  69     CHECKED_MALLOC(m_immedVals, int16_t, 64 * (64 + NTAPS_LUMA - 1));
  70     CHECKED_MALLOC(m_refAbove, pixel, 12 * MAX_CU_SIZE);
  71
  72     m_refAboveFlt = m_refAbove + 3 * MAX_CU_SIZE;
  73     m_refLeft = m_refAboveFlt + 3 * MAX_CU_SIZE;
  74     m_refLeftFlt = m_refLeft + 3 * MAX_CU_SIZE;
  75
  76     return m_predShortYuv[0].create(MAX_CU_SIZE, csp) && m_predShortYuv[1].create(MAX_CU_SIZE, csp);
  77
  78 fail:
  79     return false;
  80 }
  81
  82 void Predict::predIntraLumaAng(uint32_t dirMode, pixel* dst, intptr_t stride, uint32_t log2TrSize)
  83 {
  84     int tuSize = 1 << log2TrSize;
  85
  86     pixel *refLft, *refAbv;
  87
  88     if (!(g_intraFilterFlags[dirMode] & tuSize))
  89     {
  90         refLft = m_refLeft + tuSize - 1;
  91         refAbv = m_refAbove + tuSize - 1;
  92     }
  93     else
  94     {
  95         refLft = m_refLeftFlt + tuSize - 1;
  96         refAbv = m_refAboveFlt + tuSize - 1;
  97     }
  98
  99     bool bFilter = log2TrSize <= 4;
 100     int sizeIdx = log2TrSize - 2;
 101     X265_CHECK(sizeIdx >= 0 && sizeIdx < 4, "intra block size is out of range\n");
 102     primitives.intra_pred[dirMode][sizeIdx](dst, stride, refLft, refAbv, dirMode, bFilter);
 103 }
 104
 105 void Predict::predIntraChromaAng(pixel* src, uint32_t dirMode, pixel* dst, intptr_t stride, uint32_t log2TrSizeC, int chFmt)
 106 {
 107     int tuSize = 1 << log2TrSizeC;
 108     int tuSize2 = tuSize << 1;
 109
 110     // Create the prediction
 111     const int bufOffset = tuSize - 1;
 112     pixel buf0[3 * MAX_CU_SIZE];
 113     pixel buf1[3 * MAX_CU_SIZE];
 114     pixel* above;
 115     pixel* left = buf0 + bufOffset;
 116
 117     int limit = (dirMode <= 25 && dirMode >= 11) ? (tuSize + 1 + 1) : (tuSize2 + 1);
 118     for (int k = 0; k < limit; k++)
 119         left[k] = src[k * ADI_BUF_STRIDE];
 120
 121     if (chFmt == X265_CSP_I444 && (g_intraFilterFlags[dirMode] & tuSize))
 122     {
 123         // generate filtered intra prediction samples
 124         buf0[bufOffset - 1] = src[1];
 125         left = buf1 + bufOffset;
 126         for (int i = 0; i < tuSize2; i++)
 127             left[i] = (buf0[bufOffset + i - 1] + 2 * buf0[bufOffset + i] + buf0[bufOffset + i + 1] + 2) >> 2;
 128         left[tuSize2] = buf0[bufOffset + tuSize2];
 129
 130         above = buf0 + bufOffset;
 131         above[0] = left[0];
 132         for (int i = 1; i < tuSize2; i++)
 133             above[i] = (src[i - 1] + 2 * src[i] + src[i + 1] + 2) >> 2;
 134         above[tuSize2] = src[tuSize2];
 135     }
 136     else
 137     {
 138         above = buf1 + bufOffset;
 139         memcpy(above, src, (tuSize2 + 1) * sizeof(pixel));
 140     }
 141
 142     int sizeIdx = log2TrSizeC - 2;
 143     X265_CHECK(sizeIdx >= 0 && sizeIdx < 4, "intra block size is out of range\n");
 144     primitives.intra_pred[dirMode][sizeIdx](dst, stride, left, above, dirMode, 0);
 145 }
 146
 147 void Predict::initMotionCompensation(const CUData& cu, const CUGeom& cuGeom, int partIdx)
 148 {
 149     m_predSlice = cu.m_slice;
 150     cu.getPartIndexAndSize(partIdx, m_puAbsPartIdx, m_puWidth, m_puHeight);
 151     m_ctuAddr = cu.m_cuAddr;
 152     m_cuAbsPartIdx = cuGeom.encodeIdx;
 153 }
 154
 155 void Predict::prepMotionCompensation(const CUData& cu, const CUGeom& cuGeom, int partIdx)
 156 {
 157     initMotionCompensation(cu, cuGeom, partIdx);
 158
 159     m_refIdx0      = cu.m_refIdx[0][m_puAbsPartIdx];
 160     m_clippedMv[0] = cu.m_mv[0][m_puAbsPartIdx];
 161     m_refIdx1      = cu.m_refIdx[1][m_puAbsPartIdx];
 162     m_clippedMv[1] = cu.m_mv[1][m_puAbsPartIdx];
 163     cu.clipMv(m_clippedMv[0]);
 164     cu.clipMv(m_clippedMv[1]);
 165 }
 166
 167 void Predict::motionCompensation(Yuv& predYuv, bool bLuma, bool bChroma)
 168 {
 169     if (m_predSlice->isInterP())
 170     {
 171         /* P Slice */
 172         WeightValues wv0[3];
 173         X265_CHECK(m_refIdx0 >= 0, "invalid P refidx\n");
 174         X265_CHECK(m_refIdx0 < m_predSlice->m_numRefIdx[0], "P refidx out of range\n");
 175         const WeightParam *wp0 = m_predSlice->m_weightPredTable[0][m_refIdx0];
 176
 177         if (m_predSlice->m_pps->bUseWeightPred && wp0->bPresentFlag)
 178         {
 179             for (int plane = 0; plane < 3; plane++)
 180             {
 181                 wv0[plane].w      = wp0[plane].inputWeight;
 182                 wv0[plane].offset = wp0[plane].inputOffset * (1 << (X265_DEPTH - 8));
 183                 wv0[plane].shift  = wp0[plane].log2WeightDenom;
 184                 wv0[plane].round  = wp0[plane].log2WeightDenom >= 1 ? 1 << (wp0[plane].log2WeightDenom - 1) : 0;
 185             }
 186
 187             ShortYuv& shortYuv = m_predShortYuv[0];
 188
 189             if (bLuma)
 190                 predInterLumaShort(shortYuv, *m_predSlice->m_refPicList[0][m_refIdx0]->m_reconPicYuv, m_clippedMv[0]);
 191             if (bChroma)
 192                 predInterChromaShort(shortYuv, *m_predSlice->m_refPicList[0][m_refIdx0]->m_reconPicYuv, m_clippedMv[0]);
 193
 194             addWeightUni(predYuv, shortYuv, wv0, bLuma, bChroma);
 195         }
 196         else
 197         {
 198             if (bLuma)
 199                 predInterLumaPixel(predYuv, *m_predSlice->m_refPicList[0][m_refIdx0]->m_reconPicYuv, m_clippedMv[0]);
 200             if (bChroma)
 201                 predInterChromaPixel(predYuv, *m_predSlice->m_refPicList[0][m_refIdx0]->m_reconPicYuv, m_clippedMv[0]);
 202         }
 203     }
 204     else
 205     {
 206         /* B Slice */
 207
 208         WeightValues wv0[3], wv1[3];
 209         const WeightParam *pwp0, *pwp1;
 210
 211         if (m_predSlice->m_pps->bUseWeightedBiPred)
 212         {
 213             pwp0 = m_refIdx0 >= 0 ? m_predSlice->m_weightPredTable[0][m_refIdx0] : NULL;
 214             pwp1 = m_refIdx1 >= 0 ? m_predSlice->m_weightPredTable[1][m_refIdx1] : NULL;
 215
 216             if (pwp0 && pwp1 && (pwp0->bPresentFlag || pwp1->bPresentFlag))
 217             {
 218                 /* biprediction weighting */
 219                 for (int plane = 0; plane < 3; plane++)
 220                 {
 221                     wv0[plane].w = pwp0[plane].inputWeight;
 222                     wv0[plane].o = pwp0[plane].inputOffset * (1 << (X265_DEPTH - 8));
 223                     wv0[plane].shift = pwp0[plane].log2WeightDenom;
 224                     wv0[plane].round = 1 << pwp0[plane].log2WeightDenom;
 225
 226                     wv1[plane].w = pwp1[plane].inputWeight;
 227                     wv1[plane].o = pwp1[plane].inputOffset * (1 << (X265_DEPTH - 8));
 228                     wv1[plane].shift = wv0[plane].shift;
 229                     wv1[plane].round = wv0[plane].round;
 230                 }
 231             }
 232             else
 233             {
 234                 /* uniprediction weighting, always outputs to wv0 */
 235                 const WeightParam* pwp = (m_refIdx0 >= 0) ? pwp0 : pwp1;
 236                 for (int plane = 0; plane < 3; plane++)
 237                 {
 238                     wv0[plane].w = pwp[plane].inputWeight;
 239                     wv0[plane].offset = pwp[plane].inputOffset * (1 << (X265_DEPTH - 8));
 240                     wv0[plane].shift = pwp[plane].log2WeightDenom;
 241                     wv0[plane].round = pwp[plane].log2WeightDenom >= 1 ? 1 << (pwp[plane].log2WeightDenom - 1) : 0;
 242                 }
 243             }
 244         }
 245         else
 246             pwp0 = pwp1 = NULL;
 247
 248         if (m_refIdx0 >= 0 && m_refIdx1 >= 0)
 249         {
 250             /* Biprediction */
 251             X265_CHECK(m_refIdx0 < m_predSlice->m_numRefIdx[0], "bidir refidx0 out of range\n");
 252             X265_CHECK(m_refIdx1 < m_predSlice->m_numRefIdx[1], "bidir refidx1 out of range\n");
 253
 254             if (bLuma)
 255             {
 256                 predInterLumaShort(m_predShortYuv[0], *m_predSlice->m_refPicList[0][m_refIdx0]->m_reconPicYuv, m_clippedMv[0]);
 257                 predInterLumaShort(m_predShortYuv[1], *m_predSlice->m_refPicList[1][m_refIdx1]->m_reconPicYuv, m_clippedMv[1]);
 258             }
 259             if (bChroma)
 260             {
 261                 predInterChromaShort(m_predShortYuv[0], *m_predSlice->m_refPicList[0][m_refIdx0]->m_reconPicYuv, m_clippedMv[0]);
 262                 predInterChromaShort(m_predShortYuv[1], *m_predSlice->m_refPicList[1][m_refIdx1]->m_reconPicYuv, m_clippedMv[1]);
 263             }
 264
 265             if (pwp0 && pwp1 && (pwp0->bPresentFlag || pwp1->bPresentFlag))
 266                 addWeightBi(predYuv, m_predShortYuv[0], m_predShortYuv[1], wv0, wv1, bLuma, bChroma);
 267             else
 268                 predYuv.addAvg(m_predShortYuv[0], m_predShortYuv[1], m_puAbsPartIdx, m_puWidth, m_puHeight, bLuma, bChroma);
 269         }
 270         else if (m_refIdx0 >= 0)
 271         {
 272             /* uniprediction to L0 */
 273             X265_CHECK(m_refIdx0 < m_predSlice->m_numRefIdx[0], "unidir refidx0 out of range\n");
 274
 275             if (pwp0 && pwp0->bPresentFlag)
 276             {
 277                 ShortYuv& shortYuv = m_predShortYuv[0];
 278
 279                 if (bLuma)
 280                     predInterLumaShort(shortYuv, *m_predSlice->m_refPicList[0][m_refIdx0]->m_reconPicYuv, m_clippedMv[0]);
 281                 if (bChroma)
 282                     predInterChromaShort(shortYuv, *m_predSlice->m_refPicList[0][m_refIdx0]->m_reconPicYuv, m_clippedMv[0]);
 283
 284                 addWeightUni(predYuv, shortYuv, wv0, bLuma, bChroma);
 285             }
 286             else
 287             {
 288                 if (bLuma)
 289                     predInterLumaPixel(predYuv, *m_predSlice->m_refPicList[0][m_refIdx0]->m_reconPicYuv, m_clippedMv[0]);
 290                 if (bChroma)
 291                     predInterChromaPixel(predYuv, *m_predSlice->m_refPicList[0][m_refIdx0]->m_reconPicYuv, m_clippedMv[0]);
 292             }
 293         }
 294         else
 295         {
 296             /* uniprediction to L1 */
 297             X265_CHECK(m_refIdx1 >= 0, "refidx1 was not positive\n");
 298             X265_CHECK(m_refIdx1 < m_predSlice->m_numRefIdx[1], "unidir refidx1 out of range\n");
 299
 300             if (pwp1 && pwp1->bPresentFlag)
 301             {
 302                 ShortYuv& shortYuv = m_predShortYuv[0];
 303
 304                 if (bLuma)
 305                     predInterLumaShort(shortYuv, *m_predSlice->m_refPicList[1][m_refIdx1]->m_reconPicYuv, m_clippedMv[1]);
 306                 if (bChroma)
 307                     predInterChromaShort(shortYuv, *m_predSlice->m_refPicList[1][m_refIdx1]->m_reconPicYuv, m_clippedMv[1]);
 308
 309                 addWeightUni(predYuv, shortYuv, wv0, bLuma, bChroma);
 310             }
 311             else
 312             {
 313                 if (bLuma)
 314                     predInterLumaPixel(predYuv, *m_predSlice->m_refPicList[1][m_refIdx1]->m_reconPicYuv, m_clippedMv[1]);
 315                 if (bChroma)
 316                     predInterChromaPixel(predYuv, *m_predSlice->m_refPicList[1][m_refIdx1]->m_reconPicYuv, m_clippedMv[1]);
 317             }
 318         }
 319     }
 320 }
 321
 322 void Predict::predInterLumaPixel(Yuv& dstYuv, const PicYuv& refPic, const MV& mv) const
 323 {
 324     pixel *dst = dstYuv.getLumaAddr(m_puAbsPartIdx);
 325     intptr_t dstStride = dstYuv.m_size;
 326
 327     intptr_t srcStride = refPic.m_stride;
 328     intptr_t srcOffset = (mv.x >> 2) + (mv.y >> 2) * srcStride;
 329     int partEnum = partitionFromSizes(m_puWidth, m_puHeight);
 330     pixel* src = const_cast<PicYuv&>(refPic).getLumaAddr(m_ctuAddr, m_cuAbsPartIdx + m_puAbsPartIdx) + srcOffset;
 331
 332     int xFrac = mv.x & 0x3;
 333     int yFrac = mv.y & 0x3;
 334
 335     if (!(yFrac | xFrac))
 336         primitives.luma_copy_pp[partEnum](dst, dstStride, src, srcStride);
 337     else if (!yFrac)
 338         primitives.luma_hpp[partEnum](src, srcStride, dst, dstStride, xFrac);
 339     else if (!xFrac)
 340         primitives.luma_vpp[partEnum](src, srcStride, dst, dstStride, yFrac);
 341     else
 342     {
 343         int tmpStride = m_puWidth;
 344         int filterSize = NTAPS_LUMA;
 345         int halfFilterSize = (filterSize >> 1);
 346         primitives.luma_hps[partEnum](src, srcStride, m_immedVals, tmpStride, xFrac, 1);
 347         primitives.luma_vsp[partEnum](m_immedVals + (halfFilterSize - 1) * tmpStride, tmpStride, dst, dstStride, yFrac);
 348     }
 349 }
 350
 351 void Predict::predInterLumaShort(ShortYuv& dstSYuv, const PicYuv& refPic, const MV& mv) const
 352 {
 353     int16_t *dst = dstSYuv.getLumaAddr(m_puAbsPartIdx);
 354     int dstStride = dstSYuv.m_size;
 355
 356     intptr_t srcStride = refPic.m_stride;
 357     intptr_t srcOffset = (mv.x >> 2) + (mv.y >> 2) * srcStride;
 358     pixel *src = const_cast<PicYuv&>(refPic).getLumaAddr(m_ctuAddr, m_cuAbsPartIdx + m_puAbsPartIdx) + srcOffset;
 359
 360     int xFrac = mv.x & 0x3;
 361     int yFrac = mv.y & 0x3;
 362
 363     int partEnum = partitionFromSizes(m_puWidth, m_puHeight);
 364
 365     X265_CHECK((m_puWidth % 4) + (m_puHeight % 4) == 0, "width or height not divisible by 4\n");
 366     X265_CHECK(dstStride == MAX_CU_SIZE, "stride expected to be max cu size\n");
 367
 368     if (!(yFrac | xFrac))
 369         primitives.luma_p2s(src, srcStride, dst, m_puWidth, m_puHeight);
 370     else if (!yFrac)
 371         primitives.luma_hps[partEnum](src, srcStride, dst, dstStride, xFrac, 0);
 372     else if (!xFrac)
 373         primitives.luma_vps[partEnum](src, srcStride, dst, dstStride, yFrac);
 374     else
 375     {
 376         int tmpStride = m_puWidth;
 377         int filterSize = NTAPS_LUMA;
 378         int halfFilterSize = (filterSize >> 1);
 379         primitives.luma_hps[partEnum](src, srcStride, m_immedVals, tmpStride, xFrac, 1);
 380         primitives.luma_vss[partEnum](m_immedVals + (halfFilterSize - 1) * tmpStride, tmpStride, dst, dstStride, yFrac);
 381     }
 382 }
 383
 384 void Predict::predInterChromaPixel(Yuv& dstYuv, const PicYuv& refPic, const MV& mv) const
 385 {
 386     intptr_t dstStride = dstYuv.m_csize;
 387     intptr_t refStride = refPic.m_strideC;
 388
 389     int shiftHor = (2 + m_hChromaShift);
 390     int shiftVer = (2 + m_vChromaShift);
 391
 392     intptr_t refOffset = (mv.x >> shiftHor) + (mv.y >> shiftVer) * refStride;
 393
 394     pixel* refCb = const_cast<PicYuv&>(refPic).getCbAddr(m_ctuAddr, m_cuAbsPartIdx + m_puAbsPartIdx) + refOffset;
 395     pixel* refCr = const_cast<PicYuv&>(refPic).getCrAddr(m_ctuAddr, m_cuAbsPartIdx + m_puAbsPartIdx) + refOffset;
 396
 397     pixel* dstCb = dstYuv.getCbAddr(m_puAbsPartIdx);
 398     pixel* dstCr = dstYuv.getCrAddr(m_puAbsPartIdx);
 399
 400     int xFrac = mv.x & ((1 << shiftHor) - 1);
 401     int yFrac = mv.y & ((1 << shiftVer) - 1);
 402
 403     int partEnum = partitionFromSizes(m_puWidth, m_puHeight);
 404
 405     if (!(yFrac | xFrac))
 406     {
 407         primitives.chroma[m_csp].copy_pp[partEnum](dstCb, dstStride, refCb, refStride);
 408         primitives.chroma[m_csp].copy_pp[partEnum](dstCr, dstStride, refCr, refStride);
 409     }
 410     else if (!yFrac)
 411     {
 412         primitives.chroma[m_csp].filter_hpp[partEnum](refCb, refStride, dstCb, dstStride, xFrac << (1 - m_hChromaShift));
 413         primitives.chroma[m_csp].filter_hpp[partEnum](refCr, refStride, dstCr, dstStride, xFrac << (1 - m_hChromaShift));
 414     }
 415     else if (!xFrac)
 416     {
 417         primitives.chroma[m_csp].filter_vpp[partEnum](refCb, refStride, dstCb, dstStride, yFrac << (1 - m_vChromaShift));
 418         primitives.chroma[m_csp].filter_vpp[partEnum](refCr, refStride, dstCr, dstStride, yFrac << (1 - m_vChromaShift));
 419     }
 420     else
 421     {
 422         int extStride = m_puWidth >> m_hChromaShift;
 423         int filterSize = NTAPS_CHROMA;
 424         int halfFilterSize = (filterSize >> 1);
 425
 426         primitives.chroma[m_csp].filter_hps[partEnum](refCb, refStride, m_immedVals, extStride, xFrac << (1 - m_hChromaShift), 1);
 427         primitives.chroma[m_csp].filter_vsp[partEnum](m_immedVals + (halfFilterSize - 1) * extStride, extStride, dstCb, dstStride, yFrac << (1 - m_vChromaShift));
 428
 429         primitives.chroma[m_csp].filter_hps[partEnum](refCr, refStride, m_immedVals, extStride, xFrac << (1 - m_hChromaShift), 1);
 430         primitives.chroma[m_csp].filter_vsp[partEnum](m_immedVals + (halfFilterSize - 1) * extStride, extStride, dstCr, dstStride, yFrac << (1 - m_vChromaShift));
 431     }
 432 }
 433
 434 void Predict::predInterChromaShort(ShortYuv& dstSYuv, const PicYuv& refPic, const MV& mv) const
 435 {
 436     intptr_t refStride = refPic.m_strideC;
 437     intptr_t dstStride = dstSYuv.m_csize;
 438
 439     int shiftHor = (2 + m_hChromaShift);
 440     int shiftVer = (2 + m_vChromaShift);
 441
 442     intptr_t refOffset = (mv.x >> shiftHor) + (mv.y >> shiftVer) * refStride;
 443
 444     pixel* refCb = const_cast<PicYuv&>(refPic).getCbAddr(m_ctuAddr, m_cuAbsPartIdx + m_puAbsPartIdx) + refOffset;
 445     pixel* refCr = const_cast<PicYuv&>(refPic).getCrAddr(m_ctuAddr, m_cuAbsPartIdx + m_puAbsPartIdx) + refOffset;
 446
 447     int16_t* dstCb = dstSYuv.getCbAddr(m_puAbsPartIdx);
 448     int16_t* dstCr = dstSYuv.getCrAddr(m_puAbsPartIdx);
 449
 450     int xFrac = mv.x & ((1 << shiftHor) - 1);
 451     int yFrac = mv.y & ((1 << shiftVer) - 1);
 452
 453     int partEnum = partitionFromSizes(m_puWidth, m_puHeight);
 454
 455     uint32_t cxWidth  = m_puWidth >> m_hChromaShift;
 456     uint32_t cxHeight = m_puHeight >> m_vChromaShift;
 457
 458     X265_CHECK(((cxWidth | cxHeight) % 2) == 0, "chroma block size expected to be multiple of 2\n");
 459
 460     if (!(yFrac | xFrac))
 461     {
 462         primitives.chroma_p2s[m_csp](refCb, refStride, dstCb, cxWidth, cxHeight);
 463         primitives.chroma_p2s[m_csp](refCr, refStride, dstCr, cxWidth, cxHeight);
 464     }
 465     else if (!yFrac)
 466     {
 467         primitives.chroma[m_csp].filter_hps[partEnum](refCb, refStride, dstCb, dstStride, xFrac << (1 - m_hChromaShift), 0);
 468         primitives.chroma[m_csp].filter_hps[partEnum](refCr, refStride, dstCr, dstStride, xFrac << (1 - m_hChromaShift), 0);
 469     }
 470     else if (!xFrac)
 471     {
 472         primitives.chroma[m_csp].filter_vps[partEnum](refCb, refStride, dstCb, dstStride, yFrac << (1 - m_vChromaShift));
 473         primitives.chroma[m_csp].filter_vps[partEnum](refCr, refStride, dstCr, dstStride, yFrac << (1 - m_vChromaShift));
 474     }
 475     else
 476     {
 477         int extStride = cxWidth;
 478         int filterSize = NTAPS_CHROMA;
 479         int halfFilterSize = (filterSize >> 1);
 480         primitives.chroma[m_csp].filter_hps[partEnum](refCb, refStride, m_immedVals, extStride, xFrac << (1 - m_hChromaShift), 1);
 481         primitives.chroma[m_csp].filter_vss[partEnum](m_immedVals + (halfFilterSize - 1) * extStride, extStride, dstCb, dstStride, yFrac << (1 - m_vChromaShift));
 482         primitives.chroma[m_csp].filter_hps[partEnum](refCr, refStride, m_immedVals, extStride, xFrac << (1 - m_hChromaShift), 1);
 483         primitives.chroma[m_csp].filter_vss[partEnum](m_immedVals + (halfFilterSize - 1) * extStride, extStride, dstCr, dstStride, yFrac << (1 - m_vChromaShift));
 484     }
 485 }
 486
 487 /* weighted averaging for bi-pred */
 488 void Predict::addWeightBi(Yuv& predYuv, const ShortYuv& srcYuv0, const ShortYuv& srcYuv1, const WeightValues wp0[3], const WeightValues wp1[3], bool bLuma, bool bChroma) const
 489 {
 490     int x, y;
 491
 492     int w0, w1, offset, shiftNum, shift, round;
 493     uint32_t src0Stride, src1Stride, dststride;
 494
 495     pixel* dstY = predYuv.getLumaAddr(m_puAbsPartIdx);
 496     pixel* dstU = predYuv.getCbAddr(m_puAbsPartIdx);
 497     pixel* dstV = predYuv.getCrAddr(m_puAbsPartIdx);
 498
 499     const int16_t* srcY0 = srcYuv0.getLumaAddr(m_puAbsPartIdx);
 500     const int16_t* srcU0 = srcYuv0.getCbAddr(m_puAbsPartIdx);
 501     const int16_t* srcV0 = srcYuv0.getCrAddr(m_puAbsPartIdx);
 502
 503     const int16_t* srcY1 = srcYuv1.getLumaAddr(m_puAbsPartIdx);
 504     const int16_t* srcU1 = srcYuv1.getCbAddr(m_puAbsPartIdx);
 505     const int16_t* srcV1 = srcYuv1.getCrAddr(m_puAbsPartIdx);
 506
 507     if (bLuma)
 508     {
 509         // Luma
 510         w0      = wp0[0].w;
 511         offset  = wp0[0].o + wp1[0].o;
 512         shiftNum = IF_INTERNAL_PREC - X265_DEPTH;
 513         shift   = wp0[0].shift + shiftNum + 1;
 514         round   = shift ? (1 << (shift - 1)) : 0;
 515         w1      = wp1[0].w;
 516
 517         src0Stride = srcYuv0.m_size;
 518         src1Stride = srcYuv1.m_size;
 519         dststride = predYuv.m_size;
 520
 521         // TODO: can we use weight_sp here?
 522         for (y = m_puHeight - 1; y >= 0; y--)
 523         {
 524             for (x = m_puWidth - 1; x >= 0; )
 525             {
 526                 // note: luma min width is 4
 527                 dstY[x] = weightBidir(w0, srcY0[x], w1, srcY1[x], round, shift, offset);
 528                 x--;
 529                 dstY[x] = weightBidir(w0, srcY0[x], w1, srcY1[x], round, shift, offset);
 530                 x--;
 531                 dstY[x] = weightBidir(w0, srcY0[x], w1, srcY1[x], round, shift, offset);
 532                 x--;
 533                 dstY[x] = weightBidir(w0, srcY0[x], w1, srcY1[x], round, shift, offset);
 534                 x--;
 535             }
 536
 537             srcY0 += src0Stride;
 538             srcY1 += src1Stride;
 539             dstY  += dststride;
 540         }
 541     }
 542
 543     if (bChroma)
 544     {
 545         // Chroma U
 546         w0      = wp0[1].w;
 547         offset  = wp0[1].o + wp1[1].o;
 548         shiftNum = IF_INTERNAL_PREC - X265_DEPTH;
 549         shift   = wp0[1].shift + shiftNum + 1;
 550         round   = shift ? (1 << (shift - 1)) : 0;
 551         w1      = wp1[1].w;
 552
 553         src0Stride = srcYuv0.m_csize;
 554         src1Stride = srcYuv1.m_csize;
 555         dststride  = predYuv.m_csize;
 556
 557         uint32_t cwidth = m_puWidth >> srcYuv0.m_hChromaShift;
 558         uint32_t cheight = m_puHeight >> srcYuv0.m_vChromaShift;
 559
 560         // TODO: can we use weight_sp here?
 561         for (y = cheight - 1; y >= 0; y--)
 562         {
 563             for (x = cwidth - 1; x >= 0;)
 564             {
 565                 // note: chroma min width is 2
 566                 dstU[x] = weightBidir(w0, srcU0[x], w1, srcU1[x], round, shift, offset);
 567                 x--;
 568                 dstU[x] = weightBidir(w0, srcU0[x], w1, srcU1[x], round, shift, offset);
 569                 x--;
 570             }
 571
 572             srcU0 += src0Stride;
 573             srcU1 += src1Stride;
 574             dstU  += dststride;
 575         }
 576
 577         // Chroma V
 578         w0     = wp0[2].w;
 579         offset = wp0[2].o + wp1[2].o;
 580         shift  = wp0[2].shift + shiftNum + 1;
 581         round  = shift ? (1 << (shift - 1)) : 0;
 582         w1     = wp1[2].w;
 583
 584         for (y = cheight - 1; y >= 0; y--)
 585         {
 586             for (x = cwidth - 1; x >= 0;)
 587             {
 588                 // note: chroma min width is 2
 589                 dstV[x] = weightBidir(w0, srcV0[x], w1, srcV1[x], round, shift, offset);
 590                 x--;
 591                 dstV[x] = weightBidir(w0, srcV0[x], w1, srcV1[x], round, shift, offset);
 592                 x--;
 593             }
 594
 595             srcV0 += src0Stride;
 596             srcV1 += src1Stride;
 597             dstV  += dststride;
 598         }
 599     }
 600 }
 601
 602 /* weighted averaging for uni-pred */
 603 void Predict::addWeightUni(Yuv& predYuv, const ShortYuv& srcYuv, const WeightValues wp[3], bool bLuma, bool bChroma) const
 604 {
 605     pixel* dstY = predYuv.getLumaAddr(m_puAbsPartIdx);
 606     pixel* dstU = predYuv.getCbAddr(m_puAbsPartIdx);
 607     pixel* dstV = predYuv.getCrAddr(m_puAbsPartIdx);
 608
 609     const int16_t* srcY0 = srcYuv.getLumaAddr(m_puAbsPartIdx);
 610     const int16_t* srcU0 = srcYuv.getCbAddr(m_puAbsPartIdx);
 611     const int16_t* srcV0 = srcYuv.getCrAddr(m_puAbsPartIdx);
 612
 613     int w0, offset, shiftNum, shift, round;
 614     uint32_t srcStride, dstStride;
 615
 616     if (bLuma)
 617     {
 618         // Luma
 619         w0      = wp[0].w;
 620         offset  = wp[0].offset;
 621         shiftNum = IF_INTERNAL_PREC - X265_DEPTH;
 622         shift   = wp[0].shift + shiftNum;
 623         round   = shift ? (1 << (shift - 1)) : 0;
 624         srcStride = srcYuv.m_size;
 625         dstStride = predYuv.m_size;
 626
 627         primitives.weight_sp(const_cast<int16_t*>(srcY0), dstY, srcStride, dstStride, m_puWidth, m_puHeight, w0, round, shift, offset);
 628     }
 629
 630     if (bChroma)
 631     {
 632         // Chroma U
 633         w0      = wp[1].w;
 634         offset  = wp[1].offset;
 635         shiftNum = IF_INTERNAL_PREC - X265_DEPTH;
 636         shift   = wp[1].shift + shiftNum;
 637         round   = shift ? (1 << (shift - 1)) : 0;
 638
 639         srcStride = srcYuv.m_csize;
 640         dstStride = predYuv.m_csize;
 641
 642         uint32_t cwidth = m_puWidth >> srcYuv.m_hChromaShift;
 643         uint32_t cheight = m_puHeight >> srcYuv.m_vChromaShift;
 644
 645         primitives.weight_sp(const_cast<int16_t*>(srcU0), dstU, srcStride, dstStride, cwidth, cheight, w0, round, shift, offset);
 646
 647         // Chroma V
 648         w0     = wp[2].w;
 649         offset = wp[2].offset;
 650         shift  = wp[2].shift + shiftNum;
 651         round  = shift ? (1 << (shift - 1)) : 0;
 652
 653         primitives.weight_sp(const_cast<int16_t*>(srcV0), dstV, srcStride, dstStride, cwidth, cheight, w0, round, shift, offset);
 654     }
 655 }
 656
 657 void Predict::initAdiPattern(const CUData& cu, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t partDepth, int dirMode)
 658 {
 659     IntraNeighbors intraNeighbors;
 660     initIntraNeighbors(cu, absPartIdx, partDepth, true, &intraNeighbors);
 661
 662     pixel* adiBuf      = m_predBuf;
 663     pixel* refAbove    = m_refAbove;
 664     pixel* refLeft     = m_refLeft;
 665     pixel* refAboveFlt = m_refAboveFlt;
 666     pixel* refLeftFlt  = m_refLeftFlt;
 667
 668     int tuSize = intraNeighbors.tuSize;
 669     int tuSize2 = tuSize << 1;
 670
 671     pixel* adiOrigin = cu.m_encData->m_reconPicYuv->getLumaAddr(cu.m_cuAddr, cuGeom.encodeIdx + absPartIdx);
 672     intptr_t picStride = cu.m_encData->m_reconPicYuv->m_stride;
 673
 674     fillReferenceSamples(adiOrigin, picStride, adiBuf, intraNeighbors);
 675
 676     // initialization of ADI buffers
 677     const int bufOffset = tuSize - 1;
 678     refAbove += bufOffset;
 679     refLeft += bufOffset;
 680
 681     //  ADI_BUF_STRIDE * (2 * tuSize + 1);
 682     memcpy(refAbove, adiBuf, (tuSize2 + 1) * sizeof(pixel));
 683     for (int k = 0; k < tuSize2 + 1; k++)
 684         refLeft[k] = adiBuf[k * ADI_BUF_STRIDE];
 685
 686     if (dirMode == ALL_IDX ? (8 | 16 | 32) & tuSize : g_intraFilterFlags[dirMode] & tuSize)
 687     {
 688         // generate filtered intra prediction samples
 689         refAboveFlt += bufOffset;
 690         refLeftFlt += bufOffset;
 691
 692         bool bStrongSmoothing = (tuSize == 32 && cu.m_slice->m_sps->bUseStrongIntraSmoothing);
 693
 694         if (bStrongSmoothing)
 695         {
 696             const int trSize = 32;
 697             const int trSize2 = 32 * 2;
 698             const int threshold = 1 << (X265_DEPTH - 5);
 699             int refBL = refLeft[trSize2];
 700             int refTL = refAbove[0];
 701             int refTR = refAbove[trSize2];
 702             bStrongSmoothing = (abs(refBL + refTL - 2 * refLeft[trSize]) < threshold &&
 703                 abs(refTL + refTR - 2 * refAbove[trSize]) < threshold);
 704
 705             if (bStrongSmoothing)
 706             {
 707                 // bilinear interpolation
 708                 const int shift = 5 + 1; // intraNeighbors.log2TrSize + 1;
 709                 int init = (refTL << shift) + tuSize;
 710                 int delta;
 711
 712                 refLeftFlt[0] = refAboveFlt[0] = refAbove[0];
 713
 714                 //TODO: Performance Primitive???
 715                 delta = refBL - refTL;
 716                 for (int i = 1; i < trSize2; i++)
 717                     refLeftFlt[i] = (pixel)((init + delta * i) >> shift);
 718                 refLeftFlt[trSize2] = refLeft[trSize2];
 719
 720                 delta = refTR - refTL;
 721                 for (int i = 1; i < trSize2; i++)
 722                     refAboveFlt[i] = (pixel)((init + delta * i) >> shift);
 723                 refAboveFlt[trSize2] = refAbove[trSize2];
 724
 725                 return;
 726             }
 727         }
 728
 729         refLeft[-1] = refAbove[1];
 730         for (int i = 0; i < tuSize2; i++)
 731             refLeftFlt[i] = (refLeft[i - 1] + 2 * refLeft[i] + refLeft[i + 1] + 2) >> 2;
 732         refLeftFlt[tuSize2] = refLeft[tuSize2];
 733
 734         refAboveFlt[0] = refLeftFlt[0];
 735         for (int i = 1; i < tuSize2; i++)
 736             refAboveFlt[i] = (refAbove[i - 1] + 2 * refAbove[i] + refAbove[i + 1] + 2) >> 2;
 737         refAboveFlt[tuSize2] = refAbove[tuSize2];
 738     }
 739 }
 740
 741 void Predict::initAdiPatternChroma(const CUData& cu, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t partDepth, uint32_t chromaId)
 742 {
 743     IntraNeighbors intraNeighbors;
 744     initIntraNeighbors(cu, absPartIdx, partDepth, false, &intraNeighbors);
 745     uint32_t tuSize = intraNeighbors.tuSize;
 746
 747     const pixel* adiOrigin = cu.m_encData->m_reconPicYuv->getChromaAddr(chromaId, cu.m_cuAddr, cuGeom.encodeIdx + absPartIdx);
 748     intptr_t picStride = cu.m_encData->m_reconPicYuv->m_strideC;
 749     pixel* adiRef = getAdiChromaBuf(chromaId, tuSize);
 750
 751     fillReferenceSamples(adiOrigin, picStride, adiRef, intraNeighbors);
 752 }
 753
 754 void Predict::initIntraNeighbors(const CUData& cu, uint32_t absPartIdx, uint32_t partDepth, bool isLuma, IntraNeighbors *intraNeighbors)
 755 {
 756     uint32_t log2TrSize = cu.m_log2CUSize[0] - partDepth;
 757     int log2UnitWidth = LOG2_UNIT_SIZE;
 758     int log2UnitHeight = LOG2_UNIT_SIZE;
 759
 760     if (!isLuma)
 761     {
 762         log2TrSize -= cu.m_hChromaShift;
 763         log2UnitWidth -= cu.m_hChromaShift;
 764         log2UnitHeight -= cu.m_vChromaShift;
 765     }
 766
 767     int   numIntraNeighbor = 0;
 768     bool *bNeighborFlags = intraNeighbors->bNeighborFlags;
 769
 770     uint32_t partIdxLT, partIdxRT, partIdxLB;
 771
 772     cu.deriveLeftRightTopIdxAdi(partIdxLT, partIdxRT, absPartIdx, partDepth);
 773
 774     uint32_t tuSize = 1 << log2TrSize;
 775     int  tuWidthInUnits = tuSize >> log2UnitWidth;
 776     int  tuHeightInUnits = tuSize >> log2UnitHeight;
 777     int  aboveUnits = tuWidthInUnits << 1;
 778     int  leftUnits = tuHeightInUnits << 1;
 779     int  partIdxStride = cu.m_slice->m_sps->numPartInCUSize;
 780     partIdxLB = g_rasterToZscan[g_zscanToRaster[partIdxLT] + ((tuHeightInUnits - 1) * partIdxStride)];
 781
 782     bNeighborFlags[leftUnits] = isAboveLeftAvailable(cu, partIdxLT);
 783     numIntraNeighbor += (int)(bNeighborFlags[leftUnits]);
 784     numIntraNeighbor += isAboveAvailable(cu, partIdxLT, partIdxRT, (bNeighborFlags + leftUnits + 1));
 785     numIntraNeighbor += isAboveRightAvailable(cu, partIdxLT, partIdxRT, (bNeighborFlags + leftUnits + 1 + tuWidthInUnits));
 786     numIntraNeighbor += isLeftAvailable(cu, partIdxLT, partIdxLB, (bNeighborFlags + leftUnits - 1));
 787     numIntraNeighbor += isBelowLeftAvailable(cu, partIdxLT, partIdxLB, (bNeighborFlags + leftUnits - 1 - tuHeightInUnits));
 788
 789     intraNeighbors->numIntraNeighbor = numIntraNeighbor;
 790     intraNeighbors->totalUnits = aboveUnits + leftUnits + 1;
 791     intraNeighbors->aboveUnits = aboveUnits;
 792     intraNeighbors->leftUnits = leftUnits;
 793     intraNeighbors->unitWidth = 1 << log2UnitWidth;
 794     intraNeighbors->unitHeight = 1 << log2UnitHeight;
 795     intraNeighbors->tuSize = tuSize;
 796     intraNeighbors->log2TrSize = log2TrSize;
 797 }
 798
 799 void Predict::fillReferenceSamples(const pixel* adiOrigin, intptr_t picStride, pixel* adiRef, const IntraNeighbors& intraNeighbors)
 800 {
 801     const pixel dcValue = (pixel)(1 << (X265_DEPTH - 1));
 802     int numIntraNeighbor = intraNeighbors.numIntraNeighbor;
 803     int totalUnits = intraNeighbors.totalUnits;
 804     uint32_t tuSize = intraNeighbors.tuSize;
 805     uint32_t refSize = tuSize * 2 + 1;
 806
 807     if (numIntraNeighbor == 0)
 808     {
 809         // Fill border with DC value
 810         for (uint32_t i = 0; i < refSize; i++)
 811             adiRef[i] = dcValue;
 812
 813         for (uint32_t i = 1; i < refSize; i++)
 814             adiRef[i * ADI_BUF_STRIDE] = dcValue;
 815     }
 816     else if (numIntraNeighbor == totalUnits)
 817     {
 818         // Fill top border with rec. samples
 819         const pixel* adiTemp = adiOrigin - picStride - 1;
 820         memcpy(adiRef, adiTemp, refSize * sizeof(*adiRef));
 821
 822         // Fill left border with rec. samples
 823         adiTemp = adiOrigin - 1;
 824         for (uint32_t i = 1; i < refSize; i++)
 825         {
 826             adiRef[i * ADI_BUF_STRIDE] = adiTemp[0];
 827             adiTemp += picStride;
 828         }
 829     }
 830     else // reference samples are partially available
 831     {
 832         const bool *bNeighborFlags = intraNeighbors.bNeighborFlags;
 833         const bool *pNeighborFlags;
 834         int aboveUnits = intraNeighbors.aboveUnits;
 835         int leftUnits = intraNeighbors.leftUnits;
 836         int unitWidth = intraNeighbors.unitWidth;
 837         int unitHeight = intraNeighbors.unitHeight;
 838         int totalSamples = (leftUnits * unitHeight) + ((aboveUnits + 1) * unitWidth);
 839         pixel adiLineBuffer[5 * MAX_CU_SIZE];
 840         pixel *adi;
 841
 842         // Initialize
 843         for (int i = 0; i < totalSamples; i++)
 844             adiLineBuffer[i] = dcValue;
 845
 846         // Fill top-left sample
 847         const pixel* adiTemp = adiOrigin - picStride - 1;
 848         adi = adiLineBuffer + (leftUnits * unitHeight);
 849         pNeighborFlags = bNeighborFlags + leftUnits;
 850         if (*pNeighborFlags)
 851         {
 852             pixel topLeftVal = adiTemp[0];
 853             for (int i = 0; i < unitWidth; i++)
 854                 adi[i] = topLeftVal;
 855         }
 856
 857         // Fill left & below-left samples
 858         adiTemp += picStride;
 859         adi--;
 860         pNeighborFlags--;
 861         for (int j = 0; j < leftUnits; j++)
 862         {
 863             if (*pNeighborFlags)
 864                 for (int i = 0; i < unitHeight; i++)
 865                     adi[-i] = adiTemp[i * picStride];
 866
 867             adiTemp += unitHeight * picStride;
 868             adi -= unitHeight;
 869             pNeighborFlags--;
 870         }
 871
 872         // Fill above & above-right samples
 873         adiTemp = adiOrigin - picStride;
 874         adi = adiLineBuffer + (leftUnits * unitHeight) + unitWidth;
 875         pNeighborFlags = bNeighborFlags + leftUnits + 1;
 876         for (int j = 0; j < aboveUnits; j++)
 877         {
 878             if (*pNeighborFlags)
 879                 memcpy(adi, adiTemp, unitWidth * sizeof(*adiTemp));
 880             adiTemp += unitWidth;
 881             adi += unitWidth;
 882             pNeighborFlags++;
 883         }
 884
 885         // Pad reference samples when necessary
 886         int curr = 0;
 887         int next = 1;
 888         adi = adiLineBuffer;
 889         int pAdiLineTopRowOffset = leftUnits * (unitHeight - unitWidth);
 890         if (!bNeighborFlags[0])
 891         {
 892             // very bottom unit of bottom-left; at least one unit will be valid.
 893             while (next < totalUnits && !bNeighborFlags[next])
 894                 next++;
 895
 896             pixel *pAdiLineNext = adiLineBuffer + ((next < leftUnits) ? (next * unitHeight) : (pAdiLineTopRowOffset + (next * unitWidth)));
 897             const pixel refSample = *pAdiLineNext;
 898             // Pad unavailable samples with new value
 899             int nextOrTop = X265_MIN(next, leftUnits);
 900             // fill left column
 901             while (curr < nextOrTop)
 902             {
 903                 for (int i = 0; i < unitHeight; i++)
 904                     adi[i] = refSample;
 905
 906                 adi += unitHeight;
 907                 curr++;
 908             }
 909
 910             // fill top row
 911             while (curr < next)
 912             {
 913                 for (int i = 0; i < unitWidth; i++)
 914                     adi[i] = refSample;
 915
 916                 adi += unitWidth;
 917                 curr++;
 918             }
 919         }
 920
 921         // pad all other reference samples.
 922         while (curr < totalUnits)
 923         {
 924             if (!bNeighborFlags[curr]) // samples not available
 925             {
 926                 int numSamplesInCurrUnit = (curr >= leftUnits) ? unitWidth : unitHeight;
 927                 const pixel refSample = *(adi - 1);
 928                 for (int i = 0; i < numSamplesInCurrUnit; i++)
 929                     adi[i] = refSample;
 930
 931                 adi += numSamplesInCurrUnit;
 932                 curr++;
 933             }
 934             else
 935             {
 936                 adi += (curr >= leftUnits) ? unitWidth : unitHeight;
 937                 curr++;
 938             }
 939         }
 940
 941         // Copy processed samples
 942         adi = adiLineBuffer + refSize + unitWidth - 2;
 943         memcpy(adiRef, adi, refSize * sizeof(*adiRef));
 944
 945         adi = adiLineBuffer + refSize - 1;
 946         for (int i = 1; i < (int)refSize; i++)
 947             adiRef[i * ADI_BUF_STRIDE] = adi[-i];
 948     }
 949 }
 950
 951 bool Predict::isAboveLeftAvailable(const CUData& cu, uint32_t partIdxLT)
 952 {
 953     uint32_t partAboveLeft;
 954     const CUData* cuAboveLeft = cu.getPUAboveLeft(partAboveLeft, partIdxLT);
 955
 956     if (!cu.m_slice->m_pps->bConstrainedIntraPred)
 957         return cuAboveLeft ? true : false;
 958     else
 959         return cuAboveLeft && cuAboveLeft->isIntra(partAboveLeft);
 960 }
 961
 962 int Predict::isAboveAvailable(const CUData& cu, uint32_t partIdxLT, uint32_t partIdxRT, bool *bValidFlags)
 963 {
 964     const uint32_t rasterPartBegin = g_zscanToRaster[partIdxLT];
 965     const uint32_t rasterPartEnd = g_zscanToRaster[partIdxRT] + 1;
 966     const uint32_t idxStep = 1;
 967     bool *validFlagPtr = bValidFlags;
 968     int numIntra = 0;
 969
 970     for (uint32_t rasterPart = rasterPartBegin; rasterPart < rasterPartEnd; rasterPart += idxStep)
 971     {
 972         uint32_t partAbove;
 973         const CUData* cuAbove = cu.getPUAbove(partAbove, g_rasterToZscan[rasterPart]);
 974         if (cuAbove && (!cu.m_slice->m_pps->bConstrainedIntraPred || cuAbove->isIntra(partAbove)))
 975         {
 976             numIntra++;
 977             *validFlagPtr = true;
 978         }
 979         else
 980             *validFlagPtr = false;
 981
 982         validFlagPtr++;
 983     }
 984
 985     return numIntra;
 986 }
 987
 988 int Predict::isLeftAvailable(const CUData& cu, uint32_t partIdxLT, uint32_t partIdxLB, bool *bValidFlags)
 989 {
 990     const uint32_t rasterPartBegin = g_zscanToRaster[partIdxLT];
 991     const uint32_t rasterPartEnd = g_zscanToRaster[partIdxLB] + 1;
 992     const uint32_t idxStep = cu.m_slice->m_sps->numPartInCUSize;
 993     bool *validFlagPtr = bValidFlags;
 994     int numIntra = 0;
 995
 996     for (uint32_t rasterPart = rasterPartBegin; rasterPart < rasterPartEnd; rasterPart += idxStep)
 997     {
 998         uint32_t partLeft;
 999         const CUData* cuLeft = cu.getPULeft(partLeft, g_rasterToZscan[rasterPart]);
1000         if (cuLeft && (!cu.m_slice->m_pps->bConstrainedIntraPred || cuLeft->isIntra(partLeft)))
1001         {
1002             numIntra++;
1003             *validFlagPtr = true;
1004         }
1005         else
1006             *validFlagPtr = false;
1007
1008         validFlagPtr--; // opposite direction
1009     }
1010
1011     return numIntra;
1012 }
1013
1014 int Predict::isAboveRightAvailable(const CUData& cu, uint32_t partIdxLT, uint32_t partIdxRT, bool *bValidFlags)
1015 {
1016     const uint32_t numUnitsInPU = g_zscanToRaster[partIdxRT] - g_zscanToRaster[partIdxLT] + 1;
1017     bool *validFlagPtr = bValidFlags;
1018     int numIntra = 0;
1019
1020     for (uint32_t offset = 1; offset <= numUnitsInPU; offset++)
1021     {
1022         uint32_t partAboveRight;
1023         const CUData* cuAboveRight = cu.getPUAboveRightAdi(partAboveRight, partIdxRT, offset);
1024         if (cuAboveRight && (!cu.m_slice->m_pps->bConstrainedIntraPred || cuAboveRight->isIntra(partAboveRight)))
1025         {
1026             numIntra++;
1027             *validFlagPtr = true;
1028         }
1029         else
1030             *validFlagPtr = false;
1031
1032         validFlagPtr++;
1033     }
1034
1035     return numIntra;
1036 }
1037
1038 int Predict::isBelowLeftAvailable(const CUData& cu, uint32_t partIdxLT, uint32_t partIdxLB, bool *bValidFlags)
1039 {
1040     const uint32_t numUnitsInPU = (g_zscanToRaster[partIdxLB] - g_zscanToRaster[partIdxLT]) / cu.m_slice->m_sps->numPartInCUSize + 1;
1041     bool *validFlagPtr = bValidFlags;
1042     int numIntra = 0;
1043
1044     for (uint32_t offset = 1; offset <= numUnitsInPU; offset++)
1045     {
1046         uint32_t partBelowLeft;
1047         const CUData* cuBelowLeft = cu.getPUBelowLeftAdi(partBelowLeft, partIdxLB, offset);
1048         if (cuBelowLeft && (!cu.m_slice->m_pps->bConstrainedIntraPred || cuBelowLeft->isIntra(partBelowLeft)))
1049         {
1050             numIntra++;
1051             *validFlagPtr = true;
1052         }
1053         else
1054             *validFlagPtr = false;
1055
1056         validFlagPtr--; // opposite direction
1057     }
1058
1059     return numIntra;
1060 }