/* Each CU's data is layed out sequentially within the charMemBlock */
uint8_t *charBuf = dataPool.charMemBlock + (m_numPartitions * BytesPerPartition) * instance;
- m_qp = (char*)charBuf; charBuf += m_numPartitions;
+ m_qp = (int8_t*)charBuf; charBuf += m_numPartitions;
m_log2CUSize = charBuf; charBuf += m_numPartitions;
- m_partSize = charBuf; charBuf += m_numPartitions;
- m_predMode = charBuf; charBuf += m_numPartitions;
m_lumaIntraDir = charBuf; charBuf += m_numPartitions;
m_tqBypass = charBuf; charBuf += m_numPartitions;
- m_refIdx[0] = (char*)charBuf; charBuf += m_numPartitions;
- m_refIdx[1] = (char*)charBuf; charBuf += m_numPartitions;
+ m_refIdx[0] = (int8_t*)charBuf; charBuf += m_numPartitions;
+ m_refIdx[1] = (int8_t*)charBuf; charBuf += m_numPartitions;
m_cuDepth = charBuf; charBuf += m_numPartitions;
- m_skipFlag = charBuf; charBuf += m_numPartitions; /* the order up to here is important in initCTU() and initSubCU() */
+ m_predMode = charBuf; charBuf += m_numPartitions; /* the order up to here is important in initCTU() and initSubCU() */
+ m_partSize = charBuf; charBuf += m_numPartitions;
m_mergeFlag = charBuf; charBuf += m_numPartitions;
m_interDir = charBuf; charBuf += m_numPartitions;
m_mvpIdx[0] = charBuf; charBuf += m_numPartitions;
/* sequential memsets */
m_partSet((uint8_t*)m_qp, (uint8_t)qp);
m_partSet(m_log2CUSize, (uint8_t)g_maxLog2CUSize);
- m_partSet(m_partSize, (uint8_t)SIZE_NONE);
- m_partSet(m_predMode, (uint8_t)MODE_NONE);
m_partSet(m_lumaIntraDir, (uint8_t)DC_IDX);
m_partSet(m_tqBypass, (uint8_t)frame.m_encData->m_param->bLossless);
if (m_slice->m_sliceType != I_SLICE)
X265_CHECK(!(frame.m_encData->m_param->bLossless && !m_slice->m_pps->bTransquantBypassEnabled), "lossless enabled without TQbypass in PPS\n");
/* initialize the remaining CU data in one memset */
- memset(m_cuDepth, 0, (BytesPerPartition - 8) * m_numPartitions);
+ memset(m_cuDepth, 0, (BytesPerPartition - 6) * m_numPartitions);
uint32_t widthInCU = m_slice->m_sps->numCuInWidth;
m_cuLeft = (m_cuAddr % widthInCU) ? m_encData->getPicCTU(m_cuAddr - 1) : NULL;
/* sequential memsets */
m_partSet((uint8_t*)m_qp, (uint8_t)ctu.m_qp[0]);
m_partSet(m_log2CUSize, (uint8_t)cuGeom.log2CUSize);
- m_partSet(m_partSize, (uint8_t)SIZE_NONE);
- m_partSet(m_predMode, (uint8_t)MODE_NONE);
m_partSet(m_lumaIntraDir, (uint8_t)DC_IDX);
m_partSet(m_tqBypass, (uint8_t)m_encData->m_param->bLossless);
m_partSet((uint8_t*)m_refIdx[0], (uint8_t)REF_NOT_VALID);
m_partSet(m_cuDepth, (uint8_t)cuGeom.depth);
/* initialize the remaining CU data in one memset */
- memset(m_skipFlag, 0, (BytesPerPartition - 9) * m_numPartitions);
+ memset(m_predMode, 0, (BytesPerPartition - 7) * m_numPartitions);
}
/* Copy the results of a sub-part (split) CU to the parent CU */
m_subPartCopy((uint8_t*)m_qp + offset, (uint8_t*)subCU.m_qp);
m_subPartCopy(m_log2CUSize + offset, subCU.m_log2CUSize);
- m_subPartCopy(m_partSize + offset, subCU.m_partSize);
- m_subPartCopy(m_predMode + offset, subCU.m_predMode);
m_subPartCopy(m_lumaIntraDir + offset, subCU.m_lumaIntraDir);
m_subPartCopy(m_tqBypass + offset, subCU.m_tqBypass);
m_subPartCopy((uint8_t*)m_refIdx[0] + offset, (uint8_t*)subCU.m_refIdx[0]);
m_subPartCopy((uint8_t*)m_refIdx[1] + offset, (uint8_t*)subCU.m_refIdx[1]);
m_subPartCopy(m_cuDepth + offset, subCU.m_cuDepth);
- m_subPartCopy(m_skipFlag + offset, subCU.m_skipFlag);
+ m_subPartCopy(m_predMode + offset, subCU.m_predMode);
+ m_subPartCopy(m_partSize + offset, subCU.m_partSize);
m_subPartCopy(m_mergeFlag + offset, subCU.m_mergeFlag);
m_subPartCopy(m_interDir + offset, subCU.m_interDir);
m_subPartCopy(m_mvpIdx[0] + offset, subCU.m_mvpIdx[0]);
m_partSet(m_tqBypass, true);
/* clear residual coding flags */
- m_partSet(m_skipFlag, 0);
+ m_partSet(m_predMode, cu.m_predMode[0] & (MODE_INTRA | MODE_INTER));
m_partSet(m_tuDepth, 0);
m_partSet(m_transformSkip[0], 0);
m_partSet(m_transformSkip[1], 0);
m_partCopy((uint8_t*)ctu.m_qp + m_absIdxInCTU, (uint8_t*)m_qp);
m_partCopy(ctu.m_log2CUSize + m_absIdxInCTU, m_log2CUSize);
- m_partCopy(ctu.m_partSize + m_absIdxInCTU, m_partSize);
- m_partCopy(ctu.m_predMode + m_absIdxInCTU, m_predMode);
m_partCopy(ctu.m_lumaIntraDir + m_absIdxInCTU, m_lumaIntraDir);
m_partCopy(ctu.m_tqBypass + m_absIdxInCTU, m_tqBypass);
m_partCopy((uint8_t*)ctu.m_refIdx[0] + m_absIdxInCTU, (uint8_t*)m_refIdx[0]);
m_partCopy((uint8_t*)ctu.m_refIdx[1] + m_absIdxInCTU, (uint8_t*)m_refIdx[1]);
m_partCopy(ctu.m_cuDepth + m_absIdxInCTU, m_cuDepth);
- m_partCopy(ctu.m_skipFlag + m_absIdxInCTU, m_skipFlag);
+ m_partCopy(ctu.m_predMode + m_absIdxInCTU, m_predMode);
+ m_partCopy(ctu.m_partSize + m_absIdxInCTU, m_partSize);
m_partCopy(ctu.m_mergeFlag + m_absIdxInCTU, m_mergeFlag);
m_partCopy(ctu.m_interDir + m_absIdxInCTU, m_interDir);
m_partCopy(ctu.m_mvpIdx[0] + m_absIdxInCTU, m_mvpIdx[0]);
/* copy out all prediction info for this part */
m_partCopy((uint8_t*)m_qp, (uint8_t*)ctu.m_qp + m_absIdxInCTU);
m_partCopy(m_log2CUSize, ctu.m_log2CUSize + m_absIdxInCTU);
- m_partCopy(m_partSize, ctu.m_partSize + m_absIdxInCTU);
- m_partCopy(m_predMode, ctu.m_predMode + m_absIdxInCTU);
m_partCopy(m_lumaIntraDir, ctu.m_lumaIntraDir + m_absIdxInCTU);
m_partCopy(m_tqBypass, ctu.m_tqBypass + m_absIdxInCTU);
m_partCopy((uint8_t*)m_refIdx[0], (uint8_t*)ctu.m_refIdx[0] + m_absIdxInCTU);
m_partCopy((uint8_t*)m_refIdx[1], (uint8_t*)ctu.m_refIdx[1] + m_absIdxInCTU);
m_partCopy(m_cuDepth, ctu.m_cuDepth + m_absIdxInCTU);
+ m_partSet(m_predMode, ctu.m_predMode[m_absIdxInCTU] & (MODE_INTRA | MODE_INTER)); /* clear skip flag */
+ m_partCopy(m_partSize, ctu.m_partSize + m_absIdxInCTU);
m_partCopy(m_mergeFlag, ctu.m_mergeFlag + m_absIdxInCTU);
m_partCopy(m_interDir, ctu.m_interDir + m_absIdxInCTU);
m_partCopy(m_mvpIdx[0], ctu.m_mvpIdx[0] + m_absIdxInCTU);
memcpy(m_mvd[1], ctu.m_mvd[1] + m_absIdxInCTU, m_numPartitions * sizeof(MV));
/* clear residual coding flags */
- m_partSet(m_skipFlag, 0);
m_partSet(m_tuDepth, 0);
m_partSet(m_transformSkip[0], 0);
m_partSet(m_transformSkip[1], 0);
m_partCopy(ctu.m_transformSkip[0] + m_absIdxInCTU, m_transformSkip[0]);
m_partCopy(ctu.m_transformSkip[1] + m_absIdxInCTU, m_transformSkip[1]);
m_partCopy(ctu.m_transformSkip[2] + m_absIdxInCTU, m_transformSkip[2]);
- m_partCopy(ctu.m_skipFlag + m_absIdxInCTU, m_skipFlag);
+ m_partCopy(ctu.m_predMode + m_absIdxInCTU, m_predMode);
m_partCopy(ctu.m_tuDepth + m_absIdxInCTU, m_tuDepth);
m_partCopy(ctu.m_cbf[0] + m_absIdxInCTU, m_cbf[0]);
m_partCopy(ctu.m_cbf[1] + m_absIdxInCTU, m_cbf[1]);
return m_cuLeft;
}
-const CUData* CUData::getPUAbove(uint32_t& aPartUnitIdx, uint32_t curPartUnitIdx, bool planarAtCTUBoundary) const
+const CUData* CUData::getPUAbove(uint32_t& aPartUnitIdx, uint32_t curPartUnitIdx) const
{
uint32_t absPartIdx = g_zscanToRaster[curPartUnitIdx];
if (isEqualRow(absPartIdx, absZorderCUIdx, s_numPartInCUSize))
return m_encData->getPicCTU(m_cuAddr);
else
- {
aPartUnitIdx -= m_absIdxInCTU;
- return this;
- }
+ return this;
}
- if (planarAtCTUBoundary)
- return NULL;
-
aPartUnitIdx = g_rasterToZscan[absPartIdx + NUM_CU_PARTITIONS - s_numPartInCUSize];
return m_cuAbove;
}
}
/* Get reference QP from left QpMinCu or latest coded QP */
-char CUData::getRefQP(uint32_t curAbsIdxInCTU) const
+int8_t CUData::getRefQP(uint32_t curAbsIdxInCTU) const
{
uint32_t lPartIdx = 0, aPartIdx = 0;
const CUData* cULeft = getQpMinCuLeft(lPartIdx, m_absIdxInCTU + curAbsIdxInCTU);
return lastValidPartIdx;
}
-char CUData::getLastCodedQP(uint32_t absPartIdx) const
+int8_t CUData::getLastCodedQP(uint32_t absPartIdx) const
{
uint32_t quPartIdxMask = 0xFF << (g_maxFullDepth - m_slice->m_pps->maxCuDQPDepth) * 2;
int lastValidPartIdx = getLastValidPartIdx(absPartIdx & quPartIdxMask);
else if (m_cuAddr > 0 && !(m_slice->m_pps->bEntropyCodingSyncEnabled && !(m_cuAddr % m_slice->m_sps->numCuInWidth)))
return m_encData->getPicCTU(m_cuAddr - 1)->getLastCodedQP(NUM_CU_PARTITIONS);
else
- return (char)m_slice->m_sliceQp;
+ return (int8_t)m_slice->m_sliceQp;
}
}
leftIntraDir = (tempCU && tempCU->isIntra(tempPartIdx)) ? tempCU->m_lumaIntraDir[tempPartIdx] : DC_IDX;
// Get intra direction of above PU
- tempCU = getPUAbove(tempPartIdx, m_absIdxInCTU + absPartIdx, true);
+ tempCU = g_zscanToPelY[m_absIdxInCTU + absPartIdx] > 0 ? getPUAbove(tempPartIdx, m_absIdxInCTU + absPartIdx) : NULL;
aboveIntraDir = (tempCU && tempCU->isIntra(tempPartIdx)) ? tempCU->m_lumaIntraDir[tempPartIdx] : DC_IDX;
void CUData::getIntraTUQtDepthRange(uint32_t tuDepthRange[2], uint32_t absPartIdx) const
{
uint32_t log2CUSize = m_log2CUSize[absPartIdx];
- uint32_t splitFlag = m_partSize[absPartIdx] == SIZE_NxN;
+ uint32_t splitFlag = m_partSize[absPartIdx] != SIZE_2Nx2N;
tuDepthRange[0] = m_slice->m_sps->quadtreeTULog2MinSize;
tuDepthRange[1] = m_slice->m_sps->quadtreeTULog2MaxSize;
return ctx;
}
-bool CUData::setQPSubCUs(char qp, uint32_t absPartIdx, uint32_t depth)
+bool CUData::setQPSubCUs(int8_t qp, uint32_t absPartIdx, uint32_t depth)
{
uint32_t curPartNumb = NUM_CU_PARTITIONS >> (depth << 1);
uint32_t curPartNumQ = curPartNumb >> 2;
setAllPU(m_mv[list], mv, absPartIdx, puIdx);
}
-void CUData::setPURefIdx(int list, char refIdx, int absPartIdx, int puIdx)
+void CUData::setPURefIdx(int list, int8_t refIdx, int absPartIdx, int puIdx)
{
setAllPU(m_refIdx[list], refIdx, absPartIdx, puIdx);
}
else
{
// OUT OF BOUNDARY
- outMvField.mv.word = 0;
+ outMvField.mv = 0;
outMvField.refIdx = REF_NOT_VALID;
}
}
for (uint32_t i = 0; i < maxNumMergeCand; ++i)
{
+ mvFieldNeighbours[i][0].mv = 0;
+ mvFieldNeighbours[i][1].mv = 0;
mvFieldNeighbours[i][0].refIdx = REF_NOT_VALID;
mvFieldNeighbours[i][1].refIdx = REF_NOT_VALID;
}
bool isAvailableA1 = cuLeft &&
cuLeft->isDiffMER(xP - 1, yP + nPSH - 1, xP, yP) &&
!(puIdx == 1 && (curPS == SIZE_Nx2N || curPS == SIZE_nLx2N || curPS == SIZE_nRx2N)) &&
- !cuLeft->isIntra(leftPartIdx);
+ cuLeft->isInter(leftPartIdx);
if (isAvailableA1)
{
// get Inter Dir
bool isAvailableB1 = cuAbove &&
cuAbove->isDiffMER(xP + nPSW - 1, yP - 1, xP, yP) &&
!(puIdx == 1 && (curPS == SIZE_2NxN || curPS == SIZE_2NxnU || curPS == SIZE_2NxnD)) &&
- !cuAbove->isIntra(abovePartIdx);
+ cuAbove->isInter(abovePartIdx);
if (isAvailableB1 && (!isAvailableA1 || !cuLeft->hasEqualMotion(leftPartIdx, *cuAbove, abovePartIdx)))
{
// get Inter Dir
const CUData* cuAboveRight = getPUAboveRight(aboveRightPartIdx, partIdxRT);
bool isAvailableB0 = cuAboveRight &&
cuAboveRight->isDiffMER(xP + nPSW, yP - 1, xP, yP) &&
- !cuAboveRight->isIntra(aboveRightPartIdx);
+ cuAboveRight->isInter(aboveRightPartIdx);
if (isAvailableB0 && (!isAvailableB1 || !cuAbove->hasEqualMotion(abovePartIdx, *cuAboveRight, aboveRightPartIdx)))
{
// get Inter Dir
const CUData* cuLeftBottom = this->getPUBelowLeft(leftBottomPartIdx, partIdxLB);
bool isAvailableA0 = cuLeftBottom &&
cuLeftBottom->isDiffMER(xP - 1, yP + nPSH, xP, yP) &&
- !cuLeftBottom->isIntra(leftBottomPartIdx);
+ cuLeftBottom->isInter(leftBottomPartIdx);
if (isAvailableA0 && (!isAvailableA1 || !cuLeft->hasEqualMotion(leftPartIdx, *cuLeftBottom, leftBottomPartIdx)))
{
// get Inter Dir
const CUData* cuAboveLeft = getPUAboveLeft(aboveLeftPartIdx, absPartAddr);
bool isAvailableB2 = cuAboveLeft &&
cuAboveLeft->isDiffMER(xP - 1, yP - 1, xP, yP) &&
- !cuAboveLeft->isIntra(aboveLeftPartIdx);
+ cuAboveLeft->isInter(aboveLeftPartIdx);
if (isAvailableB2 && (!isAvailableA1 || !cuLeft->hasEqualMotion(leftPartIdx, *cuAboveLeft, aboveLeftPartIdx))
&& (!isAvailableB1 || !cuAbove->hasEqualMotion(abovePartIdx, *cuAboveLeft, aboveLeftPartIdx)))
{
while (count < maxNumMergeCand)
{
interDirNeighbours[count] = 1;
- mvFieldNeighbours[count][0].mv.word = 0;
+ mvFieldNeighbours[count][0].mv = 0;
mvFieldNeighbours[count][0].refIdx = r;
if (isInterB)
bool CUData::getColMVP(MV& outMV, int& outRefIdx, int picList, int cuAddr, int partUnitIdx) const
{
- uint32_t absPartAddr = partUnitIdx & TMVP_UNIT_MASK;
-
- int colRefPicList;
- int colPOC, colRefPOC, curPOC, curRefPOC;
- MV colmv;
-
- // use coldir.
- Frame *colPic = m_slice->m_refPicList[m_slice->isInterB() ? 1 - m_slice->m_colFromL0Flag : 0][m_slice->m_colRefIdx];
- CUData *colCU = colPic->m_encData->getPicCTU(cuAddr);
+ const Frame* colPic = m_slice->m_refPicList[m_slice->isInterB() && !m_slice->m_colFromL0Flag][m_slice->m_colRefIdx];
+ const CUData* colCU = colPic->m_encData->getPicCTU(cuAddr);
- if (colCU->m_partSize[partUnitIdx] == SIZE_NONE)
+ if (colCU->m_predMode[partUnitIdx] == MODE_NONE)
return false;
- curPOC = m_slice->m_poc;
- colPOC = colCU->m_slice->m_poc;
+ uint32_t absPartAddr = partUnitIdx & TMVP_UNIT_MASK;
if (colCU->isIntra(absPartAddr))
return false;
- colRefPicList = m_slice->m_bCheckLDC ? picList : m_slice->m_colFromL0Flag;
+ int colRefPicList = m_slice->m_bCheckLDC ? picList : m_slice->m_colFromL0Flag;
int colRefIdx = colCU->m_refIdx[colRefPicList][absPartAddr];
}
// Scale the vector
- colRefPOC = colCU->m_slice->m_refPOCList[colRefPicList][colRefIdx];
- colmv = colCU->m_mv[colRefPicList][absPartAddr];
- curRefPOC = m_slice->m_refPOCList[picList][outRefIdx];
+ int colRefPOC = colCU->m_slice->m_refPOCList[colRefPicList][colRefIdx];
+ int colPOC = colCU->m_slice->m_poc;
+ MV colmv = colCU->m_mv[colRefPicList][absPartAddr];
+
+ int curRefPOC = m_slice->m_refPOCList[picList][outRefIdx];
+ int curPOC = m_slice->m_poc;
scaleMvByPOCDist(outMV, colmv, curPOC, curRefPOC, colPOC, colRefPOC);
return true;
#define CU_SET_FLAG(bitfield, flag, value) (bitfield) = ((bitfield) & (~(flag))) | ((~((value) - 1)) & (flag))
-void CUData::calcCTUGeoms(uint32_t picWidth, uint32_t picHeight, uint32_t maxCUSize, CUGeom cuDataArray[CUGeom::MAX_GEOMS]) const
+void CUData::calcCTUGeoms(uint32_t ctuWidth, uint32_t ctuHeight, uint32_t maxCUSize, CUGeom cuDataArray[CUGeom::MAX_GEOMS])
{
// Initialize the coding blocks inside the CTB
for (uint32_t log2CUSize = g_log2Size[maxCUSize], rangeCUIdx = 0; log2CUSize >= MIN_LOG2_CU_SIZE; log2CUSize--)
uint32_t depthIdx = g_depthScanIdx[sbY][sbX];
uint32_t cuIdx = rangeCUIdx + depthIdx;
uint32_t childIdx = rangeCUIdx + sbWidth * sbWidth + (depthIdx << 2);
- uint32_t px = m_cuPelX + sbX * blockSize;
- uint32_t py = m_cuPelY + sbY * blockSize;
- int32_t presentFlag = px < picWidth && py < picHeight;
- int32_t splitMandatoryFlag = presentFlag && !lastLevelFlag && (px + blockSize > picWidth || py + blockSize > picHeight);
+ uint32_t px = sbX * blockSize;
+ uint32_t py = sbY * blockSize;
+ int32_t presentFlag = px < ctuWidth && py < ctuHeight;
+ int32_t splitMandatoryFlag = presentFlag && !lastLevelFlag && (px + blockSize > ctuWidth || py + blockSize > ctuHeight);
/* Offset of the luma CU in the X, Y direction in terms of pixels from the CTU origin */
uint32_t xOffset = (sbX * blockSize) >> 3;