#include "rdcost.h"
#include "encoder.h"
-#include "PPA/ppa.h"
-
using namespace x265;
/* An explanation of rate distortion levels (--rd-level)
*
* RDO selection between merge and skip
* sa8d selection of best inter mode
+ * sa8d decisions include chroma residual cost
* RDO selection between (merge/skip) / best inter mode / intra / split
*
* rd-level 4 enables RDOQuant
+ * chroma residual cost included in satd decisions, including subpel refine
+ * (as a result of --subme 3 being used by preset slow)
*
* rd-level 5,6 does RDO for each inter mode
*/
Analysis::Analysis()
{
m_totalNumJobs = m_numAcquiredJobs = m_numCompletedJobs = 0;
+ m_reuseIntraDataCTU = NULL;
+ m_reuseInterDataCTU = NULL;
}
bool Analysis::create(ThreadLocalData *tld)
{
m_tld = tld;
m_bTryLossless = m_param->bCULossless && !m_param->bLossless && m_param->rdLevel >= 2;
+ m_bChromaSa8d = m_param->rdLevel >= 3;
int csp = m_param->internalCsp;
uint32_t cuSize = g_maxCUSize;
}
}
-Search::Mode& Analysis::compressCTU(CUData& ctu, Frame& frame, const CUGeom& cuGeom, const Entropy& initialContext)
+Mode& Analysis::compressCTU(CUData& ctu, Frame& frame, const CUGeom& cuGeom, const Entropy& initialContext)
{
m_slice = ctu.m_slice;
m_frame = &frame;
invalidateContexts(0);
m_quant.setQPforQuant(ctu);
m_rqt[0].cur.load(initialContext);
- m_modeDepth[0].fencYuv.copyFromPicYuv(*m_frame->m_origPicYuv, ctu.m_cuAddr, 0);
+ m_modeDepth[0].fencYuv.copyFromPicYuv(*m_frame->m_fencPic, ctu.m_cuAddr, 0);
uint32_t numPartition = ctu.m_numPartitions;
+ if (m_param->analysisMode)
+ {
+ m_reuseIntraDataCTU = (analysis_intra_data *)m_frame->m_analysisData.intraData;
+ int numPredDir = m_slice->isInterP() ? 1 : 2;
+ m_reuseInterDataCTU = (analysis_inter_data *)m_frame->m_analysisData.interData + ctu.m_cuAddr * X265_MAX_PRED_MODE_PER_CTU * numPredDir;
+ }
+
if (m_slice->m_sliceType == I_SLICE)
{
uint32_t zOrder = 0;
- if (m_param->analysisMode == X265_ANALYSIS_LOAD)
- compressIntraCU(ctu, cuGeom, m_frame->m_intraData, zOrder);
- else
+ compressIntraCU(ctu, cuGeom, zOrder);
+ if (m_param->analysisMode == X265_ANALYSIS_SAVE && m_frame->m_analysisData.intraData)
{
- compressIntraCU(ctu, cuGeom, NULL, zOrder);
-
- if (m_param->analysisMode == X265_ANALYSIS_SAVE && m_frame->m_intraData)
- {
- CUData *bestCU = &m_modeDepth[0].bestMode->cu;
- memcpy(&m_frame->m_intraData->depth[ctu.m_cuAddr * numPartition], bestCU->m_cuDepth, sizeof(uint8_t) * numPartition);
- memcpy(&m_frame->m_intraData->modes[ctu.m_cuAddr * numPartition], bestCU->m_lumaIntraDir, sizeof(uint8_t) * numPartition);
- memcpy(&m_frame->m_intraData->partSizes[ctu.m_cuAddr * numPartition], bestCU->m_partSize, sizeof(uint8_t) * numPartition);
- m_frame->m_intraData->cuAddr[ctu.m_cuAddr] = ctu.m_cuAddr;
- m_frame->m_intraData->poc[ctu.m_cuAddr] = m_frame->m_poc;
- }
+ CUData *bestCU = &m_modeDepth[0].bestMode->cu;
+ memcpy(&m_reuseIntraDataCTU->depth[ctu.m_cuAddr * numPartition], bestCU->m_cuDepth, sizeof(uint8_t) * numPartition);
+ memcpy(&m_reuseIntraDataCTU->modes[ctu.m_cuAddr * numPartition], bestCU->m_lumaIntraDir, sizeof(uint8_t) * numPartition);
+ memcpy(&m_reuseIntraDataCTU->partSizes[ctu.m_cuAddr * numPartition], bestCU->m_partSize, sizeof(uint8_t) * numPartition);
}
}
else
if (!m_param->rdLevel)
{
/* In RD Level 0/1, copy source pixels into the reconstructed block so
- * they are available for intra predictions */
- m_modeDepth[0].fencYuv.copyToPicYuv(*m_frame->m_reconPicYuv, ctu.m_cuAddr, 0);
-
- compressInterCU_rd0_4(ctu, cuGeom); // TODO: this really wants to be compressInterCU_rd0_1
+ * they are available for intra predictions */
+ m_modeDepth[0].fencYuv.copyToPicYuv(*m_frame->m_reconPic, ctu.m_cuAddr, 0);
+
+ compressInterCU_rd0_4(ctu, cuGeom);
/* generate residual for entire CTU at once and copy to reconPic */
encodeResidue(ctu, cuGeom);
if (!md.bestMode->distortion)
/* already lossless */
return;
- else if (md.bestMode->cu.m_predMode[0] == MODE_INTRA)
+ else if (md.bestMode->cu.isIntra(0))
{
md.pred[PRED_LOSSLESS].cu.initLosslessCU(md.bestMode->cu, cuGeom);
PartSize size = (PartSize)md.pred[PRED_LOSSLESS].cu.m_partSize[0];
}
}
-void Analysis::compressIntraCU(const CUData& parentCTU, const CUGeom& cuGeom, x265_intra_data* shared, uint32_t& zOrder)
+void Analysis::compressIntraCU(const CUData& parentCTU, const CUGeom& cuGeom, uint32_t& zOrder)
{
uint32_t depth = cuGeom.depth;
ModeDepth& md = m_modeDepth[depth];
bool mightSplit = !(cuGeom.flags & CUGeom::LEAF);
bool mightNotSplit = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY);
- if (shared)
+ if (m_param->analysisMode == X265_ANALYSIS_LOAD)
{
- uint8_t* sharedDepth = &shared->depth[parentCTU.m_cuAddr * parentCTU.m_numPartitions];
- char* sharedPartSizes = &shared->partSizes[parentCTU.m_cuAddr * parentCTU.m_numPartitions];
- uint8_t* sharedModes = &shared->modes[parentCTU.m_cuAddr * parentCTU.m_numPartitions];
+ uint8_t* reuseDepth = &m_reuseIntraDataCTU->depth[parentCTU.m_cuAddr * parentCTU.m_numPartitions];
+ uint8_t* reuseModes = &m_reuseIntraDataCTU->modes[parentCTU.m_cuAddr * parentCTU.m_numPartitions];
+ char* reusePartSizes = &m_reuseIntraDataCTU->partSizes[parentCTU.m_cuAddr * parentCTU.m_numPartitions];
- if (mightNotSplit && depth == sharedDepth[zOrder] && zOrder == cuGeom.encodeIdx)
+ if (mightNotSplit && depth == reuseDepth[zOrder] && zOrder == cuGeom.encodeIdx)
{
m_quant.setQPforQuant(parentCTU);
- PartSize size = (PartSize)sharedPartSizes[zOrder];
+ PartSize size = (PartSize)reusePartSizes[zOrder];
Mode& mode = size == SIZE_2Nx2N ? md.pred[PRED_INTRA] : md.pred[PRED_INTRA_NxN];
mode.cu.initSubCU(parentCTU, cuGeom);
- checkIntra(mode, cuGeom, size, sharedModes);
+ checkIntra(mode, cuGeom, size, &reuseModes[zOrder]);
checkBestMode(mode, depth);
if (m_bTryLossless)
addSplitFlagCost(*md.bestMode, cuGeom.depth);
// increment zOrder offset to point to next best depth in sharedDepth buffer
- zOrder += g_depthInc[g_maxCUDepth - 1][sharedDepth[zOrder]];
+ zOrder += g_depthInc[g_maxCUDepth - 1][reuseDepth[zOrder]];
mightSplit = false;
}
}
for (uint32_t subPartIdx = 0; subPartIdx < 4; subPartIdx++)
{
- const CUGeom& childCuData = *(&cuGeom + cuGeom.childOffset + subPartIdx);
- if (childCuData.flags & CUGeom::PRESENT)
+ const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset + subPartIdx);
+ if (childGeom.flags & CUGeom::PRESENT)
{
- m_modeDepth[0].fencYuv.copyPartToYuv(nd.fencYuv, childCuData.encodeIdx);
+ m_modeDepth[0].fencYuv.copyPartToYuv(nd.fencYuv, childGeom.encodeIdx);
m_rqt[nextDepth].cur.load(*nextContext);
- compressIntraCU(parentCTU, childCuData, shared, zOrder);
+ compressIntraCU(parentCTU, childGeom, zOrder);
// Save best CU and pred data for this sub CU
- splitCU->copyPartFrom(nd.bestMode->cu, childCuData, subPartIdx);
+ splitCU->copyPartFrom(nd.bestMode->cu, childGeom, subPartIdx);
splitPred->addSubCosts(*nd.bestMode);
- nd.bestMode->reconYuv.copyToPartYuv(splitPred->reconYuv, childCuData.numPartitions * subPartIdx);
+ nd.bestMode->reconYuv.copyToPartYuv(splitPred->reconYuv, childGeom.numPartitions * subPartIdx);
nextContext = &nd.bestMode->contexts;
}
else
{
/* record the depth of this non-present sub-CU */
- splitCU->setEmptyPart(childCuData, subPartIdx);
+ splitCU->setEmptyPart(childGeom, subPartIdx);
zOrder += g_depthInc[g_maxCUDepth - 1][nextDepth];
}
}
/* Copy best data to encData CTU and recon */
md.bestMode->cu.copyToPic(depth);
if (md.bestMode != &md.pred[PRED_SPLIT])
- md.bestMode->reconYuv.copyToPicYuv(*m_frame->m_reconPicYuv, parentCTU.m_cuAddr, cuGeom.encodeIdx);
+ md.bestMode->reconYuv.copyToPicYuv(*m_frame->m_reconPic, parentCTU.m_cuAddr, cuGeom.encodeIdx);
}
bool Analysis::findJob(int threadId)
{
/* try to acquire a CU mode to analyze */
+ m_pmodeLock.acquire();
if (m_totalNumJobs > m_numAcquiredJobs)
{
- /* ATOMIC_INC returns the incremented value */
- int id = ATOMIC_INC(&m_numAcquiredJobs);
- if (m_totalNumJobs >= id)
- {
- parallelModeAnalysis(threadId, id - 1);
+ int id = m_numAcquiredJobs++;
+ m_pmodeLock.release();
- if (ATOMIC_INC(&m_numCompletedJobs) == m_totalNumJobs)
- m_modeCompletionEvent.trigger();
- return true;
- }
+ parallelModeAnalysis(threadId, id);
+
+ m_pmodeLock.acquire();
+ if (++m_numCompletedJobs == m_totalNumJobs)
+ m_modeCompletionEvent.trigger();
+ m_pmodeLock.release();
+ return true;
}
+ else
+ m_pmodeLock.release();
+ m_meLock.acquire();
if (m_totalNumME > m_numAcquiredME)
{
- int id = ATOMIC_INC(&m_numAcquiredME);
- if (m_totalNumME >= id)
- {
- parallelME(threadId, id - 1);
+ int id = m_numAcquiredME++;
+ m_meLock.release();
- if (ATOMIC_INC(&m_numCompletedME) == m_totalNumME)
- m_meCompletionEvent.trigger();
- return true;
- }
+ parallelME(threadId, id);
+
+ m_meLock.acquire();
+ if (++m_numCompletedME == m_totalNumME)
+ m_meCompletionEvent.trigger();
+ m_meLock.release();
+ return true;
}
+ else
+ m_meLock.release();
return false;
}
slave->m_slice = m_slice;
slave->m_frame = m_frame;
- PicYuv* fencPic = m_frame->m_origPicYuv;
- pixel* pu = fencPic->getLumaAddr(m_curMECu->m_cuAddr, m_curGeom->encodeIdx + m_puAbsPartIdx);
- slave->m_me.setSourcePlane(fencPic->m_picOrg[0], fencPic->m_stride);
- slave->m_me.setSourcePU(pu - fencPic->m_picOrg[0], m_puWidth, m_puHeight);
-
- slave->prepMotionCompensation(*m_curMECu, *m_curGeom, m_curPart);
+ slave->m_me.setSourcePU(*m_curInterMode->fencYuv, m_curInterMode->cu.m_cuAddr, m_curGeom->encodeIdx, m_puAbsPartIdx, m_puWidth, m_puHeight);
+ slave->prepMotionCompensation(m_curInterMode->cu, *m_curGeom, m_curPart);
}
if (meId < m_slice->m_numRefIdx[0])
- slave->singleMotionEstimation(*this, *m_curMECu, *m_curGeom, m_curPart, 0, meId);
+ slave->singleMotionEstimation(*this, *m_curInterMode, *m_curGeom, m_curPart, 0, meId);
else
- slave->singleMotionEstimation(*this, *m_curMECu, *m_curGeom, m_curPart, 1, meId - m_slice->m_numRefIdx[0]);
+ slave->singleMotionEstimation(*this, *m_curInterMode, *m_curGeom, m_curPart, 1, meId - m_slice->m_numRefIdx[0]);
}
void Analysis::parallelModeAnalysis(int threadId, int jobId)
slave->m_frame = m_frame;
slave->setQP(*m_slice, m_rdCost.m_qp);
slave->invalidateContexts(0);
- if (jobId)
- slave->m_me.setSourcePlane(m_frame->m_origPicYuv->m_picOrg[0], m_frame->m_origPicYuv->m_stride);
}
ModeDepth& md = m_modeDepth[m_curGeom->depth];
case 0:
if (slave != this)
slave->m_rqt[m_curGeom->depth].cur.load(m_rqt[m_curGeom->depth].cur);
- slave->checkIntraInInter_rd0_4(md.pred[PRED_INTRA], *m_curGeom);
+ slave->checkIntraInInter(md.pred[PRED_INTRA], *m_curGeom);
if (m_param->rdLevel > 2)
slave->encodeIntraInInter(md.pred[PRED_INTRA], *m_curGeom);
break;
case 1:
slave->checkInter_rd0_4(md.pred[PRED_2Nx2N], *m_curGeom, SIZE_2Nx2N);
+ if (m_slice->m_sliceType == B_SLICE)
+ slave->checkBidir2Nx2N(md.pred[PRED_2Nx2N], md.pred[PRED_BIDIR], *m_curGeom);
break;
case 2:
case 1:
slave->checkInter_rd5_6(md.pred[PRED_2Nx2N], *m_curGeom, SIZE_2Nx2N, false);
+ md.pred[PRED_BIDIR].rdCost = MAX_INT64;
+ if (m_slice->m_sliceType == B_SLICE)
+ {
+ slave->checkBidir2Nx2N(md.pred[PRED_2Nx2N], md.pred[PRED_BIDIR], *m_curGeom);
+ if (md.pred[PRED_BIDIR].sa8dCost < MAX_INT64)
+ slave->encodeResAndCalcRdInterCU(md.pred[PRED_BIDIR], *m_curGeom);
+ }
break;
case 2:
/* Initialize all prediction CUs based on parentCTU */
md.pred[PRED_2Nx2N].cu.initSubCU(parentCTU, cuGeom);
+ md.pred[PRED_BIDIR].cu.initSubCU(parentCTU, cuGeom);
md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom);
md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom);
if (m_param->bEnableRectInter)
md.pred[PRED_INTRA_NxN].cu.initSubCU(parentCTU, cuGeom);
}
+ m_pmodeLock.acquire();
m_totalNumJobs = 2 + m_param->bEnableRectInter * 2 + bTryAmp * 4;
m_numAcquiredJobs = !bTryIntra;
m_numCompletedJobs = m_numAcquiredJobs;
m_curGeom = &cuGeom;
m_bJobsQueued = true;
JobProvider::enqueue();
+ m_pmodeLock.release();
for (int i = 0; i < m_totalNumJobs - m_numCompletedJobs; i++)
m_pool->pokeIdleThread();
if (m_param->rdLevel > 2)
{
- /* encode best inter */
- for (uint32_t puIdx = 0; puIdx < bestInter->cu.getNumPartInter(); puIdx++)
+ /* RD selection between merge, inter, bidir and intra */
+ if (!m_bChromaSa8d) /* When m_bChromaSa8d is enabled, chroma MC has already been done */
{
- prepMotionCompensation(bestInter->cu, cuGeom, puIdx);
- motionCompensation(bestInter->predYuv, false, true);
+ for (uint32_t puIdx = 0; puIdx < bestInter->cu.getNumPartInter(); puIdx++)
+ {
+ prepMotionCompensation(bestInter->cu, cuGeom, puIdx);
+ motionCompensation(bestInter->predYuv, false, true);
+ }
}
encodeResAndCalcRdInterCU(*bestInter, cuGeom);
-
- /* RD selection between merge, inter and intra */
checkBestMode(*bestInter, depth);
+ /* If BIDIR is available and within 17/16 of best inter option, choose by RDO */
+ if (m_slice->m_sliceType == B_SLICE && md.pred[PRED_BIDIR].sa8dCost != MAX_INT64 &&
+ md.pred[PRED_BIDIR].sa8dCost * 16 <= bestInter->sa8dCost * 17)
+ {
+ encodeResAndCalcRdInterCU(md.pred[PRED_BIDIR], cuGeom);
+ checkBestMode(md.pred[PRED_BIDIR], depth);
+ }
+
if (bTryIntra)
checkBestMode(md.pred[PRED_INTRA], depth);
}
if (!md.bestMode || bestInter->sa8dCost < md.bestMode->sa8dCost)
md.bestMode = bestInter;
+ if (m_slice->m_sliceType == B_SLICE && md.pred[PRED_BIDIR].sa8dCost < md.bestMode->sa8dCost)
+ md.bestMode = &md.pred[PRED_BIDIR];
+
if (bTryIntra && md.pred[PRED_INTRA].sa8dCost < md.bestMode->sa8dCost)
{
md.bestMode = &md.pred[PRED_INTRA];
m_modeCompletionEvent.wait();
checkBestMode(md.pred[PRED_2Nx2N], depth);
+ checkBestMode(md.pred[PRED_BIDIR], depth);
if (m_param->bEnableRectInter)
{
if (md.bestMode->rdCost == MAX_INT64 && !bTryIntra)
{
md.pred[PRED_INTRA].cu.initSubCU(parentCTU, cuGeom);
- checkIntraInInter_rd0_4(md.pred[PRED_INTRA], cuGeom);
+ checkIntraInInter(md.pred[PRED_INTRA], cuGeom);
encodeIntraInInter(md.pred[PRED_INTRA], cuGeom);
checkBestMode(md.pred[PRED_INTRA], depth);
}
bool bNoSplit = false;
if (md.bestMode)
{
- bNoSplit = !!md.bestMode->cu.isSkipped(0);
+ bNoSplit = md.bestMode->cu.isSkipped(0);
if (mightSplit && depth && depth >= minDepth && !bNoSplit && m_param->rdLevel <= 4)
bNoSplit = recursionDepthCheck(parentCTU, cuGeom, *md.bestMode);
}
for (uint32_t subPartIdx = 0; subPartIdx < 4; subPartIdx++)
{
- const CUGeom& childCuData = *(&cuGeom + cuGeom.childOffset + subPartIdx);
- if (childCuData.flags & CUGeom::PRESENT)
+ const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset + subPartIdx);
+ if (childGeom.flags & CUGeom::PRESENT)
{
- m_modeDepth[0].fencYuv.copyPartToYuv(nd.fencYuv, childCuData.encodeIdx);
+ m_modeDepth[0].fencYuv.copyPartToYuv(nd.fencYuv, childGeom.encodeIdx);
m_rqt[nextDepth].cur.load(*nextContext);
- compressInterCU_dist(parentCTU, childCuData);
+ compressInterCU_dist(parentCTU, childGeom);
// Save best CU and pred data for this sub CU
- splitCU->copyPartFrom(nd.bestMode->cu, childCuData, subPartIdx);
+ splitCU->copyPartFrom(nd.bestMode->cu, childGeom, subPartIdx);
splitPred->addSubCosts(*nd.bestMode);
- nd.bestMode->reconYuv.copyToPartYuv(splitPred->reconYuv, childCuData.numPartitions * subPartIdx);
+ nd.bestMode->reconYuv.copyToPartYuv(splitPred->reconYuv, childGeom.numPartitions * subPartIdx);
nextContext = &nd.bestMode->contexts;
}
else
- splitCU->setEmptyPart(childCuData, subPartIdx);
+ splitCU->setEmptyPart(childGeom, subPartIdx);
}
nextContext->store(splitPred->contexts);
checkBestMode(*splitPred, depth);
}
- if (!depth || md.bestMode->cu.m_predMode[0] != MODE_INTRA)
+ if (mightNotSplit)
{
/* early-out statistics */
- FrameData& curEncData = const_cast<FrameData&>(*m_frame->m_encData);
+ FrameData& curEncData = *m_frame->m_encData;
FrameData::RCStatCU& cuStat = curEncData.m_cuStat[parentCTU.m_cuAddr];
uint64_t temp = cuStat.avgCost[depth] * cuStat.count[depth];
cuStat.count[depth] += 1;
/* Copy best data to encData CTU and recon */
md.bestMode->cu.copyToPic(depth);
if (md.bestMode != &md.pred[PRED_SPLIT])
- md.bestMode->reconYuv.copyToPicYuv(*m_frame->m_reconPicYuv, cuAddr, cuGeom.encodeIdx);
+ md.bestMode->reconYuv.copyToPicYuv(*m_frame->m_reconPic, cuAddr, cuGeom.encodeIdx);
}
void Analysis::compressInterCU_rd0_4(const CUData& parentCTU, const CUGeom& cuGeom)
{
bool bTryIntra = m_slice->m_sliceType != B_SLICE || m_param->bIntraInBFrames;
- /* Initialize all prediction CUs based on parentCTU */
- md.pred[PRED_2Nx2N].cu.initSubCU(parentCTU, cuGeom);
+ /* Compute Merge Cost */
md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom);
md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom);
- if (m_param->bEnableRectInter)
- {
- md.pred[PRED_2NxN].cu.initSubCU(parentCTU, cuGeom);
- md.pred[PRED_Nx2N].cu.initSubCU(parentCTU, cuGeom);
- }
- if (m_slice->m_sps->maxAMPDepth > depth && cuGeom.log2CUSize < 6)
- {
- md.pred[PRED_2NxnU].cu.initSubCU(parentCTU, cuGeom);
- md.pred[PRED_2NxnD].cu.initSubCU(parentCTU, cuGeom);
- md.pred[PRED_nLx2N].cu.initSubCU(parentCTU, cuGeom);
- md.pred[PRED_nRx2N].cu.initSubCU(parentCTU, cuGeom);
- }
-
- /* Compute Merge Cost */
checkMerge2Nx2N_rd0_4(md.pred[PRED_SKIP], md.pred[PRED_MERGE], cuGeom);
bool earlyskip = false;
if (!earlyskip)
{
+ md.pred[PRED_2Nx2N].cu.initSubCU(parentCTU, cuGeom);
checkInter_rd0_4(md.pred[PRED_2Nx2N], cuGeom, SIZE_2Nx2N);
- Mode *bestInter = &md.pred[PRED_2Nx2N];
+ if (m_slice->m_sliceType == B_SLICE)
+ {
+ md.pred[PRED_BIDIR].cu.initSubCU(parentCTU, cuGeom);
+ checkBidir2Nx2N(md.pred[PRED_2Nx2N], md.pred[PRED_BIDIR], cuGeom);
+ }
+
+ Mode *bestInter = &md.pred[PRED_2Nx2N];
if (m_param->bEnableRectInter)
{
+ md.pred[PRED_Nx2N].cu.initSubCU(parentCTU, cuGeom);
checkInter_rd0_4(md.pred[PRED_Nx2N], cuGeom, SIZE_Nx2N);
if (md.pred[PRED_Nx2N].sa8dCost < bestInter->sa8dCost)
bestInter = &md.pred[PRED_Nx2N];
+
+ md.pred[PRED_2NxN].cu.initSubCU(parentCTU, cuGeom);
checkInter_rd0_4(md.pred[PRED_2NxN], cuGeom, SIZE_2NxN);
if (md.pred[PRED_2NxN].sa8dCost < bestInter->sa8dCost)
bestInter = &md.pred[PRED_2NxN];
if (bHor)
{
+ md.pred[PRED_2NxnU].cu.initSubCU(parentCTU, cuGeom);
checkInter_rd0_4(md.pred[PRED_2NxnU], cuGeom, SIZE_2NxnU);
if (md.pred[PRED_2NxnU].sa8dCost < bestInter->sa8dCost)
bestInter = &md.pred[PRED_2NxnU];
+
+ md.pred[PRED_2NxnD].cu.initSubCU(parentCTU, cuGeom);
checkInter_rd0_4(md.pred[PRED_2NxnD], cuGeom, SIZE_2NxnD);
if (md.pred[PRED_2NxnD].sa8dCost < bestInter->sa8dCost)
bestInter = &md.pred[PRED_2NxnD];
}
if (bVer)
{
+ md.pred[PRED_nLx2N].cu.initSubCU(parentCTU, cuGeom);
checkInter_rd0_4(md.pred[PRED_nLx2N], cuGeom, SIZE_nLx2N);
if (md.pred[PRED_nLx2N].sa8dCost < bestInter->sa8dCost)
bestInter = &md.pred[PRED_nLx2N];
+
+ md.pred[PRED_nRx2N].cu.initSubCU(parentCTU, cuGeom);
checkInter_rd0_4(md.pred[PRED_nRx2N], cuGeom, SIZE_nRx2N);
if (md.pred[PRED_nRx2N].sa8dCost < bestInter->sa8dCost)
bestInter = &md.pred[PRED_nRx2N];
if (m_param->rdLevel >= 3)
{
/* Calculate RD cost of best inter option */
- for (uint32_t puIdx = 0; puIdx < bestInter->cu.getNumPartInter(); puIdx++)
+ if (!m_bChromaSa8d) /* When m_bChromaSa8d is enabled, chroma MC has already been done */
{
- prepMotionCompensation(bestInter->cu, cuGeom, puIdx);
- motionCompensation(bestInter->predYuv, false, true);
+ for (uint32_t puIdx = 0; puIdx < bestInter->cu.getNumPartInter(); puIdx++)
+ {
+ prepMotionCompensation(bestInter->cu, cuGeom, puIdx);
+ motionCompensation(bestInter->predYuv, false, true);
+ }
}
-
encodeResAndCalcRdInterCU(*bestInter, cuGeom);
+ checkBestMode(*bestInter, depth);
- if (!md.bestMode || bestInter->rdCost < md.bestMode->rdCost)
- md.bestMode = bestInter;
+ /* If BIDIR is available and within 17/16 of best inter option, choose by RDO */
+ if (m_slice->m_sliceType == B_SLICE && md.pred[PRED_BIDIR].sa8dCost != MAX_INT64 &&
+ md.pred[PRED_BIDIR].sa8dCost * 16 <= bestInter->sa8dCost * 17)
+ {
+ encodeResAndCalcRdInterCU(md.pred[PRED_BIDIR], cuGeom);
+ checkBestMode(md.pred[PRED_BIDIR], depth);
+ }
if ((bTryIntra && md.bestMode->cu.getQtRootCbf(0)) ||
md.bestMode->sa8dCost == MAX_INT64)
{
md.pred[PRED_INTRA].cu.initSubCU(parentCTU, cuGeom);
- checkIntraInInter_rd0_4(md.pred[PRED_INTRA], cuGeom);
+ checkIntraInInter(md.pred[PRED_INTRA], cuGeom);
encodeIntraInInter(md.pred[PRED_INTRA], cuGeom);
- if (md.pred[PRED_INTRA].rdCost < md.bestMode->rdCost)
- md.bestMode = &md.pred[PRED_INTRA];
+ checkBestMode(md.pred[PRED_INTRA], depth);
}
}
else
{
- /* SA8D choice between merge/skip, inter, and intra */
+ /* SA8D choice between merge/skip, inter, bidir, and intra */
if (!md.bestMode || bestInter->sa8dCost < md.bestMode->sa8dCost)
md.bestMode = bestInter;
+ if (m_slice->m_sliceType == B_SLICE &&
+ md.pred[PRED_BIDIR].sa8dCost < md.bestMode->sa8dCost)
+ md.bestMode = &md.pred[PRED_BIDIR];
+
if (bTryIntra || md.bestMode->sa8dCost == MAX_INT64)
{
md.pred[PRED_INTRA].cu.initSubCU(parentCTU, cuGeom);
- checkIntraInInter_rd0_4(md.pred[PRED_INTRA], cuGeom);
+ checkIntraInInter(md.pred[PRED_INTRA], cuGeom);
if (md.pred[PRED_INTRA].sa8dCost < md.bestMode->sa8dCost)
md.bestMode = &md.pred[PRED_INTRA];
}
/* prediction already generated for this CU, and if rd level
* is not 0, it is already fully encoded */
}
- else if (md.bestMode->cu.m_predMode[0] == MODE_INTER)
+ else if (md.bestMode->cu.isInter(0))
{
for (uint32_t puIdx = 0; puIdx < md.bestMode->cu.getNumPartInter(); puIdx++)
{
encodeResAndCalcRdInterCU(*md.bestMode, cuGeom);
else if (m_param->rdLevel == 1)
{
- m_rqt[cuGeom.depth].tmpResiYuv.subtract(md.fencYuv, md.bestMode->predYuv, cuGeom.log2CUSize);
- generateCoeffRecon(*md.bestMode, cuGeom);
+ /* generate recon pixels with no rate distortion considerations */
+ CUData& cu = md.bestMode->cu;
+ m_quant.setQPforQuant(cu);
+
+ uint32_t tuDepthRange[2];
+ cu.getInterTUQtDepthRange(tuDepthRange, 0);
+
+ m_rqt[cuGeom.depth].tmpResiYuv.subtract(*md.bestMode->fencYuv, md.bestMode->predYuv, cuGeom.log2CUSize);
+ residualTransformQuantInter(*md.bestMode, cuGeom, 0, cuGeom.depth, tuDepthRange);
+ if (cu.getQtRootCbf(0))
+ md.bestMode->reconYuv.addClip(md.bestMode->predYuv, m_rqt[cuGeom.depth].tmpResiYuv, cu.m_log2CUSize[0]);
+ else
+ {
+ md.bestMode->reconYuv.copyFromYuv(md.bestMode->predYuv);
+ if (cu.m_mergeFlag[0] && cu.m_partSize[0] == SIZE_2Nx2N)
+ cu.setPredModeSubParts(MODE_SKIP);
+ }
}
}
else
if (m_param->rdLevel == 2)
encodeIntraInInter(*md.bestMode, cuGeom);
else if (m_param->rdLevel == 1)
- generateCoeffRecon(*md.bestMode, cuGeom);
+ {
+ /* generate recon pixels with no rate distortion considerations */
+ CUData& cu = md.bestMode->cu;
+ m_quant.setQPforQuant(cu);
+
+ uint32_t tuDepthRange[2];
+ cu.getIntraTUQtDepthRange(tuDepthRange, 0);
+
+ uint32_t initTuDepth = cu.m_partSize[0] != SIZE_2Nx2N;
+ residualTransformQuantIntra(*md.bestMode, cuGeom, initTuDepth, 0, tuDepthRange);
+ getBestIntraModeChroma(*md.bestMode, cuGeom);
+ residualQTIntraChroma(*md.bestMode, cuGeom, 0, 0);
+ md.bestMode->reconYuv.copyFromPicYuv(*m_frame->m_reconPic, cu.m_cuAddr, cuGeom.encodeIdx); // TODO:
+ }
}
}
} // !earlyskip
bool bNoSplit = false;
if (md.bestMode)
{
- bNoSplit = !!md.bestMode->cu.isSkipped(0);
+ bNoSplit = md.bestMode->cu.isSkipped(0);
if (mightSplit && depth && depth >= minDepth && !bNoSplit)
bNoSplit = recursionDepthCheck(parentCTU, cuGeom, *md.bestMode);
}
for (uint32_t subPartIdx = 0; subPartIdx < 4; subPartIdx++)
{
- const CUGeom& childCuData = *(&cuGeom + cuGeom.childOffset + subPartIdx);
- if (childCuData.flags & CUGeom::PRESENT)
+ const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset + subPartIdx);
+ if (childGeom.flags & CUGeom::PRESENT)
{
- m_modeDepth[0].fencYuv.copyPartToYuv(nd.fencYuv, childCuData.encodeIdx);
+ m_modeDepth[0].fencYuv.copyPartToYuv(nd.fencYuv, childGeom.encodeIdx);
m_rqt[nextDepth].cur.load(*nextContext);
- compressInterCU_rd0_4(parentCTU, childCuData);
+ compressInterCU_rd0_4(parentCTU, childGeom);
// Save best CU and pred data for this sub CU
- splitCU->copyPartFrom(nd.bestMode->cu, childCuData, subPartIdx);
+ splitCU->copyPartFrom(nd.bestMode->cu, childGeom, subPartIdx);
splitPred->addSubCosts(*nd.bestMode);
if (m_param->rdLevel)
- nd.bestMode->reconYuv.copyToPartYuv(splitPred->reconYuv, childCuData.numPartitions * subPartIdx);
+ nd.bestMode->reconYuv.copyToPartYuv(splitPred->reconYuv, childGeom.numPartitions * subPartIdx);
else
- nd.bestMode->predYuv.copyToPartYuv(splitPred->predYuv, childCuData.numPartitions * subPartIdx);
+ nd.bestMode->predYuv.copyToPartYuv(splitPred->predYuv, childGeom.numPartitions * subPartIdx);
if (m_param->rdLevel > 1)
nextContext = &nd.bestMode->contexts;
}
else
- splitCU->setEmptyPart(childCuData, subPartIdx);
+ splitCU->setEmptyPart(childGeom, subPartIdx);
}
nextContext->store(splitPred->contexts);
if (mightNotSplit)
addSplitFlagCost(*splitPred, cuGeom.depth);
- else if (m_param->rdLevel <= 1)
- splitPred->sa8dCost = m_rdCost.calcRdSADCost(splitPred->distortion, splitPred->sa8dBits);
- else
+ else if (m_param->rdLevel > 1)
updateModeCost(*splitPred);
+ else
+ splitPred->sa8dCost = m_rdCost.calcRdSADCost(splitPred->distortion, splitPred->sa8dBits);
if (!md.bestMode)
md.bestMode = splitPred;
- else if (m_param->rdLevel >= 1)
- {
- if (splitPred->rdCost < md.bestMode->rdCost)
- md.bestMode = splitPred;
- }
- else
- {
- if (splitPred->sa8dCost < md.bestMode->sa8dCost)
- md.bestMode = splitPred;
- }
+ else if (m_param->rdLevel > 1)
+ checkBestMode(*splitPred, cuGeom.depth);
+ else if (splitPred->sa8dCost < md.bestMode->sa8dCost)
+ md.bestMode = splitPred;
}
- if (!depth || md.bestMode->cu.m_predMode[0] != MODE_INTRA)
+ if (mightNotSplit)
{
/* early-out statistics */
- FrameData& curEncData = const_cast<FrameData&>(*m_frame->m_encData);
+ FrameData& curEncData = *m_frame->m_encData;
FrameData::RCStatCU& cuStat = curEncData.m_cuStat[parentCTU.m_cuAddr];
uint64_t temp = cuStat.avgCost[depth] * cuStat.count[depth];
cuStat.count[depth] += 1;
/* Copy best data to encData CTU and recon */
md.bestMode->cu.copyToPic(depth);
if (md.bestMode != &md.pred[PRED_SPLIT] && m_param->rdLevel)
- md.bestMode->reconYuv.copyToPicYuv(*m_frame->m_reconPicYuv, cuAddr, cuGeom.encodeIdx);
+ md.bestMode->reconYuv.copyToPicYuv(*m_frame->m_reconPic, cuAddr, cuGeom.encodeIdx);
}
void Analysis::compressInterCU_rd5_6(const CUData& parentCTU, const CUGeom& cuGeom)
if (mightNotSplit)
{
- for (int i = 0; i < MAX_PRED_TYPES; i++)
- md.pred[i].cu.initSubCU(parentCTU, cuGeom);
-
+ md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom);
+ md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom);
checkMerge2Nx2N_rd5_6(md.pred[PRED_SKIP], md.pred[PRED_MERGE], cuGeom);
bool earlySkip = m_param->bEnableEarlySkip && md.bestMode && !md.bestMode->cu.getQtRootCbf(0);
if (!earlySkip)
{
+ md.pred[PRED_2Nx2N].cu.initSubCU(parentCTU, cuGeom);
checkInter_rd5_6(md.pred[PRED_2Nx2N], cuGeom, SIZE_2Nx2N, false);
checkBestMode(md.pred[PRED_2Nx2N], cuGeom.depth);
+ if (m_slice->m_sliceType == B_SLICE)
+ {
+ md.pred[PRED_BIDIR].cu.initSubCU(parentCTU, cuGeom);
+ checkBidir2Nx2N(md.pred[PRED_2Nx2N], md.pred[PRED_BIDIR], cuGeom);
+ if (md.pred[PRED_BIDIR].sa8dCost < MAX_INT64)
+ {
+ encodeResAndCalcRdInterCU(md.pred[PRED_BIDIR], cuGeom);
+ checkBestMode(md.pred[PRED_BIDIR], cuGeom.depth);
+ }
+ }
+
if (m_param->bEnableRectInter)
{
- // Nx2N rect
if (!m_param->bEnableCbfFastMode || md.bestMode->cu.getQtRootCbf(0))
{
+ md.pred[PRED_Nx2N].cu.initSubCU(parentCTU, cuGeom);
checkInter_rd5_6(md.pred[PRED_Nx2N], cuGeom, SIZE_Nx2N, false);
checkBestMode(md.pred[PRED_Nx2N], cuGeom.depth);
}
if (!m_param->bEnableCbfFastMode || md.bestMode->cu.getQtRootCbf(0))
{
+ md.pred[PRED_2NxN].cu.initSubCU(parentCTU, cuGeom);
checkInter_rd5_6(md.pred[PRED_2NxN], cuGeom, SIZE_2NxN, false);
checkBestMode(md.pred[PRED_2NxN], cuGeom.depth);
}
{
if (!m_param->bEnableCbfFastMode || md.bestMode->cu.getQtRootCbf(0))
{
+ md.pred[PRED_2NxnU].cu.initSubCU(parentCTU, cuGeom);
checkInter_rd5_6(md.pred[PRED_2NxnU], cuGeom, SIZE_2NxnU, bMergeOnly);
checkBestMode(md.pred[PRED_2NxnU], cuGeom.depth);
}
if (!m_param->bEnableCbfFastMode || md.bestMode->cu.getQtRootCbf(0))
{
+ md.pred[PRED_2NxnD].cu.initSubCU(parentCTU, cuGeom);
checkInter_rd5_6(md.pred[PRED_2NxnD], cuGeom, SIZE_2NxnD, bMergeOnly);
checkBestMode(md.pred[PRED_2NxnD], cuGeom.depth);
}
{
if (!m_param->bEnableCbfFastMode || md.bestMode->cu.getQtRootCbf(0))
{
+ md.pred[PRED_nLx2N].cu.initSubCU(parentCTU, cuGeom);
checkInter_rd5_6(md.pred[PRED_nLx2N], cuGeom, SIZE_nLx2N, bMergeOnly);
checkBestMode(md.pred[PRED_nLx2N], cuGeom.depth);
}
if (!m_param->bEnableCbfFastMode || md.bestMode->cu.getQtRootCbf(0))
{
+ md.pred[PRED_nRx2N].cu.initSubCU(parentCTU, cuGeom);
checkInter_rd5_6(md.pred[PRED_nRx2N], cuGeom, SIZE_nRx2N, bMergeOnly);
checkBestMode(md.pred[PRED_nRx2N], cuGeom.depth);
}
if ((m_slice->m_sliceType != B_SLICE || m_param->bIntraInBFrames) &&
(!m_param->bEnableCbfFastMode || md.bestMode->cu.getQtRootCbf(0)))
{
+ md.pred[PRED_INTRA].cu.initSubCU(parentCTU, cuGeom);
checkIntra(md.pred[PRED_INTRA], cuGeom, SIZE_2Nx2N, NULL);
checkBestMode(md.pred[PRED_INTRA], depth);
if (depth == g_maxCUDepth && cuGeom.log2CUSize > m_slice->m_sps->quadtreeTULog2MinSize)
{
+ md.pred[PRED_INTRA_NxN].cu.initSubCU(parentCTU, cuGeom);
checkIntra(md.pred[PRED_INTRA_NxN], cuGeom, SIZE_NxN, NULL);
checkBestMode(md.pred[PRED_INTRA_NxN], depth);
}
for (uint32_t subPartIdx = 0; subPartIdx < 4; subPartIdx++)
{
- const CUGeom& childCuData = *(&cuGeom + cuGeom.childOffset + subPartIdx);
- if (childCuData.flags & CUGeom::PRESENT)
+ const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset + subPartIdx);
+ if (childGeom.flags & CUGeom::PRESENT)
{
- m_modeDepth[0].fencYuv.copyPartToYuv(nd.fencYuv, childCuData.encodeIdx);
+ m_modeDepth[0].fencYuv.copyPartToYuv(nd.fencYuv, childGeom.encodeIdx);
m_rqt[nextDepth].cur.load(*nextContext);
- compressInterCU_rd5_6(parentCTU, childCuData);
+ compressInterCU_rd5_6(parentCTU, childGeom);
// Save best CU and pred data for this sub CU
- splitCU->copyPartFrom(nd.bestMode->cu, childCuData, subPartIdx);
+ splitCU->copyPartFrom(nd.bestMode->cu, childGeom, subPartIdx);
splitPred->addSubCosts(*nd.bestMode);
- nd.bestMode->reconYuv.copyToPartYuv(splitPred->reconYuv, childCuData.numPartitions * subPartIdx);
+ nd.bestMode->reconYuv.copyToPartYuv(splitPred->reconYuv, childGeom.numPartitions * subPartIdx);
nextContext = &nd.bestMode->contexts;
}
else
- splitCU->setEmptyPart(childCuData, subPartIdx);
+ splitCU->setEmptyPart(childGeom, subPartIdx);
}
nextContext->store(splitPred->contexts);
if (mightNotSplit)
/* Copy best data to encData CTU and recon */
md.bestMode->cu.copyToPic(depth);
if (md.bestMode != &md.pred[PRED_SPLIT])
- md.bestMode->reconYuv.copyToPicYuv(*m_frame->m_reconPicYuv, parentCTU.m_cuAddr, cuGeom.encodeIdx);
+ md.bestMode->reconYuv.copyToPicYuv(*m_frame->m_reconPic, parentCTU.m_cuAddr, cuGeom.encodeIdx);
}
/* sets md.bestMode if a valid merge candidate is found, else leaves it NULL */
bestPred->sa8dCost = MAX_INT64;
int bestSadCand = -1;
- int sizeIdx = cuGeom.log2CUSize - 2;
+ int cpart, sizeIdx = cuGeom.log2CUSize - 2;
+ if (m_bChromaSa8d)
+ {
+ int cuSize = 1 << cuGeom.log2CUSize;
+ cpart = partitionFromSizes(cuSize >> m_hChromaShift, cuSize >> m_vChromaShift);
+ }
for (uint32_t i = 0; i < maxNumMergeCand; ++i)
{
if (m_bFrameParallel &&
tempPred->cu.m_mvpIdx[0][0] = (uint8_t)i; // merge candidate ID is stored in L0 MVP idx
tempPred->cu.m_interDir[0] = interDirNeighbours[i];
tempPred->cu.m_mv[0][0] = mvFieldNeighbours[i][0].mv;
- tempPred->cu.m_refIdx[0][0] = (char)mvFieldNeighbours[i][0].refIdx;
+ tempPred->cu.m_refIdx[0][0] = (int8_t)mvFieldNeighbours[i][0].refIdx;
tempPred->cu.m_mv[1][0] = mvFieldNeighbours[i][1].mv;
- tempPred->cu.m_refIdx[1][0] = (char)mvFieldNeighbours[i][1].refIdx;
+ tempPred->cu.m_refIdx[1][0] = (int8_t)mvFieldNeighbours[i][1].refIdx;
- // do MC only for Luma part
prepMotionCompensation(tempPred->cu, cuGeom, 0);
- motionCompensation(tempPred->predYuv, true, false);
+ motionCompensation(tempPred->predYuv, true, m_bChromaSa8d);
tempPred->sa8dBits = getTUBits(i, maxNumMergeCand);
tempPred->distortion = primitives.sa8d[sizeIdx](fencYuv->m_buf[0], fencYuv->m_size, tempPred->predYuv.m_buf[0], tempPred->predYuv.m_size);
+ if (m_bChromaSa8d)
+ {
+ tempPred->distortion += primitives.sa8d_inter[cpart](fencYuv->m_buf[1], fencYuv->m_csize, tempPred->predYuv.m_buf[1], tempPred->predYuv.m_csize);
+ tempPred->distortion += primitives.sa8d_inter[cpart](fencYuv->m_buf[2], fencYuv->m_csize, tempPred->predYuv.m_buf[2], tempPred->predYuv.m_csize);
+ }
tempPred->sa8dCost = m_rdCost.calcRdSADCost(tempPred->distortion, tempPred->sa8dBits);
if (tempPred->sa8dCost < bestPred->sa8dCost)
return;
/* calculate the motion compensation for chroma for the best mode selected */
- prepMotionCompensation(bestPred->cu, cuGeom, 0);
- motionCompensation(bestPred->predYuv, false, true);
+ if (!m_bChromaSa8d) /* Chroma MC was done above */
+ {
+ prepMotionCompensation(bestPred->cu, cuGeom, 0);
+ motionCompensation(bestPred->predYuv, false, true);
+ }
if (m_param->rdLevel)
{
tempPred->cu.m_mvpIdx[0][0] = (uint8_t)bestSadCand;
tempPred->cu.setPUInterDir(interDirNeighbours[bestSadCand], 0, 0);
tempPred->cu.setPUMv(0, mvFieldNeighbours[bestSadCand][0].mv, 0, 0);
- tempPred->cu.setPURefIdx(0, (char)mvFieldNeighbours[bestSadCand][0].refIdx, 0, 0);
+ tempPred->cu.setPURefIdx(0, (int8_t)mvFieldNeighbours[bestSadCand][0].refIdx, 0, 0);
tempPred->cu.setPUMv(1, mvFieldNeighbours[bestSadCand][1].mv, 0, 0);
- tempPred->cu.setPURefIdx(1, (char)mvFieldNeighbours[bestSadCand][1].refIdx, 0, 0);
+ tempPred->cu.setPURefIdx(1, (int8_t)mvFieldNeighbours[bestSadCand][1].refIdx, 0, 0);
tempPred->sa8dCost = bestPred->sa8dCost;
tempPred->predYuv.copyFromYuv(bestPred->predYuv);
/* broadcast sets of MV field data */
bestPred->cu.setPUInterDir(interDirNeighbours[bestSadCand], 0, 0);
bestPred->cu.setPUMv(0, mvFieldNeighbours[bestSadCand][0].mv, 0, 0);
- bestPred->cu.setPURefIdx(0, (char)mvFieldNeighbours[bestSadCand][0].refIdx, 0, 0);
+ bestPred->cu.setPURefIdx(0, (int8_t)mvFieldNeighbours[bestSadCand][0].refIdx, 0, 0);
bestPred->cu.setPUMv(1, mvFieldNeighbours[bestSadCand][1].mv, 0, 0);
- bestPred->cu.setPURefIdx(1, (char)mvFieldNeighbours[bestSadCand][1].refIdx, 0, 0);
+ bestPred->cu.setPURefIdx(1, (int8_t)mvFieldNeighbours[bestSadCand][1].refIdx, 0, 0);
}
/* sets md.bestMode if a valid merge candidate is found, else leaves it NULL */
tempPred->cu.m_mvpIdx[0][0] = (uint8_t)i; /* merge candidate ID is stored in L0 MVP idx */
tempPred->cu.m_interDir[0] = interDirNeighbours[i];
tempPred->cu.m_mv[0][0] = mvFieldNeighbours[i][0].mv;
- tempPred->cu.m_refIdx[0][0] = (char)mvFieldNeighbours[i][0].refIdx;
+ tempPred->cu.m_refIdx[0][0] = (int8_t)mvFieldNeighbours[i][0].refIdx;
tempPred->cu.m_mv[1][0] = mvFieldNeighbours[i][1].mv;
- tempPred->cu.m_refIdx[1][0] = (char)mvFieldNeighbours[i][1].refIdx;
- tempPred->cu.setSkipFlagSubParts(false); /* must be cleared between encode iterations */
+ tempPred->cu.m_refIdx[1][0] = (int8_t)mvFieldNeighbours[i][1].refIdx;
+ tempPred->cu.setPredModeSubParts(MODE_INTER); /* must be cleared between encode iterations */
prepMotionCompensation(tempPred->cu, cuGeom, 0);
motionCompensation(tempPred->predYuv, true, true);
tempPred->cu.m_mvpIdx[0][0] = (uint8_t)i;
tempPred->cu.m_interDir[0] = interDirNeighbours[i];
tempPred->cu.m_mv[0][0] = mvFieldNeighbours[i][0].mv;
- tempPred->cu.m_refIdx[0][0] = (char)mvFieldNeighbours[i][0].refIdx;
+ tempPred->cu.m_refIdx[0][0] = (int8_t)mvFieldNeighbours[i][0].refIdx;
tempPred->cu.m_mv[1][0] = mvFieldNeighbours[i][1].mv;
- tempPred->cu.m_refIdx[1][0] = (char)mvFieldNeighbours[i][1].refIdx;
- tempPred->cu.setSkipFlagSubParts(false);
+ tempPred->cu.m_refIdx[1][0] = (int8_t)mvFieldNeighbours[i][1].refIdx;
+ tempPred->cu.setPredModeSubParts(MODE_INTER);
tempPred->predYuv.copyFromYuv(bestPred->predYuv);
}
uint32_t bestCand = bestPred->cu.m_mvpIdx[0][0];
bestPred->cu.setPUInterDir(interDirNeighbours[bestCand], 0, 0);
bestPred->cu.setPUMv(0, mvFieldNeighbours[bestCand][0].mv, 0, 0);
- bestPred->cu.setPURefIdx(0, (char)mvFieldNeighbours[bestCand][0].refIdx, 0, 0);
+ bestPred->cu.setPURefIdx(0, (int8_t)mvFieldNeighbours[bestCand][0].refIdx, 0, 0);
bestPred->cu.setPUMv(1, mvFieldNeighbours[bestCand][1].mv, 0, 0);
- bestPred->cu.setPURefIdx(1, (char)mvFieldNeighbours[bestCand][1].refIdx, 0, 0);
+ bestPred->cu.setPURefIdx(1, (int8_t)mvFieldNeighbours[bestCand][1].refIdx, 0, 0);
}
}
interMode.initCosts();
interMode.cu.setPartSizeSubParts(partSize);
interMode.cu.setPredModeSubParts(MODE_INTER);
+ int numPredDir = m_slice->isInterP() ? 1 : 2;
- if (predInterSearch(interMode, cuGeom, false, false))
+ if (m_param->analysisMode == X265_ANALYSIS_LOAD && m_reuseInterDataCTU)
+ {
+ for (uint32_t part = 0; part < interMode.cu.getNumPartInter(); part++)
+ {
+ MotionData* bestME = interMode.bestME[part];
+ for (int32_t i = 0; i < numPredDir; i++)
+ {
+ bestME[i].ref = m_reuseInterDataCTU->ref;
+ m_reuseInterDataCTU++;
+ }
+ }
+ }
+ if (predInterSearch(interMode, cuGeom, false, m_bChromaSa8d))
{
/* predInterSearch sets interMode.sa8dBits */
const Yuv& fencYuv = *interMode.fencYuv;
Yuv& predYuv = interMode.predYuv;
- interMode.distortion = primitives.sa8d[cuGeom.log2CUSize - 2](fencYuv.m_buf[0], fencYuv.m_size, predYuv.m_buf[0], predYuv.m_size);
+ int part = partitionFromLog2Size(cuGeom.log2CUSize);
+ interMode.distortion = primitives.sa8d[part](fencYuv.m_buf[0], fencYuv.m_size, predYuv.m_buf[0], predYuv.m_size);
+ if (m_bChromaSa8d)
+ {
+ uint32_t cuSize = 1 << cuGeom.log2CUSize;
+ int cpart = partitionFromSizes(cuSize >> m_hChromaShift, cuSize >> m_vChromaShift);
+ interMode.distortion += primitives.sa8d_inter[cpart](fencYuv.m_buf[1], fencYuv.m_csize, predYuv.m_buf[1], predYuv.m_csize);
+ interMode.distortion += primitives.sa8d_inter[cpart](fencYuv.m_buf[2], fencYuv.m_csize, predYuv.m_buf[2], predYuv.m_csize);
+ }
interMode.sa8dCost = m_rdCost.calcRdSADCost(interMode.distortion, interMode.sa8dBits);
+
+ if (m_param->analysisMode == X265_ANALYSIS_SAVE && m_reuseInterDataCTU)
+ {
+ for (uint32_t puIdx = 0; puIdx < interMode.cu.getNumPartInter(); puIdx++)
+ {
+ MotionData* bestME = interMode.bestME[puIdx];
+ for (int32_t i = 0; i < numPredDir; i++)
+ {
+ m_reuseInterDataCTU->ref = bestME[i].ref;
+ m_reuseInterDataCTU++;
+ }
+ }
+ }
}
else
{
interMode.initCosts();
interMode.cu.setPartSizeSubParts(partSize);
interMode.cu.setPredModeSubParts(MODE_INTER);
+ int numPredDir = m_slice->isInterP() ? 1 : 2;
+ if (m_param->analysisMode == X265_ANALYSIS_LOAD && m_reuseInterDataCTU)
+ {
+ for (uint32_t puIdx = 0; puIdx < interMode.cu.getNumPartInter(); puIdx++)
+ {
+ MotionData* bestME = interMode.bestME[puIdx];
+ for (int32_t i = 0; i < numPredDir; i++)
+ {
+ bestME[i].ref = m_reuseInterDataCTU->ref;
+ m_reuseInterDataCTU++;
+ }
+ }
+ }
if (predInterSearch(interMode, cuGeom, bMergeOnly, true))
{
/* predInterSearch sets interMode.sa8dBits, but this is ignored */
encodeResAndCalcRdInterCU(interMode, cuGeom);
+
+ if (m_param->analysisMode == X265_ANALYSIS_SAVE && m_reuseInterDataCTU)
+ {
+ for (uint32_t puIdx = 0; puIdx < interMode.cu.getNumPartInter(); puIdx++)
+ {
+ MotionData* bestME = interMode.bestME[puIdx];
+ for (int32_t i = 0; i < numPredDir; i++)
+ {
+ m_reuseInterDataCTU->ref = bestME[i].ref;
+ m_reuseInterDataCTU++;
+ }
+ }
+ }
}
else
{
}
}
-/* Note that this function does not save the best intra prediction, it must
- * be generated later. It records the best mode in the cu */
-void Analysis::checkIntraInInter_rd0_4(Mode& intraMode, const CUGeom& cuGeom)
+void Analysis::checkBidir2Nx2N(Mode& inter2Nx2N, Mode& bidir2Nx2N, const CUGeom& cuGeom)
{
- CUData& cu = intraMode.cu;
- uint32_t depth = cu.m_cuDepth[0];
+ CUData& cu = bidir2Nx2N.cu;
- cu.setPartSizeSubParts(SIZE_2Nx2N);
- cu.setPredModeSubParts(MODE_INTRA);
-
- uint32_t initTrDepth = 0;
- uint32_t log2TrSize = cu.m_log2CUSize[0] - initTrDepth;
- uint32_t tuSize = 1 << log2TrSize;
- const uint32_t absPartIdx = 0;
-
- // Reference sample smoothing
- initAdiPattern(cu, cuGeom, absPartIdx, initTrDepth, ALL_IDX);
-
- pixel* fenc = m_modeDepth[depth].fencYuv.m_buf[0];
- uint32_t stride = m_modeDepth[depth].fencYuv.m_size;
-
- pixel *above = m_refAbove + tuSize - 1;
- pixel *aboveFiltered = m_refAboveFlt + tuSize - 1;
- pixel *left = m_refLeft + tuSize - 1;
- pixel *leftFiltered = m_refLeftFlt + tuSize - 1;
- int sad, bsad;
- uint32_t bits, bbits, mode, bmode;
- uint64_t cost, bcost;
-
- // 33 Angle modes once
- ALIGN_VAR_32(pixel, bufScale[32 * 32]);
- ALIGN_VAR_32(pixel, bufTrans[32 * 32]);
- ALIGN_VAR_32(pixel, tmp[33 * 32 * 32]);
- int scaleTuSize = tuSize;
- int scaleStride = stride;
- int costShift = 0;
- int sizeIdx = log2TrSize - 2;
-
- if (tuSize > 32)
+ if (cu.isBipredRestriction() || inter2Nx2N.bestME[0][0].cost == MAX_UINT || inter2Nx2N.bestME[0][1].cost == MAX_UINT)
{
- // origin is 64x64, we scale to 32x32 and setup required parameters
- primitives.scale2D_64to32(bufScale, fenc, stride);
- fenc = bufScale;
-
- // reserve space in case primitives need to store data in above
- // or left buffers
- pixel _above[4 * 32 + 1];
- pixel _left[4 * 32 + 1];
- pixel *aboveScale = _above + 2 * 32;
- pixel *leftScale = _left + 2 * 32;
- aboveScale[0] = leftScale[0] = above[0];
- primitives.scale1D_128to64(aboveScale + 1, above + 1, 0);
- primitives.scale1D_128to64(leftScale + 1, left + 1, 0);
-
- scaleTuSize = 32;
- scaleStride = 32;
- costShift = 2;
- sizeIdx = 5 - 2; // log2(scaleTuSize) - 2
-
- // Filtered and Unfiltered refAbove and refLeft pointing to above and left.
- above = aboveScale;
- left = leftScale;
- aboveFiltered = aboveScale;
- leftFiltered = leftScale;
+ bidir2Nx2N.sa8dCost = MAX_INT64;
+ bidir2Nx2N.rdCost = MAX_INT64;
+ return;
}
- pixelcmp_t sa8d = primitives.sa8d[sizeIdx];
- int predsize = scaleTuSize * scaleTuSize;
-
- m_entropyCoder.loadIntraDirModeLuma(m_rqt[depth].cur);
+ const Yuv& fencYuv = *bidir2Nx2N.fencYuv;
+ MV mvzero(0, 0);
+ int cpart, partEnum = cuGeom.log2CUSize - 2;
- /* there are three cost tiers for intra modes:
- * pred[0] - mode probable, least cost
- * pred[1], pred[2] - less probable, slightly more cost
- * non-mpm modes - all cost the same (rbits) */
- uint64_t mpms;
- uint32_t preds[3];
- uint32_t rbits = getIntraRemModeBits(cu, absPartIdx, preds, mpms);
-
- // DC
- primitives.intra_pred[DC_IDX][sizeIdx](tmp, scaleStride, left, above, 0, (scaleTuSize <= 16));
- bsad = sa8d(fenc, scaleStride, tmp, scaleStride) << costShift;
- bmode = mode = DC_IDX;
- bbits = (mpms & ((uint64_t)1 << mode)) ? m_entropyCoder.bitsIntraModeMPM(preds, mode) : rbits;
- bcost = m_rdCost.calcRdSADCost(bsad, bbits);
-
- pixel *abovePlanar = above;
- pixel *leftPlanar = left;
-
- if (tuSize & (8 | 16 | 32))
+ if (m_bChromaSa8d)
{
- abovePlanar = aboveFiltered;
- leftPlanar = leftFiltered;
+ int cuSize = 1 << cuGeom.log2CUSize;
+ cpart = partitionFromSizes(cuSize >> m_hChromaShift, cuSize >> m_vChromaShift);
}
- // PLANAR
- primitives.intra_pred[PLANAR_IDX][sizeIdx](tmp, scaleStride, leftPlanar, abovePlanar, 0, 0);
- sad = sa8d(fenc, scaleStride, tmp, scaleStride) << costShift;
- mode = PLANAR_IDX;
- bits = (mpms & ((uint64_t)1 << mode)) ? m_entropyCoder.bitsIntraModeMPM(preds, mode) : rbits;
- cost = m_rdCost.calcRdSADCost(sad, bits);
- COPY4_IF_LT(bcost, cost, bmode, mode, bsad, sad, bbits, bits);
-
- // Transpose NxN
- primitives.transpose[sizeIdx](bufTrans, fenc, scaleStride);
-
- primitives.intra_pred_allangs[sizeIdx](tmp, above, left, aboveFiltered, leftFiltered, (scaleTuSize <= 16));
-
- bool modeHor;
- pixel *cmp;
- intptr_t srcStride;
-
-#define TRY_ANGLE(angle) \
- modeHor = angle < 18; \
- cmp = modeHor ? bufTrans : fenc; \
- srcStride = modeHor ? scaleTuSize : scaleStride; \
- sad = sa8d(cmp, srcStride, &tmp[(angle - 2) * predsize], scaleTuSize) << costShift; \
- bits = (mpms & ((uint64_t)1 << angle)) ? m_entropyCoder.bitsIntraModeMPM(preds, angle) : rbits; \
- cost = m_rdCost.calcRdSADCost(sad, bits)
+ bidir2Nx2N.bestME[0][0] = inter2Nx2N.bestME[0][0];
+ bidir2Nx2N.bestME[0][1] = inter2Nx2N.bestME[0][1];
+ MotionData* bestME = bidir2Nx2N.bestME[0];
+ int ref0 = bestME[0].ref;
+ MV mvp0 = bestME[0].mvp;
+ int mvpIdx0 = bestME[0].mvpIdx;
+ int ref1 = bestME[1].ref;
+ MV mvp1 = bestME[1].mvp;
+ int mvpIdx1 = bestME[1].mvpIdx;
+
+ bidir2Nx2N.initCosts();
+ cu.setPartSizeSubParts(SIZE_2Nx2N);
+ cu.setPredModeSubParts(MODE_INTER);
+ cu.setPUInterDir(3, 0, 0);
+ cu.setPURefIdx(0, (int8_t)ref0, 0, 0);
+ cu.setPURefIdx(1, (int8_t)ref1, 0, 0);
+ cu.m_mvpIdx[0][0] = (uint8_t)mvpIdx0;
+ cu.m_mvpIdx[1][0] = (uint8_t)mvpIdx1;
+ cu.m_mergeFlag[0] = 0;
+
+ /* Estimate cost of BIDIR using best 2Nx2N L0 and L1 motion vectors */
+ cu.setPUMv(0, bestME[0].mv, 0, 0);
+ cu.m_mvd[0][0] = bestME[0].mv - mvp0;
+
+ cu.setPUMv(1, bestME[1].mv, 0, 0);
+ cu.m_mvd[1][0] = bestME[1].mv - mvp1;
+
+ prepMotionCompensation(cu, cuGeom, 0);
+ motionCompensation(bidir2Nx2N.predYuv, true, m_bChromaSa8d);
+
+ int sa8d = primitives.sa8d[partEnum](fencYuv.m_buf[0], fencYuv.m_size, bidir2Nx2N.predYuv.m_buf[0], bidir2Nx2N.predYuv.m_size);
+ if (m_bChromaSa8d)
+ {
+ /* Add in chroma distortion */
+ sa8d += primitives.sa8d_inter[cpart](fencYuv.m_buf[1], fencYuv.m_csize, bidir2Nx2N.predYuv.m_buf[1], bidir2Nx2N.predYuv.m_csize);
+ sa8d += primitives.sa8d_inter[cpart](fencYuv.m_buf[2], fencYuv.m_csize, bidir2Nx2N.predYuv.m_buf[2], bidir2Nx2N.predYuv.m_csize);
+ }
+ bidir2Nx2N.sa8dBits = bestME[0].bits + bestME[1].bits + m_listSelBits[2] - (m_listSelBits[0] + m_listSelBits[1]);
+ bidir2Nx2N.sa8dCost = sa8d + m_rdCost.getCost(bidir2Nx2N.sa8dBits);
- if (m_param->bEnableFastIntra)
+ bool bTryZero = bestME[0].mv.notZero() || bestME[1].mv.notZero();
+ if (bTryZero)
+ {
+ /* Do not try zero MV if unidir motion predictors are beyond
+ * valid search area */
+ MV mvmin, mvmax;
+ int merange = X265_MAX(m_param->sourceWidth, m_param->sourceHeight);
+ setSearchRange(cu, mvzero, merange, mvmin, mvmax);
+ mvmax.y += 2; // there is some pad for subpel refine
+ mvmin <<= 2;
+ mvmax <<= 2;
+
+ bTryZero &= bestME[0].mvp.checkRange(mvmin, mvmax);
+ bTryZero &= bestME[1].mvp.checkRange(mvmin, mvmax);
+ }
+ if (bTryZero)
{
- int asad = 0;
- uint32_t lowmode, highmode, amode = 5, abits = 0;
- uint64_t acost = MAX_INT64;
+ /* Estimate cost of BIDIR using coincident blocks */
+ Yuv& tmpPredYuv = m_rqt[cuGeom.depth].tmpPredYuv;
- /* pick the best angle, sampling at distance of 5 */
- for (mode = 5; mode < 35; mode += 5)
- {
- TRY_ANGLE(mode);
- COPY4_IF_LT(acost, cost, amode, mode, asad, sad, abits, bits);
- }
+ int zsa8d;
- /* refine best angle at distance 2, then distance 1 */
- for (uint32_t dist = 2; dist >= 1; dist--)
+ if (m_bChromaSa8d)
{
- lowmode = amode - dist;
- highmode = amode + dist;
+ cu.m_mv[0][0] = mvzero;
+ cu.m_mv[1][0] = mvzero;
- X265_CHECK(lowmode >= 2 && lowmode <= 34, "low intra mode out of range\n");
- TRY_ANGLE(lowmode);
- COPY4_IF_LT(acost, cost, amode, lowmode, asad, sad, abits, bits);
+ prepMotionCompensation(cu, cuGeom, 0);
+ motionCompensation(tmpPredYuv, true, true);
- X265_CHECK(highmode >= 2 && highmode <= 34, "high intra mode out of range\n");
- TRY_ANGLE(highmode);
- COPY4_IF_LT(acost, cost, amode, highmode, asad, sad, abits, bits);
+ zsa8d = primitives.sa8d[partEnum](fencYuv.m_buf[0], fencYuv.m_size, tmpPredYuv.m_buf[0], tmpPredYuv.m_size);
+ zsa8d += primitives.sa8d_inter[cpart](fencYuv.m_buf[1], fencYuv.m_csize, tmpPredYuv.m_buf[1], tmpPredYuv.m_csize);
+ zsa8d += primitives.sa8d_inter[cpart](fencYuv.m_buf[2], fencYuv.m_csize, tmpPredYuv.m_buf[2], tmpPredYuv.m_csize);
}
-
- if (amode == 33)
+ else
{
- TRY_ANGLE(34);
- COPY4_IF_LT(acost, cost, amode, 34, asad, sad, abits, bits);
- }
+ pixel *fref0 = m_slice->m_mref[0][ref0].getLumaAddr(cu.m_cuAddr, cuGeom.encodeIdx);
+ pixel *fref1 = m_slice->m_mref[1][ref1].getLumaAddr(cu.m_cuAddr, cuGeom.encodeIdx);
+ intptr_t refStride = m_slice->m_mref[0][0].lumaStride;
- COPY4_IF_LT(bcost, acost, bmode, amode, bsad, asad, bbits, abits);
- }
- else // calculate and search all intra prediction angles for lowest cost
- {
- for (mode = 2; mode < 35; mode++)
- {
- TRY_ANGLE(mode);
- COPY4_IF_LT(bcost, cost, bmode, mode, bsad, sad, bbits, bits);
+ primitives.pixelavg_pp[partEnum](tmpPredYuv.m_buf[0], tmpPredYuv.m_size, fref0, refStride, fref1, refStride, 32);
+ zsa8d = primitives.sa8d[partEnum](fencYuv.m_buf[0], fencYuv.m_size, tmpPredYuv.m_buf[0], tmpPredYuv.m_size);
}
- }
- cu.setLumaIntraDirSubParts((uint8_t)bmode, absPartIdx, depth + initTrDepth);
- intraMode.initCosts();
- intraMode.totalBits = bbits;
- intraMode.distortion = bsad;
- intraMode.sa8dCost = bcost;
- intraMode.sa8dBits = bbits;
-}
+ uint32_t bits0 = bestME[0].bits - m_me.bitcost(bestME[0].mv, mvp0) + m_me.bitcost(mvzero, mvp0);
+ uint32_t bits1 = bestME[1].bits - m_me.bitcost(bestME[1].mv, mvp1) + m_me.bitcost(mvzero, mvp1);
+ uint32_t zcost = zsa8d + m_rdCost.getCost(bits0) + m_rdCost.getCost(bits1);
-void Analysis::encodeIntraInInter(Mode& intraMode, const CUGeom& cuGeom)
-{
- CUData& cu = intraMode.cu;
- Yuv* reconYuv = &intraMode.reconYuv;
- Yuv* fencYuv = &m_modeDepth[cuGeom.depth].fencYuv;
+ /* refine MVP selection for zero mv, updates: mvp, mvpidx, bits, cost */
+ checkBestMVP(inter2Nx2N.amvpCand[0][ref0], mvzero, mvp0, mvpIdx0, bits0, zcost);
+ checkBestMVP(inter2Nx2N.amvpCand[1][ref1], mvzero, mvp1, mvpIdx1, bits1, zcost);
- X265_CHECK(cu.m_partSize[0] == SIZE_2Nx2N, "encodeIntraInInter does not expect NxN intra\n");
- X265_CHECK(!m_slice->isIntra(), "encodeIntraInInter does not expect to be used in I slices\n");
+ uint32_t zbits = bits0 + bits1 + m_listSelBits[2] - (m_listSelBits[0] + m_listSelBits[1]);
+ zcost = zsa8d + m_rdCost.getCost(zbits);
- m_quant.setQPforQuant(cu);
-
- uint32_t tuDepthRange[2];
- cu.getIntraTUQtDepthRange(tuDepthRange, 0);
-
- m_entropyCoder.load(m_rqt[cuGeom.depth].cur);
-
- Cost icosts;
- codeIntraLumaQT(intraMode, cuGeom, 0, 0, false, icosts, tuDepthRange);
- extractIntraResultQT(cu, *reconYuv, 0, 0);
-
- intraMode.distortion = icosts.distortion;
- intraMode.distortion += estIntraPredChromaQT(intraMode, cuGeom);
-
- m_entropyCoder.resetBits();
- if (m_slice->m_pps->bTransquantBypassEnabled)
- m_entropyCoder.codeCUTransquantBypassFlag(cu.m_tqBypass[0]);
- m_entropyCoder.codeSkipFlag(cu, 0);
- m_entropyCoder.codePredMode(cu.m_predMode[0]);
- m_entropyCoder.codePartSize(cu, 0, cuGeom.depth);
- m_entropyCoder.codePredInfo(cu, 0);
- intraMode.mvBits += m_entropyCoder.getNumberOfWrittenBits();
+ if (zcost < bidir2Nx2N.sa8dCost)
+ {
+ bidir2Nx2N.sa8dBits = zbits;
+ bidir2Nx2N.sa8dCost = zcost;
- bool bCodeDQP = m_slice->m_pps->bUseDQP;
- m_entropyCoder.codeCoeff(cu, 0, cuGeom.depth, bCodeDQP, tuDepthRange);
+ cu.setPUMv(0, mvzero, 0, 0);
+ cu.m_mvd[0][0] = mvzero - mvp0;
+ cu.m_mvpIdx[0][0] = (uint8_t)mvpIdx0;
- intraMode.totalBits = m_entropyCoder.getNumberOfWrittenBits();
- intraMode.coeffBits = intraMode.totalBits - intraMode.mvBits;
- if (m_rdCost.m_psyRd)
- intraMode.psyEnergy = m_rdCost.psyCost(cuGeom.log2CUSize - 2, fencYuv->m_buf[0], fencYuv->m_size, reconYuv->m_buf[0], reconYuv->m_size);
+ cu.setPUMv(1, mvzero, 0, 0);
+ cu.m_mvd[1][0] = mvzero - mvp1;
+ cu.m_mvpIdx[1][0] = (uint8_t)mvpIdx1;
- m_entropyCoder.store(intraMode.contexts);
- updateModeCost(intraMode);
+ if (m_bChromaSa8d)
+ /* real MC was already performed */
+ bidir2Nx2N.predYuv.copyFromYuv(tmpPredYuv);
+ else
+ {
+ prepMotionCompensation(cu, cuGeom, 0);
+ motionCompensation(bidir2Nx2N.predYuv, true, true);
+ }
+ }
+ else if (m_bChromaSa8d)
+ {
+ /* recover overwritten motion vectors */
+ cu.m_mv[0][0] = bestME[0].mv;
+ cu.m_mv[1][0] = bestME[1].mv;
+ }
+ }
}
void Analysis::encodeResidue(const CUData& ctu, const CUGeom& cuGeom)
{
for (uint32_t subPartIdx = 0; subPartIdx < 4; subPartIdx++)
{
- const CUGeom& childCuData = *(&cuGeom + cuGeom.childOffset + subPartIdx);
- if (childCuData.flags & CUGeom::PRESENT)
- encodeResidue(ctu, childCuData);
+ const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset + subPartIdx);
+ if (childGeom.flags & CUGeom::PRESENT)
+ encodeResidue(ctu, childGeom);
}
return;
}
uint32_t absPartIdx = cuGeom.encodeIdx;
int sizeIdx = cuGeom.log2CUSize - 2;
- Yuv& fencYuv = m_modeDepth[0].fencYuv;
-
/* reuse the bestMode data structures at the current depth */
Mode *bestMode = m_modeDepth[cuGeom.depth].bestMode;
- Yuv& reconYuv = bestMode->reconYuv;
CUData& cu = bestMode->cu;
cu.copyFromPic(ctu, cuGeom);
m_quant.setQPforQuant(cu);
- if (cu.m_predMode[0] == MODE_INTRA)
+ Yuv& fencYuv = m_modeDepth[cuGeom.depth].fencYuv;
+ if (cuGeom.depth)
+ m_modeDepth[0].fencYuv.copyPartToYuv(fencYuv, absPartIdx);
+ X265_CHECK(bestMode->fencYuv == &fencYuv, "invalid fencYuv\n");
+
+ if (cu.isIntra(0))
{
uint32_t tuDepthRange[2];
cu.getIntraTUQtDepthRange(tuDepthRange, 0);
- uint32_t initTrDepth = cu.m_partSize[0] == SIZE_NxN;
- residualTransformQuantIntra(*bestMode, cuGeom, initTrDepth, 0, tuDepthRange);
+ uint32_t initTuDepth = cu.m_partSize[0] != SIZE_2Nx2N;
+ residualTransformQuantIntra(*bestMode, cuGeom, initTuDepth, 0, tuDepthRange);
getBestIntraModeChroma(*bestMode, cuGeom);
residualQTIntraChroma(*bestMode, cuGeom, 0, 0);
}
- else if (cu.m_predMode[0] == MODE_INTER)
+ else // if (cu.isInter(0))
{
- X265_CHECK(!ctu.m_skipFlag[absPartIdx], "skip not expected prior to transform\n");
+ X265_CHECK(!ctu.isSkipped(absPartIdx), "skip not expected prior to transform\n");
/* Calculate residual for current CU part into depth sized resiYuv */
pixel* predV = predYuv.getCrAddr(absPartIdx);
primitives.luma_sub_ps[sizeIdx](resiYuv.m_buf[0], resiYuv.m_size,
- fencYuv.getLumaAddr(absPartIdx), predY,
+ fencYuv.m_buf[0], predY,
fencYuv.m_size, predYuv.m_size);
primitives.chroma[m_csp].sub_ps[sizeIdx](resiYuv.m_buf[1], resiYuv.m_csize,
- fencYuv.getCbAddr(absPartIdx), predU,
- fencYuv.m_csize, predYuv.m_csize);
+ fencYuv.m_buf[1], predU,
+ fencYuv.m_csize, predYuv.m_csize);
primitives.chroma[m_csp].sub_ps[sizeIdx](resiYuv.m_buf[2], resiYuv.m_csize,
- fencYuv.getCrAddr(absPartIdx), predV,
- fencYuv.m_csize, predYuv.m_csize);
+ fencYuv.m_buf[2], predV,
+ fencYuv.m_csize, predYuv.m_csize);
uint32_t tuDepthRange[2];
cu.getInterTUQtDepthRange(tuDepthRange, 0);
residualTransformQuantInter(*bestMode, cuGeom, 0, cuGeom.depth, tuDepthRange);
if (cu.m_mergeFlag[0] && cu.m_partSize[0] == SIZE_2Nx2N && !cu.getQtRootCbf(0))
- cu.setSkipFlagSubParts(true);
+ cu.setPredModeSubParts(MODE_SKIP);
- PicYuv& reconPicYuv = *m_frame->m_reconPicYuv;
- if (cu.getQtRootCbf(0)) // TODO: split to each component
- {
- /* residualTransformQuantInter() wrote transformed residual back into
- * resiYuv. Generate the recon pixels by adding it to the prediction */
+ /* residualTransformQuantInter() wrote transformed residual back into
+ * resiYuv. Generate the recon pixels by adding it to the prediction */
- primitives.luma_add_ps[sizeIdx](reconYuv.m_buf[0], reconYuv.m_size,
+ PicYuv& reconPic = *m_frame->m_reconPic;
+ if (cu.m_cbf[0][0])
+ primitives.luma_add_ps[sizeIdx](reconPic.getLumaAddr(cu.m_cuAddr, absPartIdx), reconPic.m_stride,
predY, resiYuv.m_buf[0], predYuv.m_size, resiYuv.m_size);
- primitives.chroma[m_csp].add_ps[sizeIdx](reconYuv.m_buf[1], reconYuv.m_csize,
- predU, resiYuv.m_buf[1], predYuv.m_csize, resiYuv.m_csize);
- primitives.chroma[m_csp].add_ps[sizeIdx](reconYuv.m_buf[2], reconYuv.m_csize,
- predV, resiYuv.m_buf[2], predYuv.m_csize, resiYuv.m_csize);
-
- /* copy the reconstructed part to the recon pic for later intra
- * predictions */
- reconYuv.copyToPicYuv(*m_frame->m_reconPicYuv, cu.m_cuAddr, absPartIdx);
- }
else
- {
- /* copy the prediction pixels to the recon pic for later intra
- * predictions */
-
- primitives.luma_copy_pp[sizeIdx](reconPicYuv.getLumaAddr(cu.m_cuAddr, absPartIdx), reconPicYuv.m_stride,
+ primitives.luma_copy_pp[sizeIdx](reconPic.getLumaAddr(cu.m_cuAddr, absPartIdx), reconPic.m_stride,
predY, predYuv.m_size);
- primitives.chroma[m_csp].copy_pp[sizeIdx](reconPicYuv.getCbAddr(cu.m_cuAddr, absPartIdx), reconPicYuv.m_strideC,
+
+ if (cu.m_cbf[1][0])
+ primitives.chroma[m_csp].add_ps[sizeIdx](reconPic.getCbAddr(cu.m_cuAddr, absPartIdx), reconPic.m_strideC,
+ predU, resiYuv.m_buf[1], predYuv.m_csize, resiYuv.m_csize);
+ else
+ primitives.chroma[m_csp].copy_pp[sizeIdx](reconPic.getCbAddr(cu.m_cuAddr, absPartIdx), reconPic.m_strideC,
predU, predYuv.m_csize);
- primitives.chroma[m_csp].copy_pp[sizeIdx](reconPicYuv.getCrAddr(cu.m_cuAddr, absPartIdx), reconPicYuv.m_strideC,
+
+ if (cu.m_cbf[2][0])
+ primitives.chroma[m_csp].add_ps[sizeIdx](reconPic.getCrAddr(cu.m_cuAddr, absPartIdx), reconPic.m_strideC,
+ predV, resiYuv.m_buf[2], predYuv.m_csize, resiYuv.m_csize);
+ else
+ primitives.chroma[m_csp].copy_pp[sizeIdx](reconPic.getCrAddr(cu.m_cuAddr, absPartIdx), reconPic.m_strideC,
predV, predYuv.m_csize);
- }
}
- /* else if (cu.m_predMode[0] == MODE_NONE) {} */
checkDQP(cu, cuGeom);
cu.updatePic(cuGeom.depth);
}
-/* check whether current try is the best with identifying the depth of current try */
-void Analysis::checkBestMode(Mode& mode, uint32_t depth)
-{
- ModeDepth& md = m_modeDepth[depth];
- if (md.bestMode)
- {
- if (mode.rdCost < md.bestMode->rdCost)
- md.bestMode = &mode;
- }
- else
- md.bestMode = &mode;
-}
-
void Analysis::addSplitFlagCost(Mode& mode, uint32_t depth)
{
if (m_param->rdLevel >= 3)
* each quantity */
uint32_t depth = cuGeom.depth;
- FrameData& curEncData = const_cast<FrameData&>(*m_frame->m_encData);
+ FrameData& curEncData = *m_frame->m_encData;
FrameData::RCStatCU& cuStat = curEncData.m_cuStat[parentCTU.m_cuAddr];
uint64_t cuCost = cuStat.avgCost[depth] * cuStat.count[depth];
uint64_t cuCount = cuStat.count[depth];
}
// give 60% weight to all CU's and 40% weight to neighbour CU's
- if (neighCost + cuCount)
+ if (neighCount + cuCount)
{
uint64_t avgCost = ((3 * cuCost) + (2 * neighCost)) / ((3 * cuCount) + (2 * neighCount));
uint64_t curCost = m_param->rdLevel > 1 ? bestMode.rdCost : bestMode.sa8dCost;