#include "wavefront.h"
#include "param.h"
-#include "PPA/ppa.h"
-
#include "encoder.h"
#include "frameencoder.h"
#include "common.h"
ok &= m_rce.picTimingSEI && m_rce.hrdTiming;
}
- if (m_param->noiseReduction)
+ if (m_param->noiseReductionIntra || m_param->noiseReductionInter)
m_nr = X265_MALLOC(NoiseReduction, 1);
if (m_nr)
memset(m_nr, 0, sizeof(NoiseReduction));
else
- m_param->noiseReduction = 0;
+ m_param->noiseReductionIntra = m_param->noiseReductionInter = 0;
start();
return ok;
}
/* Generate a complete list of unique geom sets for the current picture dimensions */
-bool FrameEncoder::initializeGeoms(const FrameData& encData)
+bool FrameEncoder::initializeGeoms()
{
/* Geoms only vary between CTUs in the presence of picture edges */
- int heightRem = m_param->sourceHeight & (m_param->maxCUSize - 1);
- int widthRem = m_param->sourceWidth & (m_param->maxCUSize - 1);
+ int maxCUSize = m_param->maxCUSize;
+ int heightRem = m_param->sourceHeight & (maxCUSize - 1);
+ int widthRem = m_param->sourceWidth & (maxCUSize - 1);
int allocGeoms = 1; // body
if (heightRem && widthRem)
allocGeoms = 4; // body, right, bottom, corner
if (!m_cuGeoms || !m_ctuGeomMap)
return false;
- CUGeom cuLocalData[CUGeom::MAX_GEOMS];
- memset(cuLocalData, 0, sizeof(cuLocalData)); // temporal fix for memcmp
+ // body
+ CUData::calcCTUGeoms(maxCUSize, maxCUSize, maxCUSize, m_cuGeoms);
+ memset(m_ctuGeomMap, 0, sizeof(uint32_t) * m_numRows * m_numCols);
+ if (allocGeoms == 1)
+ return true;
- int countGeoms = 0;
- for (uint32_t ctuAddr = 0; ctuAddr < m_numRows * m_numCols; ctuAddr++)
+ int countGeoms = 1;
+ if (widthRem)
{
- /* TODO: detach this logic from TComDataCU */
- encData.m_picCTU[ctuAddr].initCTU(*m_frame, ctuAddr, 0);
- encData.m_picCTU[ctuAddr].calcCTUGeoms(m_param->sourceWidth, m_param->sourceHeight, m_param->maxCUSize, cuLocalData);
-
- m_ctuGeomMap[ctuAddr] = MAX_INT;
- for (int i = 0; i < countGeoms; i++)
+ // right
+ CUData::calcCTUGeoms(widthRem, maxCUSize, maxCUSize, m_cuGeoms + countGeoms * CUGeom::MAX_GEOMS);
+ for (int i = 0; i < m_numRows; i++)
{
- if (!memcmp(cuLocalData, m_cuGeoms + i * CUGeom::MAX_GEOMS, sizeof(CUGeom) * CUGeom::MAX_GEOMS))
- {
- m_ctuGeomMap[ctuAddr] = i * CUGeom::MAX_GEOMS;
- break;
- }
+ uint32_t ctuAddr = m_numCols * (i + 1) - 1;
+ m_ctuGeomMap[ctuAddr] = countGeoms * CUGeom::MAX_GEOMS;
}
+ countGeoms++;
+ }
+ if (heightRem)
+ {
+ // bottom
+ CUData::calcCTUGeoms(maxCUSize, heightRem, maxCUSize, m_cuGeoms + countGeoms * CUGeom::MAX_GEOMS);
+ for (uint32_t i = 0; i < m_numCols; i++)
+ {
+ uint32_t ctuAddr = m_numCols * (m_numRows - 1) + i;
+ m_ctuGeomMap[ctuAddr] = countGeoms * CUGeom::MAX_GEOMS;
+ }
+ countGeoms++;
- if (m_ctuGeomMap[ctuAddr] == MAX_INT)
+ if (widthRem)
{
- X265_CHECK(countGeoms < allocGeoms, "geometry match check failure\n");
+ // corner
+ CUData::calcCTUGeoms(widthRem, heightRem, maxCUSize, m_cuGeoms + countGeoms * CUGeom::MAX_GEOMS);
+
+ uint32_t ctuAddr = m_numCols * m_numRows - 1;
m_ctuGeomMap[ctuAddr] = countGeoms * CUGeom::MAX_GEOMS;
- memcpy(m_cuGeoms + countGeoms * CUGeom::MAX_GEOMS, cuLocalData, sizeof(CUGeom) * CUGeom::MAX_GEOMS);
countGeoms++;
}
+ X265_CHECK(countGeoms == allocGeoms, "geometry match check failure\n");
}
return true;
m_frame = curFrame;
curFrame->m_encData->m_frameEncoderID = m_frameEncoderID; // Each Frame knows the ID of the FrameEncoder encoding it
curFrame->m_encData->m_slice->m_mref = m_mref;
+
if (!m_cuGeoms)
{
- if (!initializeGeoms(*curFrame->m_encData))
+ if (!initializeGeoms())
return false;
}
+
m_enable.trigger();
return true;
}
void FrameEncoder::compressFrame()
{
- PPAScopeEvent(FrameEncoder_compressFrame);
+ //ProfileScopeEvent(frameThread);
int64_t startCompressTime = x265_mdate();
Slice* slice = m_frame->m_encData->m_slice;
WeightParam *w = NULL;
if ((bUseWeightP || bUseWeightB) && slice->m_weightPredTable[l][ref][0].bPresentFlag)
w = slice->m_weightPredTable[l][ref];
- m_mref[l][ref].init(slice->m_refPicList[l][ref]->m_reconPicYuv, w);
+ m_mref[l][ref].init(slice->m_refPicList[l][ref]->m_reconPic, w, *m_param);
}
}
for (int i = 0; i < m_top->m_numThreadLocalData; i++)
{
NoiseReduction* nr = &m_top->m_threadLocalData[i].analysis.m_quant.m_frameNr[m_frameEncoderID];
- memcpy(nr->offsetDenoise, m_nr->offsetDenoise, sizeof(uint32_t) * MAX_NUM_TR_CATEGORIES * MAX_NUM_TR_COEFFS);
+ memcpy(nr->offsetDenoise, m_nr->offsetDenoise, sizeof(uint16_t) * MAX_NUM_TR_CATEGORIES * MAX_NUM_TR_COEFFS);
memset(nr->count, 0, sizeof(uint32_t) * MAX_NUM_TR_CATEGORIES);
memset(nr->residualSum, 0, sizeof(uint32_t) * MAX_NUM_TR_CATEGORIES * MAX_NUM_TR_COEFFS);
}
void FrameEncoder::compressCTURows()
{
- PPAScopeEvent(FrameEncoder_compressRows);
Slice* slice = m_frame->m_encData->m_slice;
m_bAllRowsStop = false;
}
}
- processRow(i * 2 + 0, -1);
+ processRowEncoder(i, *m_tld);
}
// Filter
if (i >= m_filterRowDelay)
- processRow((i - m_filterRowDelay) * 2 + 1, -1);
+ m_frameFilter.processRow(i - m_filterRowDelay);
}
}
m_frameTime = (double)m_totalTime / 1000000;
processRowEncoder(realRow, tld);
else
{
- processRowFilter(realRow);
+ m_frameFilter.processRow(realRow);
// NOTE: Active next row
if (realRow != m_numRows - 1)
// Called by worker threads
void FrameEncoder::processRowEncoder(int row, ThreadLocalData& tld)
{
- PPAScopeEvent(Thread_ProcessRow);
-
CTURow& curRow = m_rows[row];
{
Entropy& rowCoder = m_param->bEnableWavefront ? m_rows[row].rowGoOnCoder : m_rows[0].rowGoOnCoder;
FrameData& curEncData = *m_frame->m_encData;
Slice *slice = curEncData.m_slice;
- PicYuv* fencPic = m_frame->m_origPicYuv;
-
- tld.analysis.m_me.setSourcePlane(fencPic->m_picOrg[0], fencPic->m_stride);
int64_t startTime = x265_mdate();
const uint32_t numCols = m_numCols;
while (curRow.completed < numCols)
{
+ ProfileScopeEvent(encodeCTU);
+
int col = curRow.completed;
const uint32_t cuAddr = lineStartCUAddr + col;
CUData* ctu = curEncData.getPicCTU(cuAddr);
int qp = calcQpForCu(cuAddr, curEncData.m_cuStat[cuAddr].baseQp);
tld.analysis.setQP(*slice, qp);
qp = Clip3(QP_MIN, QP_MAX_SPEC, qp);
- ctu->setQPSubParts((char)qp, 0, 0);
+ ctu->setQPSubParts((int8_t)qp, 0, 0);
curEncData.m_rowStat[row].sumQpAq += qp;
}
else
}
// Does all the CU analysis, returns best top level mode decision
- Search::Mode& best = tld.analysis.compressCTU(*ctu, *m_frame, m_cuGeoms[m_ctuGeomMap[cuAddr]], rowCoder);
+ Mode& best = tld.analysis.compressCTU(*ctu, *m_frame, m_cuGeoms[m_ctuGeomMap[cuAddr]], rowCoder);
/* advance top-level row coder to include the context of this CTU.
* if SAO is disabled, rowCoder writes the final CTU bitstream */
if (dequeueRow(r * 2))
stopRow.active = false;
else
+ {
+ /* we must release the row lock to allow the thread to exit */
+ stopRow.lock.release();
GIVE_UP_TIME();
+ stopRow.lock.acquire();
+ }
}
-
stopRow.lock.release();
bool bRowBusy = true;
m_top->m_rateControl->rateControlUpdateStats(&m_rce);
}
- // trigger row-wise loop filters
- if (row >= m_filterRowDelay)
+ if (m_param->bEnableWavefront)
{
- enableRowFilter(row - m_filterRowDelay);
+ /* trigger row-wise loop filters */
+ if (row >= m_filterRowDelay)
+ {
+ enableRowFilter(row - m_filterRowDelay);
- // NOTE: Active Filter to first row (row 0)
- if (row == m_filterRowDelay)
- enqueueRowFilter(0);
- }
- if (row == m_numRows - 1)
- {
- for (int i = m_numRows - m_filterRowDelay; i < m_numRows; i++)
- enableRowFilter(i);
+ /* NOTE: Activate filter if first row (row 0) */
+ if (row == m_filterRowDelay)
+ enqueueRowFilter(0);
+ }
+ if (row == m_numRows - 1)
+ {
+ for (int i = m_numRows - m_filterRowDelay; i < m_numRows; i++)
+ enableRowFilter(i);
+ }
}
m_totalTime += x265_mdate() - startTime;
log->cntIntra[depth]++;
log->qTreeIntraCnt[depth]++;
- if (ctu.m_partSize[absPartIdx] == SIZE_NONE)
+ if (ctu.m_predMode[absPartIdx] == MODE_NONE)
{
log->totalCu--;
log->cntIntra[depth]--;
log->qTreeIntraCnt[depth]--;
}
- else if (ctu.m_partSize[absPartIdx] == SIZE_NxN)
+ else if (ctu.m_partSize[absPartIdx] != SIZE_2Nx2N)
{
/* TODO: log intra modes at absPartIdx +0 to +3 */
X265_CHECK(depth == g_maxCUDepth, "Intra NxN found at improbable depth\n");
log->totalCu++;
log->cntTotalCu[depth]++;
- if (ctu.m_partSize[absPartIdx] == SIZE_NONE)
+ if (ctu.m_predMode[absPartIdx] == MODE_NONE)
{
log->totalCu--;
log->cntTotalCu[depth]--;
log->cntSkipCu[depth]++;
log->qTreeSkipCnt[depth]++;
}
- else if (ctu.m_predMode[absPartIdx] == MODE_INTER)
+ else if (ctu.isInter(absPartIdx))
{
log->cntInter[depth]++;
log->qTreeInterCnt[depth]++;
else
log->cuInterDistribution[depth][AMP_ID]++;
}
- else if (ctu.m_predMode[absPartIdx] == MODE_INTRA)
+ else if (ctu.isIntra(absPartIdx))
{
log->cntIntra[depth]++;
log->qTreeIntraCnt[depth]++;
- if (ctu.m_partSize[absPartIdx] == SIZE_NxN)
+ if (ctu.m_partSize[absPartIdx] != SIZE_2Nx2N)
{
X265_CHECK(depth == g_maxCUDepth, "Intra NxN found at improbable depth\n");
log->cntIntraNxN++;
m_nr->count[cat] >>= 1;
}
- uint64_t scaledCount = (uint64_t)m_param->noiseReduction * m_nr->count[cat];
+ int nrStrength = cat < 8 ? m_param->noiseReductionIntra : m_param->noiseReductionInter;
+ uint64_t scaledCount = (uint64_t)nrStrength * m_nr->count[cat];
for (int i = 0; i < coefCount; i++)
{
/* Derive qpOffet for each CU by averaging offsets for all 16x16 blocks in the cu. */
double qp_offset = 0;
- uint32_t maxBlockCols = (m_frame->m_origPicYuv->m_picWidth + (16 - 1)) / 16;
- uint32_t maxBlockRows = (m_frame->m_origPicYuv->m_picHeight + (16 - 1)) / 16;
+ uint32_t maxBlockCols = (m_frame->m_fencPic->m_picWidth + (16 - 1)) / 16;
+ uint32_t maxBlockRows = (m_frame->m_fencPic->m_picHeight + (16 - 1)) / 16;
uint32_t noOfBlocks = g_maxCUSize / 16;
uint32_t block_y = (ctuAddr / curEncData.m_slice->m_sps->numCuInWidth) * noOfBlocks;
uint32_t block_x = (ctuAddr * noOfBlocks) - block_y * curEncData.m_slice->m_sps->numCuInWidth;