X-Git-Url: https://git.piment-noir.org/?p=deb_x265.git;a=blobdiff_plain;f=source%2Fencoder%2Fframeencoder.cpp;h=5f4d2f7cf56847f3ca091711dc04615a60929aa2;hp=c6e69150985007e920fe0313a9e432455aa568bf;hb=b53f7c52d8280ab63876efd6eb292c21430ac607;hpb=5c9b45285dd64723ad1dac380b98a7b1f3095674 diff --git a/source/encoder/frameencoder.cpp b/source/encoder/frameencoder.cpp index c6e6915..5f4d2f7 100644 --- a/source/encoder/frameencoder.cpp +++ b/source/encoder/frameencoder.cpp @@ -29,8 +29,6 @@ #include "wavefront.h" #include "param.h" -#include "PPA/ppa.h" - #include "encoder.h" #include "frameencoder.h" #include "common.h" @@ -126,23 +124,24 @@ bool FrameEncoder::init(Encoder *top, int numRows, int numCols, int id) ok &= m_rce.picTimingSEI && m_rce.hrdTiming; } - if (m_param->noiseReduction) + if (m_param->noiseReductionIntra || m_param->noiseReductionInter) m_nr = X265_MALLOC(NoiseReduction, 1); if (m_nr) memset(m_nr, 0, sizeof(NoiseReduction)); else - m_param->noiseReduction = 0; + m_param->noiseReductionIntra = m_param->noiseReductionInter = 0; start(); return ok; } /* Generate a complete list of unique geom sets for the current picture dimensions */ -bool FrameEncoder::initializeGeoms(const FrameData& encData) +bool FrameEncoder::initializeGeoms() { /* Geoms only vary between CTUs in the presence of picture edges */ - int heightRem = m_param->sourceHeight & (m_param->maxCUSize - 1); - int widthRem = m_param->sourceWidth & (m_param->maxCUSize - 1); + int maxCUSize = m_param->maxCUSize; + int heightRem = m_param->sourceHeight & (maxCUSize - 1); + int widthRem = m_param->sourceWidth & (maxCUSize - 1); int allocGeoms = 1; // body if (heightRem && widthRem) allocGeoms = 4; // body, right, bottom, corner @@ -154,33 +153,45 @@ bool FrameEncoder::initializeGeoms(const FrameData& encData) if (!m_cuGeoms || !m_ctuGeomMap) return false; - CUGeom cuLocalData[CUGeom::MAX_GEOMS]; - memset(cuLocalData, 0, sizeof(cuLocalData)); // temporal fix for memcmp + // body + CUData::calcCTUGeoms(maxCUSize, maxCUSize, maxCUSize, m_cuGeoms); + memset(m_ctuGeomMap, 0, sizeof(uint32_t) * m_numRows * m_numCols); + if (allocGeoms == 1) + return true; - int countGeoms = 0; - for (uint32_t ctuAddr = 0; ctuAddr < m_numRows * m_numCols; ctuAddr++) + int countGeoms = 1; + if (widthRem) { - /* TODO: detach this logic from TComDataCU */ - encData.m_picCTU[ctuAddr].initCTU(*m_frame, ctuAddr, 0); - encData.m_picCTU[ctuAddr].calcCTUGeoms(m_param->sourceWidth, m_param->sourceHeight, m_param->maxCUSize, cuLocalData); - - m_ctuGeomMap[ctuAddr] = MAX_INT; - for (int i = 0; i < countGeoms; i++) + // right + CUData::calcCTUGeoms(widthRem, maxCUSize, maxCUSize, m_cuGeoms + countGeoms * CUGeom::MAX_GEOMS); + for (int i = 0; i < m_numRows; i++) { - if (!memcmp(cuLocalData, m_cuGeoms + i * CUGeom::MAX_GEOMS, sizeof(CUGeom) * CUGeom::MAX_GEOMS)) - { - m_ctuGeomMap[ctuAddr] = i * CUGeom::MAX_GEOMS; - break; - } + uint32_t ctuAddr = m_numCols * (i + 1) - 1; + m_ctuGeomMap[ctuAddr] = countGeoms * CUGeom::MAX_GEOMS; } + countGeoms++; + } + if (heightRem) + { + // bottom + CUData::calcCTUGeoms(maxCUSize, heightRem, maxCUSize, m_cuGeoms + countGeoms * CUGeom::MAX_GEOMS); + for (uint32_t i = 0; i < m_numCols; i++) + { + uint32_t ctuAddr = m_numCols * (m_numRows - 1) + i; + m_ctuGeomMap[ctuAddr] = countGeoms * CUGeom::MAX_GEOMS; + } + countGeoms++; - if (m_ctuGeomMap[ctuAddr] == MAX_INT) + if (widthRem) { - X265_CHECK(countGeoms < allocGeoms, "geometry match check failure\n"); + // corner + CUData::calcCTUGeoms(widthRem, heightRem, maxCUSize, m_cuGeoms + countGeoms * CUGeom::MAX_GEOMS); + + uint32_t ctuAddr = m_numCols * m_numRows - 1; m_ctuGeomMap[ctuAddr] = countGeoms * CUGeom::MAX_GEOMS; - memcpy(m_cuGeoms + countGeoms * CUGeom::MAX_GEOMS, cuLocalData, sizeof(CUGeom) * CUGeom::MAX_GEOMS); countGeoms++; } + X265_CHECK(countGeoms == allocGeoms, "geometry match check failure\n"); } return true; @@ -191,11 +202,13 @@ bool FrameEncoder::startCompressFrame(Frame* curFrame) m_frame = curFrame; curFrame->m_encData->m_frameEncoderID = m_frameEncoderID; // Each Frame knows the ID of the FrameEncoder encoding it curFrame->m_encData->m_slice->m_mref = m_mref; + if (!m_cuGeoms) { - if (!initializeGeoms(*curFrame->m_encData)) + if (!initializeGeoms()) return false; } + m_enable.trigger(); return true; } @@ -217,7 +230,7 @@ void FrameEncoder::threadMain() void FrameEncoder::compressFrame() { - PPAScopeEvent(FrameEncoder_compressFrame); + //ProfileScopeEvent(frameThread); int64_t startCompressTime = x265_mdate(); Slice* slice = m_frame->m_encData->m_slice; @@ -252,7 +265,7 @@ void FrameEncoder::compressFrame() WeightParam *w = NULL; if ((bUseWeightP || bUseWeightB) && slice->m_weightPredTable[l][ref][0].bPresentFlag) w = slice->m_weightPredTable[l][ref]; - m_mref[l][ref].init(slice->m_refPicList[l][ref]->m_reconPicYuv, w); + m_mref[l][ref].init(slice->m_refPicList[l][ref]->m_reconPic, w, *m_param); } } @@ -481,7 +494,7 @@ void FrameEncoder::compressFrame() for (int i = 0; i < m_top->m_numThreadLocalData; i++) { NoiseReduction* nr = &m_top->m_threadLocalData[i].analysis.m_quant.m_frameNr[m_frameEncoderID]; - memcpy(nr->offsetDenoise, m_nr->offsetDenoise, sizeof(uint32_t) * MAX_NUM_TR_CATEGORIES * MAX_NUM_TR_COEFFS); + memcpy(nr->offsetDenoise, m_nr->offsetDenoise, sizeof(uint16_t) * MAX_NUM_TR_CATEGORIES * MAX_NUM_TR_COEFFS); memset(nr->count, 0, sizeof(uint32_t) * MAX_NUM_TR_CATEGORIES); memset(nr->residualSum, 0, sizeof(uint32_t) * MAX_NUM_TR_CATEGORIES * MAX_NUM_TR_COEFFS); } @@ -569,7 +582,6 @@ void FrameEncoder::encodeSlice() void FrameEncoder::compressCTURows() { - PPAScopeEvent(FrameEncoder_compressRows); Slice* slice = m_frame->m_encData->m_slice; m_bAllRowsStop = false; @@ -643,12 +655,12 @@ void FrameEncoder::compressCTURows() } } - processRow(i * 2 + 0, -1); + processRowEncoder(i, *m_tld); } // Filter if (i >= m_filterRowDelay) - processRow((i - m_filterRowDelay) * 2 + 1, -1); + m_frameFilter.processRow(i - m_filterRowDelay); } } m_frameTime = (double)m_totalTime / 1000000; @@ -666,7 +678,7 @@ void FrameEncoder::processRow(int row, int threadId) processRowEncoder(realRow, tld); else { - processRowFilter(realRow); + m_frameFilter.processRow(realRow); // NOTE: Active next row if (realRow != m_numRows - 1) @@ -679,8 +691,6 @@ void FrameEncoder::processRow(int row, int threadId) // Called by worker threads void FrameEncoder::processRowEncoder(int row, ThreadLocalData& tld) { - PPAScopeEvent(Thread_ProcessRow); - CTURow& curRow = m_rows[row]; { @@ -707,9 +717,6 @@ void FrameEncoder::processRowEncoder(int row, ThreadLocalData& tld) Entropy& rowCoder = m_param->bEnableWavefront ? m_rows[row].rowGoOnCoder : m_rows[0].rowGoOnCoder; FrameData& curEncData = *m_frame->m_encData; Slice *slice = curEncData.m_slice; - PicYuv* fencPic = m_frame->m_origPicYuv; - - tld.analysis.m_me.setSourcePlane(fencPic->m_picOrg[0], fencPic->m_stride); int64_t startTime = x265_mdate(); const uint32_t numCols = m_numCols; @@ -718,6 +725,8 @@ void FrameEncoder::processRowEncoder(int row, ThreadLocalData& tld) while (curRow.completed < numCols) { + ProfileScopeEvent(encodeCTU); + int col = curRow.completed; const uint32_t cuAddr = lineStartCUAddr + col; CUData* ctu = curEncData.getPicCTU(cuAddr); @@ -744,7 +753,7 @@ void FrameEncoder::processRowEncoder(int row, ThreadLocalData& tld) int qp = calcQpForCu(cuAddr, curEncData.m_cuStat[cuAddr].baseQp); tld.analysis.setQP(*slice, qp); qp = Clip3(QP_MIN, QP_MAX_SPEC, qp); - ctu->setQPSubParts((char)qp, 0, 0); + ctu->setQPSubParts((int8_t)qp, 0, 0); curEncData.m_rowStat[row].sumQpAq += qp; } else @@ -758,7 +767,7 @@ void FrameEncoder::processRowEncoder(int row, ThreadLocalData& tld) } // Does all the CU analysis, returns best top level mode decision - Search::Mode& best = tld.analysis.compressCTU(*ctu, *m_frame, m_cuGeoms[m_ctuGeomMap[cuAddr]], rowCoder); + Mode& best = tld.analysis.compressCTU(*ctu, *m_frame, m_cuGeoms[m_ctuGeomMap[cuAddr]], rowCoder); /* advance top-level row coder to include the context of this CTU. * if SAO is disabled, rowCoder writes the final CTU bitstream */ @@ -839,9 +848,13 @@ void FrameEncoder::processRowEncoder(int row, ThreadLocalData& tld) if (dequeueRow(r * 2)) stopRow.active = false; else + { + /* we must release the row lock to allow the thread to exit */ + stopRow.lock.release(); GIVE_UP_TIME(); + stopRow.lock.acquire(); + } } - stopRow.lock.release(); bool bRowBusy = true; @@ -937,19 +950,22 @@ void FrameEncoder::processRowEncoder(int row, ThreadLocalData& tld) m_top->m_rateControl->rateControlUpdateStats(&m_rce); } - // trigger row-wise loop filters - if (row >= m_filterRowDelay) + if (m_param->bEnableWavefront) { - enableRowFilter(row - m_filterRowDelay); + /* trigger row-wise loop filters */ + if (row >= m_filterRowDelay) + { + enableRowFilter(row - m_filterRowDelay); - // NOTE: Active Filter to first row (row 0) - if (row == m_filterRowDelay) - enqueueRowFilter(0); - } - if (row == m_numRows - 1) - { - for (int i = m_numRows - m_filterRowDelay; i < m_numRows; i++) - enableRowFilter(i); + /* NOTE: Activate filter if first row (row 0) */ + if (row == m_filterRowDelay) + enqueueRowFilter(0); + } + if (row == m_numRows - 1) + { + for (int i = m_numRows - m_filterRowDelay; i < m_numRows; i++) + enableRowFilter(i); + } } m_totalTime += x265_mdate() - startTime; @@ -971,13 +987,13 @@ void FrameEncoder::collectCTUStatistics(CUData& ctu) log->cntIntra[depth]++; log->qTreeIntraCnt[depth]++; - if (ctu.m_partSize[absPartIdx] == SIZE_NONE) + if (ctu.m_predMode[absPartIdx] == MODE_NONE) { log->totalCu--; log->cntIntra[depth]--; log->qTreeIntraCnt[depth]--; } - else if (ctu.m_partSize[absPartIdx] == SIZE_NxN) + else if (ctu.m_partSize[absPartIdx] != SIZE_2Nx2N) { /* TODO: log intra modes at absPartIdx +0 to +3 */ X265_CHECK(depth == g_maxCUDepth, "Intra NxN found at improbable depth\n"); @@ -1000,7 +1016,7 @@ void FrameEncoder::collectCTUStatistics(CUData& ctu) log->totalCu++; log->cntTotalCu[depth]++; - if (ctu.m_partSize[absPartIdx] == SIZE_NONE) + if (ctu.m_predMode[absPartIdx] == MODE_NONE) { log->totalCu--; log->cntTotalCu[depth]--; @@ -1011,7 +1027,7 @@ void FrameEncoder::collectCTUStatistics(CUData& ctu) log->cntSkipCu[depth]++; log->qTreeSkipCnt[depth]++; } - else if (ctu.m_predMode[absPartIdx] == MODE_INTER) + else if (ctu.isInter(absPartIdx)) { log->cntInter[depth]++; log->qTreeInterCnt[depth]++; @@ -1021,12 +1037,12 @@ void FrameEncoder::collectCTUStatistics(CUData& ctu) else log->cuInterDistribution[depth][AMP_ID]++; } - else if (ctu.m_predMode[absPartIdx] == MODE_INTRA) + else if (ctu.isIntra(absPartIdx)) { log->cntIntra[depth]++; log->qTreeIntraCnt[depth]++; - if (ctu.m_partSize[absPartIdx] == SIZE_NxN) + if (ctu.m_partSize[absPartIdx] != SIZE_2Nx2N) { X265_CHECK(depth == g_maxCUDepth, "Intra NxN found at improbable depth\n"); log->cntIntraNxN++; @@ -1061,7 +1077,8 @@ void FrameEncoder::noiseReductionUpdate() m_nr->count[cat] >>= 1; } - uint64_t scaledCount = (uint64_t)m_param->noiseReduction * m_nr->count[cat]; + int nrStrength = cat < 8 ? m_param->noiseReductionIntra : m_param->noiseReductionInter; + uint64_t scaledCount = (uint64_t)nrStrength * m_nr->count[cat]; for (int i = 0; i < coefCount; i++) { @@ -1091,8 +1108,8 @@ int FrameEncoder::calcQpForCu(uint32_t ctuAddr, double baseQp) /* Derive qpOffet for each CU by averaging offsets for all 16x16 blocks in the cu. */ double qp_offset = 0; - uint32_t maxBlockCols = (m_frame->m_origPicYuv->m_picWidth + (16 - 1)) / 16; - uint32_t maxBlockRows = (m_frame->m_origPicYuv->m_picHeight + (16 - 1)) / 16; + uint32_t maxBlockCols = (m_frame->m_fencPic->m_picWidth + (16 - 1)) / 16; + uint32_t maxBlockRows = (m_frame->m_fencPic->m_picHeight + (16 - 1)) / 16; uint32_t noOfBlocks = g_maxCUSize / 16; uint32_t block_y = (ctuAddr / curEncData.m_slice->m_sps->numCuInWidth) * noOfBlocks; uint32_t block_x = (ctuAddr * noOfBlocks) - block_y * curEncData.m_slice->m_sps->numCuInWidth;