X-Git-Url: https://git.piment-noir.org/?p=deb_x265.git;a=blobdiff_plain;f=source%2Fencoder%2Fslicetype.cpp;h=a792760a05fc7ee6f453e93038a2a036e1f4e80e;hp=cc70c20658d9df0bfe2fa93c52f26f6c868f98e7;hb=b53f7c52d8280ab63876efd6eb292c21430ac607;hpb=5c9b45285dd64723ad1dac380b98a7b1f3095674 diff --git a/source/encoder/slicetype.cpp b/source/encoder/slicetype.cpp index cc70c20..a792760 100644 --- a/source/encoder/slicetype.cpp +++ b/source/encoder/slicetype.cpp @@ -111,7 +111,7 @@ void Lookahead::destroy() /* Called by API thread */ void Lookahead::addPicture(Frame *curFrame, int sliceType) { - PicYuv *orig = curFrame->m_origPicYuv; + PicYuv *orig = curFrame->m_fencPic; curFrame->m_lowres.init(orig, curFrame->m_poc, sliceType); @@ -192,7 +192,7 @@ Frame* Lookahead::getDecidedPicture() /* Called by pool worker threads */ bool Lookahead::findJob(int) { - if (m_bReady && ATOMIC_CAS32(&m_bReady, 1, 0) == 1) + if (m_bReady > 0 && ATOMIC_DEC(&m_bReady) == 0) { m_inputQueueLock.acquire(); slicetypeDecide(); @@ -290,6 +290,8 @@ void Lookahead::getEstimatedPictureCost(Frame *curFrame) /* called by API thread or worker thread with inputQueueLock acquired */ void Lookahead::slicetypeDecide() { + ProfileScopeEvent(slicetypeDecideEV); + ScopedLock lock(m_decideLock); Lowres *frames[X265_LOOKAHEAD_MAX]; @@ -417,7 +419,6 @@ void Lookahead::slicetypeDecide() list[bframes / 2]->m_lowres.sliceType = X265_TYPE_BREF; brefs++; } - /* calculate the frame costs ahead of time for estimateFrameCost while we still have lowres */ if (m_param->rc.rateControlMode != X265_RC_CQP) { @@ -524,14 +525,12 @@ void Lookahead::slicetypeDecide() void Lookahead::vbvLookahead(Lowres **frames, int numFrames, int keyframe) { int prevNonB = 0, curNonB = 1, idx = 0; - bool isNextNonB = false; - while (curNonB < numFrames && frames[curNonB]->sliceType == X265_TYPE_B) curNonB++; - int nextNonB = keyframe ? prevNonB : curNonB; - int nextB = keyframe ? prevNonB + 1 : curNonB + 1; - + int nextB = prevNonB + 1; + int nextBRef = 0; + int miniGopEnd = keyframe ? prevNonB : curNonB; while (curNonB < numFrames + !keyframe) { /* P/I cost: This shouldn't include the cost of nextNonB */ @@ -540,38 +539,53 @@ void Lookahead::vbvLookahead(Lowres **frames, int numFrames, int keyframe) int p0 = IS_X265_TYPE_I(frames[curNonB]->sliceType) ? curNonB : prevNonB; frames[nextNonB]->plannedSatd[idx] = vbvFrameCost(frames, p0, curNonB, curNonB); frames[nextNonB]->plannedType[idx] = frames[curNonB]->sliceType; + /* Save the nextNonB Cost in each B frame of the current miniGop */ + if (curNonB > miniGopEnd) + { + for (int j = nextB; j < miniGopEnd; j++) + { + frames[j]->plannedSatd[frames[j]->indB] = frames[nextNonB]->plannedSatd[idx]; + frames[j]->plannedType[frames[j]->indB++] = frames[nextNonB]->plannedType[idx]; + + } + } idx++; } /* Handle the B-frames: coded order */ - for (int i = prevNonB + 1; i < curNonB; i++, idx++) - { - frames[nextNonB]->plannedSatd[idx] = vbvFrameCost(frames, prevNonB, curNonB, i); - frames[nextNonB]->plannedType[idx] = X265_TYPE_B; - } + if (m_param->bBPyramid && curNonB - prevNonB > 1) + nextBRef = (prevNonB + curNonB + 1) / 2; - for (int i = nextB; i <= curNonB; i++) + for (int i = prevNonB + 1; i < curNonB; i++, idx++) { - for (int j = frames[i]->indB + i + 1; j <= curNonB; j++, frames[i]->indB++) + int64_t satdCost = 0; int type = X265_TYPE_B; + if (nextBRef) { - if (j == curNonB) + if (i == nextBRef) { - if (isNextNonB) - { - int p0 = IS_X265_TYPE_I(frames[curNonB]->sliceType) ? curNonB : prevNonB; - frames[i]->plannedSatd[frames[i]->indB] = vbvFrameCost(frames, p0, curNonB, curNonB); - frames[i]->plannedType[frames[i]->indB] = frames[curNonB]->sliceType; - } + satdCost = vbvFrameCost(frames, prevNonB, curNonB, nextBRef); + type = X265_TYPE_BREF; } + else if (i < nextBRef) + satdCost = vbvFrameCost(frames, prevNonB, nextBRef, i); else - { - frames[i]->plannedSatd[frames[i]->indB] = vbvFrameCost(frames, prevNonB, curNonB, j); - frames[i]->plannedType[frames[i]->indB] = X265_TYPE_B; - } + satdCost = vbvFrameCost(frames, nextBRef, curNonB, i); } - if (i == curNonB && !isNextNonB) - isNextNonB = true; - } + else + satdCost = vbvFrameCost(frames, prevNonB, nextNonB, i); + frames[nextNonB]->plannedSatd[idx] = satdCost; + frames[nextNonB]->plannedType[idx] = type; + /* Save the nextB Cost in each B frame of the current miniGop */ + for (int j = nextB; j < miniGopEnd; j++) + { + if (nextBRef && i == nextBRef) + break; + if (j >= i && j !=nextBRef) + continue; + frames[j]->plannedSatd[frames[j]->indB] = satdCost; + frames[j]->plannedType[frames[j]->indB++] = X265_TYPE_B; + } + } prevNonB = curNonB; curNonB++; while (curNonB <= numFrames && frames[curNonB]->sliceType == X265_TYPE_B) @@ -1238,7 +1252,7 @@ void CostEstimate::init(x265_param *_param, Frame *curFrame) if (m_param->bEnableWeightedPred) { - PicYuv *orig = curFrame->m_origPicYuv; + PicYuv *orig = curFrame->m_fencPic; m_paddedLines = curFrame->m_lowres.lines + 2 * orig->m_lumaMarginY; intptr_t padoffset = curFrame->m_lowres.lumaStride * orig->m_lumaMarginY + orig->m_lumaMarginX; @@ -1249,7 +1263,7 @@ void CostEstimate::init(x265_param *_param, Frame *curFrame) m_weightedRef.lowresPlane[i] = m_wbuffer[i] + padoffset; } - m_weightedRef.fpelPlane = m_weightedRef.lowresPlane[0]; + m_weightedRef.fpelPlane[0] = m_weightedRef.lowresPlane[0]; m_weightedRef.lumaStride = curFrame->m_lowres.lumaStride; m_weightedRef.isLowres = true; m_weightedRef.isWeighted = false; @@ -1290,7 +1304,6 @@ int64_t CostEstimate::estimateFrameCost(Lowres **frames, int p0, int p1, int b, for (int i = 0; i < m_heightInCU; i++) { m_rows[i].init(); - m_rows[i].m_me.setSourcePlane(fenc->lowresPlane[0], fenc->lumaStride); if (!fenc->bIntraCalculated) fenc->rowSatds[0][0][i] = 0; fenc->rowSatds[b - p0][p1 - b][i] = 0; @@ -1351,7 +1364,7 @@ uint32_t CostEstimate::weightCostLuma(Lowres **frames, int b, int p0, WeightPara { Lowres *fenc = frames[b]; Lowres *ref = frames[p0]; - pixel *src = ref->fpelPlane; + pixel *src = ref->fpelPlane[0]; intptr_t stride = fenc->lumaStride; if (wp) @@ -1365,7 +1378,7 @@ uint32_t CostEstimate::weightCostLuma(Lowres **frames, int b, int p0, WeightPara primitives.weight_pp(ref->buffer[0], m_wbuffer[0], stride, widthHeight, m_paddedLines, scale, round << correction, denom + correction, offset); - src = m_weightedRef.fpelPlane; + src = m_weightedRef.fpelPlane[0]; } uint32_t cost = 0; @@ -1376,7 +1389,7 @@ uint32_t CostEstimate::weightCostLuma(Lowres **frames, int b, int p0, WeightPara { for (int x = 0; x < fenc->width; x += 8, mb++, pixoff += 8) { - int satd = primitives.satd[LUMA_8x8](src + pixoff, stride, fenc->fpelPlane + pixoff, stride); + int satd = primitives.satd[LUMA_8x8](src + pixoff, stride, fenc->fpelPlane[0] + pixoff, stride); cost += X265_MIN(satd, fenc->intraCost[mb]); } } @@ -1469,6 +1482,8 @@ void CostEstimate::weightsAnalyse(Lowres **frames, int b, int p0) void CostEstimate::processRow(int row, int /*threadId*/) { + ProfileScopeEvent(costEstimateRow); + int realrow = m_heightInCU - 1 - row; Lowres **frames = m_curframes; ReferencePlanes *wfref0 = m_weightedRef.isWeighted ? &m_weightedRef : frames[m_curp0]; @@ -1531,7 +1546,7 @@ void EstimateRow::estimateCUCost(Lowres **frames, ReferencePlanes *wfref0, int c const bool bFrameScoreCU = (cux > 0 && cux < m_widthInCU - 1 && cuy > 0 && cuy < m_heightInCU - 1) || m_widthInCU <= 2 || m_heightInCU <= 2; - m_me.setSourcePU(pelOffset, cuSize, cuSize); + m_me.setSourcePU(fenc->lowresPlane[0], fenc->lumaStride, pelOffset, cuSize, cuSize); /* A small, arbitrary bias to avoid VBV problems caused by zero-residual lookahead blocks. */ int lowresPenalty = 4; @@ -1592,12 +1607,13 @@ void EstimateRow::estimateCUCost(Lowres **frames, ReferencePlanes *wfref0, int c } if (bBidir) { - pixel subpelbuf0[X265_LOWRES_CU_SIZE * X265_LOWRES_CU_SIZE], subpelbuf1[X265_LOWRES_CU_SIZE * X265_LOWRES_CU_SIZE]; + ALIGN_VAR_32(pixel, subpelbuf0[X265_LOWRES_CU_SIZE * X265_LOWRES_CU_SIZE]); + ALIGN_VAR_32(pixel, subpelbuf1[X265_LOWRES_CU_SIZE * X265_LOWRES_CU_SIZE]); intptr_t stride0 = X265_LOWRES_CU_SIZE, stride1 = X265_LOWRES_CU_SIZE; pixel *src0 = wfref0->lowresMC(pelOffset, *fenc_mvs[0], subpelbuf0, stride0); pixel *src1 = fref1->lowresMC(pelOffset, *fenc_mvs[1], subpelbuf1, stride1); - pixel ref[X265_LOWRES_CU_SIZE * X265_LOWRES_CU_SIZE]; + ALIGN_VAR_32(pixel, ref[X265_LOWRES_CU_SIZE * X265_LOWRES_CU_SIZE]); primitives.pixelavg_pp[LUMA_8x8](ref, X265_LOWRES_CU_SIZE, src0, stride0, src1, stride1, 32); int bicost = primitives.satd[LUMA_8x8](fenc->lowresPlane[0] + pelOffset, fenc->lumaStride, ref, X265_LOWRES_CU_SIZE); COPY2_IF_LT(bcost, bicost, listused, 3); @@ -1626,9 +1642,7 @@ void EstimateRow::estimateCUCost(Lowres **frames, ReferencePlanes *wfref0, int c // Copy Left for (int i = 0; i < cuSize + 1; i++) - { left0[i] = pix_cur[-1 - fenc->lumaStride + i * fenc->lumaStride]; - } for (int i = 0; i < cuSize; i++) { @@ -1652,22 +1666,22 @@ void EstimateRow::estimateCUCost(Lowres **frames, ReferencePlanes *wfref0, int c // generate 35 intra predictions into m_predictions pixelcmp_t satd = primitives.satd[partitionFromLog2Size(X265_LOWRES_CU_BITS)]; - int icost = m_me.COST_MAX, cost; + int icost = m_me.COST_MAX; primitives.intra_pred[DC_IDX][sizeIdx](m_predictions, cuSize, left0, above0, 0, (cuSize <= 16)); - cost = satd(m_me.fenc, FENC_STRIDE, m_predictions, cuSize); + int cost = m_me.bufSATD(m_predictions, cuSize); if (cost < icost) icost = cost; pixel *above = (cuSize >= 8) ? above1 : above0; pixel *left = (cuSize >= 8) ? left1 : left0; primitives.intra_pred[PLANAR_IDX][sizeIdx](m_predictions, cuSize, left, above, 0, 0); - cost = satd(m_me.fenc, FENC_STRIDE, m_predictions, cuSize); + cost = m_me.bufSATD(m_predictions, cuSize); if (cost < icost) icost = cost; primitives.intra_pred_allangs[sizeIdx](m_predictions + 2 * predsize, above0, left0, above1, left1, (cuSize <= 16)); // calculate satd costs, keep least cost ALIGN_VAR_32(pixel, buf_trans[32 * 32]); - primitives.transpose[sizeIdx](buf_trans, m_me.fenc, FENC_STRIDE); + primitives.transpose[sizeIdx](buf_trans, m_me.fencPUYuv.m_buf[0], FENC_STRIDE); int acost = m_me.COST_MAX; uint32_t mode, lowmode = 4; @@ -1676,7 +1690,7 @@ void EstimateRow::estimateCUCost(Lowres **frames, ReferencePlanes *wfref0, int c if (mode < 18) cost = satd(buf_trans, cuSize, &m_predictions[mode * predsize], cuSize); else - cost = satd(m_me.fenc, FENC_STRIDE, &m_predictions[mode * predsize], cuSize); + cost = m_me.bufSATD(&m_predictions[mode * predsize], cuSize); COPY2_IF_LT(acost, cost, lowmode, mode); } for (uint32_t dist = 2; dist >= 1; dist--) @@ -1685,14 +1699,14 @@ void EstimateRow::estimateCUCost(Lowres **frames, ReferencePlanes *wfref0, int c if (mode < 18) cost = satd(buf_trans, cuSize, &m_predictions[mode * predsize], cuSize); else - cost = satd(m_me.fenc, FENC_STRIDE, &m_predictions[mode * predsize], cuSize); + cost = m_me.bufSATD(&m_predictions[mode * predsize], cuSize); COPY2_IF_LT(acost, cost, lowmode, mode); mode = lowmode + dist; if (mode < 18) cost = satd(buf_trans, cuSize, &m_predictions[mode * predsize], cuSize); else - cost = satd(m_me.fenc, FENC_STRIDE, &m_predictions[mode * predsize], cuSize); + cost = m_me.bufSATD(&m_predictions[mode * predsize], cuSize); COPY2_IF_LT(acost, cost, lowmode, mode); } if (acost < icost) @@ -1701,6 +1715,7 @@ void EstimateRow::estimateCUCost(Lowres **frames, ReferencePlanes *wfref0, int c const int intraPenalty = 5 * m_lookAheadLambda; icost += intraPenalty + lowresPenalty; /* estimate intra signal cost */ fenc->intraCost[cuXY] = icost; + fenc->intraMode[cuXY] = (uint8_t)lowmode; int icostAq = icost; if (bFrameScoreCU) {