X-Git-Url: https://git.piment-noir.org/?p=deb_x265.git;a=blobdiff_plain;f=source%2Fcommon%2Fdeblock.cpp;h=7369787ba5271238e5d1f34a3a5229092d9a1968;hp=c9a2731078e397d07ad4532a9cac79861fde372b;hb=b53f7c52d8280ab63876efd6eb292c21430ac607;hpb=5c9b45285dd64723ad1dac380b98a7b1f3095674 diff --git a/source/common/deblock.cpp b/source/common/deblock.cpp index c9a2731..7369787 100644 --- a/source/common/deblock.cpp +++ b/source/common/deblock.cpp @@ -33,20 +33,44 @@ using namespace x265; #define DEBLOCK_SMALLEST_BLOCK 8 #define DEFAULT_INTRA_TC_OFFSET 2 -void Deblock::deblockCTU(CUData* cu, int32_t dir) +void Deblock::deblockCTU(const CUData* ctu, int32_t dir) { - uint8_t blockingStrength[MAX_NUM_PARTITIONS]; + uint8_t blockStrength[MAX_NUM_PARTITIONS]; - memset(blockingStrength, 0, sizeof(uint8_t) * m_numPartitions); + memset(blockStrength, 0, sizeof(uint8_t) * m_numPartitions); - deblockCU(cu, 0, 0, dir, blockingStrength); + deblockCU(ctu, 0, 0, dir, blockStrength); +} + +static inline uint8_t bsCuEdge(const CUData* cu, uint32_t absPartIdx, int32_t dir) +{ + if (dir == Deblock::EDGE_VER) + { + if (cu->m_cuPelX + g_zscanToPelX[absPartIdx] > 0) + { + uint32_t tempPartIdx; + const CUData* tempCU = cu->getPULeft(tempPartIdx, absPartIdx); + return tempCU ? 2 : 0; + } + } + else + { + if (cu->m_cuPelY + g_zscanToPelY[absPartIdx] > 0) + { + uint32_t tempPartIdx; + const CUData* tempCU = cu->getPUAbove(tempPartIdx, absPartIdx); + return tempCU ? 2 : 0; + } + } + + return 0; } /* Deblocking filter process in CU-based (the same function as conventional's) * param Edge the direction of the edge in block boundary (horizonta/vertical), which is added newly */ -void Deblock::deblockCU(CUData* cu, uint32_t absPartIdx, uint32_t depth, const int32_t dir, uint8_t blockingStrength[]) +void Deblock::deblockCU(const CUData* cu, uint32_t absPartIdx, uint32_t depth, const int32_t dir, uint8_t blockStrength[]) { - if (cu->m_partSize[absPartIdx] == SIZE_NONE) + if (cu->m_predMode[absPartIdx] == MODE_NONE) return; uint32_t curNumParts = NUM_CU_PARTITIONS >> (depth << 1); @@ -60,23 +84,21 @@ void Deblock::deblockCU(CUData* cu, uint32_t absPartIdx, uint32_t depth, const i uint32_t ymax = sps.picHeightInLumaSamples - cu->m_cuPelY; for (uint32_t partIdx = 0; partIdx < 4; partIdx++, absPartIdx += qNumParts) if (g_zscanToPelX[absPartIdx] < xmax && g_zscanToPelY[absPartIdx] < ymax) - deblockCU(cu, absPartIdx, depth + 1, dir, blockingStrength); + deblockCU(cu, absPartIdx, depth + 1, dir, blockStrength); return; } - const uint32_t widthInBaseUnits = sps.numPartInCUSize >> depth; - Param params; - setLoopfilterParam(cu, absPartIdx, ¶ms); - setEdgefilterPU(cu, absPartIdx, dir, blockingStrength, widthInBaseUnits); - setEdgefilterTU(cu, absPartIdx, depth, dir, blockingStrength); - setEdgefilterMultiple(cu, absPartIdx, dir, 0, (dir == EDGE_VER ? params.leftEdge : params.topEdge), blockingStrength, widthInBaseUnits); + const uint32_t numUnits = sps.numPartInCUSize >> depth; + setEdgefilterPU(cu, absPartIdx, dir, blockStrength, numUnits); + setEdgefilterTU(cu, absPartIdx, depth, dir, blockStrength); + setEdgefilterMultiple(cu, absPartIdx, dir, 0, bsCuEdge(cu, absPartIdx, dir), blockStrength, numUnits); for (uint32_t partIdx = absPartIdx; partIdx < absPartIdx + curNumParts; partIdx++) { uint32_t bsCheck = !(partIdx & (1 << dir)); - if (bsCheck && blockingStrength[partIdx]) - getBoundaryStrengthSingle(cu, dir, partIdx, blockingStrength); + if (bsCheck && blockStrength[partIdx]) + blockStrength[partIdx] = getBoundaryStrength(cu, dir, partIdx, blockStrength); } const uint32_t partIdxIncr = DEBLOCK_SMALLEST_BLOCK >> LOG2_UNIT_SIZE; @@ -87,34 +109,33 @@ void Deblock::deblockCU(CUData* cu, uint32_t absPartIdx, uint32_t depth, const i for (uint32_t e = 0; e < sizeInPU; e += partIdxIncr) { - edgeFilterLuma(cu, absPartIdx, depth, dir, e, blockingStrength); + edgeFilterLuma(cu, absPartIdx, depth, dir, e, blockStrength); if (!((e0 + e) & chromaMask)) - edgeFilterChroma(cu, absPartIdx, depth, dir, e, blockingStrength); + edgeFilterChroma(cu, absPartIdx, depth, dir, e, blockStrength); } } -static inline uint32_t calcBsIdx(CUData* cu, uint32_t absPartIdx, int32_t dir, int32_t edgeIdx, int32_t baseUnitIdx) +static inline uint32_t calcBsIdx(const CUData* cu, uint32_t absPartIdx, int32_t dir, int32_t edgeIdx, int32_t baseUnitIdx) { - uint32_t ctuWidthInBaseUnits = cu->m_slice->m_sps->numPartInCUSize; + uint32_t numPartInCUSize = cu->m_slice->m_sps->numPartInCUSize; if (dir) - return g_rasterToZscan[g_zscanToRaster[absPartIdx] + edgeIdx * ctuWidthInBaseUnits + baseUnitIdx]; + return g_rasterToZscan[g_zscanToRaster[absPartIdx] + edgeIdx * numPartInCUSize + baseUnitIdx]; else - return g_rasterToZscan[g_zscanToRaster[absPartIdx] + baseUnitIdx * ctuWidthInBaseUnits + edgeIdx]; + return g_rasterToZscan[g_zscanToRaster[absPartIdx] + baseUnitIdx * numPartInCUSize + edgeIdx]; } -void Deblock::setEdgefilterMultiple(CUData* cu, uint32_t scanIdx, int32_t dir, int32_t edgeIdx, uint8_t value, uint8_t blockingStrength[], uint32_t widthInBaseUnits) +void Deblock::setEdgefilterMultiple(const CUData* cu, uint32_t scanIdx, int32_t dir, int32_t edgeIdx, uint8_t value, uint8_t blockStrength[], uint32_t numUnits) { - const uint32_t numElem = widthInBaseUnits; - X265_CHECK(numElem > 0, "numElem edge filter check\n"); - for (uint32_t i = 0; i < numElem; i++) + X265_CHECK(numUnits > 0, "numUnits edge filter check\n"); + for (uint32_t i = 0; i < numUnits; i++) { const uint32_t bsidx = calcBsIdx(cu, scanIdx, dir, edgeIdx, i); - blockingStrength[bsidx] = value; + blockStrength[bsidx] = value; } } -void Deblock::setEdgefilterTU(CUData* cu, uint32_t absPartIdx, uint32_t depth, int32_t dir, uint8_t blockingStrength[]) +void Deblock::setEdgefilterTU(const CUData* cu, uint32_t absPartIdx, uint32_t depth, int32_t dir, uint8_t blockStrength[]) { if ((uint32_t)cu->m_tuDepth[absPartIdx] + cu->m_cuDepth[absPartIdx] > depth) { @@ -122,47 +143,47 @@ void Deblock::setEdgefilterTU(CUData* cu, uint32_t absPartIdx, uint32_t depth, i const uint32_t qNumParts = curNumParts >> 2; for (uint32_t partIdx = 0; partIdx < 4; partIdx++, absPartIdx += qNumParts) - setEdgefilterTU(cu, absPartIdx, depth + 1, dir, blockingStrength); + setEdgefilterTU(cu, absPartIdx, depth + 1, dir, blockStrength); return; } - uint32_t widthInBaseUnits = 1 << (cu->m_log2CUSize[absPartIdx] - cu->m_tuDepth[absPartIdx] - LOG2_UNIT_SIZE); - setEdgefilterMultiple(cu, absPartIdx, dir, 0, 2, blockingStrength, widthInBaseUnits); + uint32_t numUnits = 1 << (cu->m_log2CUSize[absPartIdx] - cu->m_tuDepth[absPartIdx] - LOG2_UNIT_SIZE); + setEdgefilterMultiple(cu, absPartIdx, dir, 0, 2, blockStrength, numUnits); } -void Deblock::setEdgefilterPU(CUData* cu, uint32_t absPartIdx, int32_t dir, uint8_t blockingStrength[], uint32_t widthInBaseUnits) +void Deblock::setEdgefilterPU(const CUData* cu, uint32_t absPartIdx, int32_t dir, uint8_t blockStrength[], uint32_t numUnits) { - const uint32_t hWidthInBaseUnits = widthInBaseUnits >> 1; - const uint32_t qWidthInBaseUnits = widthInBaseUnits >> 2; + const uint32_t hNumUnits = numUnits >> 1; + const uint32_t qNumUnits = numUnits >> 2; switch (cu->m_partSize[absPartIdx]) { case SIZE_2NxN: if (EDGE_HOR == dir) - setEdgefilterMultiple(cu, absPartIdx, dir, hWidthInBaseUnits, 1, blockingStrength, widthInBaseUnits); + setEdgefilterMultiple(cu, absPartIdx, dir, hNumUnits, 1, blockStrength, numUnits); break; case SIZE_Nx2N: if (EDGE_VER == dir) - setEdgefilterMultiple(cu, absPartIdx, dir, hWidthInBaseUnits, 1, blockingStrength, widthInBaseUnits); + setEdgefilterMultiple(cu, absPartIdx, dir, hNumUnits, 1, blockStrength, numUnits); break; case SIZE_NxN: - setEdgefilterMultiple(cu, absPartIdx, dir, hWidthInBaseUnits, 1, blockingStrength, widthInBaseUnits); + setEdgefilterMultiple(cu, absPartIdx, dir, hNumUnits, 1, blockStrength, numUnits); break; case SIZE_2NxnU: if (EDGE_HOR == dir) - setEdgefilterMultiple(cu, absPartIdx, dir, qWidthInBaseUnits, 1, blockingStrength, widthInBaseUnits); + setEdgefilterMultiple(cu, absPartIdx, dir, qNumUnits, 1, blockStrength, numUnits); break; case SIZE_nLx2N: if (EDGE_VER == dir) - setEdgefilterMultiple(cu, absPartIdx, dir, qWidthInBaseUnits, 1, blockingStrength, widthInBaseUnits); + setEdgefilterMultiple(cu, absPartIdx, dir, qNumUnits, 1, blockStrength, numUnits); break; case SIZE_2NxnD: if (EDGE_HOR == dir) - setEdgefilterMultiple(cu, absPartIdx, dir, widthInBaseUnits - qWidthInBaseUnits, 1, blockingStrength, widthInBaseUnits); + setEdgefilterMultiple(cu, absPartIdx, dir, numUnits - qNumUnits, 1, blockStrength, numUnits); break; case SIZE_nRx2N: if (EDGE_VER == dir) - setEdgefilterMultiple(cu, absPartIdx, dir, widthInBaseUnits - qWidthInBaseUnits, 1, blockingStrength, widthInBaseUnits); + setEdgefilterMultiple(cu, absPartIdx, dir, numUnits - qNumUnits, 1, blockStrength, numUnits); break; case SIZE_2Nx2N: @@ -171,151 +192,65 @@ void Deblock::setEdgefilterPU(CUData* cu, uint32_t absPartIdx, int32_t dir, uint } } -void Deblock::setLoopfilterParam(CUData* cu, uint32_t absPartIdx, Param *params) +uint8_t Deblock::getBoundaryStrength(const CUData* cuQ, int32_t dir, uint32_t partQ, const uint8_t blockStrength[]) { - uint32_t x = cu->m_cuPelX + g_zscanToPelX[absPartIdx]; - uint32_t y = cu->m_cuPelY + g_zscanToPelY[absPartIdx]; - - const CUData* tempCU; - uint32_t tempPartIdx; + // Calculate block index + uint32_t partP; + const CUData* cuP = (dir == EDGE_VER ? cuQ->getPULeft(partP, partQ) : cuQ->getPUAbove(partP, partQ)); - if (!x) - params->leftEdge = 0; - else - { - tempCU = cu->getPULeft(tempPartIdx, absPartIdx); - if (tempCU) - params->leftEdge = 2; - else - params->leftEdge = 0; - } + // Set BS for Intra MB : BS = 2 + if (cuP->isIntra(partP) || cuQ->isIntra(partQ)) + return 2; - if (!y) - params->topEdge = 0; - else - { - tempCU = cu->getPUAbove(tempPartIdx, absPartIdx); - if (tempCU) - params->topEdge = 2; - else - params->topEdge = 0; - } -} + // Set BS for not Intra MB : BS = 1 or 0 + if (blockStrength[partQ] > 1 && + (cuQ->getCbf(partQ, TEXT_LUMA, cuQ->m_tuDepth[partQ]) || + cuP->getCbf(partP, TEXT_LUMA, cuP->m_tuDepth[partP]))) + return 1; -void Deblock::getBoundaryStrengthSingle(CUData* cu, int32_t dir, uint32_t absPartIdx, uint8_t blockingStrength[]) -{ - const Slice* const slice = cu->m_slice; - const uint32_t partQ = absPartIdx; - CUData* const cuQ = cu; + static const MV zeroMv(0, 0); + const Slice* const sliceQ = cuQ->m_slice; + const Slice* const sliceP = cuP->m_slice; - uint32_t partP; - const CUData* cuP; - uint8_t bs = 0; + const Frame* refP0 = sliceP->getRefPic(0, cuP->m_refIdx[0][partP]); + const Frame* refQ0 = sliceQ->getRefPic(0, cuQ->m_refIdx[0][partQ]); + const MV& mvP0 = refP0 ? cuP->m_mv[0][partP] : zeroMv; + const MV& mvQ0 = refQ0 ? cuQ->m_mv[0][partQ] : zeroMv; - // Calculate block index - if (dir == EDGE_VER) - cuP = cuQ->getPULeft(partP, partQ); - else // (dir == EDGE_HOR) - cuP = cuQ->getPUAbove(partP, partQ); + if (sliceQ->isInterP() && sliceP->isInterP()) + { + return ((refP0 != refQ0) || + (abs(mvQ0.x - mvP0.x) >= 4) || (abs(mvQ0.y - mvP0.y) >= 4)) ? 1 : 0; + } - // Set BS for Intra MB : BS = 4 or 3 - if (cuP->isIntra(partP) || cuQ->isIntra(partQ)) - bs = 2; + // (sliceQ->isInterB() || sliceP->isInterB()) + const Frame* refP1 = sliceP->getRefPic(1, cuP->m_refIdx[1][partP]); + const Frame* refQ1 = sliceQ->getRefPic(1, cuQ->m_refIdx[1][partQ]); + const MV& mvP1 = refP1 ? cuP->m_mv[1][partP] : zeroMv; + const MV& mvQ1 = refQ1 ? cuQ->m_mv[1][partQ] : zeroMv; - // Set BS for not Intra MB : BS = 2 or 1 or 0 - if (!cuP->isIntra(partP) && !cuQ->isIntra(partQ)) + if (((refP0 == refQ0) && (refP1 == refQ1)) || ((refP0 == refQ1) && (refP1 == refQ0))) { - uint32_t nsPartQ = partQ; - uint32_t nsPartP = partP; - - if (blockingStrength[absPartIdx] > 1 && - (cuQ->getCbf(nsPartQ, TEXT_LUMA, cuQ->m_tuDepth[nsPartQ]) || - cuP->getCbf(nsPartP, TEXT_LUMA, cuP->m_tuDepth[nsPartP]))) - bs = 1; - else + if (refP0 != refP1) // Different L0 & L1 { - if (dir == EDGE_HOR) - cuP = cuQ->getPUAbove(partP, partQ); - - if (slice->isInterB() || cuP->m_slice->isInterB()) - { - int32_t refIdx; - Frame *refP0, *refP1, *refQ0, *refQ1; - refIdx = cuP->m_refIdx[0][partP]; - refP0 = (refIdx < 0) ? NULL : cuP->m_slice->m_refPicList[0][refIdx]; - refIdx = cuP->m_refIdx[1][partP]; - refP1 = (refIdx < 0) ? NULL : cuP->m_slice->m_refPicList[1][refIdx]; - refIdx = cuQ->m_refIdx[0][partQ]; - refQ0 = (refIdx < 0) ? NULL : slice->m_refPicList[0][refIdx]; - refIdx = cuQ->m_refIdx[1][partQ]; - refQ1 = (refIdx < 0) ? NULL : slice->m_refPicList[1][refIdx]; - - MV mvp0 = cuP->m_mv[0][partP]; - MV mvp1 = cuP->m_mv[1][partP]; - MV mvq0 = cuQ->m_mv[0][partQ]; - MV mvq1 = cuQ->m_mv[1][partQ]; - - if (!refP0) mvp0 = 0; - if (!refP1) mvp1 = 0; - if (!refQ0) mvq0 = 0; - if (!refQ1) mvq1 = 0; - - if (((refP0 == refQ0) && (refP1 == refQ1)) || ((refP0 == refQ1) && (refP1 == refQ0))) - { - if (refP0 != refP1) // Different L0 & L1 - { - if (refP0 == refQ0) - { - bs = ((abs(mvq0.x - mvp0.x) >= 4) || - (abs(mvq0.y - mvp0.y) >= 4) || - (abs(mvq1.x - mvp1.x) >= 4) || - (abs(mvq1.y - mvp1.y) >= 4)) ? 1 : 0; - } - else - { - bs = ((abs(mvq1.x - mvp0.x) >= 4) || - (abs(mvq1.y - mvp0.y) >= 4) || - (abs(mvq0.x - mvp1.x) >= 4) || - (abs(mvq0.y - mvp1.y) >= 4)) ? 1 : 0; - } - } - else // Same L0 & L1 - { - bs = ((abs(mvq0.x - mvp0.x) >= 4) || - (abs(mvq0.y - mvp0.y) >= 4) || - (abs(mvq1.x - mvp1.x) >= 4) || - (abs(mvq1.y - mvp1.y) >= 4)) && - ((abs(mvq1.x - mvp0.x) >= 4) || - (abs(mvq1.y - mvp0.y) >= 4) || - (abs(mvq0.x - mvp1.x) >= 4) || - (abs(mvq0.y - mvp1.y) >= 4)) ? 1 : 0; - } - } - else // for all different Ref_Idx - bs = 1; - } - else // slice->isInterP() - { - int32_t refIdx; - Frame *refp0, *refq0; - refIdx = cuP->m_refIdx[0][partP]; - refp0 = (refIdx < 0) ? NULL : cuP->m_slice->m_refPicList[0][refIdx]; - refIdx = cuQ->m_refIdx[0][partQ]; - refq0 = (refIdx < 0) ? NULL : slice->m_refPicList[0][refIdx]; - MV mvp0 = cuP->m_mv[0][partP]; - MV mvq0 = cuQ->m_mv[0][partQ]; - - if (!refp0) mvp0 = 0; - if (!refq0) mvq0 = 0; - - bs = ((refp0 != refq0) || - (abs(mvq0.x - mvp0.x) >= 4) || - (abs(mvq0.y - mvp0.y) >= 4)) ? 1 : 0; - } + if (refP0 == refQ0) + return ((abs(mvQ0.x - mvP0.x) >= 4) || (abs(mvQ0.y - mvP0.y) >= 4) || + (abs(mvQ1.x - mvP1.x) >= 4) || (abs(mvQ1.y - mvP1.y) >= 4)) ? 1 : 0; + else + return ((abs(mvQ1.x - mvP0.x) >= 4) || (abs(mvQ1.y - mvP0.y) >= 4) || + (abs(mvQ0.x - mvP1.x) >= 4) || (abs(mvQ0.y - mvP1.y) >= 4)) ? 1 : 0; + } + else // Same L0 & L1 + { + return (((abs(mvQ0.x - mvP0.x) >= 4) || (abs(mvQ0.y - mvP0.y) >= 4) || + (abs(mvQ1.x - mvP1.x) >= 4) || (abs(mvQ1.y - mvP1.y) >= 4)) && + ((abs(mvQ1.x - mvP0.x) >= 4) || (abs(mvQ1.y - mvP0.y) >= 4) || + (abs(mvQ0.x - mvP1.x) >= 4) || (abs(mvQ0.y - mvP1.y) >= 4))) ? 1 : 0; } } - - blockingStrength[absPartIdx] = bs; + + // for all different Ref_Idx + return 1; } static inline int32_t calcDP(pixel* src, intptr_t offset) @@ -340,46 +275,45 @@ static inline bool useStrongFiltering(intptr_t offset, int32_t beta, int32_t tc, } /* Deblocking for the luminance component with strong or weak filter - * \param src pointer to picture data - * \param offset offset value for picture data - * \param tc tc value - * \param partPNoFilter indicator to disable filtering on partP - * \param partQNoFilter indicator to disable filtering on partQ - * \param filterSecondP decision weak filter/no filter for partP - * \param filterSecondQ decision weak filter/no filter for partQ */ -static inline void pelFilterLumaStrong(pixel* src, intptr_t srcStep, intptr_t offset, int32_t tc, bool partPNoFilter, bool partQNoFilter) + * \param src pointer to picture data + * \param offset offset value for picture data + * \param tc tc value + * \param maskP indicator to enable filtering on partP + * \param maskQ indicator to enable filtering on partQ + * \param maskP1 decision weak filter/no filter for partP + * \param maskQ1 decision weak filter/no filter for partQ */ +static inline void pelFilterLumaStrong(pixel* src, intptr_t srcStep, intptr_t offset, int32_t tc, int32_t maskP, int32_t maskQ) { + int32_t tc2 = 2 * tc; + int32_t tcP = (tc2 & maskP); + int32_t tcQ = (tc2 & maskQ); for (int32_t i = 0; i < UNIT_SIZE; i++, src += srcStep) { int16_t m4 = (int16_t)src[0]; int16_t m3 = (int16_t)src[-offset]; int16_t m5 = (int16_t)src[offset]; int16_t m2 = (int16_t)src[-offset * 2]; - int32_t tc2 = 2 * tc; - if (!partPNoFilter) - { - int16_t m1 = (int16_t)src[-offset * 3]; - int16_t m0 = (int16_t)src[-offset * 4]; - src[-offset * 3] = (pixel)(Clip3(-tc2, tc2, ((2 * m0 + 3 * m1 + m2 + m3 + m4 + 4) >> 3) - m1) + m1); - src[-offset * 2] = (pixel)(Clip3(-tc2, tc2, ((m1 + m2 + m3 + m4 + 2) >> 2) - m2) + m2); - src[-offset] = (pixel)(Clip3(-tc2, tc2, ((m1 + 2 * m2 + 2 * m3 + 2 * m4 + m5 + 4) >> 3) - m3) + m3); - } - if (!partQNoFilter) - { - int16_t m6 = (int16_t)src[offset * 2]; - int16_t m7 = (int16_t)src[offset * 3]; - src[0] = (pixel)(Clip3(-tc2, tc2, ((m2 + 2 * m3 + 2 * m4 + 2 * m5 + m6 + 4) >> 3) - m4) + m4); - src[offset] = (pixel)(Clip3(-tc2, tc2, ((m3 + m4 + m5 + m6 + 2) >> 2) - m5) + m5); - src[offset * 2] = (pixel)(Clip3(-tc2, tc2, ((m3 + m4 + m5 + 3 * m6 + 2 * m7 + 4) >> 3) - m6) + m6); - } + int16_t m6 = (int16_t)src[offset * 2]; + int16_t m1 = (int16_t)src[-offset * 3]; + int16_t m7 = (int16_t)src[offset * 3]; + int16_t m0 = (int16_t)src[-offset * 4]; + src[-offset * 3] = (pixel)(Clip3(-tcP, tcP, ((2 * m0 + 3 * m1 + m2 + m3 + m4 + 4) >> 3) - m1) + m1); + src[-offset * 2] = (pixel)(Clip3(-tcP, tcP, ((m1 + m2 + m3 + m4 + 2) >> 2) - m2) + m2); + src[-offset] = (pixel)(Clip3(-tcP, tcP, ((m1 + 2 * m2 + 2 * m3 + 2 * m4 + m5 + 4) >> 3) - m3) + m3); + src[0] = (pixel)(Clip3(-tcQ, tcQ, ((m2 + 2 * m3 + 2 * m4 + 2 * m5 + m6 + 4) >> 3) - m4) + m4); + src[offset] = (pixel)(Clip3(-tcQ, tcQ, ((m3 + m4 + m5 + m6 + 2) >> 2) - m5) + m5); + src[offset * 2] = (pixel)(Clip3(-tcQ, tcQ, ((m3 + m4 + m5 + 3 * m6 + 2 * m7 + 4) >> 3) - m6) + m6); } } /* Weak filter */ -static inline void pelFilterLuma(pixel* src, intptr_t srcStep, intptr_t offset, int32_t tc, bool partPNoFilter, bool partQNoFilter, - bool filterSecondP, bool filterSecondQ) +static inline void pelFilterLuma(pixel* src, intptr_t srcStep, intptr_t offset, int32_t tc, int32_t maskP, int32_t maskQ, + int32_t maskP1, int32_t maskQ1) { int32_t thrCut = tc * 10; + int32_t tc2 = tc >> 1; + maskP1 &= maskP; + maskQ1 &= maskQ; for (int32_t i = 0; i < UNIT_SIZE; i++, src += srcStep) { @@ -394,38 +328,31 @@ static inline void pelFilterLuma(pixel* src, intptr_t srcStep, intptr_t offset, { delta = Clip3(-tc, tc, delta); - int32_t tc2 = tc >> 1; - if (!partPNoFilter) + src[-offset] = Clip(m3 + (delta & maskP)); + src[0] = Clip(m4 - (delta & maskQ)); + if (maskP1) { - src[-offset] = Clip(m3 + delta); - if (filterSecondP) - { - int16_t m1 = (int16_t)src[-offset * 3]; - int32_t delta1 = Clip3(-tc2, tc2, ((((m1 + m3 + 1) >> 1) - m2 + delta) >> 1)); - src[-offset * 2] = Clip(m2 + delta1); - } + int16_t m1 = (int16_t)src[-offset * 3]; + int32_t delta1 = Clip3(-tc2, tc2, ((((m1 + m3 + 1) >> 1) - m2 + delta) >> 1)); + src[-offset * 2] = Clip(m2 + delta1); } - if (!partQNoFilter) + if (maskQ1) { - src[0] = Clip(m4 - delta); - if (filterSecondQ) - { - int16_t m6 = (int16_t)src[offset * 2]; - int32_t delta2 = Clip3(-tc2, tc2, ((((m6 + m4 + 1) >> 1) - m5 - delta) >> 1)); - src[offset] = Clip(m5 + delta2); - } + int16_t m6 = (int16_t)src[offset * 2]; + int32_t delta2 = Clip3(-tc2, tc2, ((((m6 + m4 + 1) >> 1) - m5 - delta) >> 1)); + src[offset] = Clip(m5 + delta2); } } } } /* Deblocking of one line/column for the chrominance component - * \param src pointer to picture data - * \param offset offset value for picture data - * \param tc tc value - * \param partPNoFilter indicator to disable filtering on partP - * \param partQNoFilter indicator to disable filtering on partQ */ -static inline void pelFilterChroma(pixel* src, intptr_t srcStep, intptr_t offset, int32_t tc, bool partPNoFilter, bool partQNoFilter) + * \param src pointer to picture data + * \param offset offset value for picture data + * \param tc tc value + * \param maskP indicator to disable filtering on partP + * \param maskQ indicator to disable filtering on partQ */ +static inline void pelFilterChroma(pixel* src, intptr_t srcStep, intptr_t offset, int32_t tc, int32_t maskP, int32_t maskQ) { for (int32_t i = 0; i < UNIT_SIZE; i++, src += srcStep) { @@ -435,31 +362,25 @@ static inline void pelFilterChroma(pixel* src, intptr_t srcStep, intptr_t offset int16_t m2 = (int16_t)src[-offset * 2]; int32_t delta = Clip3(-tc, tc, ((((m4 - m3) << 2) + m2 - m5 + 4) >> 3)); - if (!partPNoFilter) - src[-offset] = Clip(m3 + delta); - if (!partQNoFilter) - src[0] = Clip(m4 - delta); + src[-offset] = Clip(m3 + (delta & maskP)); + src[0] = Clip(m4 - (delta & maskQ)); } } -void Deblock::edgeFilterLuma(CUData* cu, uint32_t absPartIdx, uint32_t depth, int32_t dir, int32_t edge, const uint8_t blockingStrength[]) +void Deblock::edgeFilterLuma(const CUData* cuQ, uint32_t absPartIdx, uint32_t depth, int32_t dir, int32_t edge, const uint8_t blockStrength[]) { - PicYuv* reconYuv = cu->m_encData->m_reconPicYuv; - pixel* src = reconYuv->getLumaAddr(cu->m_cuAddr, absPartIdx); - - intptr_t stride = reconYuv->m_stride; - uint32_t numParts = cu->m_slice->m_sps->numPartInCUSize >> depth; + PicYuv* reconPic = cuQ->m_encData->m_reconPic; + pixel* src = reconPic->getLumaAddr(cuQ->m_cuAddr, absPartIdx); + intptr_t stride = reconPic->m_stride; + const PPS* pps = cuQ->m_slice->m_pps; intptr_t offset, srcStep; - bool partPNoFilter = false; - bool partQNoFilter = false; - uint32_t partP = 0; - uint32_t partQ = 0; - const CUData* cuP = cu; - const CUData* cuQ = cu; - int32_t betaOffset = cuQ->m_slice->m_pps->deblockingFilterBetaOffsetDiv2 << 1; - int32_t tcOffset = cuQ->m_slice->m_pps->deblockingFilterTcOffsetDiv2 << 1; + int32_t maskP = -1; + int32_t maskQ = -1; + int32_t betaOffset = pps->deblockingFilterBetaOffsetDiv2 << 1; + int32_t tcOffset = pps->deblockingFilterTcOffsetDiv2 << 1; + bool bCheckNoFilter = pps->bTransquantBypassEnabled; if (dir == EDGE_VER) { @@ -474,106 +395,103 @@ void Deblock::edgeFilterLuma(CUData* cu, uint32_t absPartIdx, uint32_t depth, in src += (edge << LOG2_UNIT_SIZE) * stride; } - for (uint32_t idx = 0; idx < numParts; idx++) + uint32_t numUnits = cuQ->m_slice->m_sps->numPartInCUSize >> depth; + for (uint32_t idx = 0; idx < numUnits; idx++) { - uint32_t unitOffset = idx << LOG2_UNIT_SIZE; - uint32_t bsAbsIdx = calcBsIdx(cu, absPartIdx, dir, edge, idx); - uint32_t bs = blockingStrength[bsAbsIdx]; - if (bs) - { - int32_t qpQ = cu->m_qp[bsAbsIdx]; - partQ = bsAbsIdx; + uint32_t partQ = calcBsIdx(cuQ, absPartIdx, dir, edge, idx); + uint32_t bs = blockStrength[partQ]; - // Derive neighboring PU index - if (dir == EDGE_VER) - cuP = cuQ->getPULeft(partP, partQ); - else // (dir == EDGE_HOR) - cuP = cuQ->getPUAbove(partP, partQ); + if (!bs) + continue; - int32_t qpP = cuP->m_qp[partP]; - int32_t qp = (qpP + qpQ + 1) >> 1; + int32_t qpQ = cuQ->m_qp[partQ]; - int32_t indexB = Clip3(0, QP_MAX_SPEC, qp + betaOffset); + // Derive neighboring PU index + uint32_t partP; + const CUData* cuP = (dir == EDGE_VER ? cuQ->getPULeft(partP, partQ) : cuQ->getPUAbove(partP, partQ)); - const int32_t bitdepthShift = X265_DEPTH - 8; - int32_t beta = s_betaTable[indexB] << bitdepthShift; + int32_t qpP = cuP->m_qp[partP]; + int32_t qp = (qpP + qpQ + 1) >> 1; - int32_t dp0 = calcDP(src + srcStep * (unitOffset + 0), offset); - int32_t dq0 = calcDQ(src + srcStep * (unitOffset + 0), offset); - int32_t dp3 = calcDP(src + srcStep * (unitOffset + 3), offset); - int32_t dq3 = calcDQ(src + srcStep * (unitOffset + 3), offset); - int32_t d0 = dp0 + dq0; - int32_t d3 = dp3 + dq3; + int32_t indexB = Clip3(0, QP_MAX_SPEC, qp + betaOffset); - int32_t d = d0 + d3; + const int32_t bitdepthShift = X265_DEPTH - 8; + int32_t beta = s_betaTable[indexB] << bitdepthShift; - if (d < beta) - { - if (cu->m_slice->m_pps->bTransquantBypassEnabled) - { - // check if each of PUs is lossless coded - partPNoFilter = !!cuP->m_tqBypass[partP]; - partQNoFilter = !!cuQ->m_tqBypass[partQ]; - } - - int32_t indexTC = Clip3(0, QP_MAX_SPEC + DEFAULT_INTRA_TC_OFFSET, int32_t(qp + DEFAULT_INTRA_TC_OFFSET * (bs - 1) + tcOffset)); - int32_t tc = s_tcTable[indexTC] << bitdepthShift; - - bool sw = (2 * d0 < (beta >> 2) && - 2 * d3 < (beta >> 2) && - useStrongFiltering(offset, beta, tc, src + srcStep * (unitOffset + 0)) && - useStrongFiltering(offset, beta, tc, src + srcStep * (unitOffset + 3))); - - if (sw) - pelFilterLumaStrong(src + srcStep * unitOffset, srcStep, offset, tc, partPNoFilter, partQNoFilter); - else - { - int32_t sideThreshold = (beta + (beta >> 1)) >> 3; - int32_t dp = dp0 + dp3; - int32_t dq = dq0 + dq3; - bool filterP = (dp < sideThreshold); - bool filterQ = (dq < sideThreshold); - - pelFilterLuma(src + srcStep * unitOffset, srcStep, offset, tc, partPNoFilter, partQNoFilter, filterP, filterQ); - } - } + intptr_t unitOffset = idx * srcStep << LOG2_UNIT_SIZE; + int32_t dp0 = calcDP(src + unitOffset , offset); + int32_t dq0 = calcDQ(src + unitOffset , offset); + int32_t dp3 = calcDP(src + unitOffset + srcStep * 3, offset); + int32_t dq3 = calcDQ(src + unitOffset + srcStep * 3, offset); + int32_t d0 = dp0 + dq0; + int32_t d3 = dp3 + dq3; + + int32_t d = d0 + d3; + + if (d >= beta) + continue; + + if (bCheckNoFilter) + { + // check if each of PUs is lossless coded + maskP = (cuP->m_tqBypass[partP] ? 0 : -1); + maskQ = (cuQ->m_tqBypass[partQ] ? 0 : -1); + } + + int32_t indexTC = Clip3(0, QP_MAX_SPEC + DEFAULT_INTRA_TC_OFFSET, int32_t(qp + DEFAULT_INTRA_TC_OFFSET * (bs - 1) + tcOffset)); + int32_t tc = s_tcTable[indexTC] << bitdepthShift; + + bool sw = (2 * d0 < (beta >> 2) && + 2 * d3 < (beta >> 2) && + useStrongFiltering(offset, beta, tc, src + unitOffset ) && + useStrongFiltering(offset, beta, tc, src + unitOffset + srcStep * 3)); + + if (sw) + pelFilterLumaStrong(src + unitOffset, srcStep, offset, tc, maskP, maskQ); + else + { + int32_t sideThreshold = (beta + (beta >> 1)) >> 3; + int32_t dp = dp0 + dp3; + int32_t dq = dq0 + dq3; + int32_t maskP1 = (dp < sideThreshold ? -1 : 0); + int32_t maskQ1 = (dq < sideThreshold ? -1 : 0); + + pelFilterLuma(src + unitOffset, srcStep, offset, tc, maskP, maskQ, maskP1, maskQ1); } } } -void Deblock::edgeFilterChroma(CUData* cu, uint32_t absPartIdx, uint32_t depth, int32_t dir, int32_t edge, const uint8_t blockingStrength[]) +void Deblock::edgeFilterChroma(const CUData* cuQ, uint32_t absPartIdx, uint32_t depth, int32_t dir, int32_t edge, const uint8_t blockStrength[]) { - int32_t chFmt = cu->m_chromaFormat, chromaShift; + int32_t chFmt = cuQ->m_chromaFormat, chromaShift; intptr_t offset, srcStep; + const PPS* pps = cuQ->m_slice->m_pps; - bool partPNoFilter = false; - bool partQNoFilter = false; - uint32_t partP; - uint32_t partQ; - const CUData* cuP; - const CUData* cuQ = cu; - int32_t tcOffset = cu->m_slice->m_pps->deblockingFilterTcOffsetDiv2 << 1; + int32_t maskP = -1; + int32_t maskQ = -1; + int32_t tcOffset = pps->deblockingFilterTcOffsetDiv2 << 1; X265_CHECK(((dir == EDGE_VER) - ? ((g_zscanToPelX[absPartIdx] + edge * UNIT_SIZE) >> cu->m_hChromaShift) - : ((g_zscanToPelY[absPartIdx] + edge * UNIT_SIZE) >> cu->m_vChromaShift)) % DEBLOCK_SMALLEST_BLOCK == 0, + ? ((g_zscanToPelX[absPartIdx] + edge * UNIT_SIZE) >> cuQ->m_hChromaShift) + : ((g_zscanToPelY[absPartIdx] + edge * UNIT_SIZE) >> cuQ->m_vChromaShift)) % DEBLOCK_SMALLEST_BLOCK == 0, "invalid edge\n"); - PicYuv* reconPic = cu->m_encData->m_reconPicYuv; + PicYuv* reconPic = cuQ->m_encData->m_reconPic; intptr_t stride = reconPic->m_strideC; - intptr_t srcOffset = reconPic->getChromaAddrOffset(cu->m_cuAddr, absPartIdx); + intptr_t srcOffset = reconPic->getChromaAddrOffset(cuQ->m_cuAddr, absPartIdx); + bool bCheckNoFilter = pps->bTransquantBypassEnabled; if (dir == EDGE_VER) { - chromaShift = cu->m_vChromaShift; - srcOffset += (edge << (LOG2_UNIT_SIZE - cu->m_hChromaShift)); + chromaShift = cuQ->m_vChromaShift; + srcOffset += (edge << (LOG2_UNIT_SIZE - cuQ->m_hChromaShift)); offset = 1; srcStep = stride; } else // (dir == EDGE_HOR) { - chromaShift = cu->m_hChromaShift; - srcOffset += edge * stride << (LOG2_UNIT_SIZE - cu->m_vChromaShift); + chromaShift = cuQ->m_hChromaShift; + srcOffset += edge * stride << (LOG2_UNIT_SIZE - cuQ->m_vChromaShift); offset = stride; srcStep = 1; } @@ -582,53 +500,50 @@ void Deblock::edgeFilterChroma(CUData* cu, uint32_t absPartIdx, uint32_t depth, srcChroma[0] = reconPic->m_picOrg[1] + srcOffset; srcChroma[1] = reconPic->m_picOrg[2] + srcOffset; - uint32_t numUnits = cu->m_slice->m_sps->numPartInCUSize >> (depth + chromaShift); + uint32_t numUnits = cuQ->m_slice->m_sps->numPartInCUSize >> (depth + chromaShift); for (uint32_t idx = 0; idx < numUnits; idx++) { - uint32_t unitOffset = idx << LOG2_UNIT_SIZE; - uint32_t bsAbsIdx = calcBsIdx(cu, absPartIdx, dir, edge, idx << chromaShift); - uint32_t bs = blockingStrength[bsAbsIdx]; + uint32_t partQ = calcBsIdx(cuQ, absPartIdx, dir, edge, idx << chromaShift); + uint32_t bs = blockStrength[partQ]; - if (bs > 1) - { - int32_t qpQ = cu->m_qp[bsAbsIdx]; - partQ = bsAbsIdx; + if (bs <= 1) + continue; - // Derive neighboring PU index - if (dir == EDGE_VER) - cuP = cuQ->getPULeft(partP, partQ); - else // (dir == EDGE_HOR) - cuP = cuQ->getPUAbove(partP, partQ); + int32_t qpQ = cuQ->m_qp[partQ]; - int32_t qpP = cuP->m_qp[partP]; + // Derive neighboring PU index + uint32_t partP; + const CUData* cuP = (dir == EDGE_VER ? cuQ->getPULeft(partP, partQ) : cuQ->getPUAbove(partP, partQ)); - if (cu->m_slice->m_pps->bTransquantBypassEnabled) - { - // check if each of PUs is lossless coded - partPNoFilter = !!cuP->m_tqBypass[partP]; - partQNoFilter = !!cuQ->m_tqBypass[partQ]; - } + int32_t qpP = cuP->m_qp[partP]; - for (uint32_t chromaIdx = 0; chromaIdx < 2; chromaIdx++) + if (bCheckNoFilter) + { + // check if each of PUs is lossless coded + maskP = (cuP->m_tqBypass[partP] ? 0 : -1); + maskQ = (cuQ->m_tqBypass[partQ] ? 0 : -1); + } + + intptr_t unitOffset = idx * srcStep << LOG2_UNIT_SIZE; + for (uint32_t chromaIdx = 0; chromaIdx < 2; chromaIdx++) + { + int32_t chromaQPOffset = pps->chromaQpOffset[chromaIdx]; + int32_t qp = ((qpP + qpQ + 1) >> 1) + chromaQPOffset; + if (qp >= 30) { - int32_t chromaQPOffset = !chromaIdx ? cu->m_slice->m_pps->chromaCbQpOffset : cu->m_slice->m_pps->chromaCrQpOffset; - int32_t qp = ((qpP + qpQ + 1) >> 1) + chromaQPOffset; - if (qp >= 30) - { - if (chFmt == X265_CSP_I420) - qp = g_chromaScale[qp]; - else - qp = X265_MIN(qp, 51); - } - - int32_t indexTC = Clip3(0, QP_MAX_SPEC + DEFAULT_INTRA_TC_OFFSET, int32_t(qp + DEFAULT_INTRA_TC_OFFSET + tcOffset)); - const int32_t bitdepthShift = X265_DEPTH - 8; - int32_t tc = s_tcTable[indexTC] << bitdepthShift; - pixel* srcC = srcChroma[chromaIdx]; - - pelFilterChroma(srcC + srcStep * unitOffset, srcStep, offset, tc, partPNoFilter, partQNoFilter); + if (chFmt == X265_CSP_I420) + qp = g_chromaScale[qp]; + else + qp = X265_MIN(qp, 51); } + + int32_t indexTC = Clip3(0, QP_MAX_SPEC + DEFAULT_INTRA_TC_OFFSET, int32_t(qp + DEFAULT_INTRA_TC_OFFSET + tcOffset)); + const int32_t bitdepthShift = X265_DEPTH - 8; + int32_t tc = s_tcTable[indexTC] << bitdepthShift; + pixel* srcC = srcChroma[chromaIdx]; + + pelFilterChroma(srcC + unitOffset, srcStep, offset, tc, maskP, maskQ); } } }