| 1 | /***************************************************************************** |
| 2 | * Copyright (C) 2013 x265 project |
| 3 | * |
| 4 | * Author: Gopu Govindaswamy <gopu@multicorewareinc.com> |
| 5 | * |
| 6 | * This program is free software; you can redistribute it and/or modify |
| 7 | * it under the terms of the GNU General Public License as published by |
| 8 | * the Free Software Foundation; either version 2 of the License, or |
| 9 | * (at your option) any later version. |
| 10 | * |
| 11 | * This program is distributed in the hope that it will be useful, |
| 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 14 | * GNU General Public License for more details. |
| 15 | * |
| 16 | * You should have received a copy of the GNU General Public License |
| 17 | * along with this program; if not, write to the Free Software |
| 18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. |
| 19 | * |
| 20 | * This program is also available under a commercial proprietary license. |
| 21 | * For more information, contact us at license @ x265.com. |
| 22 | *****************************************************************************/ |
| 23 | |
| 24 | #include "common.h" |
| 25 | #include "deblock.h" |
| 26 | #include "framedata.h" |
| 27 | #include "picyuv.h" |
| 28 | #include "slice.h" |
| 29 | #include "mv.h" |
| 30 | |
| 31 | using namespace x265; |
| 32 | |
| 33 | #define DEBLOCK_SMALLEST_BLOCK 8 |
| 34 | #define DEFAULT_INTRA_TC_OFFSET 2 |
| 35 | |
| 36 | void Deblock::deblockCTU(const CUData* ctu, int32_t dir) |
| 37 | { |
| 38 | uint8_t blockStrength[MAX_NUM_PARTITIONS]; |
| 39 | |
| 40 | memset(blockStrength, 0, sizeof(uint8_t) * m_numPartitions); |
| 41 | |
| 42 | deblockCU(ctu, 0, 0, dir, blockStrength); |
| 43 | } |
| 44 | |
| 45 | static inline uint8_t bsCuEdge(const CUData* cu, uint32_t absPartIdx, int32_t dir) |
| 46 | { |
| 47 | if (dir == Deblock::EDGE_VER) |
| 48 | { |
| 49 | if (cu->m_cuPelX + g_zscanToPelX[absPartIdx] > 0) |
| 50 | { |
| 51 | uint32_t tempPartIdx; |
| 52 | const CUData* tempCU = cu->getPULeft(tempPartIdx, absPartIdx); |
| 53 | return tempCU ? 2 : 0; |
| 54 | } |
| 55 | } |
| 56 | else |
| 57 | { |
| 58 | if (cu->m_cuPelY + g_zscanToPelY[absPartIdx] > 0) |
| 59 | { |
| 60 | uint32_t tempPartIdx; |
| 61 | const CUData* tempCU = cu->getPUAbove(tempPartIdx, absPartIdx); |
| 62 | return tempCU ? 2 : 0; |
| 63 | } |
| 64 | } |
| 65 | |
| 66 | return 0; |
| 67 | } |
| 68 | |
| 69 | /* Deblocking filter process in CU-based (the same function as conventional's) |
| 70 | * param Edge the direction of the edge in block boundary (horizonta/vertical), which is added newly */ |
| 71 | void Deblock::deblockCU(const CUData* cu, uint32_t absPartIdx, uint32_t depth, const int32_t dir, uint8_t blockStrength[]) |
| 72 | { |
| 73 | if (cu->m_predMode[absPartIdx] == MODE_NONE) |
| 74 | return; |
| 75 | |
| 76 | uint32_t curNumParts = NUM_CU_PARTITIONS >> (depth << 1); |
| 77 | |
| 78 | const SPS& sps = *cu->m_slice->m_sps; |
| 79 | |
| 80 | if (cu->m_cuDepth[absPartIdx] > depth) |
| 81 | { |
| 82 | uint32_t qNumParts = curNumParts >> 2; |
| 83 | uint32_t xmax = sps.picWidthInLumaSamples - cu->m_cuPelX; |
| 84 | uint32_t ymax = sps.picHeightInLumaSamples - cu->m_cuPelY; |
| 85 | for (uint32_t partIdx = 0; partIdx < 4; partIdx++, absPartIdx += qNumParts) |
| 86 | if (g_zscanToPelX[absPartIdx] < xmax && g_zscanToPelY[absPartIdx] < ymax) |
| 87 | deblockCU(cu, absPartIdx, depth + 1, dir, blockStrength); |
| 88 | return; |
| 89 | } |
| 90 | |
| 91 | const uint32_t numUnits = sps.numPartInCUSize >> depth; |
| 92 | setEdgefilterPU(cu, absPartIdx, dir, blockStrength, numUnits); |
| 93 | setEdgefilterTU(cu, absPartIdx, depth, dir, blockStrength); |
| 94 | setEdgefilterMultiple(cu, absPartIdx, dir, 0, bsCuEdge(cu, absPartIdx, dir), blockStrength, numUnits); |
| 95 | |
| 96 | for (uint32_t partIdx = absPartIdx; partIdx < absPartIdx + curNumParts; partIdx++) |
| 97 | { |
| 98 | uint32_t bsCheck = !(partIdx & (1 << dir)); |
| 99 | |
| 100 | if (bsCheck && blockStrength[partIdx]) |
| 101 | blockStrength[partIdx] = getBoundaryStrength(cu, dir, partIdx, blockStrength); |
| 102 | } |
| 103 | |
| 104 | const uint32_t partIdxIncr = DEBLOCK_SMALLEST_BLOCK >> LOG2_UNIT_SIZE; |
| 105 | uint32_t sizeInPU = sps.numPartInCUSize >> depth; |
| 106 | uint32_t shiftFactor = (dir == EDGE_VER) ? cu->m_hChromaShift : cu->m_vChromaShift; |
| 107 | uint32_t chromaMask = ((DEBLOCK_SMALLEST_BLOCK << shiftFactor) >> LOG2_UNIT_SIZE) - 1; |
| 108 | uint32_t e0 = (dir == EDGE_VER ? g_zscanToPelX[absPartIdx] : g_zscanToPelY[absPartIdx]) >> LOG2_UNIT_SIZE; |
| 109 | |
| 110 | for (uint32_t e = 0; e < sizeInPU; e += partIdxIncr) |
| 111 | { |
| 112 | edgeFilterLuma(cu, absPartIdx, depth, dir, e, blockStrength); |
| 113 | if (!((e0 + e) & chromaMask)) |
| 114 | edgeFilterChroma(cu, absPartIdx, depth, dir, e, blockStrength); |
| 115 | } |
| 116 | } |
| 117 | |
| 118 | static inline uint32_t calcBsIdx(const CUData* cu, uint32_t absPartIdx, int32_t dir, int32_t edgeIdx, int32_t baseUnitIdx) |
| 119 | { |
| 120 | uint32_t numPartInCUSize = cu->m_slice->m_sps->numPartInCUSize; |
| 121 | |
| 122 | if (dir) |
| 123 | return g_rasterToZscan[g_zscanToRaster[absPartIdx] + edgeIdx * numPartInCUSize + baseUnitIdx]; |
| 124 | else |
| 125 | return g_rasterToZscan[g_zscanToRaster[absPartIdx] + baseUnitIdx * numPartInCUSize + edgeIdx]; |
| 126 | } |
| 127 | |
| 128 | void Deblock::setEdgefilterMultiple(const CUData* cu, uint32_t scanIdx, int32_t dir, int32_t edgeIdx, uint8_t value, uint8_t blockStrength[], uint32_t numUnits) |
| 129 | { |
| 130 | X265_CHECK(numUnits > 0, "numUnits edge filter check\n"); |
| 131 | for (uint32_t i = 0; i < numUnits; i++) |
| 132 | { |
| 133 | const uint32_t bsidx = calcBsIdx(cu, scanIdx, dir, edgeIdx, i); |
| 134 | blockStrength[bsidx] = value; |
| 135 | } |
| 136 | } |
| 137 | |
| 138 | void Deblock::setEdgefilterTU(const CUData* cu, uint32_t absPartIdx, uint32_t depth, int32_t dir, uint8_t blockStrength[]) |
| 139 | { |
| 140 | if ((uint32_t)cu->m_tuDepth[absPartIdx] + cu->m_cuDepth[absPartIdx] > depth) |
| 141 | { |
| 142 | const uint32_t curNumParts = NUM_CU_PARTITIONS >> (depth << 1); |
| 143 | const uint32_t qNumParts = curNumParts >> 2; |
| 144 | |
| 145 | for (uint32_t partIdx = 0; partIdx < 4; partIdx++, absPartIdx += qNumParts) |
| 146 | setEdgefilterTU(cu, absPartIdx, depth + 1, dir, blockStrength); |
| 147 | return; |
| 148 | } |
| 149 | |
| 150 | uint32_t numUnits = 1 << (cu->m_log2CUSize[absPartIdx] - cu->m_tuDepth[absPartIdx] - LOG2_UNIT_SIZE); |
| 151 | setEdgefilterMultiple(cu, absPartIdx, dir, 0, 2, blockStrength, numUnits); |
| 152 | } |
| 153 | |
| 154 | void Deblock::setEdgefilterPU(const CUData* cu, uint32_t absPartIdx, int32_t dir, uint8_t blockStrength[], uint32_t numUnits) |
| 155 | { |
| 156 | const uint32_t hNumUnits = numUnits >> 1; |
| 157 | const uint32_t qNumUnits = numUnits >> 2; |
| 158 | |
| 159 | switch (cu->m_partSize[absPartIdx]) |
| 160 | { |
| 161 | case SIZE_2NxN: |
| 162 | if (EDGE_HOR == dir) |
| 163 | setEdgefilterMultiple(cu, absPartIdx, dir, hNumUnits, 1, blockStrength, numUnits); |
| 164 | break; |
| 165 | case SIZE_Nx2N: |
| 166 | if (EDGE_VER == dir) |
| 167 | setEdgefilterMultiple(cu, absPartIdx, dir, hNumUnits, 1, blockStrength, numUnits); |
| 168 | break; |
| 169 | case SIZE_NxN: |
| 170 | setEdgefilterMultiple(cu, absPartIdx, dir, hNumUnits, 1, blockStrength, numUnits); |
| 171 | break; |
| 172 | case SIZE_2NxnU: |
| 173 | if (EDGE_HOR == dir) |
| 174 | setEdgefilterMultiple(cu, absPartIdx, dir, qNumUnits, 1, blockStrength, numUnits); |
| 175 | break; |
| 176 | case SIZE_nLx2N: |
| 177 | if (EDGE_VER == dir) |
| 178 | setEdgefilterMultiple(cu, absPartIdx, dir, qNumUnits, 1, blockStrength, numUnits); |
| 179 | break; |
| 180 | case SIZE_2NxnD: |
| 181 | if (EDGE_HOR == dir) |
| 182 | setEdgefilterMultiple(cu, absPartIdx, dir, numUnits - qNumUnits, 1, blockStrength, numUnits); |
| 183 | break; |
| 184 | case SIZE_nRx2N: |
| 185 | if (EDGE_VER == dir) |
| 186 | setEdgefilterMultiple(cu, absPartIdx, dir, numUnits - qNumUnits, 1, blockStrength, numUnits); |
| 187 | break; |
| 188 | |
| 189 | case SIZE_2Nx2N: |
| 190 | default: |
| 191 | break; |
| 192 | } |
| 193 | } |
| 194 | |
| 195 | uint8_t Deblock::getBoundaryStrength(const CUData* cuQ, int32_t dir, uint32_t partQ, const uint8_t blockStrength[]) |
| 196 | { |
| 197 | // Calculate block index |
| 198 | uint32_t partP; |
| 199 | const CUData* cuP = (dir == EDGE_VER ? cuQ->getPULeft(partP, partQ) : cuQ->getPUAbove(partP, partQ)); |
| 200 | |
| 201 | // Set BS for Intra MB : BS = 2 |
| 202 | if (cuP->isIntra(partP) || cuQ->isIntra(partQ)) |
| 203 | return 2; |
| 204 | |
| 205 | // Set BS for not Intra MB : BS = 1 or 0 |
| 206 | if (blockStrength[partQ] > 1 && |
| 207 | (cuQ->getCbf(partQ, TEXT_LUMA, cuQ->m_tuDepth[partQ]) || |
| 208 | cuP->getCbf(partP, TEXT_LUMA, cuP->m_tuDepth[partP]))) |
| 209 | return 1; |
| 210 | |
| 211 | static const MV zeroMv(0, 0); |
| 212 | const Slice* const sliceQ = cuQ->m_slice; |
| 213 | const Slice* const sliceP = cuP->m_slice; |
| 214 | |
| 215 | const Frame* refP0 = sliceP->getRefPic(0, cuP->m_refIdx[0][partP]); |
| 216 | const Frame* refQ0 = sliceQ->getRefPic(0, cuQ->m_refIdx[0][partQ]); |
| 217 | const MV& mvP0 = refP0 ? cuP->m_mv[0][partP] : zeroMv; |
| 218 | const MV& mvQ0 = refQ0 ? cuQ->m_mv[0][partQ] : zeroMv; |
| 219 | |
| 220 | if (sliceQ->isInterP() && sliceP->isInterP()) |
| 221 | { |
| 222 | return ((refP0 != refQ0) || |
| 223 | (abs(mvQ0.x - mvP0.x) >= 4) || (abs(mvQ0.y - mvP0.y) >= 4)) ? 1 : 0; |
| 224 | } |
| 225 | |
| 226 | // (sliceQ->isInterB() || sliceP->isInterB()) |
| 227 | const Frame* refP1 = sliceP->getRefPic(1, cuP->m_refIdx[1][partP]); |
| 228 | const Frame* refQ1 = sliceQ->getRefPic(1, cuQ->m_refIdx[1][partQ]); |
| 229 | const MV& mvP1 = refP1 ? cuP->m_mv[1][partP] : zeroMv; |
| 230 | const MV& mvQ1 = refQ1 ? cuQ->m_mv[1][partQ] : zeroMv; |
| 231 | |
| 232 | if (((refP0 == refQ0) && (refP1 == refQ1)) || ((refP0 == refQ1) && (refP1 == refQ0))) |
| 233 | { |
| 234 | if (refP0 != refP1) // Different L0 & L1 |
| 235 | { |
| 236 | if (refP0 == refQ0) |
| 237 | return ((abs(mvQ0.x - mvP0.x) >= 4) || (abs(mvQ0.y - mvP0.y) >= 4) || |
| 238 | (abs(mvQ1.x - mvP1.x) >= 4) || (abs(mvQ1.y - mvP1.y) >= 4)) ? 1 : 0; |
| 239 | else |
| 240 | return ((abs(mvQ1.x - mvP0.x) >= 4) || (abs(mvQ1.y - mvP0.y) >= 4) || |
| 241 | (abs(mvQ0.x - mvP1.x) >= 4) || (abs(mvQ0.y - mvP1.y) >= 4)) ? 1 : 0; |
| 242 | } |
| 243 | else // Same L0 & L1 |
| 244 | { |
| 245 | return (((abs(mvQ0.x - mvP0.x) >= 4) || (abs(mvQ0.y - mvP0.y) >= 4) || |
| 246 | (abs(mvQ1.x - mvP1.x) >= 4) || (abs(mvQ1.y - mvP1.y) >= 4)) && |
| 247 | ((abs(mvQ1.x - mvP0.x) >= 4) || (abs(mvQ1.y - mvP0.y) >= 4) || |
| 248 | (abs(mvQ0.x - mvP1.x) >= 4) || (abs(mvQ0.y - mvP1.y) >= 4))) ? 1 : 0; |
| 249 | } |
| 250 | } |
| 251 | |
| 252 | // for all different Ref_Idx |
| 253 | return 1; |
| 254 | } |
| 255 | |
| 256 | static inline int32_t calcDP(pixel* src, intptr_t offset) |
| 257 | { |
| 258 | return abs(static_cast<int32_t>(src[-offset * 3]) - 2 * src[-offset * 2] + src[-offset]); |
| 259 | } |
| 260 | |
| 261 | static inline int32_t calcDQ(pixel* src, intptr_t offset) |
| 262 | { |
| 263 | return abs(static_cast<int32_t>(src[0]) - 2 * src[offset] + src[offset * 2]); |
| 264 | } |
| 265 | |
| 266 | static inline bool useStrongFiltering(intptr_t offset, int32_t beta, int32_t tc, pixel* src) |
| 267 | { |
| 268 | int16_t m4 = (int16_t)src[0]; |
| 269 | int16_t m3 = (int16_t)src[-offset]; |
| 270 | int16_t m7 = (int16_t)src[offset * 3]; |
| 271 | int16_t m0 = (int16_t)src[-offset * 4]; |
| 272 | int32_t strong = abs(m0 - m3) + abs(m7 - m4); |
| 273 | |
| 274 | return (strong < (beta >> 3)) && (abs(m3 - m4) < ((tc * 5 + 1) >> 1)); |
| 275 | } |
| 276 | |
| 277 | /* Deblocking for the luminance component with strong or weak filter |
| 278 | * \param src pointer to picture data |
| 279 | * \param offset offset value for picture data |
| 280 | * \param tc tc value |
| 281 | * \param maskP indicator to enable filtering on partP |
| 282 | * \param maskQ indicator to enable filtering on partQ |
| 283 | * \param maskP1 decision weak filter/no filter for partP |
| 284 | * \param maskQ1 decision weak filter/no filter for partQ */ |
| 285 | static inline void pelFilterLumaStrong(pixel* src, intptr_t srcStep, intptr_t offset, int32_t tc, int32_t maskP, int32_t maskQ) |
| 286 | { |
| 287 | int32_t tc2 = 2 * tc; |
| 288 | int32_t tcP = (tc2 & maskP); |
| 289 | int32_t tcQ = (tc2 & maskQ); |
| 290 | for (int32_t i = 0; i < UNIT_SIZE; i++, src += srcStep) |
| 291 | { |
| 292 | int16_t m4 = (int16_t)src[0]; |
| 293 | int16_t m3 = (int16_t)src[-offset]; |
| 294 | int16_t m5 = (int16_t)src[offset]; |
| 295 | int16_t m2 = (int16_t)src[-offset * 2]; |
| 296 | int16_t m6 = (int16_t)src[offset * 2]; |
| 297 | int16_t m1 = (int16_t)src[-offset * 3]; |
| 298 | int16_t m7 = (int16_t)src[offset * 3]; |
| 299 | int16_t m0 = (int16_t)src[-offset * 4]; |
| 300 | src[-offset * 3] = (pixel)(Clip3(-tcP, tcP, ((2 * m0 + 3 * m1 + m2 + m3 + m4 + 4) >> 3) - m1) + m1); |
| 301 | src[-offset * 2] = (pixel)(Clip3(-tcP, tcP, ((m1 + m2 + m3 + m4 + 2) >> 2) - m2) + m2); |
| 302 | src[-offset] = (pixel)(Clip3(-tcP, tcP, ((m1 + 2 * m2 + 2 * m3 + 2 * m4 + m5 + 4) >> 3) - m3) + m3); |
| 303 | src[0] = (pixel)(Clip3(-tcQ, tcQ, ((m2 + 2 * m3 + 2 * m4 + 2 * m5 + m6 + 4) >> 3) - m4) + m4); |
| 304 | src[offset] = (pixel)(Clip3(-tcQ, tcQ, ((m3 + m4 + m5 + m6 + 2) >> 2) - m5) + m5); |
| 305 | src[offset * 2] = (pixel)(Clip3(-tcQ, tcQ, ((m3 + m4 + m5 + 3 * m6 + 2 * m7 + 4) >> 3) - m6) + m6); |
| 306 | } |
| 307 | } |
| 308 | |
| 309 | /* Weak filter */ |
| 310 | static inline void pelFilterLuma(pixel* src, intptr_t srcStep, intptr_t offset, int32_t tc, int32_t maskP, int32_t maskQ, |
| 311 | int32_t maskP1, int32_t maskQ1) |
| 312 | { |
| 313 | int32_t thrCut = tc * 10; |
| 314 | int32_t tc2 = tc >> 1; |
| 315 | maskP1 &= maskP; |
| 316 | maskQ1 &= maskQ; |
| 317 | |
| 318 | for (int32_t i = 0; i < UNIT_SIZE; i++, src += srcStep) |
| 319 | { |
| 320 | int16_t m4 = (int16_t)src[0]; |
| 321 | int16_t m3 = (int16_t)src[-offset]; |
| 322 | int16_t m5 = (int16_t)src[offset]; |
| 323 | int16_t m2 = (int16_t)src[-offset * 2]; |
| 324 | |
| 325 | int32_t delta = (9 * (m4 - m3) - 3 * (m5 - m2) + 8) >> 4; |
| 326 | |
| 327 | if (abs(delta) < thrCut) |
| 328 | { |
| 329 | delta = Clip3(-tc, tc, delta); |
| 330 | |
| 331 | src[-offset] = Clip(m3 + (delta & maskP)); |
| 332 | src[0] = Clip(m4 - (delta & maskQ)); |
| 333 | if (maskP1) |
| 334 | { |
| 335 | int16_t m1 = (int16_t)src[-offset * 3]; |
| 336 | int32_t delta1 = Clip3(-tc2, tc2, ((((m1 + m3 + 1) >> 1) - m2 + delta) >> 1)); |
| 337 | src[-offset * 2] = Clip(m2 + delta1); |
| 338 | } |
| 339 | if (maskQ1) |
| 340 | { |
| 341 | int16_t m6 = (int16_t)src[offset * 2]; |
| 342 | int32_t delta2 = Clip3(-tc2, tc2, ((((m6 + m4 + 1) >> 1) - m5 - delta) >> 1)); |
| 343 | src[offset] = Clip(m5 + delta2); |
| 344 | } |
| 345 | } |
| 346 | } |
| 347 | } |
| 348 | |
| 349 | /* Deblocking of one line/column for the chrominance component |
| 350 | * \param src pointer to picture data |
| 351 | * \param offset offset value for picture data |
| 352 | * \param tc tc value |
| 353 | * \param maskP indicator to disable filtering on partP |
| 354 | * \param maskQ indicator to disable filtering on partQ */ |
| 355 | static inline void pelFilterChroma(pixel* src, intptr_t srcStep, intptr_t offset, int32_t tc, int32_t maskP, int32_t maskQ) |
| 356 | { |
| 357 | for (int32_t i = 0; i < UNIT_SIZE; i++, src += srcStep) |
| 358 | { |
| 359 | int16_t m4 = (int16_t)src[0]; |
| 360 | int16_t m3 = (int16_t)src[-offset]; |
| 361 | int16_t m5 = (int16_t)src[offset]; |
| 362 | int16_t m2 = (int16_t)src[-offset * 2]; |
| 363 | |
| 364 | int32_t delta = Clip3(-tc, tc, ((((m4 - m3) << 2) + m2 - m5 + 4) >> 3)); |
| 365 | src[-offset] = Clip(m3 + (delta & maskP)); |
| 366 | src[0] = Clip(m4 - (delta & maskQ)); |
| 367 | } |
| 368 | } |
| 369 | |
| 370 | void Deblock::edgeFilterLuma(const CUData* cuQ, uint32_t absPartIdx, uint32_t depth, int32_t dir, int32_t edge, const uint8_t blockStrength[]) |
| 371 | { |
| 372 | PicYuv* reconPic = cuQ->m_encData->m_reconPic; |
| 373 | pixel* src = reconPic->getLumaAddr(cuQ->m_cuAddr, absPartIdx); |
| 374 | intptr_t stride = reconPic->m_stride; |
| 375 | const PPS* pps = cuQ->m_slice->m_pps; |
| 376 | |
| 377 | intptr_t offset, srcStep; |
| 378 | |
| 379 | int32_t maskP = -1; |
| 380 | int32_t maskQ = -1; |
| 381 | int32_t betaOffset = pps->deblockingFilterBetaOffsetDiv2 << 1; |
| 382 | int32_t tcOffset = pps->deblockingFilterTcOffsetDiv2 << 1; |
| 383 | bool bCheckNoFilter = pps->bTransquantBypassEnabled; |
| 384 | |
| 385 | if (dir == EDGE_VER) |
| 386 | { |
| 387 | offset = 1; |
| 388 | srcStep = stride; |
| 389 | src += (edge << LOG2_UNIT_SIZE); |
| 390 | } |
| 391 | else // (dir == EDGE_HOR) |
| 392 | { |
| 393 | offset = stride; |
| 394 | srcStep = 1; |
| 395 | src += (edge << LOG2_UNIT_SIZE) * stride; |
| 396 | } |
| 397 | |
| 398 | uint32_t numUnits = cuQ->m_slice->m_sps->numPartInCUSize >> depth; |
| 399 | for (uint32_t idx = 0; idx < numUnits; idx++) |
| 400 | { |
| 401 | uint32_t partQ = calcBsIdx(cuQ, absPartIdx, dir, edge, idx); |
| 402 | uint32_t bs = blockStrength[partQ]; |
| 403 | |
| 404 | if (!bs) |
| 405 | continue; |
| 406 | |
| 407 | int32_t qpQ = cuQ->m_qp[partQ]; |
| 408 | |
| 409 | // Derive neighboring PU index |
| 410 | uint32_t partP; |
| 411 | const CUData* cuP = (dir == EDGE_VER ? cuQ->getPULeft(partP, partQ) : cuQ->getPUAbove(partP, partQ)); |
| 412 | |
| 413 | int32_t qpP = cuP->m_qp[partP]; |
| 414 | int32_t qp = (qpP + qpQ + 1) >> 1; |
| 415 | |
| 416 | int32_t indexB = Clip3(0, QP_MAX_SPEC, qp + betaOffset); |
| 417 | |
| 418 | const int32_t bitdepthShift = X265_DEPTH - 8; |
| 419 | int32_t beta = s_betaTable[indexB] << bitdepthShift; |
| 420 | |
| 421 | intptr_t unitOffset = idx * srcStep << LOG2_UNIT_SIZE; |
| 422 | int32_t dp0 = calcDP(src + unitOffset , offset); |
| 423 | int32_t dq0 = calcDQ(src + unitOffset , offset); |
| 424 | int32_t dp3 = calcDP(src + unitOffset + srcStep * 3, offset); |
| 425 | int32_t dq3 = calcDQ(src + unitOffset + srcStep * 3, offset); |
| 426 | int32_t d0 = dp0 + dq0; |
| 427 | int32_t d3 = dp3 + dq3; |
| 428 | |
| 429 | int32_t d = d0 + d3; |
| 430 | |
| 431 | if (d >= beta) |
| 432 | continue; |
| 433 | |
| 434 | if (bCheckNoFilter) |
| 435 | { |
| 436 | // check if each of PUs is lossless coded |
| 437 | maskP = (cuP->m_tqBypass[partP] ? 0 : -1); |
| 438 | maskQ = (cuQ->m_tqBypass[partQ] ? 0 : -1); |
| 439 | } |
| 440 | |
| 441 | int32_t indexTC = Clip3(0, QP_MAX_SPEC + DEFAULT_INTRA_TC_OFFSET, int32_t(qp + DEFAULT_INTRA_TC_OFFSET * (bs - 1) + tcOffset)); |
| 442 | int32_t tc = s_tcTable[indexTC] << bitdepthShift; |
| 443 | |
| 444 | bool sw = (2 * d0 < (beta >> 2) && |
| 445 | 2 * d3 < (beta >> 2) && |
| 446 | useStrongFiltering(offset, beta, tc, src + unitOffset ) && |
| 447 | useStrongFiltering(offset, beta, tc, src + unitOffset + srcStep * 3)); |
| 448 | |
| 449 | if (sw) |
| 450 | pelFilterLumaStrong(src + unitOffset, srcStep, offset, tc, maskP, maskQ); |
| 451 | else |
| 452 | { |
| 453 | int32_t sideThreshold = (beta + (beta >> 1)) >> 3; |
| 454 | int32_t dp = dp0 + dp3; |
| 455 | int32_t dq = dq0 + dq3; |
| 456 | int32_t maskP1 = (dp < sideThreshold ? -1 : 0); |
| 457 | int32_t maskQ1 = (dq < sideThreshold ? -1 : 0); |
| 458 | |
| 459 | pelFilterLuma(src + unitOffset, srcStep, offset, tc, maskP, maskQ, maskP1, maskQ1); |
| 460 | } |
| 461 | } |
| 462 | } |
| 463 | |
| 464 | void Deblock::edgeFilterChroma(const CUData* cuQ, uint32_t absPartIdx, uint32_t depth, int32_t dir, int32_t edge, const uint8_t blockStrength[]) |
| 465 | { |
| 466 | int32_t chFmt = cuQ->m_chromaFormat, chromaShift; |
| 467 | intptr_t offset, srcStep; |
| 468 | const PPS* pps = cuQ->m_slice->m_pps; |
| 469 | |
| 470 | int32_t maskP = -1; |
| 471 | int32_t maskQ = -1; |
| 472 | int32_t tcOffset = pps->deblockingFilterTcOffsetDiv2 << 1; |
| 473 | |
| 474 | X265_CHECK(((dir == EDGE_VER) |
| 475 | ? ((g_zscanToPelX[absPartIdx] + edge * UNIT_SIZE) >> cuQ->m_hChromaShift) |
| 476 | : ((g_zscanToPelY[absPartIdx] + edge * UNIT_SIZE) >> cuQ->m_vChromaShift)) % DEBLOCK_SMALLEST_BLOCK == 0, |
| 477 | "invalid edge\n"); |
| 478 | |
| 479 | PicYuv* reconPic = cuQ->m_encData->m_reconPic; |
| 480 | intptr_t stride = reconPic->m_strideC; |
| 481 | intptr_t srcOffset = reconPic->getChromaAddrOffset(cuQ->m_cuAddr, absPartIdx); |
| 482 | bool bCheckNoFilter = pps->bTransquantBypassEnabled; |
| 483 | |
| 484 | if (dir == EDGE_VER) |
| 485 | { |
| 486 | chromaShift = cuQ->m_vChromaShift; |
| 487 | srcOffset += (edge << (LOG2_UNIT_SIZE - cuQ->m_hChromaShift)); |
| 488 | offset = 1; |
| 489 | srcStep = stride; |
| 490 | } |
| 491 | else // (dir == EDGE_HOR) |
| 492 | { |
| 493 | chromaShift = cuQ->m_hChromaShift; |
| 494 | srcOffset += edge * stride << (LOG2_UNIT_SIZE - cuQ->m_vChromaShift); |
| 495 | offset = stride; |
| 496 | srcStep = 1; |
| 497 | } |
| 498 | |
| 499 | pixel* srcChroma[2]; |
| 500 | srcChroma[0] = reconPic->m_picOrg[1] + srcOffset; |
| 501 | srcChroma[1] = reconPic->m_picOrg[2] + srcOffset; |
| 502 | |
| 503 | uint32_t numUnits = cuQ->m_slice->m_sps->numPartInCUSize >> (depth + chromaShift); |
| 504 | |
| 505 | for (uint32_t idx = 0; idx < numUnits; idx++) |
| 506 | { |
| 507 | uint32_t partQ = calcBsIdx(cuQ, absPartIdx, dir, edge, idx << chromaShift); |
| 508 | uint32_t bs = blockStrength[partQ]; |
| 509 | |
| 510 | if (bs <= 1) |
| 511 | continue; |
| 512 | |
| 513 | int32_t qpQ = cuQ->m_qp[partQ]; |
| 514 | |
| 515 | // Derive neighboring PU index |
| 516 | uint32_t partP; |
| 517 | const CUData* cuP = (dir == EDGE_VER ? cuQ->getPULeft(partP, partQ) : cuQ->getPUAbove(partP, partQ)); |
| 518 | |
| 519 | int32_t qpP = cuP->m_qp[partP]; |
| 520 | |
| 521 | if (bCheckNoFilter) |
| 522 | { |
| 523 | // check if each of PUs is lossless coded |
| 524 | maskP = (cuP->m_tqBypass[partP] ? 0 : -1); |
| 525 | maskQ = (cuQ->m_tqBypass[partQ] ? 0 : -1); |
| 526 | } |
| 527 | |
| 528 | intptr_t unitOffset = idx * srcStep << LOG2_UNIT_SIZE; |
| 529 | for (uint32_t chromaIdx = 0; chromaIdx < 2; chromaIdx++) |
| 530 | { |
| 531 | int32_t chromaQPOffset = pps->chromaQpOffset[chromaIdx]; |
| 532 | int32_t qp = ((qpP + qpQ + 1) >> 1) + chromaQPOffset; |
| 533 | if (qp >= 30) |
| 534 | { |
| 535 | if (chFmt == X265_CSP_I420) |
| 536 | qp = g_chromaScale[qp]; |
| 537 | else |
| 538 | qp = X265_MIN(qp, 51); |
| 539 | } |
| 540 | |
| 541 | int32_t indexTC = Clip3(0, QP_MAX_SPEC + DEFAULT_INTRA_TC_OFFSET, int32_t(qp + DEFAULT_INTRA_TC_OFFSET + tcOffset)); |
| 542 | const int32_t bitdepthShift = X265_DEPTH - 8; |
| 543 | int32_t tc = s_tcTable[indexTC] << bitdepthShift; |
| 544 | pixel* srcC = srcChroma[chromaIdx]; |
| 545 | |
| 546 | pelFilterChroma(srcC + unitOffset, srcStep, offset, tc, maskP, maskQ); |
| 547 | } |
| 548 | } |
| 549 | } |
| 550 | |
| 551 | const uint8_t Deblock::s_tcTable[54] = |
| 552 | { |
| 553 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, |
| 554 | 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 5, 5, 6, 6, 7, 8, 9, 10, 11, 13, 14, 16, 18, 20, 22, 24 |
| 555 | }; |
| 556 | |
| 557 | const uint8_t Deblock::s_betaTable[52] = |
| 558 | { |
| 559 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, |
| 560 | 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64 |
| 561 | }; |
| 562 | |