Commit | Line | Data |
---|---|---|
72b9787e JB |
1 | /***************************************************************************** |
2 | * Copyright (C) 2013 x265 project | |
3 | * | |
4 | * Author: Gopu Govindaswamy <gopu@multicorewareinc.com> | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License as published by | |
8 | * the Free Software Foundation; either version 2 of the License, or | |
9 | * (at your option) any later version. | |
10 | * | |
11 | * This program is distributed in the hope that it will be useful, | |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | * GNU General Public License for more details. | |
15 | * | |
16 | * You should have received a copy of the GNU General Public License | |
17 | * along with this program; if not, write to the Free Software | |
18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. | |
19 | * | |
20 | * This program is also available under a commercial proprietary license. | |
21 | * For more information, contact us at license @ x265.com. | |
22 | *****************************************************************************/ | |
23 | ||
24 | #include "common.h" | |
25 | #include "deblock.h" | |
26 | #include "framedata.h" | |
27 | #include "picyuv.h" | |
28 | #include "slice.h" | |
29 | #include "mv.h" | |
30 | ||
31 | using namespace x265; | |
32 | ||
33 | #define DEBLOCK_SMALLEST_BLOCK 8 | |
34 | #define DEFAULT_INTRA_TC_OFFSET 2 | |
35 | ||
b53f7c52 | 36 | void Deblock::deblockCTU(const CUData* ctu, int32_t dir) |
72b9787e | 37 | { |
b53f7c52 | 38 | uint8_t blockStrength[MAX_NUM_PARTITIONS]; |
72b9787e | 39 | |
b53f7c52 | 40 | memset(blockStrength, 0, sizeof(uint8_t) * m_numPartitions); |
72b9787e | 41 | |
b53f7c52 JB |
42 | deblockCU(ctu, 0, 0, dir, blockStrength); |
43 | } | |
44 | ||
45 | static inline uint8_t bsCuEdge(const CUData* cu, uint32_t absPartIdx, int32_t dir) | |
46 | { | |
47 | if (dir == Deblock::EDGE_VER) | |
48 | { | |
49 | if (cu->m_cuPelX + g_zscanToPelX[absPartIdx] > 0) | |
50 | { | |
51 | uint32_t tempPartIdx; | |
52 | const CUData* tempCU = cu->getPULeft(tempPartIdx, absPartIdx); | |
53 | return tempCU ? 2 : 0; | |
54 | } | |
55 | } | |
56 | else | |
57 | { | |
58 | if (cu->m_cuPelY + g_zscanToPelY[absPartIdx] > 0) | |
59 | { | |
60 | uint32_t tempPartIdx; | |
61 | const CUData* tempCU = cu->getPUAbove(tempPartIdx, absPartIdx); | |
62 | return tempCU ? 2 : 0; | |
63 | } | |
64 | } | |
65 | ||
66 | return 0; | |
72b9787e JB |
67 | } |
68 | ||
69 | /* Deblocking filter process in CU-based (the same function as conventional's) | |
70 | * param Edge the direction of the edge in block boundary (horizonta/vertical), which is added newly */ | |
b53f7c52 | 71 | void Deblock::deblockCU(const CUData* cu, uint32_t absPartIdx, uint32_t depth, const int32_t dir, uint8_t blockStrength[]) |
72b9787e | 72 | { |
b53f7c52 | 73 | if (cu->m_predMode[absPartIdx] == MODE_NONE) |
72b9787e JB |
74 | return; |
75 | ||
76 | uint32_t curNumParts = NUM_CU_PARTITIONS >> (depth << 1); | |
77 | ||
78 | const SPS& sps = *cu->m_slice->m_sps; | |
79 | ||
80 | if (cu->m_cuDepth[absPartIdx] > depth) | |
81 | { | |
82 | uint32_t qNumParts = curNumParts >> 2; | |
83 | uint32_t xmax = sps.picWidthInLumaSamples - cu->m_cuPelX; | |
84 | uint32_t ymax = sps.picHeightInLumaSamples - cu->m_cuPelY; | |
85 | for (uint32_t partIdx = 0; partIdx < 4; partIdx++, absPartIdx += qNumParts) | |
86 | if (g_zscanToPelX[absPartIdx] < xmax && g_zscanToPelY[absPartIdx] < ymax) | |
b53f7c52 | 87 | deblockCU(cu, absPartIdx, depth + 1, dir, blockStrength); |
72b9787e JB |
88 | return; |
89 | } | |
90 | ||
b53f7c52 JB |
91 | const uint32_t numUnits = sps.numPartInCUSize >> depth; |
92 | setEdgefilterPU(cu, absPartIdx, dir, blockStrength, numUnits); | |
93 | setEdgefilterTU(cu, absPartIdx, depth, dir, blockStrength); | |
94 | setEdgefilterMultiple(cu, absPartIdx, dir, 0, bsCuEdge(cu, absPartIdx, dir), blockStrength, numUnits); | |
72b9787e JB |
95 | |
96 | for (uint32_t partIdx = absPartIdx; partIdx < absPartIdx + curNumParts; partIdx++) | |
97 | { | |
98 | uint32_t bsCheck = !(partIdx & (1 << dir)); | |
99 | ||
b53f7c52 JB |
100 | if (bsCheck && blockStrength[partIdx]) |
101 | blockStrength[partIdx] = getBoundaryStrength(cu, dir, partIdx, blockStrength); | |
72b9787e JB |
102 | } |
103 | ||
104 | const uint32_t partIdxIncr = DEBLOCK_SMALLEST_BLOCK >> LOG2_UNIT_SIZE; | |
105 | uint32_t sizeInPU = sps.numPartInCUSize >> depth; | |
106 | uint32_t shiftFactor = (dir == EDGE_VER) ? cu->m_hChromaShift : cu->m_vChromaShift; | |
107 | uint32_t chromaMask = ((DEBLOCK_SMALLEST_BLOCK << shiftFactor) >> LOG2_UNIT_SIZE) - 1; | |
108 | uint32_t e0 = (dir == EDGE_VER ? g_zscanToPelX[absPartIdx] : g_zscanToPelY[absPartIdx]) >> LOG2_UNIT_SIZE; | |
109 | ||
110 | for (uint32_t e = 0; e < sizeInPU; e += partIdxIncr) | |
111 | { | |
b53f7c52 | 112 | edgeFilterLuma(cu, absPartIdx, depth, dir, e, blockStrength); |
72b9787e | 113 | if (!((e0 + e) & chromaMask)) |
b53f7c52 | 114 | edgeFilterChroma(cu, absPartIdx, depth, dir, e, blockStrength); |
72b9787e JB |
115 | } |
116 | } | |
117 | ||
b53f7c52 | 118 | static inline uint32_t calcBsIdx(const CUData* cu, uint32_t absPartIdx, int32_t dir, int32_t edgeIdx, int32_t baseUnitIdx) |
72b9787e | 119 | { |
b53f7c52 | 120 | uint32_t numPartInCUSize = cu->m_slice->m_sps->numPartInCUSize; |
72b9787e JB |
121 | |
122 | if (dir) | |
b53f7c52 | 123 | return g_rasterToZscan[g_zscanToRaster[absPartIdx] + edgeIdx * numPartInCUSize + baseUnitIdx]; |
72b9787e | 124 | else |
b53f7c52 | 125 | return g_rasterToZscan[g_zscanToRaster[absPartIdx] + baseUnitIdx * numPartInCUSize + edgeIdx]; |
72b9787e JB |
126 | } |
127 | ||
b53f7c52 | 128 | void Deblock::setEdgefilterMultiple(const CUData* cu, uint32_t scanIdx, int32_t dir, int32_t edgeIdx, uint8_t value, uint8_t blockStrength[], uint32_t numUnits) |
72b9787e | 129 | { |
b53f7c52 JB |
130 | X265_CHECK(numUnits > 0, "numUnits edge filter check\n"); |
131 | for (uint32_t i = 0; i < numUnits; i++) | |
72b9787e JB |
132 | { |
133 | const uint32_t bsidx = calcBsIdx(cu, scanIdx, dir, edgeIdx, i); | |
b53f7c52 | 134 | blockStrength[bsidx] = value; |
72b9787e JB |
135 | } |
136 | } | |
137 | ||
b53f7c52 | 138 | void Deblock::setEdgefilterTU(const CUData* cu, uint32_t absPartIdx, uint32_t depth, int32_t dir, uint8_t blockStrength[]) |
72b9787e JB |
139 | { |
140 | if ((uint32_t)cu->m_tuDepth[absPartIdx] + cu->m_cuDepth[absPartIdx] > depth) | |
141 | { | |
142 | const uint32_t curNumParts = NUM_CU_PARTITIONS >> (depth << 1); | |
143 | const uint32_t qNumParts = curNumParts >> 2; | |
144 | ||
145 | for (uint32_t partIdx = 0; partIdx < 4; partIdx++, absPartIdx += qNumParts) | |
b53f7c52 | 146 | setEdgefilterTU(cu, absPartIdx, depth + 1, dir, blockStrength); |
72b9787e JB |
147 | return; |
148 | } | |
149 | ||
b53f7c52 JB |
150 | uint32_t numUnits = 1 << (cu->m_log2CUSize[absPartIdx] - cu->m_tuDepth[absPartIdx] - LOG2_UNIT_SIZE); |
151 | setEdgefilterMultiple(cu, absPartIdx, dir, 0, 2, blockStrength, numUnits); | |
72b9787e JB |
152 | } |
153 | ||
b53f7c52 | 154 | void Deblock::setEdgefilterPU(const CUData* cu, uint32_t absPartIdx, int32_t dir, uint8_t blockStrength[], uint32_t numUnits) |
72b9787e | 155 | { |
b53f7c52 JB |
156 | const uint32_t hNumUnits = numUnits >> 1; |
157 | const uint32_t qNumUnits = numUnits >> 2; | |
72b9787e JB |
158 | |
159 | switch (cu->m_partSize[absPartIdx]) | |
160 | { | |
161 | case SIZE_2NxN: | |
162 | if (EDGE_HOR == dir) | |
b53f7c52 | 163 | setEdgefilterMultiple(cu, absPartIdx, dir, hNumUnits, 1, blockStrength, numUnits); |
72b9787e JB |
164 | break; |
165 | case SIZE_Nx2N: | |
166 | if (EDGE_VER == dir) | |
b53f7c52 | 167 | setEdgefilterMultiple(cu, absPartIdx, dir, hNumUnits, 1, blockStrength, numUnits); |
72b9787e JB |
168 | break; |
169 | case SIZE_NxN: | |
b53f7c52 | 170 | setEdgefilterMultiple(cu, absPartIdx, dir, hNumUnits, 1, blockStrength, numUnits); |
72b9787e JB |
171 | break; |
172 | case SIZE_2NxnU: | |
173 | if (EDGE_HOR == dir) | |
b53f7c52 | 174 | setEdgefilterMultiple(cu, absPartIdx, dir, qNumUnits, 1, blockStrength, numUnits); |
72b9787e JB |
175 | break; |
176 | case SIZE_nLx2N: | |
177 | if (EDGE_VER == dir) | |
b53f7c52 | 178 | setEdgefilterMultiple(cu, absPartIdx, dir, qNumUnits, 1, blockStrength, numUnits); |
72b9787e JB |
179 | break; |
180 | case SIZE_2NxnD: | |
181 | if (EDGE_HOR == dir) | |
b53f7c52 | 182 | setEdgefilterMultiple(cu, absPartIdx, dir, numUnits - qNumUnits, 1, blockStrength, numUnits); |
72b9787e JB |
183 | break; |
184 | case SIZE_nRx2N: | |
185 | if (EDGE_VER == dir) | |
b53f7c52 | 186 | setEdgefilterMultiple(cu, absPartIdx, dir, numUnits - qNumUnits, 1, blockStrength, numUnits); |
72b9787e JB |
187 | break; |
188 | ||
189 | case SIZE_2Nx2N: | |
190 | default: | |
191 | break; | |
192 | } | |
193 | } | |
194 | ||
b53f7c52 | 195 | uint8_t Deblock::getBoundaryStrength(const CUData* cuQ, int32_t dir, uint32_t partQ, const uint8_t blockStrength[]) |
72b9787e | 196 | { |
b53f7c52 JB |
197 | // Calculate block index |
198 | uint32_t partP; | |
199 | const CUData* cuP = (dir == EDGE_VER ? cuQ->getPULeft(partP, partQ) : cuQ->getPUAbove(partP, partQ)); | |
72b9787e | 200 | |
b53f7c52 JB |
201 | // Set BS for Intra MB : BS = 2 |
202 | if (cuP->isIntra(partP) || cuQ->isIntra(partQ)) | |
203 | return 2; | |
72b9787e | 204 | |
b53f7c52 JB |
205 | // Set BS for not Intra MB : BS = 1 or 0 |
206 | if (blockStrength[partQ] > 1 && | |
207 | (cuQ->getCbf(partQ, TEXT_LUMA, cuQ->m_tuDepth[partQ]) || | |
208 | cuP->getCbf(partP, TEXT_LUMA, cuP->m_tuDepth[partP]))) | |
209 | return 1; | |
72b9787e | 210 | |
b53f7c52 JB |
211 | static const MV zeroMv(0, 0); |
212 | const Slice* const sliceQ = cuQ->m_slice; | |
213 | const Slice* const sliceP = cuP->m_slice; | |
72b9787e | 214 | |
b53f7c52 JB |
215 | const Frame* refP0 = sliceP->getRefPic(0, cuP->m_refIdx[0][partP]); |
216 | const Frame* refQ0 = sliceQ->getRefPic(0, cuQ->m_refIdx[0][partQ]); | |
217 | const MV& mvP0 = refP0 ? cuP->m_mv[0][partP] : zeroMv; | |
218 | const MV& mvQ0 = refQ0 ? cuQ->m_mv[0][partQ] : zeroMv; | |
72b9787e | 219 | |
b53f7c52 JB |
220 | if (sliceQ->isInterP() && sliceP->isInterP()) |
221 | { | |
222 | return ((refP0 != refQ0) || | |
223 | (abs(mvQ0.x - mvP0.x) >= 4) || (abs(mvQ0.y - mvP0.y) >= 4)) ? 1 : 0; | |
224 | } | |
72b9787e | 225 | |
b53f7c52 JB |
226 | // (sliceQ->isInterB() || sliceP->isInterB()) |
227 | const Frame* refP1 = sliceP->getRefPic(1, cuP->m_refIdx[1][partP]); | |
228 | const Frame* refQ1 = sliceQ->getRefPic(1, cuQ->m_refIdx[1][partQ]); | |
229 | const MV& mvP1 = refP1 ? cuP->m_mv[1][partP] : zeroMv; | |
230 | const MV& mvQ1 = refQ1 ? cuQ->m_mv[1][partQ] : zeroMv; | |
72b9787e | 231 | |
b53f7c52 | 232 | if (((refP0 == refQ0) && (refP1 == refQ1)) || ((refP0 == refQ1) && (refP1 == refQ0))) |
72b9787e | 233 | { |
b53f7c52 | 234 | if (refP0 != refP1) // Different L0 & L1 |
72b9787e | 235 | { |
b53f7c52 JB |
236 | if (refP0 == refQ0) |
237 | return ((abs(mvQ0.x - mvP0.x) >= 4) || (abs(mvQ0.y - mvP0.y) >= 4) || | |
238 | (abs(mvQ1.x - mvP1.x) >= 4) || (abs(mvQ1.y - mvP1.y) >= 4)) ? 1 : 0; | |
239 | else | |
240 | return ((abs(mvQ1.x - mvP0.x) >= 4) || (abs(mvQ1.y - mvP0.y) >= 4) || | |
241 | (abs(mvQ0.x - mvP1.x) >= 4) || (abs(mvQ0.y - mvP1.y) >= 4)) ? 1 : 0; | |
242 | } | |
243 | else // Same L0 & L1 | |
244 | { | |
245 | return (((abs(mvQ0.x - mvP0.x) >= 4) || (abs(mvQ0.y - mvP0.y) >= 4) || | |
246 | (abs(mvQ1.x - mvP1.x) >= 4) || (abs(mvQ1.y - mvP1.y) >= 4)) && | |
247 | ((abs(mvQ1.x - mvP0.x) >= 4) || (abs(mvQ1.y - mvP0.y) >= 4) || | |
248 | (abs(mvQ0.x - mvP1.x) >= 4) || (abs(mvQ0.y - mvP1.y) >= 4))) ? 1 : 0; | |
72b9787e JB |
249 | } |
250 | } | |
b53f7c52 JB |
251 | |
252 | // for all different Ref_Idx | |
253 | return 1; | |
72b9787e JB |
254 | } |
255 | ||
256 | static inline int32_t calcDP(pixel* src, intptr_t offset) | |
257 | { | |
258 | return abs(static_cast<int32_t>(src[-offset * 3]) - 2 * src[-offset * 2] + src[-offset]); | |
259 | } | |
260 | ||
261 | static inline int32_t calcDQ(pixel* src, intptr_t offset) | |
262 | { | |
263 | return abs(static_cast<int32_t>(src[0]) - 2 * src[offset] + src[offset * 2]); | |
264 | } | |
265 | ||
266 | static inline bool useStrongFiltering(intptr_t offset, int32_t beta, int32_t tc, pixel* src) | |
267 | { | |
268 | int16_t m4 = (int16_t)src[0]; | |
269 | int16_t m3 = (int16_t)src[-offset]; | |
270 | int16_t m7 = (int16_t)src[offset * 3]; | |
271 | int16_t m0 = (int16_t)src[-offset * 4]; | |
272 | int32_t strong = abs(m0 - m3) + abs(m7 - m4); | |
273 | ||
274 | return (strong < (beta >> 3)) && (abs(m3 - m4) < ((tc * 5 + 1) >> 1)); | |
275 | } | |
276 | ||
277 | /* Deblocking for the luminance component with strong or weak filter | |
b53f7c52 JB |
278 | * \param src pointer to picture data |
279 | * \param offset offset value for picture data | |
280 | * \param tc tc value | |
281 | * \param maskP indicator to enable filtering on partP | |
282 | * \param maskQ indicator to enable filtering on partQ | |
283 | * \param maskP1 decision weak filter/no filter for partP | |
284 | * \param maskQ1 decision weak filter/no filter for partQ */ | |
285 | static inline void pelFilterLumaStrong(pixel* src, intptr_t srcStep, intptr_t offset, int32_t tc, int32_t maskP, int32_t maskQ) | |
72b9787e | 286 | { |
b53f7c52 JB |
287 | int32_t tc2 = 2 * tc; |
288 | int32_t tcP = (tc2 & maskP); | |
289 | int32_t tcQ = (tc2 & maskQ); | |
72b9787e JB |
290 | for (int32_t i = 0; i < UNIT_SIZE; i++, src += srcStep) |
291 | { | |
292 | int16_t m4 = (int16_t)src[0]; | |
293 | int16_t m3 = (int16_t)src[-offset]; | |
294 | int16_t m5 = (int16_t)src[offset]; | |
295 | int16_t m2 = (int16_t)src[-offset * 2]; | |
b53f7c52 JB |
296 | int16_t m6 = (int16_t)src[offset * 2]; |
297 | int16_t m1 = (int16_t)src[-offset * 3]; | |
298 | int16_t m7 = (int16_t)src[offset * 3]; | |
299 | int16_t m0 = (int16_t)src[-offset * 4]; | |
300 | src[-offset * 3] = (pixel)(Clip3(-tcP, tcP, ((2 * m0 + 3 * m1 + m2 + m3 + m4 + 4) >> 3) - m1) + m1); | |
301 | src[-offset * 2] = (pixel)(Clip3(-tcP, tcP, ((m1 + m2 + m3 + m4 + 2) >> 2) - m2) + m2); | |
302 | src[-offset] = (pixel)(Clip3(-tcP, tcP, ((m1 + 2 * m2 + 2 * m3 + 2 * m4 + m5 + 4) >> 3) - m3) + m3); | |
303 | src[0] = (pixel)(Clip3(-tcQ, tcQ, ((m2 + 2 * m3 + 2 * m4 + 2 * m5 + m6 + 4) >> 3) - m4) + m4); | |
304 | src[offset] = (pixel)(Clip3(-tcQ, tcQ, ((m3 + m4 + m5 + m6 + 2) >> 2) - m5) + m5); | |
305 | src[offset * 2] = (pixel)(Clip3(-tcQ, tcQ, ((m3 + m4 + m5 + 3 * m6 + 2 * m7 + 4) >> 3) - m6) + m6); | |
72b9787e JB |
306 | } |
307 | } | |
308 | ||
309 | /* Weak filter */ | |
b53f7c52 JB |
310 | static inline void pelFilterLuma(pixel* src, intptr_t srcStep, intptr_t offset, int32_t tc, int32_t maskP, int32_t maskQ, |
311 | int32_t maskP1, int32_t maskQ1) | |
72b9787e JB |
312 | { |
313 | int32_t thrCut = tc * 10; | |
b53f7c52 JB |
314 | int32_t tc2 = tc >> 1; |
315 | maskP1 &= maskP; | |
316 | maskQ1 &= maskQ; | |
72b9787e JB |
317 | |
318 | for (int32_t i = 0; i < UNIT_SIZE; i++, src += srcStep) | |
319 | { | |
320 | int16_t m4 = (int16_t)src[0]; | |
321 | int16_t m3 = (int16_t)src[-offset]; | |
322 | int16_t m5 = (int16_t)src[offset]; | |
323 | int16_t m2 = (int16_t)src[-offset * 2]; | |
324 | ||
325 | int32_t delta = (9 * (m4 - m3) - 3 * (m5 - m2) + 8) >> 4; | |
326 | ||
327 | if (abs(delta) < thrCut) | |
328 | { | |
329 | delta = Clip3(-tc, tc, delta); | |
330 | ||
b53f7c52 JB |
331 | src[-offset] = Clip(m3 + (delta & maskP)); |
332 | src[0] = Clip(m4 - (delta & maskQ)); | |
333 | if (maskP1) | |
72b9787e | 334 | { |
b53f7c52 JB |
335 | int16_t m1 = (int16_t)src[-offset * 3]; |
336 | int32_t delta1 = Clip3(-tc2, tc2, ((((m1 + m3 + 1) >> 1) - m2 + delta) >> 1)); | |
337 | src[-offset * 2] = Clip(m2 + delta1); | |
72b9787e | 338 | } |
b53f7c52 | 339 | if (maskQ1) |
72b9787e | 340 | { |
b53f7c52 JB |
341 | int16_t m6 = (int16_t)src[offset * 2]; |
342 | int32_t delta2 = Clip3(-tc2, tc2, ((((m6 + m4 + 1) >> 1) - m5 - delta) >> 1)); | |
343 | src[offset] = Clip(m5 + delta2); | |
72b9787e JB |
344 | } |
345 | } | |
346 | } | |
347 | } | |
348 | ||
349 | /* Deblocking of one line/column for the chrominance component | |
b53f7c52 JB |
350 | * \param src pointer to picture data |
351 | * \param offset offset value for picture data | |
352 | * \param tc tc value | |
353 | * \param maskP indicator to disable filtering on partP | |
354 | * \param maskQ indicator to disable filtering on partQ */ | |
355 | static inline void pelFilterChroma(pixel* src, intptr_t srcStep, intptr_t offset, int32_t tc, int32_t maskP, int32_t maskQ) | |
72b9787e JB |
356 | { |
357 | for (int32_t i = 0; i < UNIT_SIZE; i++, src += srcStep) | |
358 | { | |
359 | int16_t m4 = (int16_t)src[0]; | |
360 | int16_t m3 = (int16_t)src[-offset]; | |
361 | int16_t m5 = (int16_t)src[offset]; | |
362 | int16_t m2 = (int16_t)src[-offset * 2]; | |
363 | ||
364 | int32_t delta = Clip3(-tc, tc, ((((m4 - m3) << 2) + m2 - m5 + 4) >> 3)); | |
b53f7c52 JB |
365 | src[-offset] = Clip(m3 + (delta & maskP)); |
366 | src[0] = Clip(m4 - (delta & maskQ)); | |
72b9787e JB |
367 | } |
368 | } | |
369 | ||
b53f7c52 | 370 | void Deblock::edgeFilterLuma(const CUData* cuQ, uint32_t absPartIdx, uint32_t depth, int32_t dir, int32_t edge, const uint8_t blockStrength[]) |
72b9787e | 371 | { |
b53f7c52 JB |
372 | PicYuv* reconPic = cuQ->m_encData->m_reconPic; |
373 | pixel* src = reconPic->getLumaAddr(cuQ->m_cuAddr, absPartIdx); | |
374 | intptr_t stride = reconPic->m_stride; | |
375 | const PPS* pps = cuQ->m_slice->m_pps; | |
72b9787e JB |
376 | |
377 | intptr_t offset, srcStep; | |
378 | ||
b53f7c52 JB |
379 | int32_t maskP = -1; |
380 | int32_t maskQ = -1; | |
381 | int32_t betaOffset = pps->deblockingFilterBetaOffsetDiv2 << 1; | |
382 | int32_t tcOffset = pps->deblockingFilterTcOffsetDiv2 << 1; | |
383 | bool bCheckNoFilter = pps->bTransquantBypassEnabled; | |
72b9787e JB |
384 | |
385 | if (dir == EDGE_VER) | |
386 | { | |
387 | offset = 1; | |
388 | srcStep = stride; | |
389 | src += (edge << LOG2_UNIT_SIZE); | |
390 | } | |
391 | else // (dir == EDGE_HOR) | |
392 | { | |
393 | offset = stride; | |
394 | srcStep = 1; | |
395 | src += (edge << LOG2_UNIT_SIZE) * stride; | |
396 | } | |
397 | ||
b53f7c52 JB |
398 | uint32_t numUnits = cuQ->m_slice->m_sps->numPartInCUSize >> depth; |
399 | for (uint32_t idx = 0; idx < numUnits; idx++) | |
72b9787e | 400 | { |
b53f7c52 JB |
401 | uint32_t partQ = calcBsIdx(cuQ, absPartIdx, dir, edge, idx); |
402 | uint32_t bs = blockStrength[partQ]; | |
72b9787e | 403 | |
b53f7c52 JB |
404 | if (!bs) |
405 | continue; | |
72b9787e | 406 | |
b53f7c52 | 407 | int32_t qpQ = cuQ->m_qp[partQ]; |
72b9787e | 408 | |
b53f7c52 JB |
409 | // Derive neighboring PU index |
410 | uint32_t partP; | |
411 | const CUData* cuP = (dir == EDGE_VER ? cuQ->getPULeft(partP, partQ) : cuQ->getPUAbove(partP, partQ)); | |
72b9787e | 412 | |
b53f7c52 JB |
413 | int32_t qpP = cuP->m_qp[partP]; |
414 | int32_t qp = (qpP + qpQ + 1) >> 1; | |
72b9787e | 415 | |
b53f7c52 | 416 | int32_t indexB = Clip3(0, QP_MAX_SPEC, qp + betaOffset); |
72b9787e | 417 | |
b53f7c52 JB |
418 | const int32_t bitdepthShift = X265_DEPTH - 8; |
419 | int32_t beta = s_betaTable[indexB] << bitdepthShift; | |
72b9787e | 420 | |
b53f7c52 JB |
421 | intptr_t unitOffset = idx * srcStep << LOG2_UNIT_SIZE; |
422 | int32_t dp0 = calcDP(src + unitOffset , offset); | |
423 | int32_t dq0 = calcDQ(src + unitOffset , offset); | |
424 | int32_t dp3 = calcDP(src + unitOffset + srcStep * 3, offset); | |
425 | int32_t dq3 = calcDQ(src + unitOffset + srcStep * 3, offset); | |
426 | int32_t d0 = dp0 + dq0; | |
427 | int32_t d3 = dp3 + dq3; | |
428 | ||
429 | int32_t d = d0 + d3; | |
430 | ||
431 | if (d >= beta) | |
432 | continue; | |
433 | ||
434 | if (bCheckNoFilter) | |
435 | { | |
436 | // check if each of PUs is lossless coded | |
437 | maskP = (cuP->m_tqBypass[partP] ? 0 : -1); | |
438 | maskQ = (cuQ->m_tqBypass[partQ] ? 0 : -1); | |
439 | } | |
440 | ||
441 | int32_t indexTC = Clip3(0, QP_MAX_SPEC + DEFAULT_INTRA_TC_OFFSET, int32_t(qp + DEFAULT_INTRA_TC_OFFSET * (bs - 1) + tcOffset)); | |
442 | int32_t tc = s_tcTable[indexTC] << bitdepthShift; | |
443 | ||
444 | bool sw = (2 * d0 < (beta >> 2) && | |
445 | 2 * d3 < (beta >> 2) && | |
446 | useStrongFiltering(offset, beta, tc, src + unitOffset ) && | |
447 | useStrongFiltering(offset, beta, tc, src + unitOffset + srcStep * 3)); | |
448 | ||
449 | if (sw) | |
450 | pelFilterLumaStrong(src + unitOffset, srcStep, offset, tc, maskP, maskQ); | |
451 | else | |
452 | { | |
453 | int32_t sideThreshold = (beta + (beta >> 1)) >> 3; | |
454 | int32_t dp = dp0 + dp3; | |
455 | int32_t dq = dq0 + dq3; | |
456 | int32_t maskP1 = (dp < sideThreshold ? -1 : 0); | |
457 | int32_t maskQ1 = (dq < sideThreshold ? -1 : 0); | |
458 | ||
459 | pelFilterLuma(src + unitOffset, srcStep, offset, tc, maskP, maskQ, maskP1, maskQ1); | |
72b9787e JB |
460 | } |
461 | } | |
462 | } | |
463 | ||
b53f7c52 | 464 | void Deblock::edgeFilterChroma(const CUData* cuQ, uint32_t absPartIdx, uint32_t depth, int32_t dir, int32_t edge, const uint8_t blockStrength[]) |
72b9787e | 465 | { |
b53f7c52 | 466 | int32_t chFmt = cuQ->m_chromaFormat, chromaShift; |
72b9787e | 467 | intptr_t offset, srcStep; |
b53f7c52 | 468 | const PPS* pps = cuQ->m_slice->m_pps; |
72b9787e | 469 | |
b53f7c52 JB |
470 | int32_t maskP = -1; |
471 | int32_t maskQ = -1; | |
472 | int32_t tcOffset = pps->deblockingFilterTcOffsetDiv2 << 1; | |
72b9787e JB |
473 | |
474 | X265_CHECK(((dir == EDGE_VER) | |
b53f7c52 JB |
475 | ? ((g_zscanToPelX[absPartIdx] + edge * UNIT_SIZE) >> cuQ->m_hChromaShift) |
476 | : ((g_zscanToPelY[absPartIdx] + edge * UNIT_SIZE) >> cuQ->m_vChromaShift)) % DEBLOCK_SMALLEST_BLOCK == 0, | |
72b9787e JB |
477 | "invalid edge\n"); |
478 | ||
b53f7c52 | 479 | PicYuv* reconPic = cuQ->m_encData->m_reconPic; |
72b9787e | 480 | intptr_t stride = reconPic->m_strideC; |
b53f7c52 JB |
481 | intptr_t srcOffset = reconPic->getChromaAddrOffset(cuQ->m_cuAddr, absPartIdx); |
482 | bool bCheckNoFilter = pps->bTransquantBypassEnabled; | |
72b9787e JB |
483 | |
484 | if (dir == EDGE_VER) | |
485 | { | |
b53f7c52 JB |
486 | chromaShift = cuQ->m_vChromaShift; |
487 | srcOffset += (edge << (LOG2_UNIT_SIZE - cuQ->m_hChromaShift)); | |
72b9787e JB |
488 | offset = 1; |
489 | srcStep = stride; | |
490 | } | |
491 | else // (dir == EDGE_HOR) | |
492 | { | |
b53f7c52 JB |
493 | chromaShift = cuQ->m_hChromaShift; |
494 | srcOffset += edge * stride << (LOG2_UNIT_SIZE - cuQ->m_vChromaShift); | |
72b9787e JB |
495 | offset = stride; |
496 | srcStep = 1; | |
497 | } | |
498 | ||
499 | pixel* srcChroma[2]; | |
500 | srcChroma[0] = reconPic->m_picOrg[1] + srcOffset; | |
501 | srcChroma[1] = reconPic->m_picOrg[2] + srcOffset; | |
502 | ||
b53f7c52 | 503 | uint32_t numUnits = cuQ->m_slice->m_sps->numPartInCUSize >> (depth + chromaShift); |
72b9787e JB |
504 | |
505 | for (uint32_t idx = 0; idx < numUnits; idx++) | |
506 | { | |
b53f7c52 JB |
507 | uint32_t partQ = calcBsIdx(cuQ, absPartIdx, dir, edge, idx << chromaShift); |
508 | uint32_t bs = blockStrength[partQ]; | |
72b9787e | 509 | |
b53f7c52 JB |
510 | if (bs <= 1) |
511 | continue; | |
72b9787e | 512 | |
b53f7c52 | 513 | int32_t qpQ = cuQ->m_qp[partQ]; |
72b9787e | 514 | |
b53f7c52 JB |
515 | // Derive neighboring PU index |
516 | uint32_t partP; | |
517 | const CUData* cuP = (dir == EDGE_VER ? cuQ->getPULeft(partP, partQ) : cuQ->getPUAbove(partP, partQ)); | |
72b9787e | 518 | |
b53f7c52 | 519 | int32_t qpP = cuP->m_qp[partP]; |
72b9787e | 520 | |
b53f7c52 JB |
521 | if (bCheckNoFilter) |
522 | { | |
523 | // check if each of PUs is lossless coded | |
524 | maskP = (cuP->m_tqBypass[partP] ? 0 : -1); | |
525 | maskQ = (cuQ->m_tqBypass[partQ] ? 0 : -1); | |
526 | } | |
527 | ||
528 | intptr_t unitOffset = idx * srcStep << LOG2_UNIT_SIZE; | |
529 | for (uint32_t chromaIdx = 0; chromaIdx < 2; chromaIdx++) | |
530 | { | |
531 | int32_t chromaQPOffset = pps->chromaQpOffset[chromaIdx]; | |
532 | int32_t qp = ((qpP + qpQ + 1) >> 1) + chromaQPOffset; | |
533 | if (qp >= 30) | |
72b9787e | 534 | { |
b53f7c52 JB |
535 | if (chFmt == X265_CSP_I420) |
536 | qp = g_chromaScale[qp]; | |
537 | else | |
538 | qp = X265_MIN(qp, 51); | |
72b9787e | 539 | } |
b53f7c52 JB |
540 | |
541 | int32_t indexTC = Clip3(0, QP_MAX_SPEC + DEFAULT_INTRA_TC_OFFSET, int32_t(qp + DEFAULT_INTRA_TC_OFFSET + tcOffset)); | |
542 | const int32_t bitdepthShift = X265_DEPTH - 8; | |
543 | int32_t tc = s_tcTable[indexTC] << bitdepthShift; | |
544 | pixel* srcC = srcChroma[chromaIdx]; | |
545 | ||
546 | pelFilterChroma(srcC + unitOffset, srcStep, offset, tc, maskP, maskQ); | |
72b9787e JB |
547 | } |
548 | } | |
549 | } | |
550 | ||
551 | const uint8_t Deblock::s_tcTable[54] = | |
552 | { | |
553 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, | |
554 | 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 5, 5, 6, 6, 7, 8, 9, 10, 11, 13, 14, 16, 18, 20, 22, 24 | |
555 | }; | |
556 | ||
557 | const uint8_t Deblock::s_betaTable[52] = | |
558 | { | |
559 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, | |
560 | 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64 | |
561 | }; | |
562 |