Commit | Line | Data |
---|---|---|
72b9787e JB |
1 | /***************************************************************************** |
2 | * Copyright (C) 2013 x265 project | |
3 | * | |
4 | * Author: Gopu Govindaswamy <gopu@multicorewareinc.com> | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License as published by | |
8 | * the Free Software Foundation; either version 2 of the License, or | |
9 | * (at your option) any later version. | |
10 | * | |
11 | * This program is distributed in the hope that it will be useful, | |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | * GNU General Public License for more details. | |
15 | * | |
16 | * You should have received a copy of the GNU General Public License | |
17 | * along with this program; if not, write to the Free Software | |
18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. | |
19 | * | |
20 | * This program is also available under a commercial proprietary license. | |
21 | * For more information, contact us at license @ x265.com. | |
22 | *****************************************************************************/ | |
23 | ||
24 | #include "common.h" | |
25 | #include "deblock.h" | |
26 | #include "framedata.h" | |
27 | #include "picyuv.h" | |
28 | #include "slice.h" | |
29 | #include "mv.h" | |
30 | ||
31 | using namespace x265; | |
32 | ||
33 | #define DEBLOCK_SMALLEST_BLOCK 8 | |
34 | #define DEFAULT_INTRA_TC_OFFSET 2 | |
35 | ||
36 | void Deblock::deblockCTU(CUData* cu, int32_t dir) | |
37 | { | |
38 | uint8_t blockingStrength[MAX_NUM_PARTITIONS]; | |
39 | ||
40 | memset(blockingStrength, 0, sizeof(uint8_t) * m_numPartitions); | |
41 | ||
42 | deblockCU(cu, 0, 0, dir, blockingStrength); | |
43 | } | |
44 | ||
45 | /* Deblocking filter process in CU-based (the same function as conventional's) | |
46 | * param Edge the direction of the edge in block boundary (horizonta/vertical), which is added newly */ | |
47 | void Deblock::deblockCU(CUData* cu, uint32_t absPartIdx, uint32_t depth, const int32_t dir, uint8_t blockingStrength[]) | |
48 | { | |
49 | if (cu->m_partSize[absPartIdx] == SIZE_NONE) | |
50 | return; | |
51 | ||
52 | uint32_t curNumParts = NUM_CU_PARTITIONS >> (depth << 1); | |
53 | ||
54 | const SPS& sps = *cu->m_slice->m_sps; | |
55 | ||
56 | if (cu->m_cuDepth[absPartIdx] > depth) | |
57 | { | |
58 | uint32_t qNumParts = curNumParts >> 2; | |
59 | uint32_t xmax = sps.picWidthInLumaSamples - cu->m_cuPelX; | |
60 | uint32_t ymax = sps.picHeightInLumaSamples - cu->m_cuPelY; | |
61 | for (uint32_t partIdx = 0; partIdx < 4; partIdx++, absPartIdx += qNumParts) | |
62 | if (g_zscanToPelX[absPartIdx] < xmax && g_zscanToPelY[absPartIdx] < ymax) | |
63 | deblockCU(cu, absPartIdx, depth + 1, dir, blockingStrength); | |
64 | return; | |
65 | } | |
66 | ||
67 | const uint32_t widthInBaseUnits = sps.numPartInCUSize >> depth; | |
68 | Param params; | |
69 | setLoopfilterParam(cu, absPartIdx, ¶ms); | |
70 | setEdgefilterPU(cu, absPartIdx, dir, blockingStrength, widthInBaseUnits); | |
71 | setEdgefilterTU(cu, absPartIdx, depth, dir, blockingStrength); | |
72 | setEdgefilterMultiple(cu, absPartIdx, dir, 0, (dir == EDGE_VER ? params.leftEdge : params.topEdge), blockingStrength, widthInBaseUnits); | |
73 | ||
74 | for (uint32_t partIdx = absPartIdx; partIdx < absPartIdx + curNumParts; partIdx++) | |
75 | { | |
76 | uint32_t bsCheck = !(partIdx & (1 << dir)); | |
77 | ||
78 | if (bsCheck && blockingStrength[partIdx]) | |
79 | getBoundaryStrengthSingle(cu, dir, partIdx, blockingStrength); | |
80 | } | |
81 | ||
82 | const uint32_t partIdxIncr = DEBLOCK_SMALLEST_BLOCK >> LOG2_UNIT_SIZE; | |
83 | uint32_t sizeInPU = sps.numPartInCUSize >> depth; | |
84 | uint32_t shiftFactor = (dir == EDGE_VER) ? cu->m_hChromaShift : cu->m_vChromaShift; | |
85 | uint32_t chromaMask = ((DEBLOCK_SMALLEST_BLOCK << shiftFactor) >> LOG2_UNIT_SIZE) - 1; | |
86 | uint32_t e0 = (dir == EDGE_VER ? g_zscanToPelX[absPartIdx] : g_zscanToPelY[absPartIdx]) >> LOG2_UNIT_SIZE; | |
87 | ||
88 | for (uint32_t e = 0; e < sizeInPU; e += partIdxIncr) | |
89 | { | |
90 | edgeFilterLuma(cu, absPartIdx, depth, dir, e, blockingStrength); | |
91 | if (!((e0 + e) & chromaMask)) | |
92 | edgeFilterChroma(cu, absPartIdx, depth, dir, e, blockingStrength); | |
93 | } | |
94 | } | |
95 | ||
96 | static inline uint32_t calcBsIdx(CUData* cu, uint32_t absPartIdx, int32_t dir, int32_t edgeIdx, int32_t baseUnitIdx) | |
97 | { | |
98 | uint32_t ctuWidthInBaseUnits = cu->m_slice->m_sps->numPartInCUSize; | |
99 | ||
100 | if (dir) | |
101 | return g_rasterToZscan[g_zscanToRaster[absPartIdx] + edgeIdx * ctuWidthInBaseUnits + baseUnitIdx]; | |
102 | else | |
103 | return g_rasterToZscan[g_zscanToRaster[absPartIdx] + baseUnitIdx * ctuWidthInBaseUnits + edgeIdx]; | |
104 | } | |
105 | ||
106 | void Deblock::setEdgefilterMultiple(CUData* cu, uint32_t scanIdx, int32_t dir, int32_t edgeIdx, uint8_t value, uint8_t blockingStrength[], uint32_t widthInBaseUnits) | |
107 | { | |
108 | const uint32_t numElem = widthInBaseUnits; | |
109 | X265_CHECK(numElem > 0, "numElem edge filter check\n"); | |
110 | for (uint32_t i = 0; i < numElem; i++) | |
111 | { | |
112 | const uint32_t bsidx = calcBsIdx(cu, scanIdx, dir, edgeIdx, i); | |
113 | blockingStrength[bsidx] = value; | |
114 | } | |
115 | } | |
116 | ||
117 | void Deblock::setEdgefilterTU(CUData* cu, uint32_t absPartIdx, uint32_t depth, int32_t dir, uint8_t blockingStrength[]) | |
118 | { | |
119 | if ((uint32_t)cu->m_tuDepth[absPartIdx] + cu->m_cuDepth[absPartIdx] > depth) | |
120 | { | |
121 | const uint32_t curNumParts = NUM_CU_PARTITIONS >> (depth << 1); | |
122 | const uint32_t qNumParts = curNumParts >> 2; | |
123 | ||
124 | for (uint32_t partIdx = 0; partIdx < 4; partIdx++, absPartIdx += qNumParts) | |
125 | setEdgefilterTU(cu, absPartIdx, depth + 1, dir, blockingStrength); | |
126 | return; | |
127 | } | |
128 | ||
129 | uint32_t widthInBaseUnits = 1 << (cu->m_log2CUSize[absPartIdx] - cu->m_tuDepth[absPartIdx] - LOG2_UNIT_SIZE); | |
130 | setEdgefilterMultiple(cu, absPartIdx, dir, 0, 2, blockingStrength, widthInBaseUnits); | |
131 | } | |
132 | ||
133 | void Deblock::setEdgefilterPU(CUData* cu, uint32_t absPartIdx, int32_t dir, uint8_t blockingStrength[], uint32_t widthInBaseUnits) | |
134 | { | |
135 | const uint32_t hWidthInBaseUnits = widthInBaseUnits >> 1; | |
136 | const uint32_t qWidthInBaseUnits = widthInBaseUnits >> 2; | |
137 | ||
138 | switch (cu->m_partSize[absPartIdx]) | |
139 | { | |
140 | case SIZE_2NxN: | |
141 | if (EDGE_HOR == dir) | |
142 | setEdgefilterMultiple(cu, absPartIdx, dir, hWidthInBaseUnits, 1, blockingStrength, widthInBaseUnits); | |
143 | break; | |
144 | case SIZE_Nx2N: | |
145 | if (EDGE_VER == dir) | |
146 | setEdgefilterMultiple(cu, absPartIdx, dir, hWidthInBaseUnits, 1, blockingStrength, widthInBaseUnits); | |
147 | break; | |
148 | case SIZE_NxN: | |
149 | setEdgefilterMultiple(cu, absPartIdx, dir, hWidthInBaseUnits, 1, blockingStrength, widthInBaseUnits); | |
150 | break; | |
151 | case SIZE_2NxnU: | |
152 | if (EDGE_HOR == dir) | |
153 | setEdgefilterMultiple(cu, absPartIdx, dir, qWidthInBaseUnits, 1, blockingStrength, widthInBaseUnits); | |
154 | break; | |
155 | case SIZE_nLx2N: | |
156 | if (EDGE_VER == dir) | |
157 | setEdgefilterMultiple(cu, absPartIdx, dir, qWidthInBaseUnits, 1, blockingStrength, widthInBaseUnits); | |
158 | break; | |
159 | case SIZE_2NxnD: | |
160 | if (EDGE_HOR == dir) | |
161 | setEdgefilterMultiple(cu, absPartIdx, dir, widthInBaseUnits - qWidthInBaseUnits, 1, blockingStrength, widthInBaseUnits); | |
162 | break; | |
163 | case SIZE_nRx2N: | |
164 | if (EDGE_VER == dir) | |
165 | setEdgefilterMultiple(cu, absPartIdx, dir, widthInBaseUnits - qWidthInBaseUnits, 1, blockingStrength, widthInBaseUnits); | |
166 | break; | |
167 | ||
168 | case SIZE_2Nx2N: | |
169 | default: | |
170 | break; | |
171 | } | |
172 | } | |
173 | ||
174 | void Deblock::setLoopfilterParam(CUData* cu, uint32_t absPartIdx, Param *params) | |
175 | { | |
176 | uint32_t x = cu->m_cuPelX + g_zscanToPelX[absPartIdx]; | |
177 | uint32_t y = cu->m_cuPelY + g_zscanToPelY[absPartIdx]; | |
178 | ||
179 | const CUData* tempCU; | |
180 | uint32_t tempPartIdx; | |
181 | ||
182 | if (!x) | |
183 | params->leftEdge = 0; | |
184 | else | |
185 | { | |
186 | tempCU = cu->getPULeft(tempPartIdx, absPartIdx); | |
187 | if (tempCU) | |
188 | params->leftEdge = 2; | |
189 | else | |
190 | params->leftEdge = 0; | |
191 | } | |
192 | ||
193 | if (!y) | |
194 | params->topEdge = 0; | |
195 | else | |
196 | { | |
197 | tempCU = cu->getPUAbove(tempPartIdx, absPartIdx); | |
198 | if (tempCU) | |
199 | params->topEdge = 2; | |
200 | else | |
201 | params->topEdge = 0; | |
202 | } | |
203 | } | |
204 | ||
205 | void Deblock::getBoundaryStrengthSingle(CUData* cu, int32_t dir, uint32_t absPartIdx, uint8_t blockingStrength[]) | |
206 | { | |
207 | const Slice* const slice = cu->m_slice; | |
208 | const uint32_t partQ = absPartIdx; | |
209 | CUData* const cuQ = cu; | |
210 | ||
211 | uint32_t partP; | |
212 | const CUData* cuP; | |
213 | uint8_t bs = 0; | |
214 | ||
215 | // Calculate block index | |
216 | if (dir == EDGE_VER) | |
217 | cuP = cuQ->getPULeft(partP, partQ); | |
218 | else // (dir == EDGE_HOR) | |
219 | cuP = cuQ->getPUAbove(partP, partQ); | |
220 | ||
221 | // Set BS for Intra MB : BS = 4 or 3 | |
222 | if (cuP->isIntra(partP) || cuQ->isIntra(partQ)) | |
223 | bs = 2; | |
224 | ||
225 | // Set BS for not Intra MB : BS = 2 or 1 or 0 | |
226 | if (!cuP->isIntra(partP) && !cuQ->isIntra(partQ)) | |
227 | { | |
228 | uint32_t nsPartQ = partQ; | |
229 | uint32_t nsPartP = partP; | |
230 | ||
231 | if (blockingStrength[absPartIdx] > 1 && | |
232 | (cuQ->getCbf(nsPartQ, TEXT_LUMA, cuQ->m_tuDepth[nsPartQ]) || | |
233 | cuP->getCbf(nsPartP, TEXT_LUMA, cuP->m_tuDepth[nsPartP]))) | |
234 | bs = 1; | |
235 | else | |
236 | { | |
237 | if (dir == EDGE_HOR) | |
238 | cuP = cuQ->getPUAbove(partP, partQ); | |
239 | ||
240 | if (slice->isInterB() || cuP->m_slice->isInterB()) | |
241 | { | |
242 | int32_t refIdx; | |
243 | Frame *refP0, *refP1, *refQ0, *refQ1; | |
244 | refIdx = cuP->m_refIdx[0][partP]; | |
245 | refP0 = (refIdx < 0) ? NULL : cuP->m_slice->m_refPicList[0][refIdx]; | |
246 | refIdx = cuP->m_refIdx[1][partP]; | |
247 | refP1 = (refIdx < 0) ? NULL : cuP->m_slice->m_refPicList[1][refIdx]; | |
248 | refIdx = cuQ->m_refIdx[0][partQ]; | |
249 | refQ0 = (refIdx < 0) ? NULL : slice->m_refPicList[0][refIdx]; | |
250 | refIdx = cuQ->m_refIdx[1][partQ]; | |
251 | refQ1 = (refIdx < 0) ? NULL : slice->m_refPicList[1][refIdx]; | |
252 | ||
253 | MV mvp0 = cuP->m_mv[0][partP]; | |
254 | MV mvp1 = cuP->m_mv[1][partP]; | |
255 | MV mvq0 = cuQ->m_mv[0][partQ]; | |
256 | MV mvq1 = cuQ->m_mv[1][partQ]; | |
257 | ||
258 | if (!refP0) mvp0 = 0; | |
259 | if (!refP1) mvp1 = 0; | |
260 | if (!refQ0) mvq0 = 0; | |
261 | if (!refQ1) mvq1 = 0; | |
262 | ||
263 | if (((refP0 == refQ0) && (refP1 == refQ1)) || ((refP0 == refQ1) && (refP1 == refQ0))) | |
264 | { | |
265 | if (refP0 != refP1) // Different L0 & L1 | |
266 | { | |
267 | if (refP0 == refQ0) | |
268 | { | |
269 | bs = ((abs(mvq0.x - mvp0.x) >= 4) || | |
270 | (abs(mvq0.y - mvp0.y) >= 4) || | |
271 | (abs(mvq1.x - mvp1.x) >= 4) || | |
272 | (abs(mvq1.y - mvp1.y) >= 4)) ? 1 : 0; | |
273 | } | |
274 | else | |
275 | { | |
276 | bs = ((abs(mvq1.x - mvp0.x) >= 4) || | |
277 | (abs(mvq1.y - mvp0.y) >= 4) || | |
278 | (abs(mvq0.x - mvp1.x) >= 4) || | |
279 | (abs(mvq0.y - mvp1.y) >= 4)) ? 1 : 0; | |
280 | } | |
281 | } | |
282 | else // Same L0 & L1 | |
283 | { | |
284 | bs = ((abs(mvq0.x - mvp0.x) >= 4) || | |
285 | (abs(mvq0.y - mvp0.y) >= 4) || | |
286 | (abs(mvq1.x - mvp1.x) >= 4) || | |
287 | (abs(mvq1.y - mvp1.y) >= 4)) && | |
288 | ((abs(mvq1.x - mvp0.x) >= 4) || | |
289 | (abs(mvq1.y - mvp0.y) >= 4) || | |
290 | (abs(mvq0.x - mvp1.x) >= 4) || | |
291 | (abs(mvq0.y - mvp1.y) >= 4)) ? 1 : 0; | |
292 | } | |
293 | } | |
294 | else // for all different Ref_Idx | |
295 | bs = 1; | |
296 | } | |
297 | else // slice->isInterP() | |
298 | { | |
299 | int32_t refIdx; | |
300 | Frame *refp0, *refq0; | |
301 | refIdx = cuP->m_refIdx[0][partP]; | |
302 | refp0 = (refIdx < 0) ? NULL : cuP->m_slice->m_refPicList[0][refIdx]; | |
303 | refIdx = cuQ->m_refIdx[0][partQ]; | |
304 | refq0 = (refIdx < 0) ? NULL : slice->m_refPicList[0][refIdx]; | |
305 | MV mvp0 = cuP->m_mv[0][partP]; | |
306 | MV mvq0 = cuQ->m_mv[0][partQ]; | |
307 | ||
308 | if (!refp0) mvp0 = 0; | |
309 | if (!refq0) mvq0 = 0; | |
310 | ||
311 | bs = ((refp0 != refq0) || | |
312 | (abs(mvq0.x - mvp0.x) >= 4) || | |
313 | (abs(mvq0.y - mvp0.y) >= 4)) ? 1 : 0; | |
314 | } | |
315 | } | |
316 | } | |
317 | ||
318 | blockingStrength[absPartIdx] = bs; | |
319 | } | |
320 | ||
321 | static inline int32_t calcDP(pixel* src, intptr_t offset) | |
322 | { | |
323 | return abs(static_cast<int32_t>(src[-offset * 3]) - 2 * src[-offset * 2] + src[-offset]); | |
324 | } | |
325 | ||
326 | static inline int32_t calcDQ(pixel* src, intptr_t offset) | |
327 | { | |
328 | return abs(static_cast<int32_t>(src[0]) - 2 * src[offset] + src[offset * 2]); | |
329 | } | |
330 | ||
331 | static inline bool useStrongFiltering(intptr_t offset, int32_t beta, int32_t tc, pixel* src) | |
332 | { | |
333 | int16_t m4 = (int16_t)src[0]; | |
334 | int16_t m3 = (int16_t)src[-offset]; | |
335 | int16_t m7 = (int16_t)src[offset * 3]; | |
336 | int16_t m0 = (int16_t)src[-offset * 4]; | |
337 | int32_t strong = abs(m0 - m3) + abs(m7 - m4); | |
338 | ||
339 | return (strong < (beta >> 3)) && (abs(m3 - m4) < ((tc * 5 + 1) >> 1)); | |
340 | } | |
341 | ||
342 | /* Deblocking for the luminance component with strong or weak filter | |
343 | * \param src pointer to picture data | |
344 | * \param offset offset value for picture data | |
345 | * \param tc tc value | |
346 | * \param partPNoFilter indicator to disable filtering on partP | |
347 | * \param partQNoFilter indicator to disable filtering on partQ | |
348 | * \param filterSecondP decision weak filter/no filter for partP | |
349 | * \param filterSecondQ decision weak filter/no filter for partQ */ | |
350 | static inline void pelFilterLumaStrong(pixel* src, intptr_t srcStep, intptr_t offset, int32_t tc, bool partPNoFilter, bool partQNoFilter) | |
351 | { | |
352 | for (int32_t i = 0; i < UNIT_SIZE; i++, src += srcStep) | |
353 | { | |
354 | int16_t m4 = (int16_t)src[0]; | |
355 | int16_t m3 = (int16_t)src[-offset]; | |
356 | int16_t m5 = (int16_t)src[offset]; | |
357 | int16_t m2 = (int16_t)src[-offset * 2]; | |
358 | int32_t tc2 = 2 * tc; | |
359 | if (!partPNoFilter) | |
360 | { | |
361 | int16_t m1 = (int16_t)src[-offset * 3]; | |
362 | int16_t m0 = (int16_t)src[-offset * 4]; | |
363 | src[-offset * 3] = (pixel)(Clip3(-tc2, tc2, ((2 * m0 + 3 * m1 + m2 + m3 + m4 + 4) >> 3) - m1) + m1); | |
364 | src[-offset * 2] = (pixel)(Clip3(-tc2, tc2, ((m1 + m2 + m3 + m4 + 2) >> 2) - m2) + m2); | |
365 | src[-offset] = (pixel)(Clip3(-tc2, tc2, ((m1 + 2 * m2 + 2 * m3 + 2 * m4 + m5 + 4) >> 3) - m3) + m3); | |
366 | } | |
367 | if (!partQNoFilter) | |
368 | { | |
369 | int16_t m6 = (int16_t)src[offset * 2]; | |
370 | int16_t m7 = (int16_t)src[offset * 3]; | |
371 | src[0] = (pixel)(Clip3(-tc2, tc2, ((m2 + 2 * m3 + 2 * m4 + 2 * m5 + m6 + 4) >> 3) - m4) + m4); | |
372 | src[offset] = (pixel)(Clip3(-tc2, tc2, ((m3 + m4 + m5 + m6 + 2) >> 2) - m5) + m5); | |
373 | src[offset * 2] = (pixel)(Clip3(-tc2, tc2, ((m3 + m4 + m5 + 3 * m6 + 2 * m7 + 4) >> 3) - m6) + m6); | |
374 | } | |
375 | } | |
376 | } | |
377 | ||
378 | /* Weak filter */ | |
379 | static inline void pelFilterLuma(pixel* src, intptr_t srcStep, intptr_t offset, int32_t tc, bool partPNoFilter, bool partQNoFilter, | |
380 | bool filterSecondP, bool filterSecondQ) | |
381 | { | |
382 | int32_t thrCut = tc * 10; | |
383 | ||
384 | for (int32_t i = 0; i < UNIT_SIZE; i++, src += srcStep) | |
385 | { | |
386 | int16_t m4 = (int16_t)src[0]; | |
387 | int16_t m3 = (int16_t)src[-offset]; | |
388 | int16_t m5 = (int16_t)src[offset]; | |
389 | int16_t m2 = (int16_t)src[-offset * 2]; | |
390 | ||
391 | int32_t delta = (9 * (m4 - m3) - 3 * (m5 - m2) + 8) >> 4; | |
392 | ||
393 | if (abs(delta) < thrCut) | |
394 | { | |
395 | delta = Clip3(-tc, tc, delta); | |
396 | ||
397 | int32_t tc2 = tc >> 1; | |
398 | if (!partPNoFilter) | |
399 | { | |
400 | src[-offset] = Clip(m3 + delta); | |
401 | if (filterSecondP) | |
402 | { | |
403 | int16_t m1 = (int16_t)src[-offset * 3]; | |
404 | int32_t delta1 = Clip3(-tc2, tc2, ((((m1 + m3 + 1) >> 1) - m2 + delta) >> 1)); | |
405 | src[-offset * 2] = Clip(m2 + delta1); | |
406 | } | |
407 | } | |
408 | if (!partQNoFilter) | |
409 | { | |
410 | src[0] = Clip(m4 - delta); | |
411 | if (filterSecondQ) | |
412 | { | |
413 | int16_t m6 = (int16_t)src[offset * 2]; | |
414 | int32_t delta2 = Clip3(-tc2, tc2, ((((m6 + m4 + 1) >> 1) - m5 - delta) >> 1)); | |
415 | src[offset] = Clip(m5 + delta2); | |
416 | } | |
417 | } | |
418 | } | |
419 | } | |
420 | } | |
421 | ||
422 | /* Deblocking of one line/column for the chrominance component | |
423 | * \param src pointer to picture data | |
424 | * \param offset offset value for picture data | |
425 | * \param tc tc value | |
426 | * \param partPNoFilter indicator to disable filtering on partP | |
427 | * \param partQNoFilter indicator to disable filtering on partQ */ | |
428 | static inline void pelFilterChroma(pixel* src, intptr_t srcStep, intptr_t offset, int32_t tc, bool partPNoFilter, bool partQNoFilter) | |
429 | { | |
430 | for (int32_t i = 0; i < UNIT_SIZE; i++, src += srcStep) | |
431 | { | |
432 | int16_t m4 = (int16_t)src[0]; | |
433 | int16_t m3 = (int16_t)src[-offset]; | |
434 | int16_t m5 = (int16_t)src[offset]; | |
435 | int16_t m2 = (int16_t)src[-offset * 2]; | |
436 | ||
437 | int32_t delta = Clip3(-tc, tc, ((((m4 - m3) << 2) + m2 - m5 + 4) >> 3)); | |
438 | if (!partPNoFilter) | |
439 | src[-offset] = Clip(m3 + delta); | |
440 | if (!partQNoFilter) | |
441 | src[0] = Clip(m4 - delta); | |
442 | } | |
443 | } | |
444 | ||
445 | void Deblock::edgeFilterLuma(CUData* cu, uint32_t absPartIdx, uint32_t depth, int32_t dir, int32_t edge, const uint8_t blockingStrength[]) | |
446 | { | |
447 | PicYuv* reconYuv = cu->m_encData->m_reconPicYuv; | |
448 | pixel* src = reconYuv->getLumaAddr(cu->m_cuAddr, absPartIdx); | |
449 | ||
450 | intptr_t stride = reconYuv->m_stride; | |
451 | uint32_t numParts = cu->m_slice->m_sps->numPartInCUSize >> depth; | |
452 | ||
453 | intptr_t offset, srcStep; | |
454 | ||
455 | bool partPNoFilter = false; | |
456 | bool partQNoFilter = false; | |
457 | uint32_t partP = 0; | |
458 | uint32_t partQ = 0; | |
459 | const CUData* cuP = cu; | |
460 | const CUData* cuQ = cu; | |
461 | int32_t betaOffset = cuQ->m_slice->m_pps->deblockingFilterBetaOffsetDiv2 << 1; | |
462 | int32_t tcOffset = cuQ->m_slice->m_pps->deblockingFilterTcOffsetDiv2 << 1; | |
463 | ||
464 | if (dir == EDGE_VER) | |
465 | { | |
466 | offset = 1; | |
467 | srcStep = stride; | |
468 | src += (edge << LOG2_UNIT_SIZE); | |
469 | } | |
470 | else // (dir == EDGE_HOR) | |
471 | { | |
472 | offset = stride; | |
473 | srcStep = 1; | |
474 | src += (edge << LOG2_UNIT_SIZE) * stride; | |
475 | } | |
476 | ||
477 | for (uint32_t idx = 0; idx < numParts; idx++) | |
478 | { | |
479 | uint32_t unitOffset = idx << LOG2_UNIT_SIZE; | |
480 | uint32_t bsAbsIdx = calcBsIdx(cu, absPartIdx, dir, edge, idx); | |
481 | uint32_t bs = blockingStrength[bsAbsIdx]; | |
482 | if (bs) | |
483 | { | |
484 | int32_t qpQ = cu->m_qp[bsAbsIdx]; | |
485 | partQ = bsAbsIdx; | |
486 | ||
487 | // Derive neighboring PU index | |
488 | if (dir == EDGE_VER) | |
489 | cuP = cuQ->getPULeft(partP, partQ); | |
490 | else // (dir == EDGE_HOR) | |
491 | cuP = cuQ->getPUAbove(partP, partQ); | |
492 | ||
493 | int32_t qpP = cuP->m_qp[partP]; | |
494 | int32_t qp = (qpP + qpQ + 1) >> 1; | |
495 | ||
496 | int32_t indexB = Clip3(0, QP_MAX_SPEC, qp + betaOffset); | |
497 | ||
498 | const int32_t bitdepthShift = X265_DEPTH - 8; | |
499 | int32_t beta = s_betaTable[indexB] << bitdepthShift; | |
500 | ||
501 | int32_t dp0 = calcDP(src + srcStep * (unitOffset + 0), offset); | |
502 | int32_t dq0 = calcDQ(src + srcStep * (unitOffset + 0), offset); | |
503 | int32_t dp3 = calcDP(src + srcStep * (unitOffset + 3), offset); | |
504 | int32_t dq3 = calcDQ(src + srcStep * (unitOffset + 3), offset); | |
505 | int32_t d0 = dp0 + dq0; | |
506 | int32_t d3 = dp3 + dq3; | |
507 | ||
508 | int32_t d = d0 + d3; | |
509 | ||
510 | if (d < beta) | |
511 | { | |
512 | if (cu->m_slice->m_pps->bTransquantBypassEnabled) | |
513 | { | |
514 | // check if each of PUs is lossless coded | |
515 | partPNoFilter = !!cuP->m_tqBypass[partP]; | |
516 | partQNoFilter = !!cuQ->m_tqBypass[partQ]; | |
517 | } | |
518 | ||
519 | int32_t indexTC = Clip3(0, QP_MAX_SPEC + DEFAULT_INTRA_TC_OFFSET, int32_t(qp + DEFAULT_INTRA_TC_OFFSET * (bs - 1) + tcOffset)); | |
520 | int32_t tc = s_tcTable[indexTC] << bitdepthShift; | |
521 | ||
522 | bool sw = (2 * d0 < (beta >> 2) && | |
523 | 2 * d3 < (beta >> 2) && | |
524 | useStrongFiltering(offset, beta, tc, src + srcStep * (unitOffset + 0)) && | |
525 | useStrongFiltering(offset, beta, tc, src + srcStep * (unitOffset + 3))); | |
526 | ||
527 | if (sw) | |
528 | pelFilterLumaStrong(src + srcStep * unitOffset, srcStep, offset, tc, partPNoFilter, partQNoFilter); | |
529 | else | |
530 | { | |
531 | int32_t sideThreshold = (beta + (beta >> 1)) >> 3; | |
532 | int32_t dp = dp0 + dp3; | |
533 | int32_t dq = dq0 + dq3; | |
534 | bool filterP = (dp < sideThreshold); | |
535 | bool filterQ = (dq < sideThreshold); | |
536 | ||
537 | pelFilterLuma(src + srcStep * unitOffset, srcStep, offset, tc, partPNoFilter, partQNoFilter, filterP, filterQ); | |
538 | } | |
539 | } | |
540 | } | |
541 | } | |
542 | } | |
543 | ||
544 | void Deblock::edgeFilterChroma(CUData* cu, uint32_t absPartIdx, uint32_t depth, int32_t dir, int32_t edge, const uint8_t blockingStrength[]) | |
545 | { | |
546 | int32_t chFmt = cu->m_chromaFormat, chromaShift; | |
547 | intptr_t offset, srcStep; | |
548 | ||
549 | bool partPNoFilter = false; | |
550 | bool partQNoFilter = false; | |
551 | uint32_t partP; | |
552 | uint32_t partQ; | |
553 | const CUData* cuP; | |
554 | const CUData* cuQ = cu; | |
555 | int32_t tcOffset = cu->m_slice->m_pps->deblockingFilterTcOffsetDiv2 << 1; | |
556 | ||
557 | X265_CHECK(((dir == EDGE_VER) | |
558 | ? ((g_zscanToPelX[absPartIdx] + edge * UNIT_SIZE) >> cu->m_hChromaShift) | |
559 | : ((g_zscanToPelY[absPartIdx] + edge * UNIT_SIZE) >> cu->m_vChromaShift)) % DEBLOCK_SMALLEST_BLOCK == 0, | |
560 | "invalid edge\n"); | |
561 | ||
562 | PicYuv* reconPic = cu->m_encData->m_reconPicYuv; | |
563 | intptr_t stride = reconPic->m_strideC; | |
564 | intptr_t srcOffset = reconPic->getChromaAddrOffset(cu->m_cuAddr, absPartIdx); | |
565 | ||
566 | if (dir == EDGE_VER) | |
567 | { | |
568 | chromaShift = cu->m_vChromaShift; | |
569 | srcOffset += (edge << (LOG2_UNIT_SIZE - cu->m_hChromaShift)); | |
570 | offset = 1; | |
571 | srcStep = stride; | |
572 | } | |
573 | else // (dir == EDGE_HOR) | |
574 | { | |
575 | chromaShift = cu->m_hChromaShift; | |
576 | srcOffset += edge * stride << (LOG2_UNIT_SIZE - cu->m_vChromaShift); | |
577 | offset = stride; | |
578 | srcStep = 1; | |
579 | } | |
580 | ||
581 | pixel* srcChroma[2]; | |
582 | srcChroma[0] = reconPic->m_picOrg[1] + srcOffset; | |
583 | srcChroma[1] = reconPic->m_picOrg[2] + srcOffset; | |
584 | ||
585 | uint32_t numUnits = cu->m_slice->m_sps->numPartInCUSize >> (depth + chromaShift); | |
586 | ||
587 | for (uint32_t idx = 0; idx < numUnits; idx++) | |
588 | { | |
589 | uint32_t unitOffset = idx << LOG2_UNIT_SIZE; | |
590 | uint32_t bsAbsIdx = calcBsIdx(cu, absPartIdx, dir, edge, idx << chromaShift); | |
591 | uint32_t bs = blockingStrength[bsAbsIdx]; | |
592 | ||
593 | if (bs > 1) | |
594 | { | |
595 | int32_t qpQ = cu->m_qp[bsAbsIdx]; | |
596 | partQ = bsAbsIdx; | |
597 | ||
598 | // Derive neighboring PU index | |
599 | if (dir == EDGE_VER) | |
600 | cuP = cuQ->getPULeft(partP, partQ); | |
601 | else // (dir == EDGE_HOR) | |
602 | cuP = cuQ->getPUAbove(partP, partQ); | |
603 | ||
604 | int32_t qpP = cuP->m_qp[partP]; | |
605 | ||
606 | if (cu->m_slice->m_pps->bTransquantBypassEnabled) | |
607 | { | |
608 | // check if each of PUs is lossless coded | |
609 | partPNoFilter = !!cuP->m_tqBypass[partP]; | |
610 | partQNoFilter = !!cuQ->m_tqBypass[partQ]; | |
611 | } | |
612 | ||
613 | for (uint32_t chromaIdx = 0; chromaIdx < 2; chromaIdx++) | |
614 | { | |
615 | int32_t chromaQPOffset = !chromaIdx ? cu->m_slice->m_pps->chromaCbQpOffset : cu->m_slice->m_pps->chromaCrQpOffset; | |
616 | int32_t qp = ((qpP + qpQ + 1) >> 1) + chromaQPOffset; | |
617 | if (qp >= 30) | |
618 | { | |
619 | if (chFmt == X265_CSP_I420) | |
620 | qp = g_chromaScale[qp]; | |
621 | else | |
622 | qp = X265_MIN(qp, 51); | |
623 | } | |
624 | ||
625 | int32_t indexTC = Clip3(0, QP_MAX_SPEC + DEFAULT_INTRA_TC_OFFSET, int32_t(qp + DEFAULT_INTRA_TC_OFFSET + tcOffset)); | |
626 | const int32_t bitdepthShift = X265_DEPTH - 8; | |
627 | int32_t tc = s_tcTable[indexTC] << bitdepthShift; | |
628 | pixel* srcC = srcChroma[chromaIdx]; | |
629 | ||
630 | pelFilterChroma(srcC + srcStep * unitOffset, srcStep, offset, tc, partPNoFilter, partQNoFilter); | |
631 | } | |
632 | } | |
633 | } | |
634 | } | |
635 | ||
636 | const uint8_t Deblock::s_tcTable[54] = | |
637 | { | |
638 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, | |
639 | 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 5, 5, 6, 6, 7, 8, 9, 10, 11, 13, 14, 16, 18, 20, 22, 24 | |
640 | }; | |
641 | ||
642 | const uint8_t Deblock::s_betaTable[52] = | |
643 | { | |
644 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, | |
645 | 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64 | |
646 | }; | |
647 |