Commit | Line | Data |
---|---|---|
72b9787e JB |
1 | /***************************************************************************** |
2 | * Copyright (C) 2013 x265 project | |
3 | * | |
4 | * Authors: Chung Shin Yee <shinyee@multicorewareinc.com> | |
5 | * Min Chen <chenm003@163.com> | |
6 | * | |
7 | * This program is free software; you can redistribute it and/or modify | |
8 | * it under the terms of the GNU General Public License as published by | |
9 | * the Free Software Foundation; either version 2 of the License, or | |
10 | * (at your option) any later version. | |
11 | * | |
12 | * This program is distributed in the hope that it will be useful, | |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | * GNU General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU General Public License | |
18 | * along with this program; if not, write to the Free Software | |
19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. | |
20 | * | |
21 | * This program is also available under a commercial proprietary license. | |
22 | * For more information, contact us at license @ x265.com. | |
23 | *****************************************************************************/ | |
24 | ||
25 | #include "common.h" | |
26 | #include "frame.h" | |
27 | #include "framedata.h" | |
28 | #include "encoder.h" | |
29 | #include "framefilter.h" | |
30 | #include "frameencoder.h" | |
31 | #include "wavefront.h" | |
72b9787e JB |
32 | |
33 | using namespace x265; | |
34 | ||
35 | static uint64_t computeSSD(pixel *fenc, pixel *rec, intptr_t stride, uint32_t width, uint32_t height); | |
36 | static float calculateSSIM(pixel *pix1, intptr_t stride1, pixel *pix2, intptr_t stride2, uint32_t width, uint32_t height, void *buf, uint32_t& cnt); | |
37 | ||
38 | FrameFilter::FrameFilter() | |
39 | : m_param(NULL) | |
40 | , m_frame(NULL) | |
41 | , m_frameEncoder(NULL) | |
42 | , m_ssimBuf(NULL) | |
43 | { | |
44 | } | |
45 | ||
46 | void FrameFilter::destroy() | |
47 | { | |
48 | if (m_param->bEnableSAO) | |
49 | m_sao.destroy(); | |
50 | ||
51 | X265_FREE(m_ssimBuf); | |
52 | } | |
53 | ||
54 | void FrameFilter::init(Encoder *top, FrameEncoder *frame, int numRows) | |
55 | { | |
56 | m_param = top->m_param; | |
57 | m_frameEncoder = frame; | |
58 | m_numRows = numRows; | |
59 | m_hChromaShift = CHROMA_H_SHIFT(m_param->internalCsp); | |
60 | m_vChromaShift = CHROMA_V_SHIFT(m_param->internalCsp); | |
61 | m_pad[0] = top->m_sps.conformanceWindow.rightOffset; | |
62 | m_pad[1] = top->m_sps.conformanceWindow.bottomOffset; | |
63 | m_saoRowDelay = m_param->bEnableLoopFilter ? 1 : 0; | |
64 | m_lastHeight = m_param->sourceHeight % g_maxCUSize ? m_param->sourceHeight % g_maxCUSize : g_maxCUSize; | |
65 | ||
66 | m_deblock.init(); | |
67 | ||
68 | if (m_param->bEnableSAO) | |
69 | if (!m_sao.create(m_param)) | |
70 | m_param->bEnableSAO = 0; | |
71 | ||
72 | if (m_param->bEnableSsim) | |
73 | m_ssimBuf = X265_MALLOC(int, 8 * (m_param->sourceWidth / 4 + 3)); | |
74 | } | |
75 | ||
76 | void FrameFilter::start(Frame *frame, Entropy& initState, int qp) | |
77 | { | |
78 | m_frame = frame; | |
79 | ||
80 | if (m_param->bEnableSAO) | |
81 | m_sao.startSlice(frame, initState, qp); | |
82 | } | |
83 | ||
84 | void FrameFilter::processRow(int row) | |
85 | { | |
b53f7c52 | 86 | ProfileScopeEvent(filterCTURow); |
72b9787e JB |
87 | |
88 | if (!m_param->bEnableLoopFilter && !m_param->bEnableSAO) | |
89 | { | |
90 | processRowPost(row); | |
91 | return; | |
92 | } | |
93 | FrameData& encData = *m_frame->m_encData; | |
94 | const uint32_t numCols = encData.m_slice->m_sps->numCuInWidth; | |
95 | const uint32_t lineStartCUAddr = row * numCols; | |
96 | ||
97 | if (m_param->bEnableLoopFilter) | |
98 | { | |
99 | for (uint32_t col = 0; col < numCols; col++) | |
100 | { | |
101 | uint32_t cuAddr = lineStartCUAddr + col; | |
b53f7c52 | 102 | const CUData* ctu = encData.getPicCTU(cuAddr); |
72b9787e | 103 | |
b53f7c52 | 104 | m_deblock.deblockCTU(ctu, Deblock::EDGE_VER); |
72b9787e JB |
105 | |
106 | if (col > 0) | |
107 | { | |
b53f7c52 JB |
108 | const CUData* ctuPrev = encData.getPicCTU(cuAddr - 1); |
109 | m_deblock.deblockCTU(ctuPrev, Deblock::EDGE_HOR); | |
72b9787e JB |
110 | } |
111 | } | |
112 | ||
b53f7c52 JB |
113 | const CUData* ctuPrev = encData.getPicCTU(lineStartCUAddr + numCols - 1); |
114 | m_deblock.deblockCTU(ctuPrev, Deblock::EDGE_HOR); | |
72b9787e JB |
115 | } |
116 | ||
117 | // SAO | |
118 | SAOParam* saoParam = encData.m_saoParam; | |
119 | if (m_param->bEnableSAO) | |
120 | { | |
121 | m_sao.m_entropyCoder.load(m_frameEncoder->m_initSliceContext); | |
122 | m_sao.m_rdContexts.next.load(m_frameEncoder->m_initSliceContext); | |
123 | m_sao.m_rdContexts.cur.load(m_frameEncoder->m_initSliceContext); | |
124 | ||
125 | m_sao.rdoSaoUnitRow(saoParam, row); | |
126 | ||
127 | // NOTE: Delay a row because SAO decide need top row pixels at next row, is it HM's bug? | |
128 | if (row >= m_saoRowDelay) | |
129 | processSao(row - m_saoRowDelay); | |
130 | } | |
131 | ||
132 | // this row of CTUs has been encoded | |
133 | ||
134 | if (row > 0) | |
135 | processRowPost(row - 1); | |
136 | ||
137 | if (row == m_numRows - 1) | |
138 | { | |
139 | if (m_param->bEnableSAO) | |
140 | { | |
141 | m_sao.rdoSaoUnitRowEnd(saoParam, encData.m_slice->m_sps->numCUsInFrame); | |
142 | ||
143 | for (int i = m_numRows - m_saoRowDelay; i < m_numRows; i++) | |
144 | processSao(i); | |
145 | } | |
146 | ||
147 | processRowPost(row); | |
148 | } | |
149 | } | |
150 | ||
151 | uint32_t FrameFilter::getCUHeight(int rowNum) const | |
152 | { | |
153 | return rowNum == m_numRows - 1 ? m_lastHeight : g_maxCUSize; | |
154 | } | |
155 | ||
156 | void FrameFilter::processRowPost(int row) | |
157 | { | |
b53f7c52 | 158 | PicYuv *reconPic = m_frame->m_reconPic; |
72b9787e JB |
159 | const uint32_t numCols = m_frame->m_encData->m_slice->m_sps->numCuInWidth; |
160 | const uint32_t lineStartCUAddr = row * numCols; | |
161 | const int realH = getCUHeight(row); | |
162 | ||
163 | // Border extend Left and Right | |
164 | primitives.extendRowBorder(reconPic->getLumaAddr(lineStartCUAddr), reconPic->m_stride, reconPic->m_picWidth, realH, reconPic->m_lumaMarginX); | |
165 | primitives.extendRowBorder(reconPic->getCbAddr(lineStartCUAddr), reconPic->m_strideC, reconPic->m_picWidth >> m_hChromaShift, realH >> m_vChromaShift, reconPic->m_chromaMarginX); | |
166 | primitives.extendRowBorder(reconPic->getCrAddr(lineStartCUAddr), reconPic->m_strideC, reconPic->m_picWidth >> m_hChromaShift, realH >> m_vChromaShift, reconPic->m_chromaMarginX); | |
167 | ||
168 | // Border extend Top | |
169 | if (!row) | |
170 | { | |
171 | const intptr_t stride = reconPic->m_stride; | |
172 | const intptr_t strideC = reconPic->m_strideC; | |
173 | pixel *pixY = reconPic->getLumaAddr(lineStartCUAddr) - reconPic->m_lumaMarginX; | |
174 | pixel *pixU = reconPic->getCbAddr(lineStartCUAddr) - reconPic->m_chromaMarginX; | |
175 | pixel *pixV = reconPic->getCrAddr(lineStartCUAddr) - reconPic->m_chromaMarginX; | |
176 | ||
177 | for (uint32_t y = 0; y < reconPic->m_lumaMarginY; y++) | |
178 | memcpy(pixY - (y + 1) * stride, pixY, stride * sizeof(pixel)); | |
179 | ||
180 | for (uint32_t y = 0; y < reconPic->m_chromaMarginY; y++) | |
181 | { | |
182 | memcpy(pixU - (y + 1) * strideC, pixU, strideC * sizeof(pixel)); | |
183 | memcpy(pixV - (y + 1) * strideC, pixV, strideC * sizeof(pixel)); | |
184 | } | |
185 | } | |
186 | ||
187 | // Border extend Bottom | |
188 | if (row == m_numRows - 1) | |
189 | { | |
190 | const intptr_t stride = reconPic->m_stride; | |
191 | const intptr_t strideC = reconPic->m_strideC; | |
192 | pixel *pixY = reconPic->getLumaAddr(lineStartCUAddr) - reconPic->m_lumaMarginX + (realH - 1) * stride; | |
193 | pixel *pixU = reconPic->getCbAddr(lineStartCUAddr) - reconPic->m_chromaMarginX + ((realH >> m_vChromaShift) - 1) * strideC; | |
194 | pixel *pixV = reconPic->getCrAddr(lineStartCUAddr) - reconPic->m_chromaMarginX + ((realH >> m_vChromaShift) - 1) * strideC; | |
195 | for (uint32_t y = 0; y < reconPic->m_lumaMarginY; y++) | |
196 | memcpy(pixY + (y + 1) * stride, pixY, stride * sizeof(pixel)); | |
197 | ||
198 | for (uint32_t y = 0; y < reconPic->m_chromaMarginY; y++) | |
199 | { | |
200 | memcpy(pixU + (y + 1) * strideC, pixU, strideC * sizeof(pixel)); | |
201 | memcpy(pixV + (y + 1) * strideC, pixV, strideC * sizeof(pixel)); | |
202 | } | |
203 | } | |
204 | ||
205 | // Notify other FrameEncoders that this row of reconstructed pixels is available | |
206 | m_frame->m_reconRowCount.incr(); | |
207 | ||
208 | uint32_t cuAddr = lineStartCUAddr; | |
209 | if (m_param->bEnablePsnr) | |
210 | { | |
b53f7c52 | 211 | PicYuv* fencPic = m_frame->m_fencPic; |
72b9787e JB |
212 | |
213 | intptr_t stride = reconPic->m_stride; | |
214 | uint32_t width = reconPic->m_picWidth - m_pad[0]; | |
215 | uint32_t height = getCUHeight(row); | |
216 | ||
b53f7c52 | 217 | uint64_t ssdY = computeSSD(fencPic->getLumaAddr(cuAddr), reconPic->getLumaAddr(cuAddr), stride, width, height); |
72b9787e JB |
218 | height >>= m_vChromaShift; |
219 | width >>= m_hChromaShift; | |
220 | stride = reconPic->m_strideC; | |
221 | ||
b53f7c52 JB |
222 | uint64_t ssdU = computeSSD(fencPic->getCbAddr(cuAddr), reconPic->getCbAddr(cuAddr), stride, width, height); |
223 | uint64_t ssdV = computeSSD(fencPic->getCrAddr(cuAddr), reconPic->getCrAddr(cuAddr), stride, width, height); | |
72b9787e JB |
224 | |
225 | m_frameEncoder->m_SSDY += ssdY; | |
226 | m_frameEncoder->m_SSDU += ssdU; | |
227 | m_frameEncoder->m_SSDV += ssdV; | |
228 | } | |
229 | if (m_param->bEnableSsim && m_ssimBuf) | |
230 | { | |
b53f7c52 JB |
231 | pixel *rec = m_frame->m_reconPic->m_picOrg[0]; |
232 | pixel *fenc = m_frame->m_fencPic->m_picOrg[0]; | |
233 | intptr_t stride1 = m_frame->m_fencPic->m_stride; | |
234 | intptr_t stride2 = m_frame->m_reconPic->m_stride; | |
72b9787e JB |
235 | uint32_t bEnd = ((row + 1) == (this->m_numRows - 1)); |
236 | uint32_t bStart = (row == 0); | |
237 | uint32_t minPixY = row * g_maxCUSize - 4 * !bStart; | |
238 | uint32_t maxPixY = (row + 1) * g_maxCUSize - 4 * !bEnd; | |
239 | uint32_t ssim_cnt; | |
240 | x265_emms(); | |
241 | ||
242 | /* SSIM is done for each row in blocks of 4x4 . The First blocks are offset by 2 pixels to the right | |
243 | * to avoid alignment of ssim blocks with DCT blocks. */ | |
244 | minPixY += bStart ? 2 : -6; | |
b53f7c52 | 245 | m_frameEncoder->m_ssim += calculateSSIM(rec + 2 + minPixY * stride1, stride1, fenc + 2 + minPixY * stride2, stride2, |
72b9787e JB |
246 | m_param->sourceWidth - 2, maxPixY - minPixY, m_ssimBuf, ssim_cnt); |
247 | m_frameEncoder->m_ssimCnt += ssim_cnt; | |
248 | } | |
249 | if (m_param->decodedPictureHashSEI == 1) | |
250 | { | |
251 | uint32_t height = getCUHeight(row); | |
252 | uint32_t width = reconPic->m_picWidth; | |
253 | intptr_t stride = reconPic->m_stride; | |
254 | ||
255 | if (!row) | |
256 | { | |
257 | for (int i = 0; i < 3; i++) | |
258 | MD5Init(&m_frameEncoder->m_state[i]); | |
259 | } | |
260 | ||
261 | updateMD5Plane(m_frameEncoder->m_state[0], reconPic->getLumaAddr(cuAddr), width, height, stride); | |
262 | width >>= m_hChromaShift; | |
263 | height >>= m_vChromaShift; | |
264 | stride = reconPic->m_strideC; | |
265 | ||
266 | updateMD5Plane(m_frameEncoder->m_state[1], reconPic->getCbAddr(cuAddr), width, height, stride); | |
267 | updateMD5Plane(m_frameEncoder->m_state[2], reconPic->getCrAddr(cuAddr), width, height, stride); | |
268 | } | |
269 | else if (m_param->decodedPictureHashSEI == 2) | |
270 | { | |
271 | uint32_t height = getCUHeight(row); | |
272 | uint32_t width = reconPic->m_picWidth; | |
273 | intptr_t stride = reconPic->m_stride; | |
274 | if (!row) | |
275 | m_frameEncoder->m_crc[0] = m_frameEncoder->m_crc[1] = m_frameEncoder->m_crc[2] = 0xffff; | |
276 | updateCRC(reconPic->getLumaAddr(cuAddr), m_frameEncoder->m_crc[0], height, width, stride); | |
277 | width >>= m_hChromaShift; | |
278 | height >>= m_vChromaShift; | |
279 | stride = reconPic->m_strideC; | |
280 | ||
281 | updateCRC(reconPic->getCbAddr(cuAddr), m_frameEncoder->m_crc[1], height, width, stride); | |
282 | updateCRC(reconPic->getCrAddr(cuAddr), m_frameEncoder->m_crc[2], height, width, stride); | |
283 | } | |
284 | else if (m_param->decodedPictureHashSEI == 3) | |
285 | { | |
286 | uint32_t width = reconPic->m_picWidth; | |
287 | uint32_t height = getCUHeight(row); | |
288 | intptr_t stride = reconPic->m_stride; | |
289 | uint32_t cuHeight = g_maxCUSize; | |
290 | if (!row) | |
291 | m_frameEncoder->m_checksum[0] = m_frameEncoder->m_checksum[1] = m_frameEncoder->m_checksum[2] = 0; | |
292 | updateChecksum(reconPic->m_picOrg[0], m_frameEncoder->m_checksum[0], height, width, stride, row, cuHeight); | |
293 | width >>= m_hChromaShift; | |
294 | height >>= m_vChromaShift; | |
295 | stride = reconPic->m_strideC; | |
296 | cuHeight >>= m_vChromaShift; | |
297 | ||
298 | updateChecksum(reconPic->m_picOrg[1], m_frameEncoder->m_checksum[1], height, width, stride, row, cuHeight); | |
299 | updateChecksum(reconPic->m_picOrg[2], m_frameEncoder->m_checksum[2], height, width, stride, row, cuHeight); | |
300 | } | |
301 | } | |
302 | ||
303 | static uint64_t computeSSD(pixel *fenc, pixel *rec, intptr_t stride, uint32_t width, uint32_t height) | |
304 | { | |
305 | uint64_t ssd = 0; | |
306 | ||
307 | if ((width | height) & 3) | |
308 | { | |
309 | /* Slow Path */ | |
310 | for (uint32_t y = 0; y < height; y++) | |
311 | { | |
312 | for (uint32_t x = 0; x < width; x++) | |
313 | { | |
314 | int diff = (int)(fenc[x] - rec[x]); | |
315 | ssd += diff * diff; | |
316 | } | |
317 | ||
318 | fenc += stride; | |
319 | rec += stride; | |
320 | } | |
321 | ||
322 | return ssd; | |
323 | } | |
324 | ||
325 | uint32_t y = 0; | |
326 | /* Consume Y in chunks of 64 */ | |
327 | for (; y + 64 <= height; y += 64) | |
328 | { | |
329 | uint32_t x = 0; | |
330 | ||
331 | if (!(stride & 31)) | |
332 | for (; x + 64 <= width; x += 64) | |
333 | ssd += primitives.sse_pp[LUMA_64x64](fenc + x, stride, rec + x, stride); | |
334 | ||
335 | if (!(stride & 15)) | |
336 | for (; x + 16 <= width; x += 16) | |
337 | ssd += primitives.sse_pp[LUMA_16x64](fenc + x, stride, rec + x, stride); | |
338 | ||
339 | for (; x + 4 <= width; x += 4) | |
340 | { | |
341 | ssd += primitives.sse_pp[LUMA_4x16](fenc + x, stride, rec + x, stride); | |
342 | ssd += primitives.sse_pp[LUMA_4x16](fenc + x + 16 * stride, stride, rec + x + 16 * stride, stride); | |
343 | ssd += primitives.sse_pp[LUMA_4x16](fenc + x + 32 * stride, stride, rec + x + 32 * stride, stride); | |
344 | ssd += primitives.sse_pp[LUMA_4x16](fenc + x + 48 * stride, stride, rec + x + 48 * stride, stride); | |
345 | } | |
346 | ||
347 | fenc += stride * 64; | |
348 | rec += stride * 64; | |
349 | } | |
350 | ||
351 | /* Consume Y in chunks of 16 */ | |
352 | for (; y + 16 <= height; y += 16) | |
353 | { | |
354 | uint32_t x = 0; | |
355 | ||
356 | if (!(stride & 31)) | |
357 | for (; x + 64 <= width; x += 64) | |
358 | ssd += primitives.sse_pp[LUMA_64x16](fenc + x, stride, rec + x, stride); | |
359 | ||
360 | if (!(stride & 15)) | |
361 | for (; x + 16 <= width; x += 16) | |
362 | ssd += primitives.sse_pp[LUMA_16x16](fenc + x, stride, rec + x, stride); | |
363 | ||
364 | for (; x + 4 <= width; x += 4) | |
365 | ssd += primitives.sse_pp[LUMA_4x16](fenc + x, stride, rec + x, stride); | |
366 | ||
367 | fenc += stride * 16; | |
368 | rec += stride * 16; | |
369 | } | |
370 | ||
371 | /* Consume Y in chunks of 4 */ | |
372 | for (; y + 4 <= height; y += 4) | |
373 | { | |
374 | uint32_t x = 0; | |
375 | ||
376 | if (!(stride & 15)) | |
377 | for (; x + 16 <= width; x += 16) | |
378 | ssd += primitives.sse_pp[LUMA_16x4](fenc + x, stride, rec + x, stride); | |
379 | ||
380 | for (; x + 4 <= width; x += 4) | |
381 | ssd += primitives.sse_pp[LUMA_4x4](fenc + x, stride, rec + x, stride); | |
382 | ||
383 | fenc += stride * 4; | |
384 | rec += stride * 4; | |
385 | } | |
386 | ||
387 | return ssd; | |
388 | } | |
389 | ||
390 | /* Function to calculate SSIM for each row */ | |
391 | static float calculateSSIM(pixel *pix1, intptr_t stride1, pixel *pix2, intptr_t stride2, uint32_t width, uint32_t height, void *buf, uint32_t& cnt) | |
392 | { | |
393 | uint32_t z = 0; | |
394 | float ssim = 0.0; | |
395 | ||
396 | int(*sum0)[4] = (int(*)[4])buf; | |
397 | int(*sum1)[4] = sum0 + (width >> 2) + 3; | |
398 | width >>= 2; | |
399 | height >>= 2; | |
400 | ||
401 | for (uint32_t y = 1; y < height; y++) | |
402 | { | |
403 | for (; z <= y; z++) | |
404 | { | |
405 | std::swap(sum0, sum1); | |
406 | for (uint32_t x = 0; x < width; x += 2) | |
407 | primitives.ssim_4x4x2_core(&pix1[(4 * x + (z * stride1))], stride1, &pix2[(4 * x + (z * stride2))], stride2, &sum0[x]); | |
408 | } | |
409 | ||
410 | for (uint32_t x = 0; x < width - 1; x += 4) | |
411 | ssim += primitives.ssim_end_4(sum0 + x, sum1 + x, X265_MIN(4, width - x - 1)); | |
412 | } | |
413 | ||
414 | cnt = (height - 1) * (width - 1); | |
415 | return ssim; | |
416 | } | |
417 | ||
418 | /* restore original YUV samples to recon after SAO (if lossless) */ | |
419 | static void restoreOrigLosslessYuv(const CUData* cu, Frame& frame, uint32_t absPartIdx, uint32_t depth) | |
420 | { | |
421 | uint32_t size = g_maxCUSize >> depth; | |
422 | int part = partitionFromSizes(size, size); | |
423 | ||
b53f7c52 JB |
424 | PicYuv* reconPic = frame.m_reconPic; |
425 | PicYuv* fencPic = frame.m_fencPic; | |
72b9787e JB |
426 | |
427 | pixel* dst = reconPic->getLumaAddr(cu->m_cuAddr, absPartIdx); | |
428 | pixel* src = fencPic->getLumaAddr(cu->m_cuAddr, absPartIdx); | |
429 | ||
430 | primitives.luma_copy_pp[part](dst, reconPic->m_stride, src, fencPic->m_stride); | |
431 | ||
432 | pixel* dstCb = reconPic->getCbAddr(cu->m_cuAddr, absPartIdx); | |
433 | pixel* srcCb = fencPic->getCbAddr(cu->m_cuAddr, absPartIdx); | |
434 | ||
435 | pixel* dstCr = reconPic->getCrAddr(cu->m_cuAddr, absPartIdx); | |
436 | pixel* srcCr = fencPic->getCrAddr(cu->m_cuAddr, absPartIdx); | |
437 | ||
438 | int csp = fencPic->m_picCsp; | |
439 | primitives.chroma[csp].copy_pp[part](dstCb, reconPic->m_strideC, srcCb, fencPic->m_strideC); | |
440 | primitives.chroma[csp].copy_pp[part](dstCr, reconPic->m_strideC, srcCr, fencPic->m_strideC); | |
441 | } | |
442 | ||
443 | /* Original YUV restoration for CU in lossless coding */ | |
444 | static void origCUSampleRestoration(const CUData* cu, Frame& frame, uint32_t absPartIdx, uint32_t depth) | |
445 | { | |
446 | if (cu->m_cuDepth[absPartIdx] > depth) | |
447 | { | |
448 | /* TODO: this could use cuGeom.numPartition and flags */ | |
449 | uint32_t curNumParts = NUM_CU_PARTITIONS >> (depth << 1); | |
450 | uint32_t qNumParts = curNumParts >> 2; | |
451 | uint32_t xmax = cu->m_slice->m_sps->picWidthInLumaSamples - cu->m_cuPelX; | |
452 | uint32_t ymax = cu->m_slice->m_sps->picHeightInLumaSamples - cu->m_cuPelY; | |
453 | ||
454 | /* process four split sub-cu at next depth */ | |
455 | for (int subPartIdx = 0; subPartIdx < 4; subPartIdx++, absPartIdx += qNumParts) | |
456 | { | |
457 | if (g_zscanToPelX[absPartIdx] < xmax && g_zscanToPelY[absPartIdx] < ymax) | |
458 | origCUSampleRestoration(cu, frame, absPartIdx, depth + 1); | |
459 | } | |
460 | ||
461 | return; | |
462 | } | |
463 | ||
464 | // restore original YUV samples | |
465 | if (cu->m_tqBypass[absPartIdx]) | |
466 | restoreOrigLosslessYuv(cu, frame, absPartIdx, depth); | |
467 | } | |
468 | ||
469 | void FrameFilter::processSao(int row) | |
470 | { | |
471 | SAOParam* saoParam = m_frame->m_encData->m_saoParam; | |
472 | ||
473 | if (saoParam->bSaoFlag[0]) | |
474 | m_sao.processSaoUnitRow(saoParam->ctuParam[0], row, 0); | |
475 | ||
476 | if (saoParam->bSaoFlag[1]) | |
477 | { | |
478 | m_sao.processSaoUnitRow(saoParam->ctuParam[1], row, 1); | |
479 | m_sao.processSaoUnitRow(saoParam->ctuParam[2], row, 2); | |
480 | } | |
481 | ||
482 | if (m_frame->m_encData->m_slice->m_pps->bTransquantBypassEnabled) | |
483 | { | |
484 | uint32_t numCols = m_frame->m_encData->m_slice->m_sps->numCuInWidth; | |
485 | uint32_t lineStartCUAddr = row * numCols; | |
486 | ||
487 | for (uint32_t col = 0; col < numCols; col++) | |
488 | origCUSampleRestoration(m_frame->m_encData->getPicCTU(lineStartCUAddr + col), *m_frame, 0, 0); | |
489 | } | |
490 | } |