Commit | Line | Data |
---|---|---|
72b9787e JB |
1 | /***************************************************************************** |
2 | * Copyright (C) 2013 x265 project | |
3 | * | |
4 | * Authors: Chung Shin Yee <shinyee@multicorewareinc.com> | |
5 | * Min Chen <chenm003@163.com> | |
6 | * | |
7 | * This program is free software; you can redistribute it and/or modify | |
8 | * it under the terms of the GNU General Public License as published by | |
9 | * the Free Software Foundation; either version 2 of the License, or | |
10 | * (at your option) any later version. | |
11 | * | |
12 | * This program is distributed in the hope that it will be useful, | |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | * GNU General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU General Public License | |
18 | * along with this program; if not, write to the Free Software | |
19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. | |
20 | * | |
21 | * This program is also available under a commercial proprietary license. | |
22 | * For more information, contact us at license @ x265.com. | |
23 | *****************************************************************************/ | |
24 | ||
25 | #include "common.h" | |
26 | #include "frame.h" | |
27 | #include "framedata.h" | |
28 | #include "encoder.h" | |
29 | #include "framefilter.h" | |
30 | #include "frameencoder.h" | |
31 | #include "wavefront.h" | |
32 | #include "PPA/ppa.h" | |
33 | ||
34 | using namespace x265; | |
35 | ||
36 | static uint64_t computeSSD(pixel *fenc, pixel *rec, intptr_t stride, uint32_t width, uint32_t height); | |
37 | static float calculateSSIM(pixel *pix1, intptr_t stride1, pixel *pix2, intptr_t stride2, uint32_t width, uint32_t height, void *buf, uint32_t& cnt); | |
38 | ||
39 | FrameFilter::FrameFilter() | |
40 | : m_param(NULL) | |
41 | , m_frame(NULL) | |
42 | , m_frameEncoder(NULL) | |
43 | , m_ssimBuf(NULL) | |
44 | { | |
45 | } | |
46 | ||
47 | void FrameFilter::destroy() | |
48 | { | |
49 | if (m_param->bEnableSAO) | |
50 | m_sao.destroy(); | |
51 | ||
52 | X265_FREE(m_ssimBuf); | |
53 | } | |
54 | ||
55 | void FrameFilter::init(Encoder *top, FrameEncoder *frame, int numRows) | |
56 | { | |
57 | m_param = top->m_param; | |
58 | m_frameEncoder = frame; | |
59 | m_numRows = numRows; | |
60 | m_hChromaShift = CHROMA_H_SHIFT(m_param->internalCsp); | |
61 | m_vChromaShift = CHROMA_V_SHIFT(m_param->internalCsp); | |
62 | m_pad[0] = top->m_sps.conformanceWindow.rightOffset; | |
63 | m_pad[1] = top->m_sps.conformanceWindow.bottomOffset; | |
64 | m_saoRowDelay = m_param->bEnableLoopFilter ? 1 : 0; | |
65 | m_lastHeight = m_param->sourceHeight % g_maxCUSize ? m_param->sourceHeight % g_maxCUSize : g_maxCUSize; | |
66 | ||
67 | m_deblock.init(); | |
68 | ||
69 | if (m_param->bEnableSAO) | |
70 | if (!m_sao.create(m_param)) | |
71 | m_param->bEnableSAO = 0; | |
72 | ||
73 | if (m_param->bEnableSsim) | |
74 | m_ssimBuf = X265_MALLOC(int, 8 * (m_param->sourceWidth / 4 + 3)); | |
75 | } | |
76 | ||
77 | void FrameFilter::start(Frame *frame, Entropy& initState, int qp) | |
78 | { | |
79 | m_frame = frame; | |
80 | ||
81 | if (m_param->bEnableSAO) | |
82 | m_sao.startSlice(frame, initState, qp); | |
83 | } | |
84 | ||
85 | void FrameFilter::processRow(int row) | |
86 | { | |
87 | PPAScopeEvent(Thread_filterCU); | |
88 | ||
89 | if (!m_param->bEnableLoopFilter && !m_param->bEnableSAO) | |
90 | { | |
91 | processRowPost(row); | |
92 | return; | |
93 | } | |
94 | FrameData& encData = *m_frame->m_encData; | |
95 | const uint32_t numCols = encData.m_slice->m_sps->numCuInWidth; | |
96 | const uint32_t lineStartCUAddr = row * numCols; | |
97 | ||
98 | if (m_param->bEnableLoopFilter) | |
99 | { | |
100 | for (uint32_t col = 0; col < numCols; col++) | |
101 | { | |
102 | uint32_t cuAddr = lineStartCUAddr + col; | |
103 | CUData* cu = encData.getPicCTU(cuAddr); | |
104 | ||
105 | m_deblock.deblockCTU(cu, Deblock::EDGE_VER); | |
106 | ||
107 | if (col > 0) | |
108 | { | |
109 | CUData* cuPrev = encData.getPicCTU(cuAddr - 1); | |
110 | m_deblock.deblockCTU(cuPrev, Deblock::EDGE_HOR); | |
111 | } | |
112 | } | |
113 | ||
114 | CUData* cuPrev = encData.getPicCTU(lineStartCUAddr + numCols - 1); | |
115 | m_deblock.deblockCTU(cuPrev, Deblock::EDGE_HOR); | |
116 | } | |
117 | ||
118 | // SAO | |
119 | SAOParam* saoParam = encData.m_saoParam; | |
120 | if (m_param->bEnableSAO) | |
121 | { | |
122 | m_sao.m_entropyCoder.load(m_frameEncoder->m_initSliceContext); | |
123 | m_sao.m_rdContexts.next.load(m_frameEncoder->m_initSliceContext); | |
124 | m_sao.m_rdContexts.cur.load(m_frameEncoder->m_initSliceContext); | |
125 | ||
126 | m_sao.rdoSaoUnitRow(saoParam, row); | |
127 | ||
128 | // NOTE: Delay a row because SAO decide need top row pixels at next row, is it HM's bug? | |
129 | if (row >= m_saoRowDelay) | |
130 | processSao(row - m_saoRowDelay); | |
131 | } | |
132 | ||
133 | // this row of CTUs has been encoded | |
134 | ||
135 | if (row > 0) | |
136 | processRowPost(row - 1); | |
137 | ||
138 | if (row == m_numRows - 1) | |
139 | { | |
140 | if (m_param->bEnableSAO) | |
141 | { | |
142 | m_sao.rdoSaoUnitRowEnd(saoParam, encData.m_slice->m_sps->numCUsInFrame); | |
143 | ||
144 | for (int i = m_numRows - m_saoRowDelay; i < m_numRows; i++) | |
145 | processSao(i); | |
146 | } | |
147 | ||
148 | processRowPost(row); | |
149 | } | |
150 | } | |
151 | ||
152 | uint32_t FrameFilter::getCUHeight(int rowNum) const | |
153 | { | |
154 | return rowNum == m_numRows - 1 ? m_lastHeight : g_maxCUSize; | |
155 | } | |
156 | ||
157 | void FrameFilter::processRowPost(int row) | |
158 | { | |
159 | PicYuv *reconPic = m_frame->m_reconPicYuv; | |
160 | const uint32_t numCols = m_frame->m_encData->m_slice->m_sps->numCuInWidth; | |
161 | const uint32_t lineStartCUAddr = row * numCols; | |
162 | const int realH = getCUHeight(row); | |
163 | ||
164 | // Border extend Left and Right | |
165 | primitives.extendRowBorder(reconPic->getLumaAddr(lineStartCUAddr), reconPic->m_stride, reconPic->m_picWidth, realH, reconPic->m_lumaMarginX); | |
166 | primitives.extendRowBorder(reconPic->getCbAddr(lineStartCUAddr), reconPic->m_strideC, reconPic->m_picWidth >> m_hChromaShift, realH >> m_vChromaShift, reconPic->m_chromaMarginX); | |
167 | primitives.extendRowBorder(reconPic->getCrAddr(lineStartCUAddr), reconPic->m_strideC, reconPic->m_picWidth >> m_hChromaShift, realH >> m_vChromaShift, reconPic->m_chromaMarginX); | |
168 | ||
169 | // Border extend Top | |
170 | if (!row) | |
171 | { | |
172 | const intptr_t stride = reconPic->m_stride; | |
173 | const intptr_t strideC = reconPic->m_strideC; | |
174 | pixel *pixY = reconPic->getLumaAddr(lineStartCUAddr) - reconPic->m_lumaMarginX; | |
175 | pixel *pixU = reconPic->getCbAddr(lineStartCUAddr) - reconPic->m_chromaMarginX; | |
176 | pixel *pixV = reconPic->getCrAddr(lineStartCUAddr) - reconPic->m_chromaMarginX; | |
177 | ||
178 | for (uint32_t y = 0; y < reconPic->m_lumaMarginY; y++) | |
179 | memcpy(pixY - (y + 1) * stride, pixY, stride * sizeof(pixel)); | |
180 | ||
181 | for (uint32_t y = 0; y < reconPic->m_chromaMarginY; y++) | |
182 | { | |
183 | memcpy(pixU - (y + 1) * strideC, pixU, strideC * sizeof(pixel)); | |
184 | memcpy(pixV - (y + 1) * strideC, pixV, strideC * sizeof(pixel)); | |
185 | } | |
186 | } | |
187 | ||
188 | // Border extend Bottom | |
189 | if (row == m_numRows - 1) | |
190 | { | |
191 | const intptr_t stride = reconPic->m_stride; | |
192 | const intptr_t strideC = reconPic->m_strideC; | |
193 | pixel *pixY = reconPic->getLumaAddr(lineStartCUAddr) - reconPic->m_lumaMarginX + (realH - 1) * stride; | |
194 | pixel *pixU = reconPic->getCbAddr(lineStartCUAddr) - reconPic->m_chromaMarginX + ((realH >> m_vChromaShift) - 1) * strideC; | |
195 | pixel *pixV = reconPic->getCrAddr(lineStartCUAddr) - reconPic->m_chromaMarginX + ((realH >> m_vChromaShift) - 1) * strideC; | |
196 | for (uint32_t y = 0; y < reconPic->m_lumaMarginY; y++) | |
197 | memcpy(pixY + (y + 1) * stride, pixY, stride * sizeof(pixel)); | |
198 | ||
199 | for (uint32_t y = 0; y < reconPic->m_chromaMarginY; y++) | |
200 | { | |
201 | memcpy(pixU + (y + 1) * strideC, pixU, strideC * sizeof(pixel)); | |
202 | memcpy(pixV + (y + 1) * strideC, pixV, strideC * sizeof(pixel)); | |
203 | } | |
204 | } | |
205 | ||
206 | // Notify other FrameEncoders that this row of reconstructed pixels is available | |
207 | m_frame->m_reconRowCount.incr(); | |
208 | ||
209 | uint32_t cuAddr = lineStartCUAddr; | |
210 | if (m_param->bEnablePsnr) | |
211 | { | |
212 | PicYuv* origPic = m_frame->m_origPicYuv; | |
213 | ||
214 | intptr_t stride = reconPic->m_stride; | |
215 | uint32_t width = reconPic->m_picWidth - m_pad[0]; | |
216 | uint32_t height = getCUHeight(row); | |
217 | ||
218 | uint64_t ssdY = computeSSD(origPic->getLumaAddr(cuAddr), reconPic->getLumaAddr(cuAddr), stride, width, height); | |
219 | height >>= m_vChromaShift; | |
220 | width >>= m_hChromaShift; | |
221 | stride = reconPic->m_strideC; | |
222 | ||
223 | uint64_t ssdU = computeSSD(origPic->getCbAddr(cuAddr), reconPic->getCbAddr(cuAddr), stride, width, height); | |
224 | uint64_t ssdV = computeSSD(origPic->getCrAddr(cuAddr), reconPic->getCrAddr(cuAddr), stride, width, height); | |
225 | ||
226 | m_frameEncoder->m_SSDY += ssdY; | |
227 | m_frameEncoder->m_SSDU += ssdU; | |
228 | m_frameEncoder->m_SSDV += ssdV; | |
229 | } | |
230 | if (m_param->bEnableSsim && m_ssimBuf) | |
231 | { | |
232 | pixel *rec = m_frame->m_reconPicYuv->m_picOrg[0]; | |
233 | pixel *org = m_frame->m_origPicYuv->m_picOrg[0]; | |
234 | intptr_t stride1 = m_frame->m_origPicYuv->m_stride; | |
235 | intptr_t stride2 = m_frame->m_reconPicYuv->m_stride; | |
236 | uint32_t bEnd = ((row + 1) == (this->m_numRows - 1)); | |
237 | uint32_t bStart = (row == 0); | |
238 | uint32_t minPixY = row * g_maxCUSize - 4 * !bStart; | |
239 | uint32_t maxPixY = (row + 1) * g_maxCUSize - 4 * !bEnd; | |
240 | uint32_t ssim_cnt; | |
241 | x265_emms(); | |
242 | ||
243 | /* SSIM is done for each row in blocks of 4x4 . The First blocks are offset by 2 pixels to the right | |
244 | * to avoid alignment of ssim blocks with DCT blocks. */ | |
245 | minPixY += bStart ? 2 : -6; | |
246 | m_frameEncoder->m_ssim += calculateSSIM(rec + 2 + minPixY * stride1, stride1, org + 2 + minPixY * stride2, stride2, | |
247 | m_param->sourceWidth - 2, maxPixY - minPixY, m_ssimBuf, ssim_cnt); | |
248 | m_frameEncoder->m_ssimCnt += ssim_cnt; | |
249 | } | |
250 | if (m_param->decodedPictureHashSEI == 1) | |
251 | { | |
252 | uint32_t height = getCUHeight(row); | |
253 | uint32_t width = reconPic->m_picWidth; | |
254 | intptr_t stride = reconPic->m_stride; | |
255 | ||
256 | if (!row) | |
257 | { | |
258 | for (int i = 0; i < 3; i++) | |
259 | MD5Init(&m_frameEncoder->m_state[i]); | |
260 | } | |
261 | ||
262 | updateMD5Plane(m_frameEncoder->m_state[0], reconPic->getLumaAddr(cuAddr), width, height, stride); | |
263 | width >>= m_hChromaShift; | |
264 | height >>= m_vChromaShift; | |
265 | stride = reconPic->m_strideC; | |
266 | ||
267 | updateMD5Plane(m_frameEncoder->m_state[1], reconPic->getCbAddr(cuAddr), width, height, stride); | |
268 | updateMD5Plane(m_frameEncoder->m_state[2], reconPic->getCrAddr(cuAddr), width, height, stride); | |
269 | } | |
270 | else if (m_param->decodedPictureHashSEI == 2) | |
271 | { | |
272 | uint32_t height = getCUHeight(row); | |
273 | uint32_t width = reconPic->m_picWidth; | |
274 | intptr_t stride = reconPic->m_stride; | |
275 | if (!row) | |
276 | m_frameEncoder->m_crc[0] = m_frameEncoder->m_crc[1] = m_frameEncoder->m_crc[2] = 0xffff; | |
277 | updateCRC(reconPic->getLumaAddr(cuAddr), m_frameEncoder->m_crc[0], height, width, stride); | |
278 | width >>= m_hChromaShift; | |
279 | height >>= m_vChromaShift; | |
280 | stride = reconPic->m_strideC; | |
281 | ||
282 | updateCRC(reconPic->getCbAddr(cuAddr), m_frameEncoder->m_crc[1], height, width, stride); | |
283 | updateCRC(reconPic->getCrAddr(cuAddr), m_frameEncoder->m_crc[2], height, width, stride); | |
284 | } | |
285 | else if (m_param->decodedPictureHashSEI == 3) | |
286 | { | |
287 | uint32_t width = reconPic->m_picWidth; | |
288 | uint32_t height = getCUHeight(row); | |
289 | intptr_t stride = reconPic->m_stride; | |
290 | uint32_t cuHeight = g_maxCUSize; | |
291 | if (!row) | |
292 | m_frameEncoder->m_checksum[0] = m_frameEncoder->m_checksum[1] = m_frameEncoder->m_checksum[2] = 0; | |
293 | updateChecksum(reconPic->m_picOrg[0], m_frameEncoder->m_checksum[0], height, width, stride, row, cuHeight); | |
294 | width >>= m_hChromaShift; | |
295 | height >>= m_vChromaShift; | |
296 | stride = reconPic->m_strideC; | |
297 | cuHeight >>= m_vChromaShift; | |
298 | ||
299 | updateChecksum(reconPic->m_picOrg[1], m_frameEncoder->m_checksum[1], height, width, stride, row, cuHeight); | |
300 | updateChecksum(reconPic->m_picOrg[2], m_frameEncoder->m_checksum[2], height, width, stride, row, cuHeight); | |
301 | } | |
302 | } | |
303 | ||
304 | static uint64_t computeSSD(pixel *fenc, pixel *rec, intptr_t stride, uint32_t width, uint32_t height) | |
305 | { | |
306 | uint64_t ssd = 0; | |
307 | ||
308 | if ((width | height) & 3) | |
309 | { | |
310 | /* Slow Path */ | |
311 | for (uint32_t y = 0; y < height; y++) | |
312 | { | |
313 | for (uint32_t x = 0; x < width; x++) | |
314 | { | |
315 | int diff = (int)(fenc[x] - rec[x]); | |
316 | ssd += diff * diff; | |
317 | } | |
318 | ||
319 | fenc += stride; | |
320 | rec += stride; | |
321 | } | |
322 | ||
323 | return ssd; | |
324 | } | |
325 | ||
326 | uint32_t y = 0; | |
327 | /* Consume Y in chunks of 64 */ | |
328 | for (; y + 64 <= height; y += 64) | |
329 | { | |
330 | uint32_t x = 0; | |
331 | ||
332 | if (!(stride & 31)) | |
333 | for (; x + 64 <= width; x += 64) | |
334 | ssd += primitives.sse_pp[LUMA_64x64](fenc + x, stride, rec + x, stride); | |
335 | ||
336 | if (!(stride & 15)) | |
337 | for (; x + 16 <= width; x += 16) | |
338 | ssd += primitives.sse_pp[LUMA_16x64](fenc + x, stride, rec + x, stride); | |
339 | ||
340 | for (; x + 4 <= width; x += 4) | |
341 | { | |
342 | ssd += primitives.sse_pp[LUMA_4x16](fenc + x, stride, rec + x, stride); | |
343 | ssd += primitives.sse_pp[LUMA_4x16](fenc + x + 16 * stride, stride, rec + x + 16 * stride, stride); | |
344 | ssd += primitives.sse_pp[LUMA_4x16](fenc + x + 32 * stride, stride, rec + x + 32 * stride, stride); | |
345 | ssd += primitives.sse_pp[LUMA_4x16](fenc + x + 48 * stride, stride, rec + x + 48 * stride, stride); | |
346 | } | |
347 | ||
348 | fenc += stride * 64; | |
349 | rec += stride * 64; | |
350 | } | |
351 | ||
352 | /* Consume Y in chunks of 16 */ | |
353 | for (; y + 16 <= height; y += 16) | |
354 | { | |
355 | uint32_t x = 0; | |
356 | ||
357 | if (!(stride & 31)) | |
358 | for (; x + 64 <= width; x += 64) | |
359 | ssd += primitives.sse_pp[LUMA_64x16](fenc + x, stride, rec + x, stride); | |
360 | ||
361 | if (!(stride & 15)) | |
362 | for (; x + 16 <= width; x += 16) | |
363 | ssd += primitives.sse_pp[LUMA_16x16](fenc + x, stride, rec + x, stride); | |
364 | ||
365 | for (; x + 4 <= width; x += 4) | |
366 | ssd += primitives.sse_pp[LUMA_4x16](fenc + x, stride, rec + x, stride); | |
367 | ||
368 | fenc += stride * 16; | |
369 | rec += stride * 16; | |
370 | } | |
371 | ||
372 | /* Consume Y in chunks of 4 */ | |
373 | for (; y + 4 <= height; y += 4) | |
374 | { | |
375 | uint32_t x = 0; | |
376 | ||
377 | if (!(stride & 15)) | |
378 | for (; x + 16 <= width; x += 16) | |
379 | ssd += primitives.sse_pp[LUMA_16x4](fenc + x, stride, rec + x, stride); | |
380 | ||
381 | for (; x + 4 <= width; x += 4) | |
382 | ssd += primitives.sse_pp[LUMA_4x4](fenc + x, stride, rec + x, stride); | |
383 | ||
384 | fenc += stride * 4; | |
385 | rec += stride * 4; | |
386 | } | |
387 | ||
388 | return ssd; | |
389 | } | |
390 | ||
391 | /* Function to calculate SSIM for each row */ | |
392 | static float calculateSSIM(pixel *pix1, intptr_t stride1, pixel *pix2, intptr_t stride2, uint32_t width, uint32_t height, void *buf, uint32_t& cnt) | |
393 | { | |
394 | uint32_t z = 0; | |
395 | float ssim = 0.0; | |
396 | ||
397 | int(*sum0)[4] = (int(*)[4])buf; | |
398 | int(*sum1)[4] = sum0 + (width >> 2) + 3; | |
399 | width >>= 2; | |
400 | height >>= 2; | |
401 | ||
402 | for (uint32_t y = 1; y < height; y++) | |
403 | { | |
404 | for (; z <= y; z++) | |
405 | { | |
406 | std::swap(sum0, sum1); | |
407 | for (uint32_t x = 0; x < width; x += 2) | |
408 | primitives.ssim_4x4x2_core(&pix1[(4 * x + (z * stride1))], stride1, &pix2[(4 * x + (z * stride2))], stride2, &sum0[x]); | |
409 | } | |
410 | ||
411 | for (uint32_t x = 0; x < width - 1; x += 4) | |
412 | ssim += primitives.ssim_end_4(sum0 + x, sum1 + x, X265_MIN(4, width - x - 1)); | |
413 | } | |
414 | ||
415 | cnt = (height - 1) * (width - 1); | |
416 | return ssim; | |
417 | } | |
418 | ||
419 | /* restore original YUV samples to recon after SAO (if lossless) */ | |
420 | static void restoreOrigLosslessYuv(const CUData* cu, Frame& frame, uint32_t absPartIdx, uint32_t depth) | |
421 | { | |
422 | uint32_t size = g_maxCUSize >> depth; | |
423 | int part = partitionFromSizes(size, size); | |
424 | ||
425 | PicYuv* reconPic = frame.m_reconPicYuv; | |
426 | PicYuv* fencPic = frame.m_origPicYuv; | |
427 | ||
428 | pixel* dst = reconPic->getLumaAddr(cu->m_cuAddr, absPartIdx); | |
429 | pixel* src = fencPic->getLumaAddr(cu->m_cuAddr, absPartIdx); | |
430 | ||
431 | primitives.luma_copy_pp[part](dst, reconPic->m_stride, src, fencPic->m_stride); | |
432 | ||
433 | pixel* dstCb = reconPic->getCbAddr(cu->m_cuAddr, absPartIdx); | |
434 | pixel* srcCb = fencPic->getCbAddr(cu->m_cuAddr, absPartIdx); | |
435 | ||
436 | pixel* dstCr = reconPic->getCrAddr(cu->m_cuAddr, absPartIdx); | |
437 | pixel* srcCr = fencPic->getCrAddr(cu->m_cuAddr, absPartIdx); | |
438 | ||
439 | int csp = fencPic->m_picCsp; | |
440 | primitives.chroma[csp].copy_pp[part](dstCb, reconPic->m_strideC, srcCb, fencPic->m_strideC); | |
441 | primitives.chroma[csp].copy_pp[part](dstCr, reconPic->m_strideC, srcCr, fencPic->m_strideC); | |
442 | } | |
443 | ||
444 | /* Original YUV restoration for CU in lossless coding */ | |
445 | static void origCUSampleRestoration(const CUData* cu, Frame& frame, uint32_t absPartIdx, uint32_t depth) | |
446 | { | |
447 | if (cu->m_cuDepth[absPartIdx] > depth) | |
448 | { | |
449 | /* TODO: this could use cuGeom.numPartition and flags */ | |
450 | uint32_t curNumParts = NUM_CU_PARTITIONS >> (depth << 1); | |
451 | uint32_t qNumParts = curNumParts >> 2; | |
452 | uint32_t xmax = cu->m_slice->m_sps->picWidthInLumaSamples - cu->m_cuPelX; | |
453 | uint32_t ymax = cu->m_slice->m_sps->picHeightInLumaSamples - cu->m_cuPelY; | |
454 | ||
455 | /* process four split sub-cu at next depth */ | |
456 | for (int subPartIdx = 0; subPartIdx < 4; subPartIdx++, absPartIdx += qNumParts) | |
457 | { | |
458 | if (g_zscanToPelX[absPartIdx] < xmax && g_zscanToPelY[absPartIdx] < ymax) | |
459 | origCUSampleRestoration(cu, frame, absPartIdx, depth + 1); | |
460 | } | |
461 | ||
462 | return; | |
463 | } | |
464 | ||
465 | // restore original YUV samples | |
466 | if (cu->m_tqBypass[absPartIdx]) | |
467 | restoreOrigLosslessYuv(cu, frame, absPartIdx, depth); | |
468 | } | |
469 | ||
470 | void FrameFilter::processSao(int row) | |
471 | { | |
472 | SAOParam* saoParam = m_frame->m_encData->m_saoParam; | |
473 | ||
474 | if (saoParam->bSaoFlag[0]) | |
475 | m_sao.processSaoUnitRow(saoParam->ctuParam[0], row, 0); | |
476 | ||
477 | if (saoParam->bSaoFlag[1]) | |
478 | { | |
479 | m_sao.processSaoUnitRow(saoParam->ctuParam[1], row, 1); | |
480 | m_sao.processSaoUnitRow(saoParam->ctuParam[2], row, 2); | |
481 | } | |
482 | ||
483 | if (m_frame->m_encData->m_slice->m_pps->bTransquantBypassEnabled) | |
484 | { | |
485 | uint32_t numCols = m_frame->m_encData->m_slice->m_sps->numCuInWidth; | |
486 | uint32_t lineStartCUAddr = row * numCols; | |
487 | ||
488 | for (uint32_t col = 0; col < numCols; col++) | |
489 | origCUSampleRestoration(m_frame->m_encData->getPicCTU(lineStartCUAddr + col), *m_frame, 0, 0); | |
490 | } | |
491 | } |