Commit | Line | Data |
---|---|---|
72b9787e JB |
1 | /***************************************************************************** |
2 | * Copyright (C) 2013 x265 project | |
3 | * | |
4 | * Authors: Steve Borho <steve@borho.org> | |
5 | * Min Chen <chenm003@163.com> | |
6 | * | |
7 | * This program is free software; you can redistribute it and/or modify | |
8 | * it under the terms of the GNU General Public License as published by | |
9 | * the Free Software Foundation; either version 2 of the License, or | |
10 | * (at your option) any later version. | |
11 | * | |
12 | * This program is distributed in the hope that it will be useful, | |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | * GNU General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU General Public License | |
18 | * along with this program; if not, write to the Free Software | |
19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. | |
20 | * | |
21 | * This program is also available under a commercial proprietary license. | |
22 | * For more information, contact us at license @ x265.com. | |
23 | *****************************************************************************/ | |
24 | ||
25 | #include "common.h" | |
26 | #include "frame.h" | |
27 | #include "framedata.h" | |
28 | #include "picyuv.h" | |
29 | #include "sao.h" | |
30 | ||
31 | namespace { | |
32 | ||
33 | inline int32_t roundIBDI(int32_t num, int32_t den) | |
34 | { | |
35 | return num >= 0 ? ((num * 2 + den) / (den * 2)) : -((-num * 2 + den) / (den * 2)); | |
36 | } | |
37 | ||
38 | /* get the sign of input variable (TODO: this is a dup, make common) */ | |
39 | inline int signOf(int x) | |
40 | { | |
41 | return (x >> 31) | ((int)((((uint32_t)-x)) >> 31)); | |
42 | } | |
43 | ||
44 | inline int64_t estSaoDist(int32_t count, int offset, int32_t offsetOrg) | |
45 | { | |
46 | return (count * offset - offsetOrg * 2) * offset; | |
47 | } | |
48 | ||
49 | } // end anonymous namespace | |
50 | ||
51 | ||
52 | namespace x265 { | |
53 | ||
54 | const uint32_t SAO::s_eoTable[NUM_EDGETYPE] = | |
55 | { | |
56 | 1, // 0 | |
57 | 2, // 1 | |
58 | 0, // 2 | |
59 | 3, // 3 | |
60 | 4 // 4 | |
61 | }; | |
62 | ||
63 | SAO::SAO() | |
64 | { | |
65 | m_count = NULL; | |
66 | m_offset = NULL; | |
67 | m_offsetOrg = NULL; | |
68 | m_countPreDblk = NULL; | |
69 | m_offsetOrgPreDblk = NULL; | |
70 | m_refDepth = 0; | |
71 | m_lumaLambda = 0; | |
72 | m_chromaLambda = 0; | |
73 | m_param = NULL; | |
74 | m_clipTable = NULL; | |
75 | m_clipTableBase = NULL; | |
76 | m_offsetBo = NULL; | |
77 | m_tmpU1[0] = NULL; | |
78 | m_tmpU1[1] = NULL; | |
79 | m_tmpU1[2] = NULL; | |
80 | m_tmpU2[0] = NULL; | |
81 | m_tmpU2[1] = NULL; | |
82 | m_tmpU2[2] = NULL; | |
83 | m_tmpL1 = NULL; | |
84 | m_tmpL2 = NULL; | |
85 | ||
86 | m_depthSaoRate[0][0] = 0; | |
87 | m_depthSaoRate[0][1] = 0; | |
88 | m_depthSaoRate[0][2] = 0; | |
89 | m_depthSaoRate[0][3] = 0; | |
90 | m_depthSaoRate[1][0] = 0; | |
91 | m_depthSaoRate[1][1] = 0; | |
92 | m_depthSaoRate[1][2] = 0; | |
93 | m_depthSaoRate[1][3] = 0; | |
94 | } | |
95 | ||
96 | bool SAO::create(x265_param* param) | |
97 | { | |
98 | m_param = param; | |
99 | m_hChromaShift = CHROMA_H_SHIFT(param->internalCsp); | |
100 | m_vChromaShift = CHROMA_V_SHIFT(param->internalCsp); | |
101 | ||
102 | m_numCuInWidth = (m_param->sourceWidth + g_maxCUSize - 1) / g_maxCUSize; | |
103 | m_numCuInHeight = (m_param->sourceHeight + g_maxCUSize - 1) / g_maxCUSize; | |
104 | ||
105 | const pixel maxY = (1 << X265_DEPTH) - 1; | |
106 | const pixel rangeExt = maxY >> 1; | |
107 | int numCtu = m_numCuInWidth * m_numCuInHeight; | |
108 | ||
109 | CHECKED_MALLOC(m_clipTableBase, pixel, maxY + 2 * rangeExt); | |
110 | CHECKED_MALLOC(m_offsetBo, pixel, maxY + 2 * rangeExt); | |
111 | ||
112 | CHECKED_MALLOC(m_tmpL1, pixel, g_maxCUSize + 1); | |
113 | CHECKED_MALLOC(m_tmpL2, pixel, g_maxCUSize + 1); | |
114 | ||
115 | for (int i = 0; i < 3; i++) | |
116 | { | |
117 | CHECKED_MALLOC(m_tmpU1[i], pixel, m_param->sourceWidth); | |
118 | CHECKED_MALLOC(m_tmpU2[i], pixel, m_param->sourceWidth); | |
119 | } | |
120 | ||
121 | CHECKED_MALLOC(m_count, PerClass, NUM_PLANE); | |
122 | CHECKED_MALLOC(m_offset, PerClass, NUM_PLANE); | |
123 | CHECKED_MALLOC(m_offsetOrg, PerClass, NUM_PLANE); | |
124 | ||
125 | CHECKED_MALLOC(m_countPreDblk, PerPlane, numCtu); | |
126 | CHECKED_MALLOC(m_offsetOrgPreDblk, PerPlane, numCtu); | |
127 | ||
128 | m_clipTable = &(m_clipTableBase[rangeExt]); | |
129 | ||
130 | for (int i = 0; i < rangeExt; i++) | |
131 | m_clipTableBase[i] = 0; | |
132 | ||
133 | for (int i = 0; i < maxY; i++) | |
134 | m_clipTable[i] = (pixel)i; | |
135 | ||
136 | for (int i = maxY; i < maxY + rangeExt; i++) | |
137 | m_clipTable[i] = maxY; | |
138 | ||
139 | return true; | |
140 | ||
141 | fail: | |
142 | return false; | |
143 | } | |
144 | ||
145 | void SAO::destroy() | |
146 | { | |
147 | X265_FREE(m_clipTableBase); | |
148 | X265_FREE(m_offsetBo); | |
149 | ||
150 | X265_FREE(m_tmpL1); | |
151 | X265_FREE(m_tmpL2); | |
152 | ||
153 | for (int i = 0; i < 3; i++) | |
154 | { | |
155 | X265_FREE(m_tmpU1[i]); | |
156 | X265_FREE(m_tmpU2[i]); | |
157 | } | |
158 | ||
159 | X265_FREE(m_count); | |
160 | X265_FREE(m_offset); | |
161 | X265_FREE(m_offsetOrg); | |
162 | X265_FREE(m_countPreDblk); | |
163 | X265_FREE(m_offsetOrgPreDblk); | |
164 | } | |
165 | ||
166 | /* allocate memory for SAO parameters */ | |
167 | void SAO::allocSaoParam(SAOParam* saoParam) const | |
168 | { | |
169 | saoParam->numCuInWidth = m_numCuInWidth; | |
170 | ||
171 | saoParam->ctuParam[0] = new SaoCtuParam[m_numCuInHeight * m_numCuInWidth]; | |
172 | saoParam->ctuParam[1] = new SaoCtuParam[m_numCuInHeight * m_numCuInWidth]; | |
173 | saoParam->ctuParam[2] = new SaoCtuParam[m_numCuInHeight * m_numCuInWidth]; | |
174 | } | |
175 | ||
176 | void SAO::startSlice(Frame* frame, Entropy& initState, int qp) | |
177 | { | |
178 | Slice* slice = frame->m_encData->m_slice; | |
179 | ||
180 | int qpCb = Clip3(0, QP_MAX_MAX, qp + slice->m_pps->chromaCbQpOffset); | |
181 | m_lumaLambda = x265_lambda2_tab[qp]; | |
182 | m_chromaLambda = x265_lambda2_tab[qpCb]; // Use Cb QP for SAO chroma | |
183 | m_frame = frame; | |
184 | ||
185 | switch (slice->m_sliceType) | |
186 | { | |
187 | case I_SLICE: | |
188 | m_refDepth = 0; | |
189 | break; | |
190 | case P_SLICE: | |
191 | m_refDepth = 1; | |
192 | break; | |
193 | case B_SLICE: | |
194 | m_refDepth = 2 + !IS_REFERENCED(frame); | |
195 | break; | |
196 | } | |
197 | ||
198 | resetStats(); | |
199 | ||
200 | m_entropyCoder.load(initState); | |
201 | m_rdContexts.next.load(initState); | |
202 | m_rdContexts.cur.load(initState); | |
203 | ||
204 | SAOParam* saoParam = frame->m_encData->m_saoParam; | |
205 | if (!saoParam) | |
206 | { | |
207 | saoParam = new SAOParam; | |
208 | allocSaoParam(saoParam); | |
209 | frame->m_encData->m_saoParam = saoParam; | |
210 | } | |
211 | ||
212 | rdoSaoUnitRowInit(saoParam); | |
213 | ||
214 | // NOTE: Disable SAO automatic turn-off when frame parallelism is | |
215 | // enabled for output exact independent of frame thread count | |
216 | if (m_param->frameNumThreads > 1) | |
217 | { | |
218 | saoParam->bSaoFlag[0] = true; | |
219 | saoParam->bSaoFlag[1] = true; | |
220 | } | |
221 | } | |
222 | ||
223 | // CTU-based SAO process without slice granularity | |
224 | void SAO::processSaoCu(int addr, int typeIdx, int plane) | |
225 | { | |
226 | int x, y; | |
227 | const CUData* cu = m_frame->m_encData->getPicCTU(addr); | |
228 | pixel* rec = m_frame->m_reconPicYuv->getPlaneAddr(plane, addr); | |
229 | intptr_t stride = plane ? m_frame->m_reconPicYuv->m_strideC : m_frame->m_reconPicYuv->m_stride; | |
230 | uint32_t picWidth = m_param->sourceWidth; | |
231 | uint32_t picHeight = m_param->sourceHeight; | |
232 | int ctuWidth = g_maxCUSize; | |
233 | int ctuHeight = g_maxCUSize; | |
234 | uint32_t lpelx = cu->m_cuPelX; | |
235 | uint32_t tpely = cu->m_cuPelY; | |
236 | if (plane) | |
237 | { | |
238 | picWidth >>= m_hChromaShift; | |
239 | picHeight >>= m_vChromaShift; | |
240 | ctuWidth >>= m_hChromaShift; | |
241 | ctuHeight >>= m_vChromaShift; | |
242 | lpelx >>= m_hChromaShift; | |
243 | tpely >>= m_vChromaShift; | |
244 | } | |
245 | uint32_t rpelx = x265_min(lpelx + ctuWidth, picWidth); | |
246 | uint32_t bpely = x265_min(tpely + ctuHeight, picHeight); | |
247 | ctuWidth = rpelx - lpelx; | |
248 | ctuHeight = bpely - tpely; | |
249 | ||
250 | int startX; | |
251 | int startY; | |
252 | int endX; | |
253 | int endY; | |
254 | pixel* tmpL; | |
255 | pixel* tmpU; | |
256 | ||
257 | int32_t _upBuff1[MAX_CU_SIZE + 2], *upBuff1 = _upBuff1 + 1; | |
258 | int32_t _upBufft[MAX_CU_SIZE + 2], *upBufft = _upBufft + 1; | |
259 | ||
260 | { | |
261 | const pixel* recR = &rec[ctuWidth - 1]; | |
262 | for (int i = 0; i < ctuHeight + 1; i++) | |
263 | { | |
264 | m_tmpL2[i] = *recR; | |
265 | recR += stride; | |
266 | } | |
267 | ||
268 | tmpL = m_tmpL1; | |
269 | tmpU = &(m_tmpU1[plane][lpelx]); | |
270 | } | |
271 | ||
272 | switch (typeIdx) | |
273 | { | |
274 | case SAO_EO_0: // dir: - | |
275 | { | |
276 | pixel firstPxl = 0, lastPxl = 0; | |
277 | startX = !lpelx; | |
278 | endX = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth; | |
279 | if (ctuWidth & 15) | |
280 | { | |
281 | for (y = 0; y < ctuHeight; y++) | |
282 | { | |
283 | int signLeft = signOf(rec[startX] - tmpL[y]); | |
284 | for (x = startX; x < endX; x++) | |
285 | { | |
286 | int signRight = signOf(rec[x] - rec[x + 1]); | |
287 | int edgeType = signRight + signLeft + 2; | |
288 | signLeft = -signRight; | |
289 | ||
290 | rec[x] = m_clipTable[rec[x] + m_offsetEo[edgeType]]; | |
291 | } | |
292 | ||
293 | rec += stride; | |
294 | } | |
295 | } | |
296 | else | |
297 | { | |
298 | for (y = 0; y < ctuHeight; y++) | |
299 | { | |
300 | int signLeft = signOf(rec[startX] - tmpL[y]); | |
301 | ||
302 | if (!lpelx) | |
303 | firstPxl = rec[0]; | |
304 | ||
305 | if (rpelx == picWidth) | |
306 | lastPxl = rec[ctuWidth - 1]; | |
307 | ||
308 | primitives.saoCuOrgE0(rec, m_offsetEo, ctuWidth, (int8_t)signLeft); | |
309 | ||
310 | if (!lpelx) | |
311 | rec[0] = firstPxl; | |
312 | ||
313 | if (rpelx == picWidth) | |
314 | rec[ctuWidth - 1] = lastPxl; | |
315 | ||
316 | rec += stride; | |
317 | } | |
318 | } | |
319 | break; | |
320 | } | |
321 | case SAO_EO_1: // dir: | | |
322 | { | |
323 | startY = !tpely; | |
324 | endY = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight; | |
325 | if (!tpely) | |
326 | rec += stride; | |
327 | ||
328 | for (x = 0; x < ctuWidth; x++) | |
329 | upBuff1[x] = signOf(rec[x] - tmpU[x]); | |
330 | ||
331 | for (y = startY; y < endY; y++) | |
332 | { | |
333 | for (x = 0; x < ctuWidth; x++) | |
334 | { | |
335 | int signDown = signOf(rec[x] - rec[x + stride]); | |
336 | int edgeType = signDown + upBuff1[x] + 2; | |
337 | upBuff1[x] = -signDown; | |
338 | ||
339 | rec[x] = m_clipTable[rec[x] + m_offsetEo[edgeType]]; | |
340 | } | |
341 | ||
342 | rec += stride; | |
343 | } | |
344 | ||
345 | break; | |
346 | } | |
347 | case SAO_EO_2: // dir: 135 | |
348 | { | |
349 | startX = !lpelx; | |
350 | endX = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth; | |
351 | ||
352 | startY = !tpely; | |
353 | endY = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight; | |
354 | ||
355 | if (!tpely) | |
356 | rec += stride; | |
357 | ||
358 | for (x = startX; x < endX; x++) | |
359 | upBuff1[x] = signOf(rec[x] - tmpU[x - 1]); | |
360 | ||
361 | for (y = startY; y < endY; y++) | |
362 | { | |
363 | upBufft[startX] = signOf(rec[stride + startX] - tmpL[y]); | |
364 | for (x = startX; x < endX; x++) | |
365 | { | |
366 | int signDown = signOf(rec[x] - rec[x + stride + 1]); | |
367 | int edgeType = signDown + upBuff1[x] + 2; | |
368 | upBufft[x + 1] = -signDown; | |
369 | rec[x] = m_clipTable[rec[x] + m_offsetEo[edgeType]]; | |
370 | } | |
371 | ||
372 | std::swap(upBuff1, upBufft); | |
373 | ||
374 | rec += stride; | |
375 | } | |
376 | ||
377 | break; | |
378 | } | |
379 | case SAO_EO_3: // dir: 45 | |
380 | { | |
381 | startX = !lpelx; | |
382 | endX = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth; | |
383 | ||
384 | startY = !tpely; | |
385 | endY = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight; | |
386 | ||
387 | if (!tpely) | |
388 | rec += stride; | |
389 | ||
390 | for (x = startX - 1; x < endX; x++) | |
391 | upBuff1[x] = signOf(rec[x] - tmpU[x + 1]); | |
392 | ||
393 | for (y = startY; y < endY; y++) | |
394 | { | |
395 | x = startX; | |
396 | int signDown = signOf(rec[x] - tmpL[y + 1]); | |
397 | int edgeType = signDown + upBuff1[x] + 2; | |
398 | upBuff1[x - 1] = -signDown; | |
399 | rec[x] = m_clipTable[rec[x] + m_offsetEo[edgeType]]; | |
400 | for (x = startX + 1; x < endX; x++) | |
401 | { | |
402 | signDown = signOf(rec[x] - rec[x + stride - 1]); | |
403 | edgeType = signDown + upBuff1[x] + 2; | |
404 | upBuff1[x - 1] = -signDown; | |
405 | rec[x] = m_clipTable[rec[x] + m_offsetEo[edgeType]]; | |
406 | } | |
407 | ||
408 | upBuff1[endX - 1] = signOf(rec[endX - 1 + stride] - rec[endX]); | |
409 | ||
410 | rec += stride; | |
411 | } | |
412 | ||
413 | break; | |
414 | } | |
415 | case SAO_BO: | |
416 | { | |
417 | const pixel* offsetBo = m_offsetBo; | |
418 | ||
419 | for (y = 0; y < ctuHeight; y++) | |
420 | { | |
421 | for (x = 0; x < ctuWidth; x++) | |
422 | rec[x] = offsetBo[rec[x]]; | |
423 | ||
424 | rec += stride; | |
425 | } | |
426 | ||
427 | break; | |
428 | } | |
429 | default: break; | |
430 | } | |
431 | ||
432 | // if (iSaoType!=SAO_BO_0 || iSaoType!=SAO_BO_1) | |
433 | std::swap(m_tmpL1, m_tmpL2); | |
434 | } | |
435 | ||
436 | /* Process SAO all units */ | |
437 | void SAO::processSaoUnitRow(SaoCtuParam* ctuParam, int idxY, int plane) | |
438 | { | |
439 | intptr_t stride = plane ? m_frame->m_reconPicYuv->m_strideC : m_frame->m_reconPicYuv->m_stride; | |
440 | uint32_t picWidth = m_param->sourceWidth; | |
441 | int ctuWidth = g_maxCUSize; | |
442 | int ctuHeight = g_maxCUSize; | |
443 | if (plane) | |
444 | { | |
445 | picWidth >>= m_hChromaShift; | |
446 | ctuWidth >>= m_hChromaShift; | |
447 | ctuHeight >>= m_vChromaShift; | |
448 | } | |
449 | ||
450 | if (!idxY) | |
451 | { | |
452 | pixel* rec = m_frame->m_reconPicYuv->m_picOrg[plane]; | |
453 | memcpy(m_tmpU1[plane], rec, sizeof(pixel) * picWidth); | |
454 | } | |
455 | ||
456 | int addr = idxY * m_numCuInWidth; | |
457 | pixel* rec = plane ? m_frame->m_reconPicYuv->getChromaAddr(plane, addr) : m_frame->m_reconPicYuv->getLumaAddr(addr); | |
458 | ||
459 | for (int i = 0; i < ctuHeight + 1; i++) | |
460 | { | |
461 | m_tmpL1[i] = rec[0]; | |
462 | rec += stride; | |
463 | } | |
464 | ||
465 | rec -= (stride << 1); | |
466 | ||
467 | memcpy(m_tmpU2[plane], rec, sizeof(pixel) * picWidth); | |
468 | ||
469 | const int boShift = X265_DEPTH - SAO_BO_BITS; | |
470 | ||
471 | for (int idxX = 0; idxX < m_numCuInWidth; idxX++) | |
472 | { | |
473 | addr = idxY * m_numCuInWidth + idxX; | |
474 | ||
475 | bool mergeLeftFlag = ctuParam[addr].mergeMode == SAO_MERGE_LEFT; | |
476 | int typeIdx = ctuParam[addr].typeIdx; | |
477 | ||
478 | if (typeIdx >= 0) | |
479 | { | |
480 | if (!mergeLeftFlag) | |
481 | { | |
482 | if (typeIdx == SAO_BO) | |
483 | { | |
484 | pixel* offsetBo = m_offsetBo; | |
485 | int offset[SAO_NUM_BO_CLASSES]; | |
486 | memset(offset, 0, sizeof(offset)); | |
487 | ||
488 | for (int i = 0; i < SAO_NUM_OFFSET; i++) | |
489 | offset[((ctuParam[addr].bandPos + i) & (SAO_NUM_BO_CLASSES - 1))] = ctuParam[addr].offset[i] << SAO_BIT_INC; | |
490 | ||
491 | for (int i = 0; i < (1 << X265_DEPTH); i++) | |
492 | offsetBo[i] = m_clipTable[i + offset[i >> boShift]]; | |
493 | } | |
494 | else // if (typeIdx == SAO_EO_0 || typeIdx == SAO_EO_1 || typeIdx == SAO_EO_2 || typeIdx == SAO_EO_3) | |
495 | { | |
496 | int offset[NUM_EDGETYPE]; | |
497 | offset[0] = 0; | |
498 | for (int i = 0; i < SAO_NUM_OFFSET; i++) | |
499 | offset[i + 1] = ctuParam[addr].offset[i] << SAO_BIT_INC; | |
500 | ||
501 | for (int edgeType = 0; edgeType < NUM_EDGETYPE; edgeType++) | |
502 | m_offsetEo[edgeType] = (int8_t)offset[s_eoTable[edgeType]]; | |
503 | } | |
504 | } | |
505 | processSaoCu(addr, typeIdx, plane); | |
506 | } | |
507 | else if (idxX != (m_numCuInWidth - 1)) | |
508 | { | |
509 | rec = plane ? m_frame->m_reconPicYuv->getChromaAddr(plane, addr) : m_frame->m_reconPicYuv->getLumaAddr(addr); | |
510 | ||
511 | for (int i = 0; i < ctuHeight + 1; i++) | |
512 | { | |
513 | m_tmpL1[i] = rec[ctuWidth - 1]; | |
514 | rec += stride; | |
515 | } | |
516 | } | |
517 | } | |
518 | ||
519 | std::swap(m_tmpU1[plane], m_tmpU2[plane]); | |
520 | } | |
521 | ||
522 | void SAO::resetSaoUnit(SaoCtuParam* saoUnit) | |
523 | { | |
524 | saoUnit->mergeMode = SAO_MERGE_NONE; | |
525 | saoUnit->typeIdx = -1; | |
526 | saoUnit->bandPos = 0; | |
527 | ||
528 | for (int i = 0; i < SAO_NUM_OFFSET; i++) | |
529 | saoUnit->offset[i] = 0; | |
530 | } | |
531 | ||
532 | void SAO::copySaoUnit(SaoCtuParam* saoUnitDst, const SaoCtuParam* saoUnitSrc) | |
533 | { | |
534 | saoUnitDst->mergeMode = saoUnitSrc->mergeMode; | |
535 | saoUnitDst->typeIdx = saoUnitSrc->typeIdx; | |
536 | saoUnitDst->bandPos = saoUnitSrc->bandPos; | |
537 | ||
538 | for (int i = 0; i < SAO_NUM_OFFSET; i++) | |
539 | saoUnitDst->offset[i] = saoUnitSrc->offset[i]; | |
540 | } | |
541 | ||
542 | /* Calculate SAO statistics for current CTU without non-crossing slice */ | |
543 | void SAO::calcSaoStatsCu(int addr, int plane) | |
544 | { | |
545 | int x, y; | |
546 | CUData* cu = m_frame->m_encData->getPicCTU(addr); | |
547 | const pixel* fenc0 = m_frame->m_origPicYuv->getPlaneAddr(plane, addr); | |
548 | const pixel* rec0 = m_frame->m_reconPicYuv->getPlaneAddr(plane, addr); | |
549 | const pixel* fenc; | |
550 | const pixel* rec; | |
551 | intptr_t stride = plane ? m_frame->m_reconPicYuv->m_strideC : m_frame->m_reconPicYuv->m_stride; | |
552 | uint32_t picWidth = m_param->sourceWidth; | |
553 | uint32_t picHeight = m_param->sourceHeight; | |
554 | int ctuWidth = g_maxCUSize; | |
555 | int ctuHeight = g_maxCUSize; | |
556 | uint32_t lpelx = cu->m_cuPelX; | |
557 | uint32_t tpely = cu->m_cuPelY; | |
558 | if (plane) | |
559 | { | |
560 | picWidth >>= m_hChromaShift; | |
561 | picHeight >>= m_vChromaShift; | |
562 | ctuWidth >>= m_hChromaShift; | |
563 | ctuHeight >>= m_vChromaShift; | |
564 | lpelx >>= m_hChromaShift; | |
565 | tpely >>= m_vChromaShift; | |
566 | } | |
567 | uint32_t rpelx = x265_min(lpelx + ctuWidth, picWidth); | |
568 | uint32_t bpely = x265_min(tpely + ctuHeight, picHeight); | |
569 | ctuWidth = rpelx - lpelx; | |
570 | ctuHeight = bpely - tpely; | |
571 | ||
572 | int startX; | |
573 | int startY; | |
574 | int endX; | |
575 | int endY; | |
576 | int32_t* stats; | |
577 | int32_t* count; | |
578 | ||
579 | int skipB = plane ? 2 : 4; | |
580 | int skipR = plane ? 3 : 5; | |
581 | ||
582 | int32_t _upBuff1[MAX_CU_SIZE + 2], *upBuff1 = _upBuff1 + 1; | |
583 | int32_t _upBufft[MAX_CU_SIZE + 2], *upBufft = _upBufft + 1; | |
584 | ||
585 | // SAO_BO: | |
586 | { | |
587 | const int boShift = X265_DEPTH - SAO_BO_BITS; | |
588 | ||
589 | if (m_param->bSaoNonDeblocked) | |
590 | { | |
591 | skipB = plane ? 1 : 3; | |
592 | skipR = plane ? 2 : 4; | |
593 | } | |
594 | stats = m_offsetOrg[plane][SAO_BO]; | |
595 | count = m_count[plane][SAO_BO]; | |
596 | ||
597 | fenc = fenc0; | |
598 | rec = rec0; | |
599 | ||
600 | endX = (rpelx == picWidth) ? ctuWidth : ctuWidth - skipR; | |
601 | endY = (bpely == picHeight) ? ctuHeight : ctuHeight - skipB; | |
602 | ||
603 | for (y = 0; y < endY; y++) | |
604 | { | |
605 | for (x = 0; x < endX; x++) | |
606 | { | |
607 | int classIdx = 1 + (rec[x] >> boShift); | |
608 | stats[classIdx] += (fenc[x] - rec[x]); | |
609 | count[classIdx]++; | |
610 | } | |
611 | ||
612 | fenc += stride; | |
613 | rec += stride; | |
614 | } | |
615 | } | |
616 | ||
617 | { | |
618 | // SAO_EO_0: // dir: - | |
619 | { | |
620 | if (m_param->bSaoNonDeblocked) | |
621 | { | |
622 | skipB = plane ? 1 : 3; | |
623 | skipR = plane ? 3 : 5; | |
624 | } | |
625 | stats = m_offsetOrg[plane][SAO_EO_0]; | |
626 | count = m_count[plane][SAO_EO_0]; | |
627 | ||
628 | fenc = fenc0; | |
629 | rec = rec0; | |
630 | ||
631 | startX = !lpelx; | |
632 | endX = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth - skipR; | |
633 | for (y = 0; y < ctuHeight - skipB; y++) | |
634 | { | |
635 | int signLeft = signOf(rec[startX] - rec[startX - 1]); | |
636 | for (x = startX; x < endX; x++) | |
637 | { | |
638 | int signRight = signOf(rec[x] - rec[x + 1]); | |
639 | int edgeType = signRight + signLeft + 2; | |
640 | signLeft = -signRight; | |
641 | ||
642 | stats[s_eoTable[edgeType]] += (fenc[x] - rec[x]); | |
643 | count[s_eoTable[edgeType]]++; | |
644 | } | |
645 | ||
646 | fenc += stride; | |
647 | rec += stride; | |
648 | } | |
649 | } | |
650 | ||
651 | // SAO_EO_1: // dir: | | |
652 | { | |
653 | if (m_param->bSaoNonDeblocked) | |
654 | { | |
655 | skipB = plane ? 2 : 4; | |
656 | skipR = plane ? 2 : 4; | |
657 | } | |
658 | stats = m_offsetOrg[plane][SAO_EO_1]; | |
659 | count = m_count[plane][SAO_EO_1]; | |
660 | ||
661 | fenc = fenc0; | |
662 | rec = rec0; | |
663 | ||
664 | startY = !tpely; | |
665 | endX = (rpelx == picWidth) ? ctuWidth : ctuWidth - skipR; | |
666 | endY = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight - skipB; | |
667 | if (!tpely) | |
668 | { | |
669 | fenc += stride; | |
670 | rec += stride; | |
671 | } | |
672 | ||
673 | for (x = 0; x < ctuWidth; x++) | |
674 | upBuff1[x] = signOf(rec[x] - rec[x - stride]); | |
675 | ||
676 | for (y = startY; y < endY; y++) | |
677 | { | |
678 | for (x = 0; x < endX; x++) | |
679 | { | |
680 | int signDown = signOf(rec[x] - rec[x + stride]); | |
681 | int edgeType = signDown + upBuff1[x] + 2; | |
682 | upBuff1[x] = -signDown; | |
683 | ||
684 | stats[s_eoTable[edgeType]] += (fenc[x] - rec[x]); | |
685 | count[s_eoTable[edgeType]]++; | |
686 | } | |
687 | ||
688 | fenc += stride; | |
689 | rec += stride; | |
690 | } | |
691 | } | |
692 | ||
693 | // SAO_EO_2: // dir: 135 | |
694 | { | |
695 | if (m_param->bSaoNonDeblocked) | |
696 | { | |
697 | skipB = plane ? 2 : 4; | |
698 | skipR = plane ? 3 : 5; | |
699 | } | |
700 | stats = m_offsetOrg[plane][SAO_EO_2]; | |
701 | count = m_count[plane][SAO_EO_2]; | |
702 | ||
703 | fenc = fenc0; | |
704 | rec = rec0; | |
705 | ||
706 | startX = !lpelx; | |
707 | endX = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth - skipR; | |
708 | ||
709 | startY = !tpely; | |
710 | endY = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight - skipB; | |
711 | if (!tpely) | |
712 | { | |
713 | fenc += stride; | |
714 | rec += stride; | |
715 | } | |
716 | ||
717 | for (x = startX; x < endX; x++) | |
718 | upBuff1[x] = signOf(rec[x] - rec[x - stride - 1]); | |
719 | ||
720 | for (y = startY; y < endY; y++) | |
721 | { | |
722 | upBufft[startX] = signOf(rec[startX + stride] - rec[startX - 1]); | |
723 | for (x = startX; x < endX; x++) | |
724 | { | |
725 | int signDown = signOf(rec[x] - rec[x + stride + 1]); | |
726 | int edgeType = signDown + upBuff1[x] + 2; | |
727 | upBufft[x + 1] = -signDown; | |
728 | stats[s_eoTable[edgeType]] += (fenc[x] - rec[x]); | |
729 | count[s_eoTable[edgeType]]++; | |
730 | } | |
731 | ||
732 | std::swap(upBuff1, upBufft); | |
733 | ||
734 | rec += stride; | |
735 | fenc += stride; | |
736 | } | |
737 | } | |
738 | ||
739 | // SAO_EO_3: // dir: 45 | |
740 | { | |
741 | if (m_param->bSaoNonDeblocked) | |
742 | { | |
743 | skipB = plane ? 2 : 4; | |
744 | skipR = plane ? 3 : 5; | |
745 | } | |
746 | stats = m_offsetOrg[plane][SAO_EO_3]; | |
747 | count = m_count[plane][SAO_EO_3]; | |
748 | ||
749 | fenc = fenc0; | |
750 | rec = rec0; | |
751 | ||
752 | startX = !lpelx; | |
753 | endX = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth - skipR; | |
754 | ||
755 | startY = !tpely; | |
756 | endY = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight - skipB; | |
757 | ||
758 | if (!tpely) | |
759 | { | |
760 | fenc += stride; | |
761 | rec += stride; | |
762 | } | |
763 | ||
764 | for (x = startX - 1; x < endX; x++) | |
765 | upBuff1[x] = signOf(rec[x] - rec[x - stride + 1]); | |
766 | ||
767 | for (y = startY; y < endY; y++) | |
768 | { | |
769 | for (x = startX; x < endX; x++) | |
770 | { | |
771 | int signDown = signOf(rec[x] - rec[x + stride - 1]); | |
772 | int edgeType = signDown + upBuff1[x] + 2; | |
773 | upBuff1[x - 1] = -signDown; | |
774 | stats[s_eoTable[edgeType]] += (fenc[x] - rec[x]); | |
775 | count[s_eoTable[edgeType]]++; | |
776 | } | |
777 | ||
778 | upBuff1[endX - 1] = signOf(rec[endX - 1 + stride] - rec[endX]); | |
779 | ||
780 | rec += stride; | |
781 | fenc += stride; | |
782 | } | |
783 | } | |
784 | } | |
785 | } | |
786 | ||
787 | void SAO::calcSaoStatsCu_BeforeDblk(Frame* frame, int idxX, int idxY) | |
788 | { | |
789 | int addr = idxX + m_numCuInWidth * idxY; | |
790 | ||
791 | int x, y; | |
792 | CUData* cu = frame->m_encData->getPicCTU(addr); | |
793 | const pixel* fenc; | |
794 | const pixel* rec; | |
795 | intptr_t stride = m_frame->m_reconPicYuv->m_stride; | |
796 | uint32_t picWidth = m_param->sourceWidth; | |
797 | uint32_t picHeight = m_param->sourceHeight; | |
798 | int ctuWidth = g_maxCUSize; | |
799 | int ctuHeight = g_maxCUSize; | |
800 | uint32_t lpelx = cu->m_cuPelX; | |
801 | uint32_t tpely = cu->m_cuPelY; | |
802 | uint32_t rpelx = x265_min(lpelx + ctuWidth, picWidth); | |
803 | uint32_t bpely = x265_min(tpely + ctuHeight, picHeight); | |
804 | ctuWidth = rpelx - lpelx; | |
805 | ctuHeight = bpely - tpely; | |
806 | ||
807 | int startX; | |
808 | int startY; | |
809 | int endX; | |
810 | int endY; | |
811 | int firstX, firstY; | |
812 | int32_t* stats; | |
813 | int32_t* count; | |
814 | ||
815 | int skipB, skipR; | |
816 | ||
817 | int32_t _upBuff1[MAX_CU_SIZE + 2], *upBuff1 = _upBuff1 + 1; | |
818 | int32_t _upBufft[MAX_CU_SIZE + 2], *upBufft = _upBufft + 1; | |
819 | ||
820 | const int boShift = X265_DEPTH - SAO_BO_BITS; | |
821 | ||
822 | memset(m_countPreDblk[addr], 0, sizeof(PerPlane)); | |
823 | memset(m_offsetOrgPreDblk[addr], 0, sizeof(PerPlane)); | |
824 | ||
825 | for (int plane = 0; plane < NUM_PLANE; plane++) | |
826 | { | |
827 | if (plane == 1) | |
828 | { | |
829 | stride = frame->m_reconPicYuv->m_strideC; | |
830 | picWidth >>= m_hChromaShift; | |
831 | picHeight >>= m_vChromaShift; | |
832 | ctuWidth >>= m_hChromaShift; | |
833 | ctuHeight >>= m_vChromaShift; | |
834 | lpelx >>= m_hChromaShift; | |
835 | tpely >>= m_vChromaShift; | |
836 | rpelx >>= m_hChromaShift; | |
837 | bpely >>= m_vChromaShift; | |
838 | } | |
839 | ||
840 | // SAO_BO: | |
841 | ||
842 | skipB = plane ? 1 : 3; | |
843 | skipR = plane ? 2 : 4; | |
844 | ||
845 | stats = m_offsetOrgPreDblk[addr][plane][SAO_BO]; | |
846 | count = m_countPreDblk[addr][plane][SAO_BO]; | |
847 | ||
848 | const pixel* fenc0 = m_frame->m_origPicYuv->getPlaneAddr(plane, addr); | |
849 | const pixel* rec0 = m_frame->m_reconPicYuv->getPlaneAddr(plane, addr); | |
850 | fenc = fenc0; | |
851 | rec = rec0; | |
852 | ||
853 | startX = (rpelx == picWidth) ? ctuWidth : ctuWidth - skipR; | |
854 | startY = (bpely == picHeight) ? ctuHeight : ctuHeight - skipB; | |
855 | ||
856 | for (y = 0; y < ctuHeight; y++) | |
857 | { | |
858 | for (x = (y < startY ? startX : 0); x < ctuWidth; x++) | |
859 | { | |
860 | int classIdx = 1 + (rec[x] >> boShift); | |
861 | stats[classIdx] += (fenc[x] - rec[x]); | |
862 | count[classIdx]++; | |
863 | } | |
864 | ||
865 | fenc += stride; | |
866 | rec += stride; | |
867 | } | |
868 | ||
869 | // SAO_EO_0: // dir: - | |
870 | { | |
871 | skipB = plane ? 1 : 3; | |
872 | skipR = plane ? 3 : 5; | |
873 | ||
874 | stats = m_offsetOrgPreDblk[addr][plane][SAO_EO_0]; | |
875 | count = m_countPreDblk[addr][plane][SAO_EO_0]; | |
876 | ||
877 | fenc = fenc0; | |
878 | rec = rec0; | |
879 | ||
880 | startX = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth - skipR; | |
881 | startY = (bpely == picHeight) ? ctuHeight : ctuHeight - skipB; | |
882 | firstX = !lpelx; | |
883 | // endX = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth; | |
884 | endX = ctuWidth - 1; // not refer right CTU | |
885 | ||
886 | for (y = 0; y < ctuHeight; y++) | |
887 | { | |
888 | x = (y < startY ? startX : firstX); | |
889 | int signLeft = signOf(rec[x] - rec[x - 1]); | |
890 | for (; x < endX; x++) | |
891 | { | |
892 | int signRight = signOf(rec[x] - rec[x + 1]); | |
893 | int edgeType = signRight + signLeft + 2; | |
894 | signLeft = -signRight; | |
895 | ||
896 | stats[s_eoTable[edgeType]] += (fenc[x] - rec[x]); | |
897 | count[s_eoTable[edgeType]]++; | |
898 | } | |
899 | ||
900 | fenc += stride; | |
901 | rec += stride; | |
902 | } | |
903 | } | |
904 | ||
905 | // SAO_EO_1: // dir: | | |
906 | { | |
907 | skipB = plane ? 2 : 4; | |
908 | skipR = plane ? 2 : 4; | |
909 | ||
910 | stats = m_offsetOrgPreDblk[addr][plane][SAO_EO_1]; | |
911 | count = m_countPreDblk[addr][plane][SAO_EO_1]; | |
912 | ||
913 | fenc = fenc0; | |
914 | rec = rec0; | |
915 | ||
916 | startX = (rpelx == picWidth) ? ctuWidth : ctuWidth - skipR; | |
917 | startY = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight - skipB; | |
918 | firstY = !tpely; | |
919 | // endY = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight; | |
920 | endY = ctuHeight - 1; // not refer below CTU | |
921 | if (!tpely) | |
922 | { | |
923 | fenc += stride; | |
924 | rec += stride; | |
925 | } | |
926 | ||
927 | for (x = startX; x < ctuWidth; x++) | |
928 | upBuff1[x] = signOf(rec[x] - rec[x - stride]); | |
929 | ||
930 | for (y = firstY; y < endY; y++) | |
931 | { | |
932 | for (x = (y < startY - 1 ? startX : 0); x < ctuWidth; x++) | |
933 | { | |
934 | int signDown = signOf(rec[x] - rec[x + stride]); | |
935 | int edgeType = signDown + upBuff1[x] + 2; | |
936 | upBuff1[x] = -signDown; | |
937 | ||
938 | if (x < startX && y < startY) | |
939 | continue; | |
940 | ||
941 | stats[s_eoTable[edgeType]] += (fenc[x] - rec[x]); | |
942 | count[s_eoTable[edgeType]]++; | |
943 | } | |
944 | ||
945 | fenc += stride; | |
946 | rec += stride; | |
947 | } | |
948 | } | |
949 | ||
950 | // SAO_EO_2: // dir: 135 | |
951 | { | |
952 | skipB = plane ? 2 : 4; | |
953 | skipR = plane ? 3 : 5; | |
954 | ||
955 | stats = m_offsetOrgPreDblk[addr][plane][SAO_EO_2]; | |
956 | count = m_countPreDblk[addr][plane][SAO_EO_2]; | |
957 | ||
958 | fenc = fenc0; | |
959 | rec = rec0; | |
960 | ||
961 | startX = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth - skipR; | |
962 | startY = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight - skipB; | |
963 | firstX = !lpelx; | |
964 | firstY = !tpely; | |
965 | // endX = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth; | |
966 | // endY = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight; | |
967 | endX = ctuWidth - 1; // not refer right CTU | |
968 | endY = ctuHeight - 1; // not refer below CTU | |
969 | if (!tpely) | |
970 | { | |
971 | fenc += stride; | |
972 | rec += stride; | |
973 | } | |
974 | ||
975 | for (x = startX; x < endX; x++) | |
976 | upBuff1[x] = signOf(rec[x] - rec[x - stride - 1]); | |
977 | ||
978 | for (y = firstY; y < endY; y++) | |
979 | { | |
980 | x = (y < startY - 1 ? startX : firstX); | |
981 | upBufft[x] = signOf(rec[x + stride] - rec[x - 1]); | |
982 | for (; x < endX; x++) | |
983 | { | |
984 | int signDown = signOf(rec[x] - rec[x + stride + 1]); | |
985 | int edgeType = signDown + upBuff1[x] + 2; | |
986 | upBufft[x + 1] = -signDown; | |
987 | ||
988 | if (x < startX && y < startY) | |
989 | continue; | |
990 | ||
991 | stats[s_eoTable[edgeType]] += (fenc[x] - rec[x]); | |
992 | count[s_eoTable[edgeType]]++; | |
993 | } | |
994 | ||
995 | std::swap(upBuff1, upBufft); | |
996 | ||
997 | rec += stride; | |
998 | fenc += stride; | |
999 | } | |
1000 | } | |
1001 | ||
1002 | // SAO_EO_3: // dir: 45 | |
1003 | { | |
1004 | skipB = plane ? 2 : 4; | |
1005 | skipR = plane ? 3 : 5; | |
1006 | ||
1007 | stats = m_offsetOrgPreDblk[addr][plane][SAO_EO_3]; | |
1008 | count = m_countPreDblk[addr][plane][SAO_EO_3]; | |
1009 | ||
1010 | fenc = fenc0; | |
1011 | rec = rec0; | |
1012 | ||
1013 | startX = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth - skipR; | |
1014 | startY = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight - skipB; | |
1015 | firstX = !lpelx; | |
1016 | firstY = !tpely; | |
1017 | // endX = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth; | |
1018 | // endY = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight; | |
1019 | endX = ctuWidth - 1; // not refer right CTU | |
1020 | endY = ctuHeight - 1; // not refer below CTU | |
1021 | if (!tpely) | |
1022 | { | |
1023 | fenc += stride; | |
1024 | rec += stride; | |
1025 | } | |
1026 | ||
1027 | for (x = startX - 1; x < endX; x++) | |
1028 | upBuff1[x] = signOf(rec[x] - rec[x - stride + 1]); | |
1029 | ||
1030 | for (y = firstY; y < endY; y++) | |
1031 | { | |
1032 | for (x = (y < startY - 1 ? startX : firstX); x < endX; x++) | |
1033 | { | |
1034 | int signDown = signOf(rec[x] - rec[x + stride - 1]); | |
1035 | int edgeType = signDown + upBuff1[x] + 2; | |
1036 | upBuff1[x - 1] = -signDown; | |
1037 | ||
1038 | if (x < startX && y < startY) | |
1039 | continue; | |
1040 | ||
1041 | stats[s_eoTable[edgeType]] += (fenc[x] - rec[x]); | |
1042 | count[s_eoTable[edgeType]]++; | |
1043 | } | |
1044 | ||
1045 | upBuff1[endX - 1] = signOf(rec[endX - 1 + stride] - rec[endX]); | |
1046 | ||
1047 | rec += stride; | |
1048 | fenc += stride; | |
1049 | } | |
1050 | } | |
1051 | } | |
1052 | } | |
1053 | ||
1054 | /* reset offset statistics */ | |
1055 | void SAO::resetStats() | |
1056 | { | |
1057 | memset(m_count, 0, sizeof(PerClass) * NUM_PLANE); | |
1058 | memset(m_offset, 0, sizeof(PerClass) * NUM_PLANE); | |
1059 | memset(m_offsetOrg, 0, sizeof(PerClass) * NUM_PLANE); | |
1060 | } | |
1061 | ||
1062 | void SAO::rdoSaoUnitRowInit(SAOParam* saoParam) | |
1063 | { | |
1064 | saoParam->bSaoFlag[0] = true; | |
1065 | saoParam->bSaoFlag[1] = true; | |
1066 | ||
1067 | m_numNoSao[0] = 0; // Luma | |
1068 | m_numNoSao[1] = 0; // Chroma | |
1069 | if (m_refDepth > 0 && m_depthSaoRate[0][m_refDepth - 1] > SAO_ENCODING_RATE) | |
1070 | saoParam->bSaoFlag[0] = false; | |
1071 | if (m_refDepth > 0 && m_depthSaoRate[1][m_refDepth - 1] > SAO_ENCODING_RATE_CHROMA) | |
1072 | saoParam->bSaoFlag[1] = false; | |
1073 | } | |
1074 | ||
1075 | void SAO::rdoSaoUnitRowEnd(const SAOParam* saoParam, int numctus) | |
1076 | { | |
1077 | if (!saoParam->bSaoFlag[0]) | |
1078 | m_depthSaoRate[0][m_refDepth] = 1.0; | |
1079 | else | |
1080 | m_depthSaoRate[0][m_refDepth] = m_numNoSao[0] / ((double)numctus); | |
1081 | ||
1082 | if (!saoParam->bSaoFlag[1]) | |
1083 | m_depthSaoRate[1][m_refDepth] = 1.0; | |
1084 | else | |
1085 | m_depthSaoRate[1][m_refDepth] = m_numNoSao[1] / ((double)numctus); | |
1086 | } | |
1087 | ||
1088 | void SAO::rdoSaoUnitRow(SAOParam* saoParam, int idxY) | |
1089 | { | |
1090 | SaoCtuParam mergeSaoParam[NUM_MERGE_MODE][2]; | |
1091 | double mergeDist[NUM_MERGE_MODE]; | |
1092 | bool allowMerge[2]; // left, up | |
1093 | allowMerge[1] = (idxY > 0); | |
1094 | ||
1095 | for (int idxX = 0; idxX < m_numCuInWidth; idxX++) | |
1096 | { | |
1097 | int addr = idxX + idxY * m_numCuInWidth; | |
1098 | int addrUp = idxY ? addr - m_numCuInWidth : -1; | |
1099 | int addrLeft = idxX ? addr - 1 : -1; | |
1100 | allowMerge[0] = (idxX > 0); | |
1101 | ||
1102 | m_entropyCoder.load(m_rdContexts.cur); | |
1103 | if (allowMerge[0]) | |
1104 | m_entropyCoder.codeSaoMerge(0); | |
1105 | if (allowMerge[1]) | |
1106 | m_entropyCoder.codeSaoMerge(0); | |
1107 | m_entropyCoder.store(m_rdContexts.temp); | |
1108 | // reset stats Y, Cb, Cr | |
1109 | for (int plane = 0; plane < 3; plane++) | |
1110 | { | |
1111 | for (int j = 0; j < MAX_NUM_SAO_TYPE; j++) | |
1112 | { | |
1113 | for (int k = 0; k < MAX_NUM_SAO_CLASS; k++) | |
1114 | { | |
1115 | m_offset[plane][j][k] = 0; | |
1116 | if (m_param->bSaoNonDeblocked) | |
1117 | { | |
1118 | m_count[plane][j][k] = m_countPreDblk[addr][plane][j][k]; | |
1119 | m_offsetOrg[plane][j][k] = m_offsetOrgPreDblk[addr][plane][j][k]; | |
1120 | } | |
1121 | else | |
1122 | { | |
1123 | m_count[plane][j][k] = 0; | |
1124 | m_offsetOrg[plane][j][k] = 0; | |
1125 | } | |
1126 | } | |
1127 | } | |
1128 | ||
1129 | saoParam->ctuParam[plane][addr].mergeMode = SAO_MERGE_NONE; | |
1130 | saoParam->ctuParam[plane][addr].typeIdx = -1; | |
1131 | saoParam->ctuParam[plane][addr].bandPos = 0; | |
1132 | if (saoParam->bSaoFlag[plane > 0]) | |
1133 | calcSaoStatsCu(addr, plane); | |
1134 | } | |
1135 | ||
1136 | saoComponentParamDist(saoParam, addr, addrUp, addrLeft, &mergeSaoParam[0][0], mergeDist); | |
1137 | ||
1138 | sao2ChromaParamDist(saoParam, addr, addrUp, addrLeft, mergeSaoParam, mergeDist); | |
1139 | ||
1140 | if (saoParam->bSaoFlag[0] || saoParam->bSaoFlag[1]) | |
1141 | { | |
1142 | // Cost of new SAO_params | |
1143 | m_entropyCoder.load(m_rdContexts.cur); | |
1144 | m_entropyCoder.resetBits(); | |
1145 | if (allowMerge[0]) | |
1146 | m_entropyCoder.codeSaoMerge(0); | |
1147 | if (allowMerge[1]) | |
1148 | m_entropyCoder.codeSaoMerge(0); | |
1149 | for (int plane = 0; plane < 3; plane++) | |
1150 | { | |
1151 | if (saoParam->bSaoFlag[plane > 0]) | |
1152 | m_entropyCoder.codeSaoOffset(saoParam->ctuParam[plane][addr], plane); | |
1153 | } | |
1154 | ||
1155 | uint32_t rate = m_entropyCoder.getNumberOfWrittenBits(); | |
1156 | double bestCost = mergeDist[0] + (double)rate; | |
1157 | m_entropyCoder.store(m_rdContexts.temp); | |
1158 | ||
1159 | // Cost of Merge | |
1160 | for (int mergeIdx = 0; mergeIdx < 2; ++mergeIdx) | |
1161 | { | |
1162 | if (!allowMerge[mergeIdx]) | |
1163 | continue; | |
1164 | ||
1165 | m_entropyCoder.load(m_rdContexts.cur); | |
1166 | m_entropyCoder.resetBits(); | |
1167 | if (allowMerge[0]) | |
1168 | m_entropyCoder.codeSaoMerge(1 - mergeIdx); | |
1169 | if (allowMerge[1] && (mergeIdx == 1)) | |
1170 | m_entropyCoder.codeSaoMerge(1); | |
1171 | ||
1172 | rate = m_entropyCoder.getNumberOfWrittenBits(); | |
1173 | double mergeCost = mergeDist[mergeIdx + 1] + (double)rate; | |
1174 | if (mergeCost < bestCost) | |
1175 | { | |
1176 | SaoMergeMode mergeMode = mergeIdx ? SAO_MERGE_UP : SAO_MERGE_LEFT; | |
1177 | bestCost = mergeCost; | |
1178 | m_entropyCoder.store(m_rdContexts.temp); | |
1179 | for (int plane = 0; plane < 3; plane++) | |
1180 | { | |
1181 | mergeSaoParam[plane][mergeIdx].mergeMode = mergeMode; | |
1182 | if (saoParam->bSaoFlag[plane > 0]) | |
1183 | copySaoUnit(&saoParam->ctuParam[plane][addr], &mergeSaoParam[plane][mergeIdx]); | |
1184 | } | |
1185 | } | |
1186 | } | |
1187 | ||
1188 | if (saoParam->ctuParam[0][addr].typeIdx < 0) | |
1189 | m_numNoSao[0]++; | |
1190 | if (saoParam->ctuParam[1][addr].typeIdx < 0) | |
1191 | m_numNoSao[1]++; | |
1192 | m_entropyCoder.load(m_rdContexts.temp); | |
1193 | m_entropyCoder.store(m_rdContexts.cur); | |
1194 | } | |
1195 | } | |
1196 | } | |
1197 | ||
1198 | /** rate distortion optimization of SAO unit */ | |
1199 | inline int64_t SAO::estSaoTypeDist(int plane, int typeIdx, double lambda, int32_t* currentDistortionTableBo, double* currentRdCostTableBo) | |
1200 | { | |
1201 | int64_t estDist = 0; | |
1202 | ||
1203 | for (int classIdx = 1; classIdx < ((typeIdx < SAO_BO) ? SAO_EO_LEN + 1 : SAO_NUM_BO_CLASSES + 1); classIdx++) | |
1204 | { | |
1205 | int32_t count = m_count[plane][typeIdx][classIdx]; | |
1206 | int32_t& offsetOrg = m_offsetOrg[plane][typeIdx][classIdx]; | |
1207 | int32_t& offsetOut = m_offset[plane][typeIdx][classIdx]; | |
1208 | ||
1209 | if (typeIdx == SAO_BO) | |
1210 | { | |
1211 | currentDistortionTableBo[classIdx - 1] = 0; | |
1212 | currentRdCostTableBo[classIdx - 1] = lambda; | |
1213 | } | |
1214 | if (count) | |
1215 | { | |
1216 | int offset = roundIBDI(offsetOrg, count << SAO_BIT_INC); | |
1217 | offset = Clip3(-OFFSET_THRESH + 1, OFFSET_THRESH - 1, offset); | |
1218 | if (typeIdx < SAO_BO) | |
1219 | { | |
1220 | if (classIdx < 3) | |
1221 | offset = X265_MAX(offset, 0); | |
1222 | else | |
1223 | offset = X265_MIN(offset, 0); | |
1224 | } | |
1225 | offsetOut = estIterOffset(typeIdx, classIdx, lambda, offset, count, offsetOrg, currentDistortionTableBo, currentRdCostTableBo); | |
1226 | } | |
1227 | else | |
1228 | { | |
1229 | offsetOrg = 0; | |
1230 | offsetOut = 0; | |
1231 | } | |
1232 | if (typeIdx != SAO_BO) | |
1233 | estDist += estSaoDist(count, (int)offsetOut << SAO_BIT_INC, offsetOrg); | |
1234 | } | |
1235 | ||
1236 | return estDist; | |
1237 | } | |
1238 | ||
1239 | inline int SAO::estIterOffset(int typeIdx, int classIdx, double lambda, int offset, int32_t count, int32_t offsetOrg, int32_t* currentDistortionTableBo, double* currentRdCostTableBo) | |
1240 | { | |
1241 | int offsetOut = 0; | |
1242 | ||
1243 | // Assuming sending quantized value 0 results in zero offset and sending the value zero needs 1 bit. entropy coder can be used to measure the exact rate here. | |
1244 | double tempMinCost = lambda; | |
1245 | while (offset != 0) | |
1246 | { | |
1247 | // Calculate the bits required for signalling the offset | |
1248 | int tempRate = (typeIdx == SAO_BO) ? (abs(offset) + 2) : (abs(offset) + 1); | |
1249 | if (abs(offset) == OFFSET_THRESH - 1) | |
1250 | tempRate--; | |
1251 | ||
1252 | // Do the dequntization before distorion calculation | |
1253 | int tempOffset = offset << SAO_BIT_INC; | |
1254 | int64_t tempDist = estSaoDist(count, tempOffset, offsetOrg); | |
1255 | double tempCost = ((double)tempDist + lambda * (double)tempRate); | |
1256 | if (tempCost < tempMinCost) | |
1257 | { | |
1258 | tempMinCost = tempCost; | |
1259 | offsetOut = offset; | |
1260 | if (typeIdx == SAO_BO) | |
1261 | { | |
1262 | currentDistortionTableBo[classIdx - 1] = (int)tempDist; | |
1263 | currentRdCostTableBo[classIdx - 1] = tempCost; | |
1264 | } | |
1265 | } | |
1266 | offset = (offset > 0) ? (offset - 1) : (offset + 1); | |
1267 | } | |
1268 | ||
1269 | return offsetOut; | |
1270 | } | |
1271 | ||
1272 | void SAO::saoComponentParamDist(SAOParam* saoParam, int addr, int addrUp, int addrLeft, SaoCtuParam* mergeSaoParam, double* mergeDist) | |
1273 | { | |
1274 | int64_t bestDist = 0; | |
1275 | ||
1276 | SaoCtuParam* lclCtuParam = &saoParam->ctuParam[0][addr]; | |
1277 | ||
1278 | double bestRDCostTableBo = MAX_DOUBLE; | |
1279 | int bestClassTableBo = 0; | |
1280 | int currentDistortionTableBo[MAX_NUM_SAO_CLASS]; | |
1281 | double currentRdCostTableBo[MAX_NUM_SAO_CLASS]; | |
1282 | ||
1283 | resetSaoUnit(lclCtuParam); | |
1284 | m_entropyCoder.load(m_rdContexts.temp); | |
1285 | m_entropyCoder.resetBits(); | |
1286 | m_entropyCoder.codeSaoOffset(*lclCtuParam, 0); | |
1287 | double dCostPartBest = m_entropyCoder.getNumberOfWrittenBits() * m_lumaLambda; | |
1288 | ||
1289 | for (int typeIdx = 0; typeIdx < MAX_NUM_SAO_TYPE; typeIdx++) | |
1290 | { | |
1291 | int64_t estDist = estSaoTypeDist(0, typeIdx, m_lumaLambda, currentDistortionTableBo, currentRdCostTableBo); | |
1292 | ||
1293 | if (typeIdx == SAO_BO) | |
1294 | { | |
1295 | // Estimate Best Position | |
1296 | for (int i = 0; i < SAO_NUM_BO_CLASSES - SAO_BO_LEN + 1; i++) | |
1297 | { | |
1298 | double currentRDCost = 0.0; | |
1299 | for (int j = i; j < i + SAO_BO_LEN; j++) | |
1300 | currentRDCost += currentRdCostTableBo[j]; | |
1301 | ||
1302 | if (currentRDCost < bestRDCostTableBo) | |
1303 | { | |
1304 | bestRDCostTableBo = currentRDCost; | |
1305 | bestClassTableBo = i; | |
1306 | } | |
1307 | } | |
1308 | ||
1309 | // Re code all Offsets | |
1310 | // Code Center | |
1311 | estDist = 0; | |
1312 | for (int classIdx = bestClassTableBo; classIdx < bestClassTableBo + SAO_BO_LEN; classIdx++) | |
1313 | estDist += currentDistortionTableBo[classIdx]; | |
1314 | } | |
1315 | SaoCtuParam ctuParamRdo; | |
1316 | ctuParamRdo.mergeMode = SAO_MERGE_NONE; | |
1317 | ctuParamRdo.typeIdx = typeIdx; | |
1318 | ctuParamRdo.bandPos = (typeIdx == SAO_BO) ? bestClassTableBo : 0; | |
1319 | for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; classIdx++) | |
1320 | ctuParamRdo.offset[classIdx] = (int)m_offset[0][typeIdx][classIdx + ctuParamRdo.bandPos + 1]; | |
1321 | ||
1322 | m_entropyCoder.load(m_rdContexts.temp); | |
1323 | m_entropyCoder.resetBits(); | |
1324 | m_entropyCoder.codeSaoOffset(ctuParamRdo, 0); | |
1325 | ||
1326 | uint32_t estRate = m_entropyCoder.getNumberOfWrittenBits(); | |
1327 | double cost = (double)estDist + m_lumaLambda * (double)estRate; | |
1328 | ||
1329 | if (cost < dCostPartBest) | |
1330 | { | |
1331 | dCostPartBest = cost; | |
1332 | copySaoUnit(lclCtuParam, &ctuParamRdo); | |
1333 | bestDist = estDist; | |
1334 | } | |
1335 | } | |
1336 | ||
1337 | mergeDist[0] = ((double)bestDist / m_lumaLambda); | |
1338 | m_entropyCoder.load(m_rdContexts.temp); | |
1339 | m_entropyCoder.codeSaoOffset(*lclCtuParam, 0); | |
1340 | m_entropyCoder.store(m_rdContexts.temp); | |
1341 | ||
1342 | // merge left or merge up | |
1343 | ||
1344 | for (int mergeIdx = 0; mergeIdx < 2; mergeIdx++) | |
1345 | { | |
1346 | SaoCtuParam* mergeSrcParam = NULL; | |
1347 | if (addrLeft >= 0 && mergeIdx == 0) | |
1348 | mergeSrcParam = &(saoParam->ctuParam[0][addrLeft]); | |
1349 | else if (addrUp >= 0 && mergeIdx == 1) | |
1350 | mergeSrcParam = &(saoParam->ctuParam[0][addrUp]); | |
1351 | if (mergeSrcParam) | |
1352 | { | |
1353 | int64_t estDist = 0; | |
1354 | int typeIdx = mergeSrcParam->typeIdx; | |
1355 | if (typeIdx >= 0) | |
1356 | { | |
1357 | int bandPos = (typeIdx == SAO_BO) ? mergeSrcParam->bandPos : 0; | |
1358 | for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; classIdx++) | |
1359 | { | |
1360 | int mergeOffset = mergeSrcParam->offset[classIdx]; | |
1361 | estDist += estSaoDist(m_count[0][typeIdx][classIdx + bandPos + 1], mergeOffset, m_offsetOrg[0][typeIdx][classIdx + bandPos + 1]); | |
1362 | } | |
1363 | } | |
1364 | ||
1365 | copySaoUnit(&mergeSaoParam[mergeIdx], mergeSrcParam); | |
1366 | mergeSaoParam[mergeIdx].mergeMode = mergeIdx ? SAO_MERGE_UP : SAO_MERGE_LEFT; | |
1367 | ||
1368 | mergeDist[mergeIdx + 1] = ((double)estDist / m_lumaLambda); | |
1369 | } | |
1370 | else | |
1371 | resetSaoUnit(&mergeSaoParam[mergeIdx]); | |
1372 | } | |
1373 | } | |
1374 | ||
1375 | void SAO::sao2ChromaParamDist(SAOParam* saoParam, int addr, int addrUp, int addrLeft, SaoCtuParam mergeSaoParam[][2], double* mergeDist) | |
1376 | { | |
1377 | int64_t bestDist = 0; | |
1378 | ||
1379 | SaoCtuParam* lclCtuParam[2] = { &saoParam->ctuParam[1][addr], &saoParam->ctuParam[2][addr] }; | |
1380 | ||
1381 | double currentRdCostTableBo[MAX_NUM_SAO_CLASS]; | |
1382 | int bestClassTableBo[2] = { 0, 0 }; | |
1383 | int currentDistortionTableBo[MAX_NUM_SAO_CLASS]; | |
1384 | ||
1385 | resetSaoUnit(lclCtuParam[0]); | |
1386 | resetSaoUnit(lclCtuParam[1]); | |
1387 | m_entropyCoder.load(m_rdContexts.temp); | |
1388 | m_entropyCoder.resetBits(); | |
1389 | m_entropyCoder.codeSaoOffset(*lclCtuParam[0], 1); | |
1390 | m_entropyCoder.codeSaoOffset(*lclCtuParam[1], 2); | |
1391 | ||
1392 | double costPartBest = m_entropyCoder.getNumberOfWrittenBits() * m_chromaLambda; | |
1393 | ||
1394 | for (int typeIdx = 0; typeIdx < MAX_NUM_SAO_TYPE; typeIdx++) | |
1395 | { | |
1396 | int64_t estDist[2]; | |
1397 | if (typeIdx == SAO_BO) | |
1398 | { | |
1399 | // Estimate Best Position | |
1400 | for (int compIdx = 0; compIdx < 2; compIdx++) | |
1401 | { | |
1402 | double bestRDCostTableBo = MAX_DOUBLE; | |
1403 | estDist[compIdx] = estSaoTypeDist(compIdx + 1, typeIdx, m_chromaLambda, currentDistortionTableBo, currentRdCostTableBo); | |
1404 | for (int i = 0; i < SAO_NUM_BO_CLASSES - SAO_BO_LEN + 1; i++) | |
1405 | { | |
1406 | double currentRDCost = 0.0; | |
1407 | for (int j = i; j < i + SAO_BO_LEN; j++) | |
1408 | currentRDCost += currentRdCostTableBo[j]; | |
1409 | ||
1410 | if (currentRDCost < bestRDCostTableBo) | |
1411 | { | |
1412 | bestRDCostTableBo = currentRDCost; | |
1413 | bestClassTableBo[compIdx] = i; | |
1414 | } | |
1415 | } | |
1416 | ||
1417 | // Re code all Offsets | |
1418 | // Code Center | |
1419 | estDist[compIdx] = 0; | |
1420 | for (int classIdx = bestClassTableBo[compIdx]; classIdx < bestClassTableBo[compIdx] + SAO_BO_LEN; classIdx++) | |
1421 | estDist[compIdx] += currentDistortionTableBo[classIdx]; | |
1422 | } | |
1423 | } | |
1424 | else | |
1425 | { | |
1426 | estDist[0] = estSaoTypeDist(1, typeIdx, m_chromaLambda, currentDistortionTableBo, currentRdCostTableBo); | |
1427 | estDist[1] = estSaoTypeDist(2, typeIdx, m_chromaLambda, currentDistortionTableBo, currentRdCostTableBo); | |
1428 | } | |
1429 | ||
1430 | m_entropyCoder.load(m_rdContexts.temp); | |
1431 | m_entropyCoder.resetBits(); | |
1432 | ||
1433 | SaoCtuParam ctuParamRdo[2]; | |
1434 | for (int compIdx = 0; compIdx < 2; compIdx++) | |
1435 | { | |
1436 | ctuParamRdo[compIdx].mergeMode = SAO_MERGE_NONE; | |
1437 | ctuParamRdo[compIdx].typeIdx = typeIdx; | |
1438 | ctuParamRdo[compIdx].bandPos = (typeIdx == SAO_BO) ? bestClassTableBo[compIdx] : 0; | |
1439 | for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; classIdx++) | |
1440 | ctuParamRdo[compIdx].offset[classIdx] = (int)m_offset[compIdx + 1][typeIdx][classIdx + ctuParamRdo[compIdx].bandPos + 1]; | |
1441 | ||
1442 | m_entropyCoder.codeSaoOffset(ctuParamRdo[compIdx], compIdx + 1); | |
1443 | } | |
1444 | ||
1445 | uint32_t estRate = m_entropyCoder.getNumberOfWrittenBits(); | |
1446 | double cost = (double)(estDist[0] + estDist[1]) + m_chromaLambda * (double)estRate; | |
1447 | ||
1448 | if (cost < costPartBest) | |
1449 | { | |
1450 | costPartBest = cost; | |
1451 | copySaoUnit(lclCtuParam[0], &ctuParamRdo[0]); | |
1452 | copySaoUnit(lclCtuParam[1], &ctuParamRdo[1]); | |
1453 | bestDist = (estDist[0] + estDist[1]); | |
1454 | } | |
1455 | } | |
1456 | ||
1457 | mergeDist[0] += ((double)bestDist / m_chromaLambda); | |
1458 | m_entropyCoder.load(m_rdContexts.temp); | |
1459 | m_entropyCoder.codeSaoOffset(*lclCtuParam[0], 1); | |
1460 | m_entropyCoder.codeSaoOffset(*lclCtuParam[1], 2); | |
1461 | m_entropyCoder.store(m_rdContexts.temp); | |
1462 | ||
1463 | // merge left or merge up | |
1464 | ||
1465 | for (int mergeIdx = 0; mergeIdx < 2; mergeIdx++) | |
1466 | { | |
1467 | for (int compIdx = 0; compIdx < 2; compIdx++) | |
1468 | { | |
1469 | int plane = compIdx + 1; | |
1470 | SaoCtuParam* mergeSrcParam = NULL; | |
1471 | if (addrLeft >= 0 && mergeIdx == 0) | |
1472 | mergeSrcParam = &(saoParam->ctuParam[plane][addrLeft]); | |
1473 | else if (addrUp >= 0 && mergeIdx == 1) | |
1474 | mergeSrcParam = &(saoParam->ctuParam[plane][addrUp]); | |
1475 | if (mergeSrcParam) | |
1476 | { | |
1477 | int64_t estDist = 0; | |
1478 | int typeIdx = mergeSrcParam->typeIdx; | |
1479 | if (typeIdx >= 0) | |
1480 | { | |
1481 | int bandPos = (typeIdx == SAO_BO) ? mergeSrcParam->bandPos : 0; | |
1482 | for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; classIdx++) | |
1483 | { | |
1484 | int mergeOffset = mergeSrcParam->offset[classIdx]; | |
1485 | estDist += estSaoDist(m_count[plane][typeIdx][classIdx + bandPos + 1], mergeOffset, m_offsetOrg[plane][typeIdx][classIdx + bandPos + 1]); | |
1486 | } | |
1487 | } | |
1488 | ||
1489 | copySaoUnit(&mergeSaoParam[plane][mergeIdx], mergeSrcParam); | |
1490 | mergeSaoParam[plane][mergeIdx].mergeMode = mergeIdx ? SAO_MERGE_UP : SAO_MERGE_LEFT; | |
1491 | mergeDist[mergeIdx + 1] += ((double)estDist / m_chromaLambda); | |
1492 | } | |
1493 | else | |
1494 | resetSaoUnit(&mergeSaoParam[plane][mergeIdx]); | |
1495 | } | |
1496 | } | |
1497 | } | |
1498 | } |