Imported Upstream version 1.4+222+hg5f9f7194267b
[deb_x265.git] / source / encoder / sao.cpp
... / ...
CommitLineData
1/*****************************************************************************
2 * Copyright (C) 2013 x265 project
3 *
4 * Authors: Steve Borho <steve@borho.org>
5 * Min Chen <chenm003@163.com>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
20 *
21 * This program is also available under a commercial proprietary license.
22 * For more information, contact us at license @ x265.com.
23 *****************************************************************************/
24
25#include "common.h"
26#include "frame.h"
27#include "framedata.h"
28#include "picyuv.h"
29#include "sao.h"
30
31namespace {
32
33inline int32_t roundIBDI(int32_t num, int32_t den)
34{
35 return num >= 0 ? ((num * 2 + den) / (den * 2)) : -((-num * 2 + den) / (den * 2));
36}
37
38/* get the sign of input variable (TODO: this is a dup, make common) */
39inline int signOf(int x)
40{
41 return (x >> 31) | ((int)((((uint32_t)-x)) >> 31));
42}
43
44inline int64_t estSaoDist(int32_t count, int offset, int32_t offsetOrg)
45{
46 return (count * offset - offsetOrg * 2) * offset;
47}
48
49} // end anonymous namespace
50
51
52namespace x265 {
53
54const uint32_t SAO::s_eoTable[NUM_EDGETYPE] =
55{
56 1, // 0
57 2, // 1
58 0, // 2
59 3, // 3
60 4 // 4
61};
62
63SAO::SAO()
64{
65 m_count = NULL;
66 m_offset = NULL;
67 m_offsetOrg = NULL;
68 m_countPreDblk = NULL;
69 m_offsetOrgPreDblk = NULL;
70 m_refDepth = 0;
71 m_lumaLambda = 0;
72 m_chromaLambda = 0;
73 m_param = NULL;
74 m_clipTable = NULL;
75 m_clipTableBase = NULL;
76 m_offsetBo = NULL;
77 m_tmpU1[0] = NULL;
78 m_tmpU1[1] = NULL;
79 m_tmpU1[2] = NULL;
80 m_tmpU2[0] = NULL;
81 m_tmpU2[1] = NULL;
82 m_tmpU2[2] = NULL;
83 m_tmpL1 = NULL;
84 m_tmpL2 = NULL;
85
86 m_depthSaoRate[0][0] = 0;
87 m_depthSaoRate[0][1] = 0;
88 m_depthSaoRate[0][2] = 0;
89 m_depthSaoRate[0][3] = 0;
90 m_depthSaoRate[1][0] = 0;
91 m_depthSaoRate[1][1] = 0;
92 m_depthSaoRate[1][2] = 0;
93 m_depthSaoRate[1][3] = 0;
94}
95
96bool SAO::create(x265_param* param)
97{
98 m_param = param;
99 m_hChromaShift = CHROMA_H_SHIFT(param->internalCsp);
100 m_vChromaShift = CHROMA_V_SHIFT(param->internalCsp);
101
102 m_numCuInWidth = (m_param->sourceWidth + g_maxCUSize - 1) / g_maxCUSize;
103 m_numCuInHeight = (m_param->sourceHeight + g_maxCUSize - 1) / g_maxCUSize;
104
105 const pixel maxY = (1 << X265_DEPTH) - 1;
106 const pixel rangeExt = maxY >> 1;
107 int numCtu = m_numCuInWidth * m_numCuInHeight;
108
109 CHECKED_MALLOC(m_clipTableBase, pixel, maxY + 2 * rangeExt);
110 CHECKED_MALLOC(m_offsetBo, pixel, maxY + 2 * rangeExt);
111
112 CHECKED_MALLOC(m_tmpL1, pixel, g_maxCUSize + 1);
113 CHECKED_MALLOC(m_tmpL2, pixel, g_maxCUSize + 1);
114
115 for (int i = 0; i < 3; i++)
116 {
117 CHECKED_MALLOC(m_tmpU1[i], pixel, m_param->sourceWidth);
118 CHECKED_MALLOC(m_tmpU2[i], pixel, m_param->sourceWidth);
119 }
120
121 CHECKED_MALLOC(m_count, PerClass, NUM_PLANE);
122 CHECKED_MALLOC(m_offset, PerClass, NUM_PLANE);
123 CHECKED_MALLOC(m_offsetOrg, PerClass, NUM_PLANE);
124
125 CHECKED_MALLOC(m_countPreDblk, PerPlane, numCtu);
126 CHECKED_MALLOC(m_offsetOrgPreDblk, PerPlane, numCtu);
127
128 m_clipTable = &(m_clipTableBase[rangeExt]);
129
130 for (int i = 0; i < rangeExt; i++)
131 m_clipTableBase[i] = 0;
132
133 for (int i = 0; i < maxY; i++)
134 m_clipTable[i] = (pixel)i;
135
136 for (int i = maxY; i < maxY + rangeExt; i++)
137 m_clipTable[i] = maxY;
138
139 return true;
140
141fail:
142 return false;
143}
144
145void SAO::destroy()
146{
147 X265_FREE(m_clipTableBase);
148 X265_FREE(m_offsetBo);
149
150 X265_FREE(m_tmpL1);
151 X265_FREE(m_tmpL2);
152
153 for (int i = 0; i < 3; i++)
154 {
155 X265_FREE(m_tmpU1[i]);
156 X265_FREE(m_tmpU2[i]);
157 }
158
159 X265_FREE(m_count);
160 X265_FREE(m_offset);
161 X265_FREE(m_offsetOrg);
162 X265_FREE(m_countPreDblk);
163 X265_FREE(m_offsetOrgPreDblk);
164}
165
166/* allocate memory for SAO parameters */
167void SAO::allocSaoParam(SAOParam* saoParam) const
168{
169 saoParam->numCuInWidth = m_numCuInWidth;
170
171 saoParam->ctuParam[0] = new SaoCtuParam[m_numCuInHeight * m_numCuInWidth];
172 saoParam->ctuParam[1] = new SaoCtuParam[m_numCuInHeight * m_numCuInWidth];
173 saoParam->ctuParam[2] = new SaoCtuParam[m_numCuInHeight * m_numCuInWidth];
174}
175
176void SAO::startSlice(Frame* frame, Entropy& initState, int qp)
177{
178 Slice* slice = frame->m_encData->m_slice;
179 int qpCb = qp;
180 if (m_param->internalCsp == X265_CSP_I420)
181 qpCb = Clip3(QP_MIN, QP_MAX_MAX, (int)g_chromaScale[qp + slice->m_pps->chromaQpOffset[0]]);
182 else
183 qpCb = X265_MIN(qp + slice->m_pps->chromaQpOffset[0], QP_MAX_SPEC);
184 m_lumaLambda = x265_lambda2_tab[qp];
185 m_chromaLambda = x265_lambda2_tab[qpCb]; // Use Cb QP for SAO chroma
186 m_frame = frame;
187
188 switch (slice->m_sliceType)
189 {
190 case I_SLICE:
191 m_refDepth = 0;
192 break;
193 case P_SLICE:
194 m_refDepth = 1;
195 break;
196 case B_SLICE:
197 m_refDepth = 2 + !IS_REFERENCED(frame);
198 break;
199 }
200
201 resetStats();
202
203 m_entropyCoder.load(initState);
204 m_rdContexts.next.load(initState);
205 m_rdContexts.cur.load(initState);
206
207 SAOParam* saoParam = frame->m_encData->m_saoParam;
208 if (!saoParam)
209 {
210 saoParam = new SAOParam;
211 allocSaoParam(saoParam);
212 frame->m_encData->m_saoParam = saoParam;
213 }
214
215 rdoSaoUnitRowInit(saoParam);
216
217 // NOTE: Disable SAO automatic turn-off when frame parallelism is
218 // enabled for output exact independent of frame thread count
219 if (m_param->frameNumThreads > 1)
220 {
221 saoParam->bSaoFlag[0] = true;
222 saoParam->bSaoFlag[1] = true;
223 }
224}
225
226// CTU-based SAO process without slice granularity
227void SAO::processSaoCu(int addr, int typeIdx, int plane)
228{
229 int x, y;
230 const CUData* cu = m_frame->m_encData->getPicCTU(addr);
231 pixel* rec = m_frame->m_reconPic->getPlaneAddr(plane, addr);
232 intptr_t stride = plane ? m_frame->m_reconPic->m_strideC : m_frame->m_reconPic->m_stride;
233 uint32_t picWidth = m_param->sourceWidth;
234 uint32_t picHeight = m_param->sourceHeight;
235 int ctuWidth = g_maxCUSize;
236 int ctuHeight = g_maxCUSize;
237 uint32_t lpelx = cu->m_cuPelX;
238 uint32_t tpely = cu->m_cuPelY;
239 if (plane)
240 {
241 picWidth >>= m_hChromaShift;
242 picHeight >>= m_vChromaShift;
243 ctuWidth >>= m_hChromaShift;
244 ctuHeight >>= m_vChromaShift;
245 lpelx >>= m_hChromaShift;
246 tpely >>= m_vChromaShift;
247 }
248 uint32_t rpelx = x265_min(lpelx + ctuWidth, picWidth);
249 uint32_t bpely = x265_min(tpely + ctuHeight, picHeight);
250 ctuWidth = rpelx - lpelx;
251 ctuHeight = bpely - tpely;
252
253 int startX;
254 int startY;
255 int endX;
256 int endY;
257 pixel* tmpL;
258 pixel* tmpU;
259
260 int32_t _upBuff1[MAX_CU_SIZE + 2], *upBuff1 = _upBuff1 + 1;
261 int32_t _upBufft[MAX_CU_SIZE + 2], *upBufft = _upBufft + 1;
262
263 {
264 const pixel* recR = &rec[ctuWidth - 1];
265 for (int i = 0; i < ctuHeight + 1; i++)
266 {
267 m_tmpL2[i] = *recR;
268 recR += stride;
269 }
270
271 tmpL = m_tmpL1;
272 tmpU = &(m_tmpU1[plane][lpelx]);
273 }
274
275 switch (typeIdx)
276 {
277 case SAO_EO_0: // dir: -
278 {
279 pixel firstPxl = 0, lastPxl = 0;
280 startX = !lpelx;
281 endX = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth;
282 if (ctuWidth & 15)
283 {
284 for (y = 0; y < ctuHeight; y++)
285 {
286 int signLeft = signOf(rec[startX] - tmpL[y]);
287 for (x = startX; x < endX; x++)
288 {
289 int signRight = signOf(rec[x] - rec[x + 1]);
290 int edgeType = signRight + signLeft + 2;
291 signLeft = -signRight;
292
293 rec[x] = m_clipTable[rec[x] + m_offsetEo[edgeType]];
294 }
295
296 rec += stride;
297 }
298 }
299 else
300 {
301 for (y = 0; y < ctuHeight; y++)
302 {
303 int signLeft = signOf(rec[startX] - tmpL[y]);
304
305 if (!lpelx)
306 firstPxl = rec[0];
307
308 if (rpelx == picWidth)
309 lastPxl = rec[ctuWidth - 1];
310
311 primitives.saoCuOrgE0(rec, m_offsetEo, ctuWidth, (int8_t)signLeft);
312
313 if (!lpelx)
314 rec[0] = firstPxl;
315
316 if (rpelx == picWidth)
317 rec[ctuWidth - 1] = lastPxl;
318
319 rec += stride;
320 }
321 }
322 break;
323 }
324 case SAO_EO_1: // dir: |
325 {
326 startY = !tpely;
327 endY = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight;
328 if (!tpely)
329 rec += stride;
330
331 for (x = 0; x < ctuWidth; x++)
332 upBuff1[x] = signOf(rec[x] - tmpU[x]);
333
334 for (y = startY; y < endY; y++)
335 {
336 for (x = 0; x < ctuWidth; x++)
337 {
338 int signDown = signOf(rec[x] - rec[x + stride]);
339 int edgeType = signDown + upBuff1[x] + 2;
340 upBuff1[x] = -signDown;
341
342 rec[x] = m_clipTable[rec[x] + m_offsetEo[edgeType]];
343 }
344
345 rec += stride;
346 }
347
348 break;
349 }
350 case SAO_EO_2: // dir: 135
351 {
352 startX = !lpelx;
353 endX = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth;
354
355 startY = !tpely;
356 endY = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight;
357
358 if (!tpely)
359 rec += stride;
360
361 for (x = startX; x < endX; x++)
362 upBuff1[x] = signOf(rec[x] - tmpU[x - 1]);
363
364 for (y = startY; y < endY; y++)
365 {
366 upBufft[startX] = signOf(rec[stride + startX] - tmpL[y]);
367 for (x = startX; x < endX; x++)
368 {
369 int signDown = signOf(rec[x] - rec[x + stride + 1]);
370 int edgeType = signDown + upBuff1[x] + 2;
371 upBufft[x + 1] = -signDown;
372 rec[x] = m_clipTable[rec[x] + m_offsetEo[edgeType]];
373 }
374
375 std::swap(upBuff1, upBufft);
376
377 rec += stride;
378 }
379
380 break;
381 }
382 case SAO_EO_3: // dir: 45
383 {
384 startX = !lpelx;
385 endX = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth;
386
387 startY = !tpely;
388 endY = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight;
389
390 if (!tpely)
391 rec += stride;
392
393 for (x = startX - 1; x < endX; x++)
394 upBuff1[x] = signOf(rec[x] - tmpU[x + 1]);
395
396 for (y = startY; y < endY; y++)
397 {
398 x = startX;
399 int signDown = signOf(rec[x] - tmpL[y + 1]);
400 int edgeType = signDown + upBuff1[x] + 2;
401 upBuff1[x - 1] = -signDown;
402 rec[x] = m_clipTable[rec[x] + m_offsetEo[edgeType]];
403 for (x = startX + 1; x < endX; x++)
404 {
405 signDown = signOf(rec[x] - rec[x + stride - 1]);
406 edgeType = signDown + upBuff1[x] + 2;
407 upBuff1[x - 1] = -signDown;
408 rec[x] = m_clipTable[rec[x] + m_offsetEo[edgeType]];
409 }
410
411 upBuff1[endX - 1] = signOf(rec[endX - 1 + stride] - rec[endX]);
412
413 rec += stride;
414 }
415
416 break;
417 }
418 case SAO_BO:
419 {
420 const pixel* offsetBo = m_offsetBo;
421
422 for (y = 0; y < ctuHeight; y++)
423 {
424 for (x = 0; x < ctuWidth; x++)
425 rec[x] = offsetBo[rec[x]];
426
427 rec += stride;
428 }
429
430 break;
431 }
432 default: break;
433 }
434
435// if (iSaoType!=SAO_BO_0 || iSaoType!=SAO_BO_1)
436 std::swap(m_tmpL1, m_tmpL2);
437}
438
439/* Process SAO all units */
440void SAO::processSaoUnitRow(SaoCtuParam* ctuParam, int idxY, int plane)
441{
442 intptr_t stride = plane ? m_frame->m_reconPic->m_strideC : m_frame->m_reconPic->m_stride;
443 uint32_t picWidth = m_param->sourceWidth;
444 int ctuWidth = g_maxCUSize;
445 int ctuHeight = g_maxCUSize;
446 if (plane)
447 {
448 picWidth >>= m_hChromaShift;
449 ctuWidth >>= m_hChromaShift;
450 ctuHeight >>= m_vChromaShift;
451 }
452
453 if (!idxY)
454 {
455 pixel* rec = m_frame->m_reconPic->m_picOrg[plane];
456 memcpy(m_tmpU1[plane], rec, sizeof(pixel) * picWidth);
457 }
458
459 int addr = idxY * m_numCuInWidth;
460 pixel* rec = plane ? m_frame->m_reconPic->getChromaAddr(plane, addr) : m_frame->m_reconPic->getLumaAddr(addr);
461
462 for (int i = 0; i < ctuHeight + 1; i++)
463 {
464 m_tmpL1[i] = rec[0];
465 rec += stride;
466 }
467
468 rec -= (stride << 1);
469
470 memcpy(m_tmpU2[plane], rec, sizeof(pixel) * picWidth);
471
472 const int boShift = X265_DEPTH - SAO_BO_BITS;
473
474 for (int idxX = 0; idxX < m_numCuInWidth; idxX++)
475 {
476 addr = idxY * m_numCuInWidth + idxX;
477
478 bool mergeLeftFlag = ctuParam[addr].mergeMode == SAO_MERGE_LEFT;
479 int typeIdx = ctuParam[addr].typeIdx;
480
481 if (typeIdx >= 0)
482 {
483 if (!mergeLeftFlag)
484 {
485 if (typeIdx == SAO_BO)
486 {
487 pixel* offsetBo = m_offsetBo;
488 int offset[SAO_NUM_BO_CLASSES];
489 memset(offset, 0, sizeof(offset));
490
491 for (int i = 0; i < SAO_NUM_OFFSET; i++)
492 offset[((ctuParam[addr].bandPos + i) & (SAO_NUM_BO_CLASSES - 1))] = ctuParam[addr].offset[i] << SAO_BIT_INC;
493
494 for (int i = 0; i < (1 << X265_DEPTH); i++)
495 offsetBo[i] = m_clipTable[i + offset[i >> boShift]];
496 }
497 else // if (typeIdx == SAO_EO_0 || typeIdx == SAO_EO_1 || typeIdx == SAO_EO_2 || typeIdx == SAO_EO_3)
498 {
499 int offset[NUM_EDGETYPE];
500 offset[0] = 0;
501 for (int i = 0; i < SAO_NUM_OFFSET; i++)
502 offset[i + 1] = ctuParam[addr].offset[i] << SAO_BIT_INC;
503
504 for (int edgeType = 0; edgeType < NUM_EDGETYPE; edgeType++)
505 m_offsetEo[edgeType] = (int8_t)offset[s_eoTable[edgeType]];
506 }
507 }
508 processSaoCu(addr, typeIdx, plane);
509 }
510 else if (idxX != (m_numCuInWidth - 1))
511 {
512 rec = plane ? m_frame->m_reconPic->getChromaAddr(plane, addr) : m_frame->m_reconPic->getLumaAddr(addr);
513
514 for (int i = 0; i < ctuHeight + 1; i++)
515 {
516 m_tmpL1[i] = rec[ctuWidth - 1];
517 rec += stride;
518 }
519 }
520 }
521
522 std::swap(m_tmpU1[plane], m_tmpU2[plane]);
523}
524
525void SAO::resetSaoUnit(SaoCtuParam* saoUnit)
526{
527 saoUnit->mergeMode = SAO_MERGE_NONE;
528 saoUnit->typeIdx = -1;
529 saoUnit->bandPos = 0;
530
531 for (int i = 0; i < SAO_NUM_OFFSET; i++)
532 saoUnit->offset[i] = 0;
533}
534
535void SAO::copySaoUnit(SaoCtuParam* saoUnitDst, const SaoCtuParam* saoUnitSrc)
536{
537 saoUnitDst->mergeMode = saoUnitSrc->mergeMode;
538 saoUnitDst->typeIdx = saoUnitSrc->typeIdx;
539 saoUnitDst->bandPos = saoUnitSrc->bandPos;
540
541 for (int i = 0; i < SAO_NUM_OFFSET; i++)
542 saoUnitDst->offset[i] = saoUnitSrc->offset[i];
543}
544
545/* Calculate SAO statistics for current CTU without non-crossing slice */
546void SAO::calcSaoStatsCu(int addr, int plane)
547{
548 int x, y;
549 const CUData* cu = m_frame->m_encData->getPicCTU(addr);
550 const pixel* fenc0 = m_frame->m_fencPic->getPlaneAddr(plane, addr);
551 const pixel* rec0 = m_frame->m_reconPic->getPlaneAddr(plane, addr);
552 const pixel* fenc;
553 const pixel* rec;
554 intptr_t stride = plane ? m_frame->m_reconPic->m_strideC : m_frame->m_reconPic->m_stride;
555 uint32_t picWidth = m_param->sourceWidth;
556 uint32_t picHeight = m_param->sourceHeight;
557 int ctuWidth = g_maxCUSize;
558 int ctuHeight = g_maxCUSize;
559 uint32_t lpelx = cu->m_cuPelX;
560 uint32_t tpely = cu->m_cuPelY;
561 if (plane)
562 {
563 picWidth >>= m_hChromaShift;
564 picHeight >>= m_vChromaShift;
565 ctuWidth >>= m_hChromaShift;
566 ctuHeight >>= m_vChromaShift;
567 lpelx >>= m_hChromaShift;
568 tpely >>= m_vChromaShift;
569 }
570 uint32_t rpelx = x265_min(lpelx + ctuWidth, picWidth);
571 uint32_t bpely = x265_min(tpely + ctuHeight, picHeight);
572 ctuWidth = rpelx - lpelx;
573 ctuHeight = bpely - tpely;
574
575 int startX;
576 int startY;
577 int endX;
578 int endY;
579 int32_t* stats;
580 int32_t* count;
581
582 int skipB = plane ? 2 : 4;
583 int skipR = plane ? 3 : 5;
584
585 int32_t _upBuff1[MAX_CU_SIZE + 2], *upBuff1 = _upBuff1 + 1;
586 int32_t _upBufft[MAX_CU_SIZE + 2], *upBufft = _upBufft + 1;
587
588 // SAO_BO:
589 {
590 const int boShift = X265_DEPTH - SAO_BO_BITS;
591
592 if (m_param->bSaoNonDeblocked)
593 {
594 skipB = plane ? 1 : 3;
595 skipR = plane ? 2 : 4;
596 }
597 stats = m_offsetOrg[plane][SAO_BO];
598 count = m_count[plane][SAO_BO];
599
600 fenc = fenc0;
601 rec = rec0;
602
603 endX = (rpelx == picWidth) ? ctuWidth : ctuWidth - skipR;
604 endY = (bpely == picHeight) ? ctuHeight : ctuHeight - skipB;
605
606 for (y = 0; y < endY; y++)
607 {
608 for (x = 0; x < endX; x++)
609 {
610 int classIdx = 1 + (rec[x] >> boShift);
611 stats[classIdx] += (fenc[x] - rec[x]);
612 count[classIdx]++;
613 }
614
615 fenc += stride;
616 rec += stride;
617 }
618 }
619
620 {
621 // SAO_EO_0: // dir: -
622 {
623 if (m_param->bSaoNonDeblocked)
624 {
625 skipB = plane ? 1 : 3;
626 skipR = plane ? 3 : 5;
627 }
628 stats = m_offsetOrg[plane][SAO_EO_0];
629 count = m_count[plane][SAO_EO_0];
630
631 fenc = fenc0;
632 rec = rec0;
633
634 startX = !lpelx;
635 endX = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth - skipR;
636 for (y = 0; y < ctuHeight - skipB; y++)
637 {
638 int signLeft = signOf(rec[startX] - rec[startX - 1]);
639 for (x = startX; x < endX; x++)
640 {
641 int signRight = signOf(rec[x] - rec[x + 1]);
642 int edgeType = signRight + signLeft + 2;
643 signLeft = -signRight;
644
645 stats[s_eoTable[edgeType]] += (fenc[x] - rec[x]);
646 count[s_eoTable[edgeType]]++;
647 }
648
649 fenc += stride;
650 rec += stride;
651 }
652 }
653
654 // SAO_EO_1: // dir: |
655 {
656 if (m_param->bSaoNonDeblocked)
657 {
658 skipB = plane ? 2 : 4;
659 skipR = plane ? 2 : 4;
660 }
661 stats = m_offsetOrg[plane][SAO_EO_1];
662 count = m_count[plane][SAO_EO_1];
663
664 fenc = fenc0;
665 rec = rec0;
666
667 startY = !tpely;
668 endX = (rpelx == picWidth) ? ctuWidth : ctuWidth - skipR;
669 endY = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight - skipB;
670 if (!tpely)
671 {
672 fenc += stride;
673 rec += stride;
674 }
675
676 for (x = 0; x < ctuWidth; x++)
677 upBuff1[x] = signOf(rec[x] - rec[x - stride]);
678
679 for (y = startY; y < endY; y++)
680 {
681 for (x = 0; x < endX; x++)
682 {
683 int signDown = signOf(rec[x] - rec[x + stride]);
684 int edgeType = signDown + upBuff1[x] + 2;
685 upBuff1[x] = -signDown;
686
687 stats[s_eoTable[edgeType]] += (fenc[x] - rec[x]);
688 count[s_eoTable[edgeType]]++;
689 }
690
691 fenc += stride;
692 rec += stride;
693 }
694 }
695
696 // SAO_EO_2: // dir: 135
697 {
698 if (m_param->bSaoNonDeblocked)
699 {
700 skipB = plane ? 2 : 4;
701 skipR = plane ? 3 : 5;
702 }
703 stats = m_offsetOrg[plane][SAO_EO_2];
704 count = m_count[plane][SAO_EO_2];
705
706 fenc = fenc0;
707 rec = rec0;
708
709 startX = !lpelx;
710 endX = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth - skipR;
711
712 startY = !tpely;
713 endY = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight - skipB;
714 if (!tpely)
715 {
716 fenc += stride;
717 rec += stride;
718 }
719
720 for (x = startX; x < endX; x++)
721 upBuff1[x] = signOf(rec[x] - rec[x - stride - 1]);
722
723 for (y = startY; y < endY; y++)
724 {
725 upBufft[startX] = signOf(rec[startX + stride] - rec[startX - 1]);
726 for (x = startX; x < endX; x++)
727 {
728 int signDown = signOf(rec[x] - rec[x + stride + 1]);
729 int edgeType = signDown + upBuff1[x] + 2;
730 upBufft[x + 1] = -signDown;
731 stats[s_eoTable[edgeType]] += (fenc[x] - rec[x]);
732 count[s_eoTable[edgeType]]++;
733 }
734
735 std::swap(upBuff1, upBufft);
736
737 rec += stride;
738 fenc += stride;
739 }
740 }
741
742 // SAO_EO_3: // dir: 45
743 {
744 if (m_param->bSaoNonDeblocked)
745 {
746 skipB = plane ? 2 : 4;
747 skipR = plane ? 3 : 5;
748 }
749 stats = m_offsetOrg[plane][SAO_EO_3];
750 count = m_count[plane][SAO_EO_3];
751
752 fenc = fenc0;
753 rec = rec0;
754
755 startX = !lpelx;
756 endX = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth - skipR;
757
758 startY = !tpely;
759 endY = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight - skipB;
760
761 if (!tpely)
762 {
763 fenc += stride;
764 rec += stride;
765 }
766
767 for (x = startX - 1; x < endX; x++)
768 upBuff1[x] = signOf(rec[x] - rec[x - stride + 1]);
769
770 for (y = startY; y < endY; y++)
771 {
772 for (x = startX; x < endX; x++)
773 {
774 int signDown = signOf(rec[x] - rec[x + stride - 1]);
775 int edgeType = signDown + upBuff1[x] + 2;
776 upBuff1[x - 1] = -signDown;
777 stats[s_eoTable[edgeType]] += (fenc[x] - rec[x]);
778 count[s_eoTable[edgeType]]++;
779 }
780
781 upBuff1[endX - 1] = signOf(rec[endX - 1 + stride] - rec[endX]);
782
783 rec += stride;
784 fenc += stride;
785 }
786 }
787 }
788}
789
790void SAO::calcSaoStatsCu_BeforeDblk(Frame* frame, int idxX, int idxY)
791{
792 int addr = idxX + m_numCuInWidth * idxY;
793
794 int x, y;
795 const CUData* cu = frame->m_encData->getPicCTU(addr);
796 const pixel* fenc;
797 const pixel* rec;
798 intptr_t stride = m_frame->m_reconPic->m_stride;
799 uint32_t picWidth = m_param->sourceWidth;
800 uint32_t picHeight = m_param->sourceHeight;
801 int ctuWidth = g_maxCUSize;
802 int ctuHeight = g_maxCUSize;
803 uint32_t lpelx = cu->m_cuPelX;
804 uint32_t tpely = cu->m_cuPelY;
805 uint32_t rpelx = x265_min(lpelx + ctuWidth, picWidth);
806 uint32_t bpely = x265_min(tpely + ctuHeight, picHeight);
807 ctuWidth = rpelx - lpelx;
808 ctuHeight = bpely - tpely;
809
810 int startX;
811 int startY;
812 int endX;
813 int endY;
814 int firstX, firstY;
815 int32_t* stats;
816 int32_t* count;
817
818 int skipB, skipR;
819
820 int32_t _upBuff1[MAX_CU_SIZE + 2], *upBuff1 = _upBuff1 + 1;
821 int32_t _upBufft[MAX_CU_SIZE + 2], *upBufft = _upBufft + 1;
822
823 const int boShift = X265_DEPTH - SAO_BO_BITS;
824
825 memset(m_countPreDblk[addr], 0, sizeof(PerPlane));
826 memset(m_offsetOrgPreDblk[addr], 0, sizeof(PerPlane));
827
828 for (int plane = 0; plane < NUM_PLANE; plane++)
829 {
830 if (plane == 1)
831 {
832 stride = frame->m_reconPic->m_strideC;
833 picWidth >>= m_hChromaShift;
834 picHeight >>= m_vChromaShift;
835 ctuWidth >>= m_hChromaShift;
836 ctuHeight >>= m_vChromaShift;
837 lpelx >>= m_hChromaShift;
838 tpely >>= m_vChromaShift;
839 rpelx >>= m_hChromaShift;
840 bpely >>= m_vChromaShift;
841 }
842
843 // SAO_BO:
844
845 skipB = plane ? 1 : 3;
846 skipR = plane ? 2 : 4;
847
848 stats = m_offsetOrgPreDblk[addr][plane][SAO_BO];
849 count = m_countPreDblk[addr][plane][SAO_BO];
850
851 const pixel* fenc0 = m_frame->m_fencPic->getPlaneAddr(plane, addr);
852 const pixel* rec0 = m_frame->m_reconPic->getPlaneAddr(plane, addr);
853 fenc = fenc0;
854 rec = rec0;
855
856 startX = (rpelx == picWidth) ? ctuWidth : ctuWidth - skipR;
857 startY = (bpely == picHeight) ? ctuHeight : ctuHeight - skipB;
858
859 for (y = 0; y < ctuHeight; y++)
860 {
861 for (x = (y < startY ? startX : 0); x < ctuWidth; x++)
862 {
863 int classIdx = 1 + (rec[x] >> boShift);
864 stats[classIdx] += (fenc[x] - rec[x]);
865 count[classIdx]++;
866 }
867
868 fenc += stride;
869 rec += stride;
870 }
871
872 // SAO_EO_0: // dir: -
873 {
874 skipB = plane ? 1 : 3;
875 skipR = plane ? 3 : 5;
876
877 stats = m_offsetOrgPreDblk[addr][plane][SAO_EO_0];
878 count = m_countPreDblk[addr][plane][SAO_EO_0];
879
880 fenc = fenc0;
881 rec = rec0;
882
883 startX = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth - skipR;
884 startY = (bpely == picHeight) ? ctuHeight : ctuHeight - skipB;
885 firstX = !lpelx;
886 // endX = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth;
887 endX = ctuWidth - 1; // not refer right CTU
888
889 for (y = 0; y < ctuHeight; y++)
890 {
891 x = (y < startY ? startX : firstX);
892 int signLeft = signOf(rec[x] - rec[x - 1]);
893 for (; x < endX; x++)
894 {
895 int signRight = signOf(rec[x] - rec[x + 1]);
896 int edgeType = signRight + signLeft + 2;
897 signLeft = -signRight;
898
899 stats[s_eoTable[edgeType]] += (fenc[x] - rec[x]);
900 count[s_eoTable[edgeType]]++;
901 }
902
903 fenc += stride;
904 rec += stride;
905 }
906 }
907
908 // SAO_EO_1: // dir: |
909 {
910 skipB = plane ? 2 : 4;
911 skipR = plane ? 2 : 4;
912
913 stats = m_offsetOrgPreDblk[addr][plane][SAO_EO_1];
914 count = m_countPreDblk[addr][plane][SAO_EO_1];
915
916 fenc = fenc0;
917 rec = rec0;
918
919 startX = (rpelx == picWidth) ? ctuWidth : ctuWidth - skipR;
920 startY = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight - skipB;
921 firstY = !tpely;
922 // endY = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight;
923 endY = ctuHeight - 1; // not refer below CTU
924 if (!tpely)
925 {
926 fenc += stride;
927 rec += stride;
928 }
929
930 for (x = startX; x < ctuWidth; x++)
931 upBuff1[x] = signOf(rec[x] - rec[x - stride]);
932
933 for (y = firstY; y < endY; y++)
934 {
935 for (x = (y < startY - 1 ? startX : 0); x < ctuWidth; x++)
936 {
937 int signDown = signOf(rec[x] - rec[x + stride]);
938 int edgeType = signDown + upBuff1[x] + 2;
939 upBuff1[x] = -signDown;
940
941 if (x < startX && y < startY)
942 continue;
943
944 stats[s_eoTable[edgeType]] += (fenc[x] - rec[x]);
945 count[s_eoTable[edgeType]]++;
946 }
947
948 fenc += stride;
949 rec += stride;
950 }
951 }
952
953 // SAO_EO_2: // dir: 135
954 {
955 skipB = plane ? 2 : 4;
956 skipR = plane ? 3 : 5;
957
958 stats = m_offsetOrgPreDblk[addr][plane][SAO_EO_2];
959 count = m_countPreDblk[addr][plane][SAO_EO_2];
960
961 fenc = fenc0;
962 rec = rec0;
963
964 startX = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth - skipR;
965 startY = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight - skipB;
966 firstX = !lpelx;
967 firstY = !tpely;
968 // endX = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth;
969 // endY = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight;
970 endX = ctuWidth - 1; // not refer right CTU
971 endY = ctuHeight - 1; // not refer below CTU
972 if (!tpely)
973 {
974 fenc += stride;
975 rec += stride;
976 }
977
978 for (x = startX; x < endX; x++)
979 upBuff1[x] = signOf(rec[x] - rec[x - stride - 1]);
980
981 for (y = firstY; y < endY; y++)
982 {
983 x = (y < startY - 1 ? startX : firstX);
984 upBufft[x] = signOf(rec[x + stride] - rec[x - 1]);
985 for (; x < endX; x++)
986 {
987 int signDown = signOf(rec[x] - rec[x + stride + 1]);
988 int edgeType = signDown + upBuff1[x] + 2;
989 upBufft[x + 1] = -signDown;
990
991 if (x < startX && y < startY)
992 continue;
993
994 stats[s_eoTable[edgeType]] += (fenc[x] - rec[x]);
995 count[s_eoTable[edgeType]]++;
996 }
997
998 std::swap(upBuff1, upBufft);
999
1000 rec += stride;
1001 fenc += stride;
1002 }
1003 }
1004
1005 // SAO_EO_3: // dir: 45
1006 {
1007 skipB = plane ? 2 : 4;
1008 skipR = plane ? 3 : 5;
1009
1010 stats = m_offsetOrgPreDblk[addr][plane][SAO_EO_3];
1011 count = m_countPreDblk[addr][plane][SAO_EO_3];
1012
1013 fenc = fenc0;
1014 rec = rec0;
1015
1016 startX = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth - skipR;
1017 startY = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight - skipB;
1018 firstX = !lpelx;
1019 firstY = !tpely;
1020 // endX = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth;
1021 // endY = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight;
1022 endX = ctuWidth - 1; // not refer right CTU
1023 endY = ctuHeight - 1; // not refer below CTU
1024 if (!tpely)
1025 {
1026 fenc += stride;
1027 rec += stride;
1028 }
1029
1030 for (x = startX - 1; x < endX; x++)
1031 upBuff1[x] = signOf(rec[x] - rec[x - stride + 1]);
1032
1033 for (y = firstY; y < endY; y++)
1034 {
1035 for (x = (y < startY - 1 ? startX : firstX); x < endX; x++)
1036 {
1037 int signDown = signOf(rec[x] - rec[x + stride - 1]);
1038 int edgeType = signDown + upBuff1[x] + 2;
1039 upBuff1[x - 1] = -signDown;
1040
1041 if (x < startX && y < startY)
1042 continue;
1043
1044 stats[s_eoTable[edgeType]] += (fenc[x] - rec[x]);
1045 count[s_eoTable[edgeType]]++;
1046 }
1047
1048 upBuff1[endX - 1] = signOf(rec[endX - 1 + stride] - rec[endX]);
1049
1050 rec += stride;
1051 fenc += stride;
1052 }
1053 }
1054 }
1055}
1056
1057/* reset offset statistics */
1058void SAO::resetStats()
1059{
1060 memset(m_count, 0, sizeof(PerClass) * NUM_PLANE);
1061 memset(m_offset, 0, sizeof(PerClass) * NUM_PLANE);
1062 memset(m_offsetOrg, 0, sizeof(PerClass) * NUM_PLANE);
1063}
1064
1065void SAO::rdoSaoUnitRowInit(SAOParam* saoParam)
1066{
1067 saoParam->bSaoFlag[0] = true;
1068 saoParam->bSaoFlag[1] = true;
1069
1070 m_numNoSao[0] = 0; // Luma
1071 m_numNoSao[1] = 0; // Chroma
1072 if (m_refDepth > 0 && m_depthSaoRate[0][m_refDepth - 1] > SAO_ENCODING_RATE)
1073 saoParam->bSaoFlag[0] = false;
1074 if (m_refDepth > 0 && m_depthSaoRate[1][m_refDepth - 1] > SAO_ENCODING_RATE_CHROMA)
1075 saoParam->bSaoFlag[1] = false;
1076}
1077
1078void SAO::rdoSaoUnitRowEnd(const SAOParam* saoParam, int numctus)
1079{
1080 if (!saoParam->bSaoFlag[0])
1081 m_depthSaoRate[0][m_refDepth] = 1.0;
1082 else
1083 m_depthSaoRate[0][m_refDepth] = m_numNoSao[0] / ((double)numctus);
1084
1085 if (!saoParam->bSaoFlag[1])
1086 m_depthSaoRate[1][m_refDepth] = 1.0;
1087 else
1088 m_depthSaoRate[1][m_refDepth] = m_numNoSao[1] / ((double)numctus);
1089}
1090
1091void SAO::rdoSaoUnitRow(SAOParam* saoParam, int idxY)
1092{
1093 SaoCtuParam mergeSaoParam[NUM_MERGE_MODE][2];
1094 double mergeDist[NUM_MERGE_MODE];
1095 bool allowMerge[2]; // left, up
1096 allowMerge[1] = (idxY > 0);
1097
1098 for (int idxX = 0; idxX < m_numCuInWidth; idxX++)
1099 {
1100 int addr = idxX + idxY * m_numCuInWidth;
1101 int addrUp = idxY ? addr - m_numCuInWidth : -1;
1102 int addrLeft = idxX ? addr - 1 : -1;
1103 allowMerge[0] = (idxX > 0);
1104
1105 m_entropyCoder.load(m_rdContexts.cur);
1106 if (allowMerge[0])
1107 m_entropyCoder.codeSaoMerge(0);
1108 if (allowMerge[1])
1109 m_entropyCoder.codeSaoMerge(0);
1110 m_entropyCoder.store(m_rdContexts.temp);
1111 // reset stats Y, Cb, Cr
1112 for (int plane = 0; plane < 3; plane++)
1113 {
1114 for (int j = 0; j < MAX_NUM_SAO_TYPE; j++)
1115 {
1116 for (int k = 0; k < MAX_NUM_SAO_CLASS; k++)
1117 {
1118 m_offset[plane][j][k] = 0;
1119 if (m_param->bSaoNonDeblocked)
1120 {
1121 m_count[plane][j][k] = m_countPreDblk[addr][plane][j][k];
1122 m_offsetOrg[plane][j][k] = m_offsetOrgPreDblk[addr][plane][j][k];
1123 }
1124 else
1125 {
1126 m_count[plane][j][k] = 0;
1127 m_offsetOrg[plane][j][k] = 0;
1128 }
1129 }
1130 }
1131
1132 saoParam->ctuParam[plane][addr].mergeMode = SAO_MERGE_NONE;
1133 saoParam->ctuParam[plane][addr].typeIdx = -1;
1134 saoParam->ctuParam[plane][addr].bandPos = 0;
1135 if (saoParam->bSaoFlag[plane > 0])
1136 calcSaoStatsCu(addr, plane);
1137 }
1138
1139 saoComponentParamDist(saoParam, addr, addrUp, addrLeft, &mergeSaoParam[0][0], mergeDist);
1140
1141 sao2ChromaParamDist(saoParam, addr, addrUp, addrLeft, mergeSaoParam, mergeDist);
1142
1143 if (saoParam->bSaoFlag[0] || saoParam->bSaoFlag[1])
1144 {
1145 // Cost of new SAO_params
1146 m_entropyCoder.load(m_rdContexts.cur);
1147 m_entropyCoder.resetBits();
1148 if (allowMerge[0])
1149 m_entropyCoder.codeSaoMerge(0);
1150 if (allowMerge[1])
1151 m_entropyCoder.codeSaoMerge(0);
1152 for (int plane = 0; plane < 3; plane++)
1153 {
1154 if (saoParam->bSaoFlag[plane > 0])
1155 m_entropyCoder.codeSaoOffset(saoParam->ctuParam[plane][addr], plane);
1156 }
1157
1158 uint32_t rate = m_entropyCoder.getNumberOfWrittenBits();
1159 double bestCost = mergeDist[0] + (double)rate;
1160 m_entropyCoder.store(m_rdContexts.temp);
1161
1162 // Cost of Merge
1163 for (int mergeIdx = 0; mergeIdx < 2; ++mergeIdx)
1164 {
1165 if (!allowMerge[mergeIdx])
1166 continue;
1167
1168 m_entropyCoder.load(m_rdContexts.cur);
1169 m_entropyCoder.resetBits();
1170 if (allowMerge[0])
1171 m_entropyCoder.codeSaoMerge(1 - mergeIdx);
1172 if (allowMerge[1] && (mergeIdx == 1))
1173 m_entropyCoder.codeSaoMerge(1);
1174
1175 rate = m_entropyCoder.getNumberOfWrittenBits();
1176 double mergeCost = mergeDist[mergeIdx + 1] + (double)rate;
1177 if (mergeCost < bestCost)
1178 {
1179 SaoMergeMode mergeMode = mergeIdx ? SAO_MERGE_UP : SAO_MERGE_LEFT;
1180 bestCost = mergeCost;
1181 m_entropyCoder.store(m_rdContexts.temp);
1182 for (int plane = 0; plane < 3; plane++)
1183 {
1184 mergeSaoParam[plane][mergeIdx].mergeMode = mergeMode;
1185 if (saoParam->bSaoFlag[plane > 0])
1186 copySaoUnit(&saoParam->ctuParam[plane][addr], &mergeSaoParam[plane][mergeIdx]);
1187 }
1188 }
1189 }
1190
1191 if (saoParam->ctuParam[0][addr].typeIdx < 0)
1192 m_numNoSao[0]++;
1193 if (saoParam->ctuParam[1][addr].typeIdx < 0)
1194 m_numNoSao[1]++;
1195 m_entropyCoder.load(m_rdContexts.temp);
1196 m_entropyCoder.store(m_rdContexts.cur);
1197 }
1198 }
1199}
1200
1201/** rate distortion optimization of SAO unit */
1202inline int64_t SAO::estSaoTypeDist(int plane, int typeIdx, double lambda, int32_t* currentDistortionTableBo, double* currentRdCostTableBo)
1203{
1204 int64_t estDist = 0;
1205
1206 for (int classIdx = 1; classIdx < ((typeIdx < SAO_BO) ? SAO_EO_LEN + 1 : SAO_NUM_BO_CLASSES + 1); classIdx++)
1207 {
1208 int32_t count = m_count[plane][typeIdx][classIdx];
1209 int32_t& offsetOrg = m_offsetOrg[plane][typeIdx][classIdx];
1210 int32_t& offsetOut = m_offset[plane][typeIdx][classIdx];
1211
1212 if (typeIdx == SAO_BO)
1213 {
1214 currentDistortionTableBo[classIdx - 1] = 0;
1215 currentRdCostTableBo[classIdx - 1] = lambda;
1216 }
1217 if (count)
1218 {
1219 int offset = roundIBDI(offsetOrg, count << SAO_BIT_INC);
1220 offset = Clip3(-OFFSET_THRESH + 1, OFFSET_THRESH - 1, offset);
1221 if (typeIdx < SAO_BO)
1222 {
1223 if (classIdx < 3)
1224 offset = X265_MAX(offset, 0);
1225 else
1226 offset = X265_MIN(offset, 0);
1227 }
1228 offsetOut = estIterOffset(typeIdx, classIdx, lambda, offset, count, offsetOrg, currentDistortionTableBo, currentRdCostTableBo);
1229 }
1230 else
1231 {
1232 offsetOrg = 0;
1233 offsetOut = 0;
1234 }
1235 if (typeIdx != SAO_BO)
1236 estDist += estSaoDist(count, (int)offsetOut << SAO_BIT_INC, offsetOrg);
1237 }
1238
1239 return estDist;
1240}
1241
1242inline int SAO::estIterOffset(int typeIdx, int classIdx, double lambda, int offset, int32_t count, int32_t offsetOrg, int32_t* currentDistortionTableBo, double* currentRdCostTableBo)
1243{
1244 int offsetOut = 0;
1245
1246 // Assuming sending quantized value 0 results in zero offset and sending the value zero needs 1 bit. entropy coder can be used to measure the exact rate here.
1247 double tempMinCost = lambda;
1248 while (offset != 0)
1249 {
1250 // Calculate the bits required for signalling the offset
1251 int tempRate = (typeIdx == SAO_BO) ? (abs(offset) + 2) : (abs(offset) + 1);
1252 if (abs(offset) == OFFSET_THRESH - 1)
1253 tempRate--;
1254
1255 // Do the dequntization before distorion calculation
1256 int tempOffset = offset << SAO_BIT_INC;
1257 int64_t tempDist = estSaoDist(count, tempOffset, offsetOrg);
1258 double tempCost = ((double)tempDist + lambda * (double)tempRate);
1259 if (tempCost < tempMinCost)
1260 {
1261 tempMinCost = tempCost;
1262 offsetOut = offset;
1263 if (typeIdx == SAO_BO)
1264 {
1265 currentDistortionTableBo[classIdx - 1] = (int)tempDist;
1266 currentRdCostTableBo[classIdx - 1] = tempCost;
1267 }
1268 }
1269 offset = (offset > 0) ? (offset - 1) : (offset + 1);
1270 }
1271
1272 return offsetOut;
1273}
1274
1275void SAO::saoComponentParamDist(SAOParam* saoParam, int addr, int addrUp, int addrLeft, SaoCtuParam* mergeSaoParam, double* mergeDist)
1276{
1277 int64_t bestDist = 0;
1278
1279 SaoCtuParam* lclCtuParam = &saoParam->ctuParam[0][addr];
1280
1281 double bestRDCostTableBo = MAX_DOUBLE;
1282 int bestClassTableBo = 0;
1283 int currentDistortionTableBo[MAX_NUM_SAO_CLASS];
1284 double currentRdCostTableBo[MAX_NUM_SAO_CLASS];
1285
1286 resetSaoUnit(lclCtuParam);
1287 m_entropyCoder.load(m_rdContexts.temp);
1288 m_entropyCoder.resetBits();
1289 m_entropyCoder.codeSaoOffset(*lclCtuParam, 0);
1290 double dCostPartBest = m_entropyCoder.getNumberOfWrittenBits() * m_lumaLambda;
1291
1292 for (int typeIdx = 0; typeIdx < MAX_NUM_SAO_TYPE; typeIdx++)
1293 {
1294 int64_t estDist = estSaoTypeDist(0, typeIdx, m_lumaLambda, currentDistortionTableBo, currentRdCostTableBo);
1295
1296 if (typeIdx == SAO_BO)
1297 {
1298 // Estimate Best Position
1299 for (int i = 0; i < SAO_NUM_BO_CLASSES - SAO_BO_LEN + 1; i++)
1300 {
1301 double currentRDCost = 0.0;
1302 for (int j = i; j < i + SAO_BO_LEN; j++)
1303 currentRDCost += currentRdCostTableBo[j];
1304
1305 if (currentRDCost < bestRDCostTableBo)
1306 {
1307 bestRDCostTableBo = currentRDCost;
1308 bestClassTableBo = i;
1309 }
1310 }
1311
1312 // Re code all Offsets
1313 // Code Center
1314 estDist = 0;
1315 for (int classIdx = bestClassTableBo; classIdx < bestClassTableBo + SAO_BO_LEN; classIdx++)
1316 estDist += currentDistortionTableBo[classIdx];
1317 }
1318 SaoCtuParam ctuParamRdo;
1319 ctuParamRdo.mergeMode = SAO_MERGE_NONE;
1320 ctuParamRdo.typeIdx = typeIdx;
1321 ctuParamRdo.bandPos = (typeIdx == SAO_BO) ? bestClassTableBo : 0;
1322 for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; classIdx++)
1323 ctuParamRdo.offset[classIdx] = (int)m_offset[0][typeIdx][classIdx + ctuParamRdo.bandPos + 1];
1324
1325 m_entropyCoder.load(m_rdContexts.temp);
1326 m_entropyCoder.resetBits();
1327 m_entropyCoder.codeSaoOffset(ctuParamRdo, 0);
1328
1329 uint32_t estRate = m_entropyCoder.getNumberOfWrittenBits();
1330 double cost = (double)estDist + m_lumaLambda * (double)estRate;
1331
1332 if (cost < dCostPartBest)
1333 {
1334 dCostPartBest = cost;
1335 copySaoUnit(lclCtuParam, &ctuParamRdo);
1336 bestDist = estDist;
1337 }
1338 }
1339
1340 mergeDist[0] = ((double)bestDist / m_lumaLambda);
1341 m_entropyCoder.load(m_rdContexts.temp);
1342 m_entropyCoder.codeSaoOffset(*lclCtuParam, 0);
1343 m_entropyCoder.store(m_rdContexts.temp);
1344
1345 // merge left or merge up
1346
1347 for (int mergeIdx = 0; mergeIdx < 2; mergeIdx++)
1348 {
1349 SaoCtuParam* mergeSrcParam = NULL;
1350 if (addrLeft >= 0 && mergeIdx == 0)
1351 mergeSrcParam = &(saoParam->ctuParam[0][addrLeft]);
1352 else if (addrUp >= 0 && mergeIdx == 1)
1353 mergeSrcParam = &(saoParam->ctuParam[0][addrUp]);
1354 if (mergeSrcParam)
1355 {
1356 int64_t estDist = 0;
1357 int typeIdx = mergeSrcParam->typeIdx;
1358 if (typeIdx >= 0)
1359 {
1360 int bandPos = (typeIdx == SAO_BO) ? mergeSrcParam->bandPos : 0;
1361 for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; classIdx++)
1362 {
1363 int mergeOffset = mergeSrcParam->offset[classIdx];
1364 estDist += estSaoDist(m_count[0][typeIdx][classIdx + bandPos + 1], mergeOffset, m_offsetOrg[0][typeIdx][classIdx + bandPos + 1]);
1365 }
1366 }
1367
1368 copySaoUnit(&mergeSaoParam[mergeIdx], mergeSrcParam);
1369 mergeSaoParam[mergeIdx].mergeMode = mergeIdx ? SAO_MERGE_UP : SAO_MERGE_LEFT;
1370
1371 mergeDist[mergeIdx + 1] = ((double)estDist / m_lumaLambda);
1372 }
1373 else
1374 resetSaoUnit(&mergeSaoParam[mergeIdx]);
1375 }
1376}
1377
1378void SAO::sao2ChromaParamDist(SAOParam* saoParam, int addr, int addrUp, int addrLeft, SaoCtuParam mergeSaoParam[][2], double* mergeDist)
1379{
1380 int64_t bestDist = 0;
1381
1382 SaoCtuParam* lclCtuParam[2] = { &saoParam->ctuParam[1][addr], &saoParam->ctuParam[2][addr] };
1383
1384 double currentRdCostTableBo[MAX_NUM_SAO_CLASS];
1385 int bestClassTableBo[2] = { 0, 0 };
1386 int currentDistortionTableBo[MAX_NUM_SAO_CLASS];
1387
1388 resetSaoUnit(lclCtuParam[0]);
1389 resetSaoUnit(lclCtuParam[1]);
1390 m_entropyCoder.load(m_rdContexts.temp);
1391 m_entropyCoder.resetBits();
1392 m_entropyCoder.codeSaoOffset(*lclCtuParam[0], 1);
1393 m_entropyCoder.codeSaoOffset(*lclCtuParam[1], 2);
1394
1395 double costPartBest = m_entropyCoder.getNumberOfWrittenBits() * m_chromaLambda;
1396
1397 for (int typeIdx = 0; typeIdx < MAX_NUM_SAO_TYPE; typeIdx++)
1398 {
1399 int64_t estDist[2];
1400 if (typeIdx == SAO_BO)
1401 {
1402 // Estimate Best Position
1403 for (int compIdx = 0; compIdx < 2; compIdx++)
1404 {
1405 double bestRDCostTableBo = MAX_DOUBLE;
1406 estDist[compIdx] = estSaoTypeDist(compIdx + 1, typeIdx, m_chromaLambda, currentDistortionTableBo, currentRdCostTableBo);
1407 for (int i = 0; i < SAO_NUM_BO_CLASSES - SAO_BO_LEN + 1; i++)
1408 {
1409 double currentRDCost = 0.0;
1410 for (int j = i; j < i + SAO_BO_LEN; j++)
1411 currentRDCost += currentRdCostTableBo[j];
1412
1413 if (currentRDCost < bestRDCostTableBo)
1414 {
1415 bestRDCostTableBo = currentRDCost;
1416 bestClassTableBo[compIdx] = i;
1417 }
1418 }
1419
1420 // Re code all Offsets
1421 // Code Center
1422 estDist[compIdx] = 0;
1423 for (int classIdx = bestClassTableBo[compIdx]; classIdx < bestClassTableBo[compIdx] + SAO_BO_LEN; classIdx++)
1424 estDist[compIdx] += currentDistortionTableBo[classIdx];
1425 }
1426 }
1427 else
1428 {
1429 estDist[0] = estSaoTypeDist(1, typeIdx, m_chromaLambda, currentDistortionTableBo, currentRdCostTableBo);
1430 estDist[1] = estSaoTypeDist(2, typeIdx, m_chromaLambda, currentDistortionTableBo, currentRdCostTableBo);
1431 }
1432
1433 m_entropyCoder.load(m_rdContexts.temp);
1434 m_entropyCoder.resetBits();
1435
1436 SaoCtuParam ctuParamRdo[2];
1437 for (int compIdx = 0; compIdx < 2; compIdx++)
1438 {
1439 ctuParamRdo[compIdx].mergeMode = SAO_MERGE_NONE;
1440 ctuParamRdo[compIdx].typeIdx = typeIdx;
1441 ctuParamRdo[compIdx].bandPos = (typeIdx == SAO_BO) ? bestClassTableBo[compIdx] : 0;
1442 for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; classIdx++)
1443 ctuParamRdo[compIdx].offset[classIdx] = (int)m_offset[compIdx + 1][typeIdx][classIdx + ctuParamRdo[compIdx].bandPos + 1];
1444
1445 m_entropyCoder.codeSaoOffset(ctuParamRdo[compIdx], compIdx + 1);
1446 }
1447
1448 uint32_t estRate = m_entropyCoder.getNumberOfWrittenBits();
1449 double cost = (double)(estDist[0] + estDist[1]) + m_chromaLambda * (double)estRate;
1450
1451 if (cost < costPartBest)
1452 {
1453 costPartBest = cost;
1454 copySaoUnit(lclCtuParam[0], &ctuParamRdo[0]);
1455 copySaoUnit(lclCtuParam[1], &ctuParamRdo[1]);
1456 bestDist = (estDist[0] + estDist[1]);
1457 }
1458 }
1459
1460 mergeDist[0] += ((double)bestDist / m_chromaLambda);
1461 m_entropyCoder.load(m_rdContexts.temp);
1462 m_entropyCoder.codeSaoOffset(*lclCtuParam[0], 1);
1463 m_entropyCoder.codeSaoOffset(*lclCtuParam[1], 2);
1464 m_entropyCoder.store(m_rdContexts.temp);
1465
1466 // merge left or merge up
1467
1468 for (int mergeIdx = 0; mergeIdx < 2; mergeIdx++)
1469 {
1470 for (int compIdx = 0; compIdx < 2; compIdx++)
1471 {
1472 int plane = compIdx + 1;
1473 SaoCtuParam* mergeSrcParam = NULL;
1474 if (addrLeft >= 0 && mergeIdx == 0)
1475 mergeSrcParam = &(saoParam->ctuParam[plane][addrLeft]);
1476 else if (addrUp >= 0 && mergeIdx == 1)
1477 mergeSrcParam = &(saoParam->ctuParam[plane][addrUp]);
1478 if (mergeSrcParam)
1479 {
1480 int64_t estDist = 0;
1481 int typeIdx = mergeSrcParam->typeIdx;
1482 if (typeIdx >= 0)
1483 {
1484 int bandPos = (typeIdx == SAO_BO) ? mergeSrcParam->bandPos : 0;
1485 for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; classIdx++)
1486 {
1487 int mergeOffset = mergeSrcParam->offset[classIdx];
1488 estDist += estSaoDist(m_count[plane][typeIdx][classIdx + bandPos + 1], mergeOffset, m_offsetOrg[plane][typeIdx][classIdx + bandPos + 1]);
1489 }
1490 }
1491
1492 copySaoUnit(&mergeSaoParam[plane][mergeIdx], mergeSrcParam);
1493 mergeSaoParam[plane][mergeIdx].mergeMode = mergeIdx ? SAO_MERGE_UP : SAO_MERGE_LEFT;
1494 mergeDist[mergeIdx + 1] += ((double)estDist / m_chromaLambda);
1495 }
1496 else
1497 resetSaoUnit(&mergeSaoParam[plane][mergeIdx]);
1498 }
1499 }
1500}
1501}