Imported Upstream version 1.4
[deb_x265.git] / source / encoder / sao.cpp
1 /*****************************************************************************
2 * Copyright (C) 2013 x265 project
3 *
4 * Authors: Steve Borho <steve@borho.org>
5 * Min Chen <chenm003@163.com>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
20 *
21 * This program is also available under a commercial proprietary license.
22 * For more information, contact us at license @ x265.com.
23 *****************************************************************************/
24
25 #include "common.h"
26 #include "frame.h"
27 #include "framedata.h"
28 #include "picyuv.h"
29 #include "sao.h"
30
31 namespace {
32
33 inline int32_t roundIBDI(int32_t num, int32_t den)
34 {
35 return num >= 0 ? ((num * 2 + den) / (den * 2)) : -((-num * 2 + den) / (den * 2));
36 }
37
38 /* get the sign of input variable (TODO: this is a dup, make common) */
39 inline int signOf(int x)
40 {
41 return (x >> 31) | ((int)((((uint32_t)-x)) >> 31));
42 }
43
44 inline int64_t estSaoDist(int32_t count, int offset, int32_t offsetOrg)
45 {
46 return (count * offset - offsetOrg * 2) * offset;
47 }
48
49 } // end anonymous namespace
50
51
52 namespace x265 {
53
54 const uint32_t SAO::s_eoTable[NUM_EDGETYPE] =
55 {
56 1, // 0
57 2, // 1
58 0, // 2
59 3, // 3
60 4 // 4
61 };
62
63 SAO::SAO()
64 {
65 m_count = NULL;
66 m_offset = NULL;
67 m_offsetOrg = NULL;
68 m_countPreDblk = NULL;
69 m_offsetOrgPreDblk = NULL;
70 m_refDepth = 0;
71 m_lumaLambda = 0;
72 m_chromaLambda = 0;
73 m_param = NULL;
74 m_clipTable = NULL;
75 m_clipTableBase = NULL;
76 m_offsetBo = NULL;
77 m_tmpU1[0] = NULL;
78 m_tmpU1[1] = NULL;
79 m_tmpU1[2] = NULL;
80 m_tmpU2[0] = NULL;
81 m_tmpU2[1] = NULL;
82 m_tmpU2[2] = NULL;
83 m_tmpL1 = NULL;
84 m_tmpL2 = NULL;
85
86 m_depthSaoRate[0][0] = 0;
87 m_depthSaoRate[0][1] = 0;
88 m_depthSaoRate[0][2] = 0;
89 m_depthSaoRate[0][3] = 0;
90 m_depthSaoRate[1][0] = 0;
91 m_depthSaoRate[1][1] = 0;
92 m_depthSaoRate[1][2] = 0;
93 m_depthSaoRate[1][3] = 0;
94 }
95
96 bool SAO::create(x265_param* param)
97 {
98 m_param = param;
99 m_hChromaShift = CHROMA_H_SHIFT(param->internalCsp);
100 m_vChromaShift = CHROMA_V_SHIFT(param->internalCsp);
101
102 m_numCuInWidth = (m_param->sourceWidth + g_maxCUSize - 1) / g_maxCUSize;
103 m_numCuInHeight = (m_param->sourceHeight + g_maxCUSize - 1) / g_maxCUSize;
104
105 const pixel maxY = (1 << X265_DEPTH) - 1;
106 const pixel rangeExt = maxY >> 1;
107 int numCtu = m_numCuInWidth * m_numCuInHeight;
108
109 CHECKED_MALLOC(m_clipTableBase, pixel, maxY + 2 * rangeExt);
110 CHECKED_MALLOC(m_offsetBo, pixel, maxY + 2 * rangeExt);
111
112 CHECKED_MALLOC(m_tmpL1, pixel, g_maxCUSize + 1);
113 CHECKED_MALLOC(m_tmpL2, pixel, g_maxCUSize + 1);
114
115 for (int i = 0; i < 3; i++)
116 {
117 CHECKED_MALLOC(m_tmpU1[i], pixel, m_param->sourceWidth);
118 CHECKED_MALLOC(m_tmpU2[i], pixel, m_param->sourceWidth);
119 }
120
121 CHECKED_MALLOC(m_count, PerClass, NUM_PLANE);
122 CHECKED_MALLOC(m_offset, PerClass, NUM_PLANE);
123 CHECKED_MALLOC(m_offsetOrg, PerClass, NUM_PLANE);
124
125 CHECKED_MALLOC(m_countPreDblk, PerPlane, numCtu);
126 CHECKED_MALLOC(m_offsetOrgPreDblk, PerPlane, numCtu);
127
128 m_clipTable = &(m_clipTableBase[rangeExt]);
129
130 for (int i = 0; i < rangeExt; i++)
131 m_clipTableBase[i] = 0;
132
133 for (int i = 0; i < maxY; i++)
134 m_clipTable[i] = (pixel)i;
135
136 for (int i = maxY; i < maxY + rangeExt; i++)
137 m_clipTable[i] = maxY;
138
139 return true;
140
141 fail:
142 return false;
143 }
144
145 void SAO::destroy()
146 {
147 X265_FREE(m_clipTableBase);
148 X265_FREE(m_offsetBo);
149
150 X265_FREE(m_tmpL1);
151 X265_FREE(m_tmpL2);
152
153 for (int i = 0; i < 3; i++)
154 {
155 X265_FREE(m_tmpU1[i]);
156 X265_FREE(m_tmpU2[i]);
157 }
158
159 X265_FREE(m_count);
160 X265_FREE(m_offset);
161 X265_FREE(m_offsetOrg);
162 X265_FREE(m_countPreDblk);
163 X265_FREE(m_offsetOrgPreDblk);
164 }
165
166 /* allocate memory for SAO parameters */
167 void SAO::allocSaoParam(SAOParam* saoParam) const
168 {
169 saoParam->numCuInWidth = m_numCuInWidth;
170
171 saoParam->ctuParam[0] = new SaoCtuParam[m_numCuInHeight * m_numCuInWidth];
172 saoParam->ctuParam[1] = new SaoCtuParam[m_numCuInHeight * m_numCuInWidth];
173 saoParam->ctuParam[2] = new SaoCtuParam[m_numCuInHeight * m_numCuInWidth];
174 }
175
176 void SAO::startSlice(Frame* frame, Entropy& initState, int qp)
177 {
178 Slice* slice = frame->m_encData->m_slice;
179
180 int qpCb = Clip3(0, QP_MAX_MAX, qp + slice->m_pps->chromaCbQpOffset);
181 m_lumaLambda = x265_lambda2_tab[qp];
182 m_chromaLambda = x265_lambda2_tab[qpCb]; // Use Cb QP for SAO chroma
183 m_frame = frame;
184
185 switch (slice->m_sliceType)
186 {
187 case I_SLICE:
188 m_refDepth = 0;
189 break;
190 case P_SLICE:
191 m_refDepth = 1;
192 break;
193 case B_SLICE:
194 m_refDepth = 2 + !IS_REFERENCED(frame);
195 break;
196 }
197
198 resetStats();
199
200 m_entropyCoder.load(initState);
201 m_rdContexts.next.load(initState);
202 m_rdContexts.cur.load(initState);
203
204 SAOParam* saoParam = frame->m_encData->m_saoParam;
205 if (!saoParam)
206 {
207 saoParam = new SAOParam;
208 allocSaoParam(saoParam);
209 frame->m_encData->m_saoParam = saoParam;
210 }
211
212 rdoSaoUnitRowInit(saoParam);
213
214 // NOTE: Disable SAO automatic turn-off when frame parallelism is
215 // enabled for output exact independent of frame thread count
216 if (m_param->frameNumThreads > 1)
217 {
218 saoParam->bSaoFlag[0] = true;
219 saoParam->bSaoFlag[1] = true;
220 }
221 }
222
223 // CTU-based SAO process without slice granularity
224 void SAO::processSaoCu(int addr, int typeIdx, int plane)
225 {
226 int x, y;
227 const CUData* cu = m_frame->m_encData->getPicCTU(addr);
228 pixel* rec = m_frame->m_reconPicYuv->getPlaneAddr(plane, addr);
229 intptr_t stride = plane ? m_frame->m_reconPicYuv->m_strideC : m_frame->m_reconPicYuv->m_stride;
230 uint32_t picWidth = m_param->sourceWidth;
231 uint32_t picHeight = m_param->sourceHeight;
232 int ctuWidth = g_maxCUSize;
233 int ctuHeight = g_maxCUSize;
234 uint32_t lpelx = cu->m_cuPelX;
235 uint32_t tpely = cu->m_cuPelY;
236 if (plane)
237 {
238 picWidth >>= m_hChromaShift;
239 picHeight >>= m_vChromaShift;
240 ctuWidth >>= m_hChromaShift;
241 ctuHeight >>= m_vChromaShift;
242 lpelx >>= m_hChromaShift;
243 tpely >>= m_vChromaShift;
244 }
245 uint32_t rpelx = x265_min(lpelx + ctuWidth, picWidth);
246 uint32_t bpely = x265_min(tpely + ctuHeight, picHeight);
247 ctuWidth = rpelx - lpelx;
248 ctuHeight = bpely - tpely;
249
250 int startX;
251 int startY;
252 int endX;
253 int endY;
254 pixel* tmpL;
255 pixel* tmpU;
256
257 int32_t _upBuff1[MAX_CU_SIZE + 2], *upBuff1 = _upBuff1 + 1;
258 int32_t _upBufft[MAX_CU_SIZE + 2], *upBufft = _upBufft + 1;
259
260 {
261 const pixel* recR = &rec[ctuWidth - 1];
262 for (int i = 0; i < ctuHeight + 1; i++)
263 {
264 m_tmpL2[i] = *recR;
265 recR += stride;
266 }
267
268 tmpL = m_tmpL1;
269 tmpU = &(m_tmpU1[plane][lpelx]);
270 }
271
272 switch (typeIdx)
273 {
274 case SAO_EO_0: // dir: -
275 {
276 pixel firstPxl = 0, lastPxl = 0;
277 startX = !lpelx;
278 endX = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth;
279 if (ctuWidth & 15)
280 {
281 for (y = 0; y < ctuHeight; y++)
282 {
283 int signLeft = signOf(rec[startX] - tmpL[y]);
284 for (x = startX; x < endX; x++)
285 {
286 int signRight = signOf(rec[x] - rec[x + 1]);
287 int edgeType = signRight + signLeft + 2;
288 signLeft = -signRight;
289
290 rec[x] = m_clipTable[rec[x] + m_offsetEo[edgeType]];
291 }
292
293 rec += stride;
294 }
295 }
296 else
297 {
298 for (y = 0; y < ctuHeight; y++)
299 {
300 int signLeft = signOf(rec[startX] - tmpL[y]);
301
302 if (!lpelx)
303 firstPxl = rec[0];
304
305 if (rpelx == picWidth)
306 lastPxl = rec[ctuWidth - 1];
307
308 primitives.saoCuOrgE0(rec, m_offsetEo, ctuWidth, (int8_t)signLeft);
309
310 if (!lpelx)
311 rec[0] = firstPxl;
312
313 if (rpelx == picWidth)
314 rec[ctuWidth - 1] = lastPxl;
315
316 rec += stride;
317 }
318 }
319 break;
320 }
321 case SAO_EO_1: // dir: |
322 {
323 startY = !tpely;
324 endY = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight;
325 if (!tpely)
326 rec += stride;
327
328 for (x = 0; x < ctuWidth; x++)
329 upBuff1[x] = signOf(rec[x] - tmpU[x]);
330
331 for (y = startY; y < endY; y++)
332 {
333 for (x = 0; x < ctuWidth; x++)
334 {
335 int signDown = signOf(rec[x] - rec[x + stride]);
336 int edgeType = signDown + upBuff1[x] + 2;
337 upBuff1[x] = -signDown;
338
339 rec[x] = m_clipTable[rec[x] + m_offsetEo[edgeType]];
340 }
341
342 rec += stride;
343 }
344
345 break;
346 }
347 case SAO_EO_2: // dir: 135
348 {
349 startX = !lpelx;
350 endX = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth;
351
352 startY = !tpely;
353 endY = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight;
354
355 if (!tpely)
356 rec += stride;
357
358 for (x = startX; x < endX; x++)
359 upBuff1[x] = signOf(rec[x] - tmpU[x - 1]);
360
361 for (y = startY; y < endY; y++)
362 {
363 upBufft[startX] = signOf(rec[stride + startX] - tmpL[y]);
364 for (x = startX; x < endX; x++)
365 {
366 int signDown = signOf(rec[x] - rec[x + stride + 1]);
367 int edgeType = signDown + upBuff1[x] + 2;
368 upBufft[x + 1] = -signDown;
369 rec[x] = m_clipTable[rec[x] + m_offsetEo[edgeType]];
370 }
371
372 std::swap(upBuff1, upBufft);
373
374 rec += stride;
375 }
376
377 break;
378 }
379 case SAO_EO_3: // dir: 45
380 {
381 startX = !lpelx;
382 endX = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth;
383
384 startY = !tpely;
385 endY = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight;
386
387 if (!tpely)
388 rec += stride;
389
390 for (x = startX - 1; x < endX; x++)
391 upBuff1[x] = signOf(rec[x] - tmpU[x + 1]);
392
393 for (y = startY; y < endY; y++)
394 {
395 x = startX;
396 int signDown = signOf(rec[x] - tmpL[y + 1]);
397 int edgeType = signDown + upBuff1[x] + 2;
398 upBuff1[x - 1] = -signDown;
399 rec[x] = m_clipTable[rec[x] + m_offsetEo[edgeType]];
400 for (x = startX + 1; x < endX; x++)
401 {
402 signDown = signOf(rec[x] - rec[x + stride - 1]);
403 edgeType = signDown + upBuff1[x] + 2;
404 upBuff1[x - 1] = -signDown;
405 rec[x] = m_clipTable[rec[x] + m_offsetEo[edgeType]];
406 }
407
408 upBuff1[endX - 1] = signOf(rec[endX - 1 + stride] - rec[endX]);
409
410 rec += stride;
411 }
412
413 break;
414 }
415 case SAO_BO:
416 {
417 const pixel* offsetBo = m_offsetBo;
418
419 for (y = 0; y < ctuHeight; y++)
420 {
421 for (x = 0; x < ctuWidth; x++)
422 rec[x] = offsetBo[rec[x]];
423
424 rec += stride;
425 }
426
427 break;
428 }
429 default: break;
430 }
431
432 // if (iSaoType!=SAO_BO_0 || iSaoType!=SAO_BO_1)
433 std::swap(m_tmpL1, m_tmpL2);
434 }
435
436 /* Process SAO all units */
437 void SAO::processSaoUnitRow(SaoCtuParam* ctuParam, int idxY, int plane)
438 {
439 intptr_t stride = plane ? m_frame->m_reconPicYuv->m_strideC : m_frame->m_reconPicYuv->m_stride;
440 uint32_t picWidth = m_param->sourceWidth;
441 int ctuWidth = g_maxCUSize;
442 int ctuHeight = g_maxCUSize;
443 if (plane)
444 {
445 picWidth >>= m_hChromaShift;
446 ctuWidth >>= m_hChromaShift;
447 ctuHeight >>= m_vChromaShift;
448 }
449
450 if (!idxY)
451 {
452 pixel* rec = m_frame->m_reconPicYuv->m_picOrg[plane];
453 memcpy(m_tmpU1[plane], rec, sizeof(pixel) * picWidth);
454 }
455
456 int addr = idxY * m_numCuInWidth;
457 pixel* rec = plane ? m_frame->m_reconPicYuv->getChromaAddr(plane, addr) : m_frame->m_reconPicYuv->getLumaAddr(addr);
458
459 for (int i = 0; i < ctuHeight + 1; i++)
460 {
461 m_tmpL1[i] = rec[0];
462 rec += stride;
463 }
464
465 rec -= (stride << 1);
466
467 memcpy(m_tmpU2[plane], rec, sizeof(pixel) * picWidth);
468
469 const int boShift = X265_DEPTH - SAO_BO_BITS;
470
471 for (int idxX = 0; idxX < m_numCuInWidth; idxX++)
472 {
473 addr = idxY * m_numCuInWidth + idxX;
474
475 bool mergeLeftFlag = ctuParam[addr].mergeMode == SAO_MERGE_LEFT;
476 int typeIdx = ctuParam[addr].typeIdx;
477
478 if (typeIdx >= 0)
479 {
480 if (!mergeLeftFlag)
481 {
482 if (typeIdx == SAO_BO)
483 {
484 pixel* offsetBo = m_offsetBo;
485 int offset[SAO_NUM_BO_CLASSES];
486 memset(offset, 0, sizeof(offset));
487
488 for (int i = 0; i < SAO_NUM_OFFSET; i++)
489 offset[((ctuParam[addr].bandPos + i) & (SAO_NUM_BO_CLASSES - 1))] = ctuParam[addr].offset[i] << SAO_BIT_INC;
490
491 for (int i = 0; i < (1 << X265_DEPTH); i++)
492 offsetBo[i] = m_clipTable[i + offset[i >> boShift]];
493 }
494 else // if (typeIdx == SAO_EO_0 || typeIdx == SAO_EO_1 || typeIdx == SAO_EO_2 || typeIdx == SAO_EO_3)
495 {
496 int offset[NUM_EDGETYPE];
497 offset[0] = 0;
498 for (int i = 0; i < SAO_NUM_OFFSET; i++)
499 offset[i + 1] = ctuParam[addr].offset[i] << SAO_BIT_INC;
500
501 for (int edgeType = 0; edgeType < NUM_EDGETYPE; edgeType++)
502 m_offsetEo[edgeType] = (int8_t)offset[s_eoTable[edgeType]];
503 }
504 }
505 processSaoCu(addr, typeIdx, plane);
506 }
507 else if (idxX != (m_numCuInWidth - 1))
508 {
509 rec = plane ? m_frame->m_reconPicYuv->getChromaAddr(plane, addr) : m_frame->m_reconPicYuv->getLumaAddr(addr);
510
511 for (int i = 0; i < ctuHeight + 1; i++)
512 {
513 m_tmpL1[i] = rec[ctuWidth - 1];
514 rec += stride;
515 }
516 }
517 }
518
519 std::swap(m_tmpU1[plane], m_tmpU2[plane]);
520 }
521
522 void SAO::resetSaoUnit(SaoCtuParam* saoUnit)
523 {
524 saoUnit->mergeMode = SAO_MERGE_NONE;
525 saoUnit->typeIdx = -1;
526 saoUnit->bandPos = 0;
527
528 for (int i = 0; i < SAO_NUM_OFFSET; i++)
529 saoUnit->offset[i] = 0;
530 }
531
532 void SAO::copySaoUnit(SaoCtuParam* saoUnitDst, const SaoCtuParam* saoUnitSrc)
533 {
534 saoUnitDst->mergeMode = saoUnitSrc->mergeMode;
535 saoUnitDst->typeIdx = saoUnitSrc->typeIdx;
536 saoUnitDst->bandPos = saoUnitSrc->bandPos;
537
538 for (int i = 0; i < SAO_NUM_OFFSET; i++)
539 saoUnitDst->offset[i] = saoUnitSrc->offset[i];
540 }
541
542 /* Calculate SAO statistics for current CTU without non-crossing slice */
543 void SAO::calcSaoStatsCu(int addr, int plane)
544 {
545 int x, y;
546 CUData* cu = m_frame->m_encData->getPicCTU(addr);
547 const pixel* fenc0 = m_frame->m_origPicYuv->getPlaneAddr(plane, addr);
548 const pixel* rec0 = m_frame->m_reconPicYuv->getPlaneAddr(plane, addr);
549 const pixel* fenc;
550 const pixel* rec;
551 intptr_t stride = plane ? m_frame->m_reconPicYuv->m_strideC : m_frame->m_reconPicYuv->m_stride;
552 uint32_t picWidth = m_param->sourceWidth;
553 uint32_t picHeight = m_param->sourceHeight;
554 int ctuWidth = g_maxCUSize;
555 int ctuHeight = g_maxCUSize;
556 uint32_t lpelx = cu->m_cuPelX;
557 uint32_t tpely = cu->m_cuPelY;
558 if (plane)
559 {
560 picWidth >>= m_hChromaShift;
561 picHeight >>= m_vChromaShift;
562 ctuWidth >>= m_hChromaShift;
563 ctuHeight >>= m_vChromaShift;
564 lpelx >>= m_hChromaShift;
565 tpely >>= m_vChromaShift;
566 }
567 uint32_t rpelx = x265_min(lpelx + ctuWidth, picWidth);
568 uint32_t bpely = x265_min(tpely + ctuHeight, picHeight);
569 ctuWidth = rpelx - lpelx;
570 ctuHeight = bpely - tpely;
571
572 int startX;
573 int startY;
574 int endX;
575 int endY;
576 int32_t* stats;
577 int32_t* count;
578
579 int skipB = plane ? 2 : 4;
580 int skipR = plane ? 3 : 5;
581
582 int32_t _upBuff1[MAX_CU_SIZE + 2], *upBuff1 = _upBuff1 + 1;
583 int32_t _upBufft[MAX_CU_SIZE + 2], *upBufft = _upBufft + 1;
584
585 // SAO_BO:
586 {
587 const int boShift = X265_DEPTH - SAO_BO_BITS;
588
589 if (m_param->bSaoNonDeblocked)
590 {
591 skipB = plane ? 1 : 3;
592 skipR = plane ? 2 : 4;
593 }
594 stats = m_offsetOrg[plane][SAO_BO];
595 count = m_count[plane][SAO_BO];
596
597 fenc = fenc0;
598 rec = rec0;
599
600 endX = (rpelx == picWidth) ? ctuWidth : ctuWidth - skipR;
601 endY = (bpely == picHeight) ? ctuHeight : ctuHeight - skipB;
602
603 for (y = 0; y < endY; y++)
604 {
605 for (x = 0; x < endX; x++)
606 {
607 int classIdx = 1 + (rec[x] >> boShift);
608 stats[classIdx] += (fenc[x] - rec[x]);
609 count[classIdx]++;
610 }
611
612 fenc += stride;
613 rec += stride;
614 }
615 }
616
617 {
618 // SAO_EO_0: // dir: -
619 {
620 if (m_param->bSaoNonDeblocked)
621 {
622 skipB = plane ? 1 : 3;
623 skipR = plane ? 3 : 5;
624 }
625 stats = m_offsetOrg[plane][SAO_EO_0];
626 count = m_count[plane][SAO_EO_0];
627
628 fenc = fenc0;
629 rec = rec0;
630
631 startX = !lpelx;
632 endX = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth - skipR;
633 for (y = 0; y < ctuHeight - skipB; y++)
634 {
635 int signLeft = signOf(rec[startX] - rec[startX - 1]);
636 for (x = startX; x < endX; x++)
637 {
638 int signRight = signOf(rec[x] - rec[x + 1]);
639 int edgeType = signRight + signLeft + 2;
640 signLeft = -signRight;
641
642 stats[s_eoTable[edgeType]] += (fenc[x] - rec[x]);
643 count[s_eoTable[edgeType]]++;
644 }
645
646 fenc += stride;
647 rec += stride;
648 }
649 }
650
651 // SAO_EO_1: // dir: |
652 {
653 if (m_param->bSaoNonDeblocked)
654 {
655 skipB = plane ? 2 : 4;
656 skipR = plane ? 2 : 4;
657 }
658 stats = m_offsetOrg[plane][SAO_EO_1];
659 count = m_count[plane][SAO_EO_1];
660
661 fenc = fenc0;
662 rec = rec0;
663
664 startY = !tpely;
665 endX = (rpelx == picWidth) ? ctuWidth : ctuWidth - skipR;
666 endY = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight - skipB;
667 if (!tpely)
668 {
669 fenc += stride;
670 rec += stride;
671 }
672
673 for (x = 0; x < ctuWidth; x++)
674 upBuff1[x] = signOf(rec[x] - rec[x - stride]);
675
676 for (y = startY; y < endY; y++)
677 {
678 for (x = 0; x < endX; x++)
679 {
680 int signDown = signOf(rec[x] - rec[x + stride]);
681 int edgeType = signDown + upBuff1[x] + 2;
682 upBuff1[x] = -signDown;
683
684 stats[s_eoTable[edgeType]] += (fenc[x] - rec[x]);
685 count[s_eoTable[edgeType]]++;
686 }
687
688 fenc += stride;
689 rec += stride;
690 }
691 }
692
693 // SAO_EO_2: // dir: 135
694 {
695 if (m_param->bSaoNonDeblocked)
696 {
697 skipB = plane ? 2 : 4;
698 skipR = plane ? 3 : 5;
699 }
700 stats = m_offsetOrg[plane][SAO_EO_2];
701 count = m_count[plane][SAO_EO_2];
702
703 fenc = fenc0;
704 rec = rec0;
705
706 startX = !lpelx;
707 endX = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth - skipR;
708
709 startY = !tpely;
710 endY = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight - skipB;
711 if (!tpely)
712 {
713 fenc += stride;
714 rec += stride;
715 }
716
717 for (x = startX; x < endX; x++)
718 upBuff1[x] = signOf(rec[x] - rec[x - stride - 1]);
719
720 for (y = startY; y < endY; y++)
721 {
722 upBufft[startX] = signOf(rec[startX + stride] - rec[startX - 1]);
723 for (x = startX; x < endX; x++)
724 {
725 int signDown = signOf(rec[x] - rec[x + stride + 1]);
726 int edgeType = signDown + upBuff1[x] + 2;
727 upBufft[x + 1] = -signDown;
728 stats[s_eoTable[edgeType]] += (fenc[x] - rec[x]);
729 count[s_eoTable[edgeType]]++;
730 }
731
732 std::swap(upBuff1, upBufft);
733
734 rec += stride;
735 fenc += stride;
736 }
737 }
738
739 // SAO_EO_3: // dir: 45
740 {
741 if (m_param->bSaoNonDeblocked)
742 {
743 skipB = plane ? 2 : 4;
744 skipR = plane ? 3 : 5;
745 }
746 stats = m_offsetOrg[plane][SAO_EO_3];
747 count = m_count[plane][SAO_EO_3];
748
749 fenc = fenc0;
750 rec = rec0;
751
752 startX = !lpelx;
753 endX = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth - skipR;
754
755 startY = !tpely;
756 endY = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight - skipB;
757
758 if (!tpely)
759 {
760 fenc += stride;
761 rec += stride;
762 }
763
764 for (x = startX - 1; x < endX; x++)
765 upBuff1[x] = signOf(rec[x] - rec[x - stride + 1]);
766
767 for (y = startY; y < endY; y++)
768 {
769 for (x = startX; x < endX; x++)
770 {
771 int signDown = signOf(rec[x] - rec[x + stride - 1]);
772 int edgeType = signDown + upBuff1[x] + 2;
773 upBuff1[x - 1] = -signDown;
774 stats[s_eoTable[edgeType]] += (fenc[x] - rec[x]);
775 count[s_eoTable[edgeType]]++;
776 }
777
778 upBuff1[endX - 1] = signOf(rec[endX - 1 + stride] - rec[endX]);
779
780 rec += stride;
781 fenc += stride;
782 }
783 }
784 }
785 }
786
787 void SAO::calcSaoStatsCu_BeforeDblk(Frame* frame, int idxX, int idxY)
788 {
789 int addr = idxX + m_numCuInWidth * idxY;
790
791 int x, y;
792 CUData* cu = frame->m_encData->getPicCTU(addr);
793 const pixel* fenc;
794 const pixel* rec;
795 intptr_t stride = m_frame->m_reconPicYuv->m_stride;
796 uint32_t picWidth = m_param->sourceWidth;
797 uint32_t picHeight = m_param->sourceHeight;
798 int ctuWidth = g_maxCUSize;
799 int ctuHeight = g_maxCUSize;
800 uint32_t lpelx = cu->m_cuPelX;
801 uint32_t tpely = cu->m_cuPelY;
802 uint32_t rpelx = x265_min(lpelx + ctuWidth, picWidth);
803 uint32_t bpely = x265_min(tpely + ctuHeight, picHeight);
804 ctuWidth = rpelx - lpelx;
805 ctuHeight = bpely - tpely;
806
807 int startX;
808 int startY;
809 int endX;
810 int endY;
811 int firstX, firstY;
812 int32_t* stats;
813 int32_t* count;
814
815 int skipB, skipR;
816
817 int32_t _upBuff1[MAX_CU_SIZE + 2], *upBuff1 = _upBuff1 + 1;
818 int32_t _upBufft[MAX_CU_SIZE + 2], *upBufft = _upBufft + 1;
819
820 const int boShift = X265_DEPTH - SAO_BO_BITS;
821
822 memset(m_countPreDblk[addr], 0, sizeof(PerPlane));
823 memset(m_offsetOrgPreDblk[addr], 0, sizeof(PerPlane));
824
825 for (int plane = 0; plane < NUM_PLANE; plane++)
826 {
827 if (plane == 1)
828 {
829 stride = frame->m_reconPicYuv->m_strideC;
830 picWidth >>= m_hChromaShift;
831 picHeight >>= m_vChromaShift;
832 ctuWidth >>= m_hChromaShift;
833 ctuHeight >>= m_vChromaShift;
834 lpelx >>= m_hChromaShift;
835 tpely >>= m_vChromaShift;
836 rpelx >>= m_hChromaShift;
837 bpely >>= m_vChromaShift;
838 }
839
840 // SAO_BO:
841
842 skipB = plane ? 1 : 3;
843 skipR = plane ? 2 : 4;
844
845 stats = m_offsetOrgPreDblk[addr][plane][SAO_BO];
846 count = m_countPreDblk[addr][plane][SAO_BO];
847
848 const pixel* fenc0 = m_frame->m_origPicYuv->getPlaneAddr(plane, addr);
849 const pixel* rec0 = m_frame->m_reconPicYuv->getPlaneAddr(plane, addr);
850 fenc = fenc0;
851 rec = rec0;
852
853 startX = (rpelx == picWidth) ? ctuWidth : ctuWidth - skipR;
854 startY = (bpely == picHeight) ? ctuHeight : ctuHeight - skipB;
855
856 for (y = 0; y < ctuHeight; y++)
857 {
858 for (x = (y < startY ? startX : 0); x < ctuWidth; x++)
859 {
860 int classIdx = 1 + (rec[x] >> boShift);
861 stats[classIdx] += (fenc[x] - rec[x]);
862 count[classIdx]++;
863 }
864
865 fenc += stride;
866 rec += stride;
867 }
868
869 // SAO_EO_0: // dir: -
870 {
871 skipB = plane ? 1 : 3;
872 skipR = plane ? 3 : 5;
873
874 stats = m_offsetOrgPreDblk[addr][plane][SAO_EO_0];
875 count = m_countPreDblk[addr][plane][SAO_EO_0];
876
877 fenc = fenc0;
878 rec = rec0;
879
880 startX = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth - skipR;
881 startY = (bpely == picHeight) ? ctuHeight : ctuHeight - skipB;
882 firstX = !lpelx;
883 // endX = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth;
884 endX = ctuWidth - 1; // not refer right CTU
885
886 for (y = 0; y < ctuHeight; y++)
887 {
888 x = (y < startY ? startX : firstX);
889 int signLeft = signOf(rec[x] - rec[x - 1]);
890 for (; x < endX; x++)
891 {
892 int signRight = signOf(rec[x] - rec[x + 1]);
893 int edgeType = signRight + signLeft + 2;
894 signLeft = -signRight;
895
896 stats[s_eoTable[edgeType]] += (fenc[x] - rec[x]);
897 count[s_eoTable[edgeType]]++;
898 }
899
900 fenc += stride;
901 rec += stride;
902 }
903 }
904
905 // SAO_EO_1: // dir: |
906 {
907 skipB = plane ? 2 : 4;
908 skipR = plane ? 2 : 4;
909
910 stats = m_offsetOrgPreDblk[addr][plane][SAO_EO_1];
911 count = m_countPreDblk[addr][plane][SAO_EO_1];
912
913 fenc = fenc0;
914 rec = rec0;
915
916 startX = (rpelx == picWidth) ? ctuWidth : ctuWidth - skipR;
917 startY = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight - skipB;
918 firstY = !tpely;
919 // endY = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight;
920 endY = ctuHeight - 1; // not refer below CTU
921 if (!tpely)
922 {
923 fenc += stride;
924 rec += stride;
925 }
926
927 for (x = startX; x < ctuWidth; x++)
928 upBuff1[x] = signOf(rec[x] - rec[x - stride]);
929
930 for (y = firstY; y < endY; y++)
931 {
932 for (x = (y < startY - 1 ? startX : 0); x < ctuWidth; x++)
933 {
934 int signDown = signOf(rec[x] - rec[x + stride]);
935 int edgeType = signDown + upBuff1[x] + 2;
936 upBuff1[x] = -signDown;
937
938 if (x < startX && y < startY)
939 continue;
940
941 stats[s_eoTable[edgeType]] += (fenc[x] - rec[x]);
942 count[s_eoTable[edgeType]]++;
943 }
944
945 fenc += stride;
946 rec += stride;
947 }
948 }
949
950 // SAO_EO_2: // dir: 135
951 {
952 skipB = plane ? 2 : 4;
953 skipR = plane ? 3 : 5;
954
955 stats = m_offsetOrgPreDblk[addr][plane][SAO_EO_2];
956 count = m_countPreDblk[addr][plane][SAO_EO_2];
957
958 fenc = fenc0;
959 rec = rec0;
960
961 startX = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth - skipR;
962 startY = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight - skipB;
963 firstX = !lpelx;
964 firstY = !tpely;
965 // endX = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth;
966 // endY = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight;
967 endX = ctuWidth - 1; // not refer right CTU
968 endY = ctuHeight - 1; // not refer below CTU
969 if (!tpely)
970 {
971 fenc += stride;
972 rec += stride;
973 }
974
975 for (x = startX; x < endX; x++)
976 upBuff1[x] = signOf(rec[x] - rec[x - stride - 1]);
977
978 for (y = firstY; y < endY; y++)
979 {
980 x = (y < startY - 1 ? startX : firstX);
981 upBufft[x] = signOf(rec[x + stride] - rec[x - 1]);
982 for (; x < endX; x++)
983 {
984 int signDown = signOf(rec[x] - rec[x + stride + 1]);
985 int edgeType = signDown + upBuff1[x] + 2;
986 upBufft[x + 1] = -signDown;
987
988 if (x < startX && y < startY)
989 continue;
990
991 stats[s_eoTable[edgeType]] += (fenc[x] - rec[x]);
992 count[s_eoTable[edgeType]]++;
993 }
994
995 std::swap(upBuff1, upBufft);
996
997 rec += stride;
998 fenc += stride;
999 }
1000 }
1001
1002 // SAO_EO_3: // dir: 45
1003 {
1004 skipB = plane ? 2 : 4;
1005 skipR = plane ? 3 : 5;
1006
1007 stats = m_offsetOrgPreDblk[addr][plane][SAO_EO_3];
1008 count = m_countPreDblk[addr][plane][SAO_EO_3];
1009
1010 fenc = fenc0;
1011 rec = rec0;
1012
1013 startX = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth - skipR;
1014 startY = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight - skipB;
1015 firstX = !lpelx;
1016 firstY = !tpely;
1017 // endX = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth;
1018 // endY = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight;
1019 endX = ctuWidth - 1; // not refer right CTU
1020 endY = ctuHeight - 1; // not refer below CTU
1021 if (!tpely)
1022 {
1023 fenc += stride;
1024 rec += stride;
1025 }
1026
1027 for (x = startX - 1; x < endX; x++)
1028 upBuff1[x] = signOf(rec[x] - rec[x - stride + 1]);
1029
1030 for (y = firstY; y < endY; y++)
1031 {
1032 for (x = (y < startY - 1 ? startX : firstX); x < endX; x++)
1033 {
1034 int signDown = signOf(rec[x] - rec[x + stride - 1]);
1035 int edgeType = signDown + upBuff1[x] + 2;
1036 upBuff1[x - 1] = -signDown;
1037
1038 if (x < startX && y < startY)
1039 continue;
1040
1041 stats[s_eoTable[edgeType]] += (fenc[x] - rec[x]);
1042 count[s_eoTable[edgeType]]++;
1043 }
1044
1045 upBuff1[endX - 1] = signOf(rec[endX - 1 + stride] - rec[endX]);
1046
1047 rec += stride;
1048 fenc += stride;
1049 }
1050 }
1051 }
1052 }
1053
1054 /* reset offset statistics */
1055 void SAO::resetStats()
1056 {
1057 memset(m_count, 0, sizeof(PerClass) * NUM_PLANE);
1058 memset(m_offset, 0, sizeof(PerClass) * NUM_PLANE);
1059 memset(m_offsetOrg, 0, sizeof(PerClass) * NUM_PLANE);
1060 }
1061
1062 void SAO::rdoSaoUnitRowInit(SAOParam* saoParam)
1063 {
1064 saoParam->bSaoFlag[0] = true;
1065 saoParam->bSaoFlag[1] = true;
1066
1067 m_numNoSao[0] = 0; // Luma
1068 m_numNoSao[1] = 0; // Chroma
1069 if (m_refDepth > 0 && m_depthSaoRate[0][m_refDepth - 1] > SAO_ENCODING_RATE)
1070 saoParam->bSaoFlag[0] = false;
1071 if (m_refDepth > 0 && m_depthSaoRate[1][m_refDepth - 1] > SAO_ENCODING_RATE_CHROMA)
1072 saoParam->bSaoFlag[1] = false;
1073 }
1074
1075 void SAO::rdoSaoUnitRowEnd(const SAOParam* saoParam, int numctus)
1076 {
1077 if (!saoParam->bSaoFlag[0])
1078 m_depthSaoRate[0][m_refDepth] = 1.0;
1079 else
1080 m_depthSaoRate[0][m_refDepth] = m_numNoSao[0] / ((double)numctus);
1081
1082 if (!saoParam->bSaoFlag[1])
1083 m_depthSaoRate[1][m_refDepth] = 1.0;
1084 else
1085 m_depthSaoRate[1][m_refDepth] = m_numNoSao[1] / ((double)numctus);
1086 }
1087
1088 void SAO::rdoSaoUnitRow(SAOParam* saoParam, int idxY)
1089 {
1090 SaoCtuParam mergeSaoParam[NUM_MERGE_MODE][2];
1091 double mergeDist[NUM_MERGE_MODE];
1092 bool allowMerge[2]; // left, up
1093 allowMerge[1] = (idxY > 0);
1094
1095 for (int idxX = 0; idxX < m_numCuInWidth; idxX++)
1096 {
1097 int addr = idxX + idxY * m_numCuInWidth;
1098 int addrUp = idxY ? addr - m_numCuInWidth : -1;
1099 int addrLeft = idxX ? addr - 1 : -1;
1100 allowMerge[0] = (idxX > 0);
1101
1102 m_entropyCoder.load(m_rdContexts.cur);
1103 if (allowMerge[0])
1104 m_entropyCoder.codeSaoMerge(0);
1105 if (allowMerge[1])
1106 m_entropyCoder.codeSaoMerge(0);
1107 m_entropyCoder.store(m_rdContexts.temp);
1108 // reset stats Y, Cb, Cr
1109 for (int plane = 0; plane < 3; plane++)
1110 {
1111 for (int j = 0; j < MAX_NUM_SAO_TYPE; j++)
1112 {
1113 for (int k = 0; k < MAX_NUM_SAO_CLASS; k++)
1114 {
1115 m_offset[plane][j][k] = 0;
1116 if (m_param->bSaoNonDeblocked)
1117 {
1118 m_count[plane][j][k] = m_countPreDblk[addr][plane][j][k];
1119 m_offsetOrg[plane][j][k] = m_offsetOrgPreDblk[addr][plane][j][k];
1120 }
1121 else
1122 {
1123 m_count[plane][j][k] = 0;
1124 m_offsetOrg[plane][j][k] = 0;
1125 }
1126 }
1127 }
1128
1129 saoParam->ctuParam[plane][addr].mergeMode = SAO_MERGE_NONE;
1130 saoParam->ctuParam[plane][addr].typeIdx = -1;
1131 saoParam->ctuParam[plane][addr].bandPos = 0;
1132 if (saoParam->bSaoFlag[plane > 0])
1133 calcSaoStatsCu(addr, plane);
1134 }
1135
1136 saoComponentParamDist(saoParam, addr, addrUp, addrLeft, &mergeSaoParam[0][0], mergeDist);
1137
1138 sao2ChromaParamDist(saoParam, addr, addrUp, addrLeft, mergeSaoParam, mergeDist);
1139
1140 if (saoParam->bSaoFlag[0] || saoParam->bSaoFlag[1])
1141 {
1142 // Cost of new SAO_params
1143 m_entropyCoder.load(m_rdContexts.cur);
1144 m_entropyCoder.resetBits();
1145 if (allowMerge[0])
1146 m_entropyCoder.codeSaoMerge(0);
1147 if (allowMerge[1])
1148 m_entropyCoder.codeSaoMerge(0);
1149 for (int plane = 0; plane < 3; plane++)
1150 {
1151 if (saoParam->bSaoFlag[plane > 0])
1152 m_entropyCoder.codeSaoOffset(saoParam->ctuParam[plane][addr], plane);
1153 }
1154
1155 uint32_t rate = m_entropyCoder.getNumberOfWrittenBits();
1156 double bestCost = mergeDist[0] + (double)rate;
1157 m_entropyCoder.store(m_rdContexts.temp);
1158
1159 // Cost of Merge
1160 for (int mergeIdx = 0; mergeIdx < 2; ++mergeIdx)
1161 {
1162 if (!allowMerge[mergeIdx])
1163 continue;
1164
1165 m_entropyCoder.load(m_rdContexts.cur);
1166 m_entropyCoder.resetBits();
1167 if (allowMerge[0])
1168 m_entropyCoder.codeSaoMerge(1 - mergeIdx);
1169 if (allowMerge[1] && (mergeIdx == 1))
1170 m_entropyCoder.codeSaoMerge(1);
1171
1172 rate = m_entropyCoder.getNumberOfWrittenBits();
1173 double mergeCost = mergeDist[mergeIdx + 1] + (double)rate;
1174 if (mergeCost < bestCost)
1175 {
1176 SaoMergeMode mergeMode = mergeIdx ? SAO_MERGE_UP : SAO_MERGE_LEFT;
1177 bestCost = mergeCost;
1178 m_entropyCoder.store(m_rdContexts.temp);
1179 for (int plane = 0; plane < 3; plane++)
1180 {
1181 mergeSaoParam[plane][mergeIdx].mergeMode = mergeMode;
1182 if (saoParam->bSaoFlag[plane > 0])
1183 copySaoUnit(&saoParam->ctuParam[plane][addr], &mergeSaoParam[plane][mergeIdx]);
1184 }
1185 }
1186 }
1187
1188 if (saoParam->ctuParam[0][addr].typeIdx < 0)
1189 m_numNoSao[0]++;
1190 if (saoParam->ctuParam[1][addr].typeIdx < 0)
1191 m_numNoSao[1]++;
1192 m_entropyCoder.load(m_rdContexts.temp);
1193 m_entropyCoder.store(m_rdContexts.cur);
1194 }
1195 }
1196 }
1197
1198 /** rate distortion optimization of SAO unit */
1199 inline int64_t SAO::estSaoTypeDist(int plane, int typeIdx, double lambda, int32_t* currentDistortionTableBo, double* currentRdCostTableBo)
1200 {
1201 int64_t estDist = 0;
1202
1203 for (int classIdx = 1; classIdx < ((typeIdx < SAO_BO) ? SAO_EO_LEN + 1 : SAO_NUM_BO_CLASSES + 1); classIdx++)
1204 {
1205 int32_t count = m_count[plane][typeIdx][classIdx];
1206 int32_t& offsetOrg = m_offsetOrg[plane][typeIdx][classIdx];
1207 int32_t& offsetOut = m_offset[plane][typeIdx][classIdx];
1208
1209 if (typeIdx == SAO_BO)
1210 {
1211 currentDistortionTableBo[classIdx - 1] = 0;
1212 currentRdCostTableBo[classIdx - 1] = lambda;
1213 }
1214 if (count)
1215 {
1216 int offset = roundIBDI(offsetOrg, count << SAO_BIT_INC);
1217 offset = Clip3(-OFFSET_THRESH + 1, OFFSET_THRESH - 1, offset);
1218 if (typeIdx < SAO_BO)
1219 {
1220 if (classIdx < 3)
1221 offset = X265_MAX(offset, 0);
1222 else
1223 offset = X265_MIN(offset, 0);
1224 }
1225 offsetOut = estIterOffset(typeIdx, classIdx, lambda, offset, count, offsetOrg, currentDistortionTableBo, currentRdCostTableBo);
1226 }
1227 else
1228 {
1229 offsetOrg = 0;
1230 offsetOut = 0;
1231 }
1232 if (typeIdx != SAO_BO)
1233 estDist += estSaoDist(count, (int)offsetOut << SAO_BIT_INC, offsetOrg);
1234 }
1235
1236 return estDist;
1237 }
1238
1239 inline int SAO::estIterOffset(int typeIdx, int classIdx, double lambda, int offset, int32_t count, int32_t offsetOrg, int32_t* currentDistortionTableBo, double* currentRdCostTableBo)
1240 {
1241 int offsetOut = 0;
1242
1243 // Assuming sending quantized value 0 results in zero offset and sending the value zero needs 1 bit. entropy coder can be used to measure the exact rate here.
1244 double tempMinCost = lambda;
1245 while (offset != 0)
1246 {
1247 // Calculate the bits required for signalling the offset
1248 int tempRate = (typeIdx == SAO_BO) ? (abs(offset) + 2) : (abs(offset) + 1);
1249 if (abs(offset) == OFFSET_THRESH - 1)
1250 tempRate--;
1251
1252 // Do the dequntization before distorion calculation
1253 int tempOffset = offset << SAO_BIT_INC;
1254 int64_t tempDist = estSaoDist(count, tempOffset, offsetOrg);
1255 double tempCost = ((double)tempDist + lambda * (double)tempRate);
1256 if (tempCost < tempMinCost)
1257 {
1258 tempMinCost = tempCost;
1259 offsetOut = offset;
1260 if (typeIdx == SAO_BO)
1261 {
1262 currentDistortionTableBo[classIdx - 1] = (int)tempDist;
1263 currentRdCostTableBo[classIdx - 1] = tempCost;
1264 }
1265 }
1266 offset = (offset > 0) ? (offset - 1) : (offset + 1);
1267 }
1268
1269 return offsetOut;
1270 }
1271
1272 void SAO::saoComponentParamDist(SAOParam* saoParam, int addr, int addrUp, int addrLeft, SaoCtuParam* mergeSaoParam, double* mergeDist)
1273 {
1274 int64_t bestDist = 0;
1275
1276 SaoCtuParam* lclCtuParam = &saoParam->ctuParam[0][addr];
1277
1278 double bestRDCostTableBo = MAX_DOUBLE;
1279 int bestClassTableBo = 0;
1280 int currentDistortionTableBo[MAX_NUM_SAO_CLASS];
1281 double currentRdCostTableBo[MAX_NUM_SAO_CLASS];
1282
1283 resetSaoUnit(lclCtuParam);
1284 m_entropyCoder.load(m_rdContexts.temp);
1285 m_entropyCoder.resetBits();
1286 m_entropyCoder.codeSaoOffset(*lclCtuParam, 0);
1287 double dCostPartBest = m_entropyCoder.getNumberOfWrittenBits() * m_lumaLambda;
1288
1289 for (int typeIdx = 0; typeIdx < MAX_NUM_SAO_TYPE; typeIdx++)
1290 {
1291 int64_t estDist = estSaoTypeDist(0, typeIdx, m_lumaLambda, currentDistortionTableBo, currentRdCostTableBo);
1292
1293 if (typeIdx == SAO_BO)
1294 {
1295 // Estimate Best Position
1296 for (int i = 0; i < SAO_NUM_BO_CLASSES - SAO_BO_LEN + 1; i++)
1297 {
1298 double currentRDCost = 0.0;
1299 for (int j = i; j < i + SAO_BO_LEN; j++)
1300 currentRDCost += currentRdCostTableBo[j];
1301
1302 if (currentRDCost < bestRDCostTableBo)
1303 {
1304 bestRDCostTableBo = currentRDCost;
1305 bestClassTableBo = i;
1306 }
1307 }
1308
1309 // Re code all Offsets
1310 // Code Center
1311 estDist = 0;
1312 for (int classIdx = bestClassTableBo; classIdx < bestClassTableBo + SAO_BO_LEN; classIdx++)
1313 estDist += currentDistortionTableBo[classIdx];
1314 }
1315 SaoCtuParam ctuParamRdo;
1316 ctuParamRdo.mergeMode = SAO_MERGE_NONE;
1317 ctuParamRdo.typeIdx = typeIdx;
1318 ctuParamRdo.bandPos = (typeIdx == SAO_BO) ? bestClassTableBo : 0;
1319 for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; classIdx++)
1320 ctuParamRdo.offset[classIdx] = (int)m_offset[0][typeIdx][classIdx + ctuParamRdo.bandPos + 1];
1321
1322 m_entropyCoder.load(m_rdContexts.temp);
1323 m_entropyCoder.resetBits();
1324 m_entropyCoder.codeSaoOffset(ctuParamRdo, 0);
1325
1326 uint32_t estRate = m_entropyCoder.getNumberOfWrittenBits();
1327 double cost = (double)estDist + m_lumaLambda * (double)estRate;
1328
1329 if (cost < dCostPartBest)
1330 {
1331 dCostPartBest = cost;
1332 copySaoUnit(lclCtuParam, &ctuParamRdo);
1333 bestDist = estDist;
1334 }
1335 }
1336
1337 mergeDist[0] = ((double)bestDist / m_lumaLambda);
1338 m_entropyCoder.load(m_rdContexts.temp);
1339 m_entropyCoder.codeSaoOffset(*lclCtuParam, 0);
1340 m_entropyCoder.store(m_rdContexts.temp);
1341
1342 // merge left or merge up
1343
1344 for (int mergeIdx = 0; mergeIdx < 2; mergeIdx++)
1345 {
1346 SaoCtuParam* mergeSrcParam = NULL;
1347 if (addrLeft >= 0 && mergeIdx == 0)
1348 mergeSrcParam = &(saoParam->ctuParam[0][addrLeft]);
1349 else if (addrUp >= 0 && mergeIdx == 1)
1350 mergeSrcParam = &(saoParam->ctuParam[0][addrUp]);
1351 if (mergeSrcParam)
1352 {
1353 int64_t estDist = 0;
1354 int typeIdx = mergeSrcParam->typeIdx;
1355 if (typeIdx >= 0)
1356 {
1357 int bandPos = (typeIdx == SAO_BO) ? mergeSrcParam->bandPos : 0;
1358 for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; classIdx++)
1359 {
1360 int mergeOffset = mergeSrcParam->offset[classIdx];
1361 estDist += estSaoDist(m_count[0][typeIdx][classIdx + bandPos + 1], mergeOffset, m_offsetOrg[0][typeIdx][classIdx + bandPos + 1]);
1362 }
1363 }
1364
1365 copySaoUnit(&mergeSaoParam[mergeIdx], mergeSrcParam);
1366 mergeSaoParam[mergeIdx].mergeMode = mergeIdx ? SAO_MERGE_UP : SAO_MERGE_LEFT;
1367
1368 mergeDist[mergeIdx + 1] = ((double)estDist / m_lumaLambda);
1369 }
1370 else
1371 resetSaoUnit(&mergeSaoParam[mergeIdx]);
1372 }
1373 }
1374
1375 void SAO::sao2ChromaParamDist(SAOParam* saoParam, int addr, int addrUp, int addrLeft, SaoCtuParam mergeSaoParam[][2], double* mergeDist)
1376 {
1377 int64_t bestDist = 0;
1378
1379 SaoCtuParam* lclCtuParam[2] = { &saoParam->ctuParam[1][addr], &saoParam->ctuParam[2][addr] };
1380
1381 double currentRdCostTableBo[MAX_NUM_SAO_CLASS];
1382 int bestClassTableBo[2] = { 0, 0 };
1383 int currentDistortionTableBo[MAX_NUM_SAO_CLASS];
1384
1385 resetSaoUnit(lclCtuParam[0]);
1386 resetSaoUnit(lclCtuParam[1]);
1387 m_entropyCoder.load(m_rdContexts.temp);
1388 m_entropyCoder.resetBits();
1389 m_entropyCoder.codeSaoOffset(*lclCtuParam[0], 1);
1390 m_entropyCoder.codeSaoOffset(*lclCtuParam[1], 2);
1391
1392 double costPartBest = m_entropyCoder.getNumberOfWrittenBits() * m_chromaLambda;
1393
1394 for (int typeIdx = 0; typeIdx < MAX_NUM_SAO_TYPE; typeIdx++)
1395 {
1396 int64_t estDist[2];
1397 if (typeIdx == SAO_BO)
1398 {
1399 // Estimate Best Position
1400 for (int compIdx = 0; compIdx < 2; compIdx++)
1401 {
1402 double bestRDCostTableBo = MAX_DOUBLE;
1403 estDist[compIdx] = estSaoTypeDist(compIdx + 1, typeIdx, m_chromaLambda, currentDistortionTableBo, currentRdCostTableBo);
1404 for (int i = 0; i < SAO_NUM_BO_CLASSES - SAO_BO_LEN + 1; i++)
1405 {
1406 double currentRDCost = 0.0;
1407 for (int j = i; j < i + SAO_BO_LEN; j++)
1408 currentRDCost += currentRdCostTableBo[j];
1409
1410 if (currentRDCost < bestRDCostTableBo)
1411 {
1412 bestRDCostTableBo = currentRDCost;
1413 bestClassTableBo[compIdx] = i;
1414 }
1415 }
1416
1417 // Re code all Offsets
1418 // Code Center
1419 estDist[compIdx] = 0;
1420 for (int classIdx = bestClassTableBo[compIdx]; classIdx < bestClassTableBo[compIdx] + SAO_BO_LEN; classIdx++)
1421 estDist[compIdx] += currentDistortionTableBo[classIdx];
1422 }
1423 }
1424 else
1425 {
1426 estDist[0] = estSaoTypeDist(1, typeIdx, m_chromaLambda, currentDistortionTableBo, currentRdCostTableBo);
1427 estDist[1] = estSaoTypeDist(2, typeIdx, m_chromaLambda, currentDistortionTableBo, currentRdCostTableBo);
1428 }
1429
1430 m_entropyCoder.load(m_rdContexts.temp);
1431 m_entropyCoder.resetBits();
1432
1433 SaoCtuParam ctuParamRdo[2];
1434 for (int compIdx = 0; compIdx < 2; compIdx++)
1435 {
1436 ctuParamRdo[compIdx].mergeMode = SAO_MERGE_NONE;
1437 ctuParamRdo[compIdx].typeIdx = typeIdx;
1438 ctuParamRdo[compIdx].bandPos = (typeIdx == SAO_BO) ? bestClassTableBo[compIdx] : 0;
1439 for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; classIdx++)
1440 ctuParamRdo[compIdx].offset[classIdx] = (int)m_offset[compIdx + 1][typeIdx][classIdx + ctuParamRdo[compIdx].bandPos + 1];
1441
1442 m_entropyCoder.codeSaoOffset(ctuParamRdo[compIdx], compIdx + 1);
1443 }
1444
1445 uint32_t estRate = m_entropyCoder.getNumberOfWrittenBits();
1446 double cost = (double)(estDist[0] + estDist[1]) + m_chromaLambda * (double)estRate;
1447
1448 if (cost < costPartBest)
1449 {
1450 costPartBest = cost;
1451 copySaoUnit(lclCtuParam[0], &ctuParamRdo[0]);
1452 copySaoUnit(lclCtuParam[1], &ctuParamRdo[1]);
1453 bestDist = (estDist[0] + estDist[1]);
1454 }
1455 }
1456
1457 mergeDist[0] += ((double)bestDist / m_chromaLambda);
1458 m_entropyCoder.load(m_rdContexts.temp);
1459 m_entropyCoder.codeSaoOffset(*lclCtuParam[0], 1);
1460 m_entropyCoder.codeSaoOffset(*lclCtuParam[1], 2);
1461 m_entropyCoder.store(m_rdContexts.temp);
1462
1463 // merge left or merge up
1464
1465 for (int mergeIdx = 0; mergeIdx < 2; mergeIdx++)
1466 {
1467 for (int compIdx = 0; compIdx < 2; compIdx++)
1468 {
1469 int plane = compIdx + 1;
1470 SaoCtuParam* mergeSrcParam = NULL;
1471 if (addrLeft >= 0 && mergeIdx == 0)
1472 mergeSrcParam = &(saoParam->ctuParam[plane][addrLeft]);
1473 else if (addrUp >= 0 && mergeIdx == 1)
1474 mergeSrcParam = &(saoParam->ctuParam[plane][addrUp]);
1475 if (mergeSrcParam)
1476 {
1477 int64_t estDist = 0;
1478 int typeIdx = mergeSrcParam->typeIdx;
1479 if (typeIdx >= 0)
1480 {
1481 int bandPos = (typeIdx == SAO_BO) ? mergeSrcParam->bandPos : 0;
1482 for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; classIdx++)
1483 {
1484 int mergeOffset = mergeSrcParam->offset[classIdx];
1485 estDist += estSaoDist(m_count[plane][typeIdx][classIdx + bandPos + 1], mergeOffset, m_offsetOrg[plane][typeIdx][classIdx + bandPos + 1]);
1486 }
1487 }
1488
1489 copySaoUnit(&mergeSaoParam[plane][mergeIdx], mergeSrcParam);
1490 mergeSaoParam[plane][mergeIdx].mergeMode = mergeIdx ? SAO_MERGE_UP : SAO_MERGE_LEFT;
1491 mergeDist[mergeIdx + 1] += ((double)estDist / m_chromaLambda);
1492 }
1493 else
1494 resetSaoUnit(&mergeSaoParam[plane][mergeIdx]);
1495 }
1496 }
1497 }
1498 }