1 /*****************************************************************************
2 * Copyright (C) 2013 x265 project
4 * Authors: Steve Borho <steve@borho.org>
5 * Min Chen <chenm003@163.com>
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
21 * This program is also available under a commercial proprietary license.
22 * For more information, contact us at license @ x265.com.
23 *****************************************************************************/
27 #include "framedata.h"
33 inline int32_t roundIBDI(int32_t num
, int32_t den
)
35 return num
>= 0 ? ((num
* 2 + den
) / (den
* 2)) : -((-num
* 2 + den
) / (den
* 2));
38 /* get the sign of input variable (TODO: this is a dup, make common) */
39 inline int signOf(int x
)
41 return (x
>> 31) | ((int)((((uint32_t)-x
)) >> 31));
44 inline int64_t estSaoDist(int32_t count
, int offset
, int32_t offsetOrg
)
46 return (count
* offset
- offsetOrg
* 2) * offset
;
49 } // end anonymous namespace
54 const uint32_t SAO::s_eoTable
[NUM_EDGETYPE
] =
68 m_countPreDblk
= NULL
;
69 m_offsetOrgPreDblk
= NULL
;
75 m_clipTableBase
= NULL
;
86 m_depthSaoRate
[0][0] = 0;
87 m_depthSaoRate
[0][1] = 0;
88 m_depthSaoRate
[0][2] = 0;
89 m_depthSaoRate
[0][3] = 0;
90 m_depthSaoRate
[1][0] = 0;
91 m_depthSaoRate
[1][1] = 0;
92 m_depthSaoRate
[1][2] = 0;
93 m_depthSaoRate
[1][3] = 0;
96 bool SAO::create(x265_param
* param
)
99 m_hChromaShift
= CHROMA_H_SHIFT(param
->internalCsp
);
100 m_vChromaShift
= CHROMA_V_SHIFT(param
->internalCsp
);
102 m_numCuInWidth
= (m_param
->sourceWidth
+ g_maxCUSize
- 1) / g_maxCUSize
;
103 m_numCuInHeight
= (m_param
->sourceHeight
+ g_maxCUSize
- 1) / g_maxCUSize
;
105 const pixel maxY
= (1 << X265_DEPTH
) - 1;
106 const pixel rangeExt
= maxY
>> 1;
107 int numCtu
= m_numCuInWidth
* m_numCuInHeight
;
109 CHECKED_MALLOC(m_clipTableBase
, pixel
, maxY
+ 2 * rangeExt
);
110 CHECKED_MALLOC(m_offsetBo
, pixel
, maxY
+ 2 * rangeExt
);
112 CHECKED_MALLOC(m_tmpL1
, pixel
, g_maxCUSize
+ 1);
113 CHECKED_MALLOC(m_tmpL2
, pixel
, g_maxCUSize
+ 1);
115 for (int i
= 0; i
< 3; i
++)
117 CHECKED_MALLOC(m_tmpU1
[i
], pixel
, m_param
->sourceWidth
);
118 CHECKED_MALLOC(m_tmpU2
[i
], pixel
, m_param
->sourceWidth
);
121 CHECKED_MALLOC(m_count
, PerClass
, NUM_PLANE
);
122 CHECKED_MALLOC(m_offset
, PerClass
, NUM_PLANE
);
123 CHECKED_MALLOC(m_offsetOrg
, PerClass
, NUM_PLANE
);
125 CHECKED_MALLOC(m_countPreDblk
, PerPlane
, numCtu
);
126 CHECKED_MALLOC(m_offsetOrgPreDblk
, PerPlane
, numCtu
);
128 m_clipTable
= &(m_clipTableBase
[rangeExt
]);
130 for (int i
= 0; i
< rangeExt
; i
++)
131 m_clipTableBase
[i
] = 0;
133 for (int i
= 0; i
< maxY
; i
++)
134 m_clipTable
[i
] = (pixel
)i
;
136 for (int i
= maxY
; i
< maxY
+ rangeExt
; i
++)
137 m_clipTable
[i
] = maxY
;
147 X265_FREE(m_clipTableBase
);
148 X265_FREE(m_offsetBo
);
153 for (int i
= 0; i
< 3; i
++)
155 X265_FREE(m_tmpU1
[i
]);
156 X265_FREE(m_tmpU2
[i
]);
161 X265_FREE(m_offsetOrg
);
162 X265_FREE(m_countPreDblk
);
163 X265_FREE(m_offsetOrgPreDblk
);
166 /* allocate memory for SAO parameters */
167 void SAO::allocSaoParam(SAOParam
* saoParam
) const
169 saoParam
->numCuInWidth
= m_numCuInWidth
;
171 saoParam
->ctuParam
[0] = new SaoCtuParam
[m_numCuInHeight
* m_numCuInWidth
];
172 saoParam
->ctuParam
[1] = new SaoCtuParam
[m_numCuInHeight
* m_numCuInWidth
];
173 saoParam
->ctuParam
[2] = new SaoCtuParam
[m_numCuInHeight
* m_numCuInWidth
];
176 void SAO::startSlice(Frame
* frame
, Entropy
& initState
, int qp
)
178 Slice
* slice
= frame
->m_encData
->m_slice
;
180 if (m_param
->internalCsp
== X265_CSP_I420
)
181 qpCb
= Clip3(QP_MIN
, QP_MAX_MAX
, (int)g_chromaScale
[qp
+ slice
->m_pps
->chromaQpOffset
[0]]);
183 qpCb
= X265_MIN(qp
+ slice
->m_pps
->chromaQpOffset
[0], QP_MAX_SPEC
);
184 m_lumaLambda
= x265_lambda2_tab
[qp
];
185 m_chromaLambda
= x265_lambda2_tab
[qpCb
]; // Use Cb QP for SAO chroma
188 switch (slice
->m_sliceType
)
197 m_refDepth
= 2 + !IS_REFERENCED(frame
);
203 m_entropyCoder
.load(initState
);
204 m_rdContexts
.next
.load(initState
);
205 m_rdContexts
.cur
.load(initState
);
207 SAOParam
* saoParam
= frame
->m_encData
->m_saoParam
;
210 saoParam
= new SAOParam
;
211 allocSaoParam(saoParam
);
212 frame
->m_encData
->m_saoParam
= saoParam
;
215 rdoSaoUnitRowInit(saoParam
);
217 // NOTE: Disable SAO automatic turn-off when frame parallelism is
218 // enabled for output exact independent of frame thread count
219 if (m_param
->frameNumThreads
> 1)
221 saoParam
->bSaoFlag
[0] = true;
222 saoParam
->bSaoFlag
[1] = true;
226 // CTU-based SAO process without slice granularity
227 void SAO::processSaoCu(int addr
, int typeIdx
, int plane
)
230 const CUData
* cu
= m_frame
->m_encData
->getPicCTU(addr
);
231 pixel
* rec
= m_frame
->m_reconPic
->getPlaneAddr(plane
, addr
);
232 intptr_t stride
= plane
? m_frame
->m_reconPic
->m_strideC
: m_frame
->m_reconPic
->m_stride
;
233 uint32_t picWidth
= m_param
->sourceWidth
;
234 uint32_t picHeight
= m_param
->sourceHeight
;
235 int ctuWidth
= g_maxCUSize
;
236 int ctuHeight
= g_maxCUSize
;
237 uint32_t lpelx
= cu
->m_cuPelX
;
238 uint32_t tpely
= cu
->m_cuPelY
;
241 picWidth
>>= m_hChromaShift
;
242 picHeight
>>= m_vChromaShift
;
243 ctuWidth
>>= m_hChromaShift
;
244 ctuHeight
>>= m_vChromaShift
;
245 lpelx
>>= m_hChromaShift
;
246 tpely
>>= m_vChromaShift
;
248 uint32_t rpelx
= x265_min(lpelx
+ ctuWidth
, picWidth
);
249 uint32_t bpely
= x265_min(tpely
+ ctuHeight
, picHeight
);
250 ctuWidth
= rpelx
- lpelx
;
251 ctuHeight
= bpely
- tpely
;
260 int32_t _upBuff1
[MAX_CU_SIZE
+ 2], *upBuff1
= _upBuff1
+ 1;
261 int32_t _upBufft
[MAX_CU_SIZE
+ 2], *upBufft
= _upBufft
+ 1;
264 const pixel
* recR
= &rec
[ctuWidth
- 1];
265 for (int i
= 0; i
< ctuHeight
+ 1; i
++)
272 tmpU
= &(m_tmpU1
[plane
][lpelx
]);
277 case SAO_EO_0
: // dir: -
279 pixel firstPxl
= 0, lastPxl
= 0;
281 endX
= (rpelx
== picWidth
) ? ctuWidth
- 1 : ctuWidth
;
284 for (y
= 0; y
< ctuHeight
; y
++)
286 int signLeft
= signOf(rec
[startX
] - tmpL
[y
]);
287 for (x
= startX
; x
< endX
; x
++)
289 int signRight
= signOf(rec
[x
] - rec
[x
+ 1]);
290 int edgeType
= signRight
+ signLeft
+ 2;
291 signLeft
= -signRight
;
293 rec
[x
] = m_clipTable
[rec
[x
] + m_offsetEo
[edgeType
]];
301 for (y
= 0; y
< ctuHeight
; y
++)
303 int signLeft
= signOf(rec
[startX
] - tmpL
[y
]);
308 if (rpelx
== picWidth
)
309 lastPxl
= rec
[ctuWidth
- 1];
311 primitives
.saoCuOrgE0(rec
, m_offsetEo
, ctuWidth
, (int8_t)signLeft
);
316 if (rpelx
== picWidth
)
317 rec
[ctuWidth
- 1] = lastPxl
;
324 case SAO_EO_1
: // dir: |
327 endY
= (bpely
== picHeight
) ? ctuHeight
- 1 : ctuHeight
;
331 for (x
= 0; x
< ctuWidth
; x
++)
332 upBuff1
[x
] = signOf(rec
[x
] - tmpU
[x
]);
334 for (y
= startY
; y
< endY
; y
++)
336 for (x
= 0; x
< ctuWidth
; x
++)
338 int signDown
= signOf(rec
[x
] - rec
[x
+ stride
]);
339 int edgeType
= signDown
+ upBuff1
[x
] + 2;
340 upBuff1
[x
] = -signDown
;
342 rec
[x
] = m_clipTable
[rec
[x
] + m_offsetEo
[edgeType
]];
350 case SAO_EO_2
: // dir: 135
353 endX
= (rpelx
== picWidth
) ? ctuWidth
- 1 : ctuWidth
;
356 endY
= (bpely
== picHeight
) ? ctuHeight
- 1 : ctuHeight
;
361 for (x
= startX
; x
< endX
; x
++)
362 upBuff1
[x
] = signOf(rec
[x
] - tmpU
[x
- 1]);
364 for (y
= startY
; y
< endY
; y
++)
366 upBufft
[startX
] = signOf(rec
[stride
+ startX
] - tmpL
[y
]);
367 for (x
= startX
; x
< endX
; x
++)
369 int signDown
= signOf(rec
[x
] - rec
[x
+ stride
+ 1]);
370 int edgeType
= signDown
+ upBuff1
[x
] + 2;
371 upBufft
[x
+ 1] = -signDown
;
372 rec
[x
] = m_clipTable
[rec
[x
] + m_offsetEo
[edgeType
]];
375 std::swap(upBuff1
, upBufft
);
382 case SAO_EO_3
: // dir: 45
385 endX
= (rpelx
== picWidth
) ? ctuWidth
- 1 : ctuWidth
;
388 endY
= (bpely
== picHeight
) ? ctuHeight
- 1 : ctuHeight
;
393 for (x
= startX
- 1; x
< endX
; x
++)
394 upBuff1
[x
] = signOf(rec
[x
] - tmpU
[x
+ 1]);
396 for (y
= startY
; y
< endY
; y
++)
399 int signDown
= signOf(rec
[x
] - tmpL
[y
+ 1]);
400 int edgeType
= signDown
+ upBuff1
[x
] + 2;
401 upBuff1
[x
- 1] = -signDown
;
402 rec
[x
] = m_clipTable
[rec
[x
] + m_offsetEo
[edgeType
]];
403 for (x
= startX
+ 1; x
< endX
; x
++)
405 signDown
= signOf(rec
[x
] - rec
[x
+ stride
- 1]);
406 edgeType
= signDown
+ upBuff1
[x
] + 2;
407 upBuff1
[x
- 1] = -signDown
;
408 rec
[x
] = m_clipTable
[rec
[x
] + m_offsetEo
[edgeType
]];
411 upBuff1
[endX
- 1] = signOf(rec
[endX
- 1 + stride
] - rec
[endX
]);
420 const pixel
* offsetBo
= m_offsetBo
;
422 for (y
= 0; y
< ctuHeight
; y
++)
424 for (x
= 0; x
< ctuWidth
; x
++)
425 rec
[x
] = offsetBo
[rec
[x
]];
435 // if (iSaoType!=SAO_BO_0 || iSaoType!=SAO_BO_1)
436 std::swap(m_tmpL1
, m_tmpL2
);
439 /* Process SAO all units */
440 void SAO::processSaoUnitRow(SaoCtuParam
* ctuParam
, int idxY
, int plane
)
442 intptr_t stride
= plane
? m_frame
->m_reconPic
->m_strideC
: m_frame
->m_reconPic
->m_stride
;
443 uint32_t picWidth
= m_param
->sourceWidth
;
444 int ctuWidth
= g_maxCUSize
;
445 int ctuHeight
= g_maxCUSize
;
448 picWidth
>>= m_hChromaShift
;
449 ctuWidth
>>= m_hChromaShift
;
450 ctuHeight
>>= m_vChromaShift
;
455 pixel
* rec
= m_frame
->m_reconPic
->m_picOrg
[plane
];
456 memcpy(m_tmpU1
[plane
], rec
, sizeof(pixel
) * picWidth
);
459 int addr
= idxY
* m_numCuInWidth
;
460 pixel
* rec
= plane
? m_frame
->m_reconPic
->getChromaAddr(plane
, addr
) : m_frame
->m_reconPic
->getLumaAddr(addr
);
462 for (int i
= 0; i
< ctuHeight
+ 1; i
++)
468 rec
-= (stride
<< 1);
470 memcpy(m_tmpU2
[plane
], rec
, sizeof(pixel
) * picWidth
);
472 const int boShift
= X265_DEPTH
- SAO_BO_BITS
;
474 for (int idxX
= 0; idxX
< m_numCuInWidth
; idxX
++)
476 addr
= idxY
* m_numCuInWidth
+ idxX
;
478 bool mergeLeftFlag
= ctuParam
[addr
].mergeMode
== SAO_MERGE_LEFT
;
479 int typeIdx
= ctuParam
[addr
].typeIdx
;
485 if (typeIdx
== SAO_BO
)
487 pixel
* offsetBo
= m_offsetBo
;
488 int offset
[SAO_NUM_BO_CLASSES
];
489 memset(offset
, 0, sizeof(offset
));
491 for (int i
= 0; i
< SAO_NUM_OFFSET
; i
++)
492 offset
[((ctuParam
[addr
].bandPos
+ i
) & (SAO_NUM_BO_CLASSES
- 1))] = ctuParam
[addr
].offset
[i
] << SAO_BIT_INC
;
494 for (int i
= 0; i
< (1 << X265_DEPTH
); i
++)
495 offsetBo
[i
] = m_clipTable
[i
+ offset
[i
>> boShift
]];
497 else // if (typeIdx == SAO_EO_0 || typeIdx == SAO_EO_1 || typeIdx == SAO_EO_2 || typeIdx == SAO_EO_3)
499 int offset
[NUM_EDGETYPE
];
501 for (int i
= 0; i
< SAO_NUM_OFFSET
; i
++)
502 offset
[i
+ 1] = ctuParam
[addr
].offset
[i
] << SAO_BIT_INC
;
504 for (int edgeType
= 0; edgeType
< NUM_EDGETYPE
; edgeType
++)
505 m_offsetEo
[edgeType
] = (int8_t)offset
[s_eoTable
[edgeType
]];
508 processSaoCu(addr
, typeIdx
, plane
);
510 else if (idxX
!= (m_numCuInWidth
- 1))
512 rec
= plane
? m_frame
->m_reconPic
->getChromaAddr(plane
, addr
) : m_frame
->m_reconPic
->getLumaAddr(addr
);
514 for (int i
= 0; i
< ctuHeight
+ 1; i
++)
516 m_tmpL1
[i
] = rec
[ctuWidth
- 1];
522 std::swap(m_tmpU1
[plane
], m_tmpU2
[plane
]);
525 void SAO::resetSaoUnit(SaoCtuParam
* saoUnit
)
527 saoUnit
->mergeMode
= SAO_MERGE_NONE
;
528 saoUnit
->typeIdx
= -1;
529 saoUnit
->bandPos
= 0;
531 for (int i
= 0; i
< SAO_NUM_OFFSET
; i
++)
532 saoUnit
->offset
[i
] = 0;
535 void SAO::copySaoUnit(SaoCtuParam
* saoUnitDst
, const SaoCtuParam
* saoUnitSrc
)
537 saoUnitDst
->mergeMode
= saoUnitSrc
->mergeMode
;
538 saoUnitDst
->typeIdx
= saoUnitSrc
->typeIdx
;
539 saoUnitDst
->bandPos
= saoUnitSrc
->bandPos
;
541 for (int i
= 0; i
< SAO_NUM_OFFSET
; i
++)
542 saoUnitDst
->offset
[i
] = saoUnitSrc
->offset
[i
];
545 /* Calculate SAO statistics for current CTU without non-crossing slice */
546 void SAO::calcSaoStatsCu(int addr
, int plane
)
549 const CUData
* cu
= m_frame
->m_encData
->getPicCTU(addr
);
550 const pixel
* fenc0
= m_frame
->m_fencPic
->getPlaneAddr(plane
, addr
);
551 const pixel
* rec0
= m_frame
->m_reconPic
->getPlaneAddr(plane
, addr
);
554 intptr_t stride
= plane
? m_frame
->m_reconPic
->m_strideC
: m_frame
->m_reconPic
->m_stride
;
555 uint32_t picWidth
= m_param
->sourceWidth
;
556 uint32_t picHeight
= m_param
->sourceHeight
;
557 int ctuWidth
= g_maxCUSize
;
558 int ctuHeight
= g_maxCUSize
;
559 uint32_t lpelx
= cu
->m_cuPelX
;
560 uint32_t tpely
= cu
->m_cuPelY
;
563 picWidth
>>= m_hChromaShift
;
564 picHeight
>>= m_vChromaShift
;
565 ctuWidth
>>= m_hChromaShift
;
566 ctuHeight
>>= m_vChromaShift
;
567 lpelx
>>= m_hChromaShift
;
568 tpely
>>= m_vChromaShift
;
570 uint32_t rpelx
= x265_min(lpelx
+ ctuWidth
, picWidth
);
571 uint32_t bpely
= x265_min(tpely
+ ctuHeight
, picHeight
);
572 ctuWidth
= rpelx
- lpelx
;
573 ctuHeight
= bpely
- tpely
;
582 int skipB
= plane
? 2 : 4;
583 int skipR
= plane
? 3 : 5;
585 int32_t _upBuff1
[MAX_CU_SIZE
+ 2], *upBuff1
= _upBuff1
+ 1;
586 int32_t _upBufft
[MAX_CU_SIZE
+ 2], *upBufft
= _upBufft
+ 1;
590 const int boShift
= X265_DEPTH
- SAO_BO_BITS
;
592 if (m_param
->bSaoNonDeblocked
)
594 skipB
= plane
? 1 : 3;
595 skipR
= plane
? 2 : 4;
597 stats
= m_offsetOrg
[plane
][SAO_BO
];
598 count
= m_count
[plane
][SAO_BO
];
603 endX
= (rpelx
== picWidth
) ? ctuWidth
: ctuWidth
- skipR
;
604 endY
= (bpely
== picHeight
) ? ctuHeight
: ctuHeight
- skipB
;
606 for (y
= 0; y
< endY
; y
++)
608 for (x
= 0; x
< endX
; x
++)
610 int classIdx
= 1 + (rec
[x
] >> boShift
);
611 stats
[classIdx
] += (fenc
[x
] - rec
[x
]);
621 // SAO_EO_0: // dir: -
623 if (m_param
->bSaoNonDeblocked
)
625 skipB
= plane
? 1 : 3;
626 skipR
= plane
? 3 : 5;
628 stats
= m_offsetOrg
[plane
][SAO_EO_0
];
629 count
= m_count
[plane
][SAO_EO_0
];
635 endX
= (rpelx
== picWidth
) ? ctuWidth
- 1 : ctuWidth
- skipR
;
636 for (y
= 0; y
< ctuHeight
- skipB
; y
++)
638 int signLeft
= signOf(rec
[startX
] - rec
[startX
- 1]);
639 for (x
= startX
; x
< endX
; x
++)
641 int signRight
= signOf(rec
[x
] - rec
[x
+ 1]);
642 int edgeType
= signRight
+ signLeft
+ 2;
643 signLeft
= -signRight
;
645 stats
[s_eoTable
[edgeType
]] += (fenc
[x
] - rec
[x
]);
646 count
[s_eoTable
[edgeType
]]++;
654 // SAO_EO_1: // dir: |
656 if (m_param
->bSaoNonDeblocked
)
658 skipB
= plane
? 2 : 4;
659 skipR
= plane
? 2 : 4;
661 stats
= m_offsetOrg
[plane
][SAO_EO_1
];
662 count
= m_count
[plane
][SAO_EO_1
];
668 endX
= (rpelx
== picWidth
) ? ctuWidth
: ctuWidth
- skipR
;
669 endY
= (bpely
== picHeight
) ? ctuHeight
- 1 : ctuHeight
- skipB
;
676 for (x
= 0; x
< ctuWidth
; x
++)
677 upBuff1
[x
] = signOf(rec
[x
] - rec
[x
- stride
]);
679 for (y
= startY
; y
< endY
; y
++)
681 for (x
= 0; x
< endX
; x
++)
683 int signDown
= signOf(rec
[x
] - rec
[x
+ stride
]);
684 int edgeType
= signDown
+ upBuff1
[x
] + 2;
685 upBuff1
[x
] = -signDown
;
687 stats
[s_eoTable
[edgeType
]] += (fenc
[x
] - rec
[x
]);
688 count
[s_eoTable
[edgeType
]]++;
696 // SAO_EO_2: // dir: 135
698 if (m_param
->bSaoNonDeblocked
)
700 skipB
= plane
? 2 : 4;
701 skipR
= plane
? 3 : 5;
703 stats
= m_offsetOrg
[plane
][SAO_EO_2
];
704 count
= m_count
[plane
][SAO_EO_2
];
710 endX
= (rpelx
== picWidth
) ? ctuWidth
- 1 : ctuWidth
- skipR
;
713 endY
= (bpely
== picHeight
) ? ctuHeight
- 1 : ctuHeight
- skipB
;
720 for (x
= startX
; x
< endX
; x
++)
721 upBuff1
[x
] = signOf(rec
[x
] - rec
[x
- stride
- 1]);
723 for (y
= startY
; y
< endY
; y
++)
725 upBufft
[startX
] = signOf(rec
[startX
+ stride
] - rec
[startX
- 1]);
726 for (x
= startX
; x
< endX
; x
++)
728 int signDown
= signOf(rec
[x
] - rec
[x
+ stride
+ 1]);
729 int edgeType
= signDown
+ upBuff1
[x
] + 2;
730 upBufft
[x
+ 1] = -signDown
;
731 stats
[s_eoTable
[edgeType
]] += (fenc
[x
] - rec
[x
]);
732 count
[s_eoTable
[edgeType
]]++;
735 std::swap(upBuff1
, upBufft
);
742 // SAO_EO_3: // dir: 45
744 if (m_param
->bSaoNonDeblocked
)
746 skipB
= plane
? 2 : 4;
747 skipR
= plane
? 3 : 5;
749 stats
= m_offsetOrg
[plane
][SAO_EO_3
];
750 count
= m_count
[plane
][SAO_EO_3
];
756 endX
= (rpelx
== picWidth
) ? ctuWidth
- 1 : ctuWidth
- skipR
;
759 endY
= (bpely
== picHeight
) ? ctuHeight
- 1 : ctuHeight
- skipB
;
767 for (x
= startX
- 1; x
< endX
; x
++)
768 upBuff1
[x
] = signOf(rec
[x
] - rec
[x
- stride
+ 1]);
770 for (y
= startY
; y
< endY
; y
++)
772 for (x
= startX
; x
< endX
; x
++)
774 int signDown
= signOf(rec
[x
] - rec
[x
+ stride
- 1]);
775 int edgeType
= signDown
+ upBuff1
[x
] + 2;
776 upBuff1
[x
- 1] = -signDown
;
777 stats
[s_eoTable
[edgeType
]] += (fenc
[x
] - rec
[x
]);
778 count
[s_eoTable
[edgeType
]]++;
781 upBuff1
[endX
- 1] = signOf(rec
[endX
- 1 + stride
] - rec
[endX
]);
790 void SAO::calcSaoStatsCu_BeforeDblk(Frame
* frame
, int idxX
, int idxY
)
792 int addr
= idxX
+ m_numCuInWidth
* idxY
;
795 const CUData
* cu
= frame
->m_encData
->getPicCTU(addr
);
798 intptr_t stride
= m_frame
->m_reconPic
->m_stride
;
799 uint32_t picWidth
= m_param
->sourceWidth
;
800 uint32_t picHeight
= m_param
->sourceHeight
;
801 int ctuWidth
= g_maxCUSize
;
802 int ctuHeight
= g_maxCUSize
;
803 uint32_t lpelx
= cu
->m_cuPelX
;
804 uint32_t tpely
= cu
->m_cuPelY
;
805 uint32_t rpelx
= x265_min(lpelx
+ ctuWidth
, picWidth
);
806 uint32_t bpely
= x265_min(tpely
+ ctuHeight
, picHeight
);
807 ctuWidth
= rpelx
- lpelx
;
808 ctuHeight
= bpely
- tpely
;
820 int32_t _upBuff1
[MAX_CU_SIZE
+ 2], *upBuff1
= _upBuff1
+ 1;
821 int32_t _upBufft
[MAX_CU_SIZE
+ 2], *upBufft
= _upBufft
+ 1;
823 const int boShift
= X265_DEPTH
- SAO_BO_BITS
;
825 memset(m_countPreDblk
[addr
], 0, sizeof(PerPlane
));
826 memset(m_offsetOrgPreDblk
[addr
], 0, sizeof(PerPlane
));
828 for (int plane
= 0; plane
< NUM_PLANE
; plane
++)
832 stride
= frame
->m_reconPic
->m_strideC
;
833 picWidth
>>= m_hChromaShift
;
834 picHeight
>>= m_vChromaShift
;
835 ctuWidth
>>= m_hChromaShift
;
836 ctuHeight
>>= m_vChromaShift
;
837 lpelx
>>= m_hChromaShift
;
838 tpely
>>= m_vChromaShift
;
839 rpelx
>>= m_hChromaShift
;
840 bpely
>>= m_vChromaShift
;
845 skipB
= plane
? 1 : 3;
846 skipR
= plane
? 2 : 4;
848 stats
= m_offsetOrgPreDblk
[addr
][plane
][SAO_BO
];
849 count
= m_countPreDblk
[addr
][plane
][SAO_BO
];
851 const pixel
* fenc0
= m_frame
->m_fencPic
->getPlaneAddr(plane
, addr
);
852 const pixel
* rec0
= m_frame
->m_reconPic
->getPlaneAddr(plane
, addr
);
856 startX
= (rpelx
== picWidth
) ? ctuWidth
: ctuWidth
- skipR
;
857 startY
= (bpely
== picHeight
) ? ctuHeight
: ctuHeight
- skipB
;
859 for (y
= 0; y
< ctuHeight
; y
++)
861 for (x
= (y
< startY
? startX
: 0); x
< ctuWidth
; x
++)
863 int classIdx
= 1 + (rec
[x
] >> boShift
);
864 stats
[classIdx
] += (fenc
[x
] - rec
[x
]);
872 // SAO_EO_0: // dir: -
874 skipB
= plane
? 1 : 3;
875 skipR
= plane
? 3 : 5;
877 stats
= m_offsetOrgPreDblk
[addr
][plane
][SAO_EO_0
];
878 count
= m_countPreDblk
[addr
][plane
][SAO_EO_0
];
883 startX
= (rpelx
== picWidth
) ? ctuWidth
- 1 : ctuWidth
- skipR
;
884 startY
= (bpely
== picHeight
) ? ctuHeight
: ctuHeight
- skipB
;
886 // endX = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth;
887 endX
= ctuWidth
- 1; // not refer right CTU
889 for (y
= 0; y
< ctuHeight
; y
++)
891 x
= (y
< startY
? startX
: firstX
);
892 int signLeft
= signOf(rec
[x
] - rec
[x
- 1]);
893 for (; x
< endX
; x
++)
895 int signRight
= signOf(rec
[x
] - rec
[x
+ 1]);
896 int edgeType
= signRight
+ signLeft
+ 2;
897 signLeft
= -signRight
;
899 stats
[s_eoTable
[edgeType
]] += (fenc
[x
] - rec
[x
]);
900 count
[s_eoTable
[edgeType
]]++;
908 // SAO_EO_1: // dir: |
910 skipB
= plane
? 2 : 4;
911 skipR
= plane
? 2 : 4;
913 stats
= m_offsetOrgPreDblk
[addr
][plane
][SAO_EO_1
];
914 count
= m_countPreDblk
[addr
][plane
][SAO_EO_1
];
919 startX
= (rpelx
== picWidth
) ? ctuWidth
: ctuWidth
- skipR
;
920 startY
= (bpely
== picHeight
) ? ctuHeight
- 1 : ctuHeight
- skipB
;
922 // endY = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight;
923 endY
= ctuHeight
- 1; // not refer below CTU
930 for (x
= startX
; x
< ctuWidth
; x
++)
931 upBuff1
[x
] = signOf(rec
[x
] - rec
[x
- stride
]);
933 for (y
= firstY
; y
< endY
; y
++)
935 for (x
= (y
< startY
- 1 ? startX
: 0); x
< ctuWidth
; x
++)
937 int signDown
= signOf(rec
[x
] - rec
[x
+ stride
]);
938 int edgeType
= signDown
+ upBuff1
[x
] + 2;
939 upBuff1
[x
] = -signDown
;
941 if (x
< startX
&& y
< startY
)
944 stats
[s_eoTable
[edgeType
]] += (fenc
[x
] - rec
[x
]);
945 count
[s_eoTable
[edgeType
]]++;
953 // SAO_EO_2: // dir: 135
955 skipB
= plane
? 2 : 4;
956 skipR
= plane
? 3 : 5;
958 stats
= m_offsetOrgPreDblk
[addr
][plane
][SAO_EO_2
];
959 count
= m_countPreDblk
[addr
][plane
][SAO_EO_2
];
964 startX
= (rpelx
== picWidth
) ? ctuWidth
- 1 : ctuWidth
- skipR
;
965 startY
= (bpely
== picHeight
) ? ctuHeight
- 1 : ctuHeight
- skipB
;
968 // endX = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth;
969 // endY = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight;
970 endX
= ctuWidth
- 1; // not refer right CTU
971 endY
= ctuHeight
- 1; // not refer below CTU
978 for (x
= startX
; x
< endX
; x
++)
979 upBuff1
[x
] = signOf(rec
[x
] - rec
[x
- stride
- 1]);
981 for (y
= firstY
; y
< endY
; y
++)
983 x
= (y
< startY
- 1 ? startX
: firstX
);
984 upBufft
[x
] = signOf(rec
[x
+ stride
] - rec
[x
- 1]);
985 for (; x
< endX
; x
++)
987 int signDown
= signOf(rec
[x
] - rec
[x
+ stride
+ 1]);
988 int edgeType
= signDown
+ upBuff1
[x
] + 2;
989 upBufft
[x
+ 1] = -signDown
;
991 if (x
< startX
&& y
< startY
)
994 stats
[s_eoTable
[edgeType
]] += (fenc
[x
] - rec
[x
]);
995 count
[s_eoTable
[edgeType
]]++;
998 std::swap(upBuff1
, upBufft
);
1005 // SAO_EO_3: // dir: 45
1007 skipB
= plane
? 2 : 4;
1008 skipR
= plane
? 3 : 5;
1010 stats
= m_offsetOrgPreDblk
[addr
][plane
][SAO_EO_3
];
1011 count
= m_countPreDblk
[addr
][plane
][SAO_EO_3
];
1016 startX
= (rpelx
== picWidth
) ? ctuWidth
- 1 : ctuWidth
- skipR
;
1017 startY
= (bpely
== picHeight
) ? ctuHeight
- 1 : ctuHeight
- skipB
;
1020 // endX = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth;
1021 // endY = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight;
1022 endX
= ctuWidth
- 1; // not refer right CTU
1023 endY
= ctuHeight
- 1; // not refer below CTU
1030 for (x
= startX
- 1; x
< endX
; x
++)
1031 upBuff1
[x
] = signOf(rec
[x
] - rec
[x
- stride
+ 1]);
1033 for (y
= firstY
; y
< endY
; y
++)
1035 for (x
= (y
< startY
- 1 ? startX
: firstX
); x
< endX
; x
++)
1037 int signDown
= signOf(rec
[x
] - rec
[x
+ stride
- 1]);
1038 int edgeType
= signDown
+ upBuff1
[x
] + 2;
1039 upBuff1
[x
- 1] = -signDown
;
1041 if (x
< startX
&& y
< startY
)
1044 stats
[s_eoTable
[edgeType
]] += (fenc
[x
] - rec
[x
]);
1045 count
[s_eoTable
[edgeType
]]++;
1048 upBuff1
[endX
- 1] = signOf(rec
[endX
- 1 + stride
] - rec
[endX
]);
1057 /* reset offset statistics */
1058 void SAO::resetStats()
1060 memset(m_count
, 0, sizeof(PerClass
) * NUM_PLANE
);
1061 memset(m_offset
, 0, sizeof(PerClass
) * NUM_PLANE
);
1062 memset(m_offsetOrg
, 0, sizeof(PerClass
) * NUM_PLANE
);
1065 void SAO::rdoSaoUnitRowInit(SAOParam
* saoParam
)
1067 saoParam
->bSaoFlag
[0] = true;
1068 saoParam
->bSaoFlag
[1] = true;
1070 m_numNoSao
[0] = 0; // Luma
1071 m_numNoSao
[1] = 0; // Chroma
1072 if (m_refDepth
> 0 && m_depthSaoRate
[0][m_refDepth
- 1] > SAO_ENCODING_RATE
)
1073 saoParam
->bSaoFlag
[0] = false;
1074 if (m_refDepth
> 0 && m_depthSaoRate
[1][m_refDepth
- 1] > SAO_ENCODING_RATE_CHROMA
)
1075 saoParam
->bSaoFlag
[1] = false;
1078 void SAO::rdoSaoUnitRowEnd(const SAOParam
* saoParam
, int numctus
)
1080 if (!saoParam
->bSaoFlag
[0])
1081 m_depthSaoRate
[0][m_refDepth
] = 1.0;
1083 m_depthSaoRate
[0][m_refDepth
] = m_numNoSao
[0] / ((double)numctus
);
1085 if (!saoParam
->bSaoFlag
[1])
1086 m_depthSaoRate
[1][m_refDepth
] = 1.0;
1088 m_depthSaoRate
[1][m_refDepth
] = m_numNoSao
[1] / ((double)numctus
);
1091 void SAO::rdoSaoUnitRow(SAOParam
* saoParam
, int idxY
)
1093 SaoCtuParam mergeSaoParam
[NUM_MERGE_MODE
][2];
1094 double mergeDist
[NUM_MERGE_MODE
];
1095 bool allowMerge
[2]; // left, up
1096 allowMerge
[1] = (idxY
> 0);
1098 for (int idxX
= 0; idxX
< m_numCuInWidth
; idxX
++)
1100 int addr
= idxX
+ idxY
* m_numCuInWidth
;
1101 int addrUp
= idxY
? addr
- m_numCuInWidth
: -1;
1102 int addrLeft
= idxX
? addr
- 1 : -1;
1103 allowMerge
[0] = (idxX
> 0);
1105 m_entropyCoder
.load(m_rdContexts
.cur
);
1107 m_entropyCoder
.codeSaoMerge(0);
1109 m_entropyCoder
.codeSaoMerge(0);
1110 m_entropyCoder
.store(m_rdContexts
.temp
);
1111 // reset stats Y, Cb, Cr
1112 for (int plane
= 0; plane
< 3; plane
++)
1114 for (int j
= 0; j
< MAX_NUM_SAO_TYPE
; j
++)
1116 for (int k
= 0; k
< MAX_NUM_SAO_CLASS
; k
++)
1118 m_offset
[plane
][j
][k
] = 0;
1119 if (m_param
->bSaoNonDeblocked
)
1121 m_count
[plane
][j
][k
] = m_countPreDblk
[addr
][plane
][j
][k
];
1122 m_offsetOrg
[plane
][j
][k
] = m_offsetOrgPreDblk
[addr
][plane
][j
][k
];
1126 m_count
[plane
][j
][k
] = 0;
1127 m_offsetOrg
[plane
][j
][k
] = 0;
1132 saoParam
->ctuParam
[plane
][addr
].mergeMode
= SAO_MERGE_NONE
;
1133 saoParam
->ctuParam
[plane
][addr
].typeIdx
= -1;
1134 saoParam
->ctuParam
[plane
][addr
].bandPos
= 0;
1135 if (saoParam
->bSaoFlag
[plane
> 0])
1136 calcSaoStatsCu(addr
, plane
);
1139 saoComponentParamDist(saoParam
, addr
, addrUp
, addrLeft
, &mergeSaoParam
[0][0], mergeDist
);
1141 sao2ChromaParamDist(saoParam
, addr
, addrUp
, addrLeft
, mergeSaoParam
, mergeDist
);
1143 if (saoParam
->bSaoFlag
[0] || saoParam
->bSaoFlag
[1])
1145 // Cost of new SAO_params
1146 m_entropyCoder
.load(m_rdContexts
.cur
);
1147 m_entropyCoder
.resetBits();
1149 m_entropyCoder
.codeSaoMerge(0);
1151 m_entropyCoder
.codeSaoMerge(0);
1152 for (int plane
= 0; plane
< 3; plane
++)
1154 if (saoParam
->bSaoFlag
[plane
> 0])
1155 m_entropyCoder
.codeSaoOffset(saoParam
->ctuParam
[plane
][addr
], plane
);
1158 uint32_t rate
= m_entropyCoder
.getNumberOfWrittenBits();
1159 double bestCost
= mergeDist
[0] + (double)rate
;
1160 m_entropyCoder
.store(m_rdContexts
.temp
);
1163 for (int mergeIdx
= 0; mergeIdx
< 2; ++mergeIdx
)
1165 if (!allowMerge
[mergeIdx
])
1168 m_entropyCoder
.load(m_rdContexts
.cur
);
1169 m_entropyCoder
.resetBits();
1171 m_entropyCoder
.codeSaoMerge(1 - mergeIdx
);
1172 if (allowMerge
[1] && (mergeIdx
== 1))
1173 m_entropyCoder
.codeSaoMerge(1);
1175 rate
= m_entropyCoder
.getNumberOfWrittenBits();
1176 double mergeCost
= mergeDist
[mergeIdx
+ 1] + (double)rate
;
1177 if (mergeCost
< bestCost
)
1179 SaoMergeMode mergeMode
= mergeIdx
? SAO_MERGE_UP
: SAO_MERGE_LEFT
;
1180 bestCost
= mergeCost
;
1181 m_entropyCoder
.store(m_rdContexts
.temp
);
1182 for (int plane
= 0; plane
< 3; plane
++)
1184 mergeSaoParam
[plane
][mergeIdx
].mergeMode
= mergeMode
;
1185 if (saoParam
->bSaoFlag
[plane
> 0])
1186 copySaoUnit(&saoParam
->ctuParam
[plane
][addr
], &mergeSaoParam
[plane
][mergeIdx
]);
1191 if (saoParam
->ctuParam
[0][addr
].typeIdx
< 0)
1193 if (saoParam
->ctuParam
[1][addr
].typeIdx
< 0)
1195 m_entropyCoder
.load(m_rdContexts
.temp
);
1196 m_entropyCoder
.store(m_rdContexts
.cur
);
1201 /** rate distortion optimization of SAO unit */
1202 inline int64_t SAO::estSaoTypeDist(int plane
, int typeIdx
, double lambda
, int32_t* currentDistortionTableBo
, double* currentRdCostTableBo
)
1204 int64_t estDist
= 0;
1206 for (int classIdx
= 1; classIdx
< ((typeIdx
< SAO_BO
) ? SAO_EO_LEN
+ 1 : SAO_NUM_BO_CLASSES
+ 1); classIdx
++)
1208 int32_t count
= m_count
[plane
][typeIdx
][classIdx
];
1209 int32_t& offsetOrg
= m_offsetOrg
[plane
][typeIdx
][classIdx
];
1210 int32_t& offsetOut
= m_offset
[plane
][typeIdx
][classIdx
];
1212 if (typeIdx
== SAO_BO
)
1214 currentDistortionTableBo
[classIdx
- 1] = 0;
1215 currentRdCostTableBo
[classIdx
- 1] = lambda
;
1219 int offset
= roundIBDI(offsetOrg
, count
<< SAO_BIT_INC
);
1220 offset
= Clip3(-OFFSET_THRESH
+ 1, OFFSET_THRESH
- 1, offset
);
1221 if (typeIdx
< SAO_BO
)
1224 offset
= X265_MAX(offset
, 0);
1226 offset
= X265_MIN(offset
, 0);
1228 offsetOut
= estIterOffset(typeIdx
, classIdx
, lambda
, offset
, count
, offsetOrg
, currentDistortionTableBo
, currentRdCostTableBo
);
1235 if (typeIdx
!= SAO_BO
)
1236 estDist
+= estSaoDist(count
, (int)offsetOut
<< SAO_BIT_INC
, offsetOrg
);
1242 inline int SAO::estIterOffset(int typeIdx
, int classIdx
, double lambda
, int offset
, int32_t count
, int32_t offsetOrg
, int32_t* currentDistortionTableBo
, double* currentRdCostTableBo
)
1246 // Assuming sending quantized value 0 results in zero offset and sending the value zero needs 1 bit. entropy coder can be used to measure the exact rate here.
1247 double tempMinCost
= lambda
;
1250 // Calculate the bits required for signalling the offset
1251 int tempRate
= (typeIdx
== SAO_BO
) ? (abs(offset
) + 2) : (abs(offset
) + 1);
1252 if (abs(offset
) == OFFSET_THRESH
- 1)
1255 // Do the dequntization before distorion calculation
1256 int tempOffset
= offset
<< SAO_BIT_INC
;
1257 int64_t tempDist
= estSaoDist(count
, tempOffset
, offsetOrg
);
1258 double tempCost
= ((double)tempDist
+ lambda
* (double)tempRate
);
1259 if (tempCost
< tempMinCost
)
1261 tempMinCost
= tempCost
;
1263 if (typeIdx
== SAO_BO
)
1265 currentDistortionTableBo
[classIdx
- 1] = (int)tempDist
;
1266 currentRdCostTableBo
[classIdx
- 1] = tempCost
;
1269 offset
= (offset
> 0) ? (offset
- 1) : (offset
+ 1);
1275 void SAO::saoComponentParamDist(SAOParam
* saoParam
, int addr
, int addrUp
, int addrLeft
, SaoCtuParam
* mergeSaoParam
, double* mergeDist
)
1277 int64_t bestDist
= 0;
1279 SaoCtuParam
* lclCtuParam
= &saoParam
->ctuParam
[0][addr
];
1281 double bestRDCostTableBo
= MAX_DOUBLE
;
1282 int bestClassTableBo
= 0;
1283 int currentDistortionTableBo
[MAX_NUM_SAO_CLASS
];
1284 double currentRdCostTableBo
[MAX_NUM_SAO_CLASS
];
1286 resetSaoUnit(lclCtuParam
);
1287 m_entropyCoder
.load(m_rdContexts
.temp
);
1288 m_entropyCoder
.resetBits();
1289 m_entropyCoder
.codeSaoOffset(*lclCtuParam
, 0);
1290 double dCostPartBest
= m_entropyCoder
.getNumberOfWrittenBits() * m_lumaLambda
;
1292 for (int typeIdx
= 0; typeIdx
< MAX_NUM_SAO_TYPE
; typeIdx
++)
1294 int64_t estDist
= estSaoTypeDist(0, typeIdx
, m_lumaLambda
, currentDistortionTableBo
, currentRdCostTableBo
);
1296 if (typeIdx
== SAO_BO
)
1298 // Estimate Best Position
1299 for (int i
= 0; i
< SAO_NUM_BO_CLASSES
- SAO_BO_LEN
+ 1; i
++)
1301 double currentRDCost
= 0.0;
1302 for (int j
= i
; j
< i
+ SAO_BO_LEN
; j
++)
1303 currentRDCost
+= currentRdCostTableBo
[j
];
1305 if (currentRDCost
< bestRDCostTableBo
)
1307 bestRDCostTableBo
= currentRDCost
;
1308 bestClassTableBo
= i
;
1312 // Re code all Offsets
1315 for (int classIdx
= bestClassTableBo
; classIdx
< bestClassTableBo
+ SAO_BO_LEN
; classIdx
++)
1316 estDist
+= currentDistortionTableBo
[classIdx
];
1318 SaoCtuParam ctuParamRdo
;
1319 ctuParamRdo
.mergeMode
= SAO_MERGE_NONE
;
1320 ctuParamRdo
.typeIdx
= typeIdx
;
1321 ctuParamRdo
.bandPos
= (typeIdx
== SAO_BO
) ? bestClassTableBo
: 0;
1322 for (int classIdx
= 0; classIdx
< SAO_NUM_OFFSET
; classIdx
++)
1323 ctuParamRdo
.offset
[classIdx
] = (int)m_offset
[0][typeIdx
][classIdx
+ ctuParamRdo
.bandPos
+ 1];
1325 m_entropyCoder
.load(m_rdContexts
.temp
);
1326 m_entropyCoder
.resetBits();
1327 m_entropyCoder
.codeSaoOffset(ctuParamRdo
, 0);
1329 uint32_t estRate
= m_entropyCoder
.getNumberOfWrittenBits();
1330 double cost
= (double)estDist
+ m_lumaLambda
* (double)estRate
;
1332 if (cost
< dCostPartBest
)
1334 dCostPartBest
= cost
;
1335 copySaoUnit(lclCtuParam
, &ctuParamRdo
);
1340 mergeDist
[0] = ((double)bestDist
/ m_lumaLambda
);
1341 m_entropyCoder
.load(m_rdContexts
.temp
);
1342 m_entropyCoder
.codeSaoOffset(*lclCtuParam
, 0);
1343 m_entropyCoder
.store(m_rdContexts
.temp
);
1345 // merge left or merge up
1347 for (int mergeIdx
= 0; mergeIdx
< 2; mergeIdx
++)
1349 SaoCtuParam
* mergeSrcParam
= NULL
;
1350 if (addrLeft
>= 0 && mergeIdx
== 0)
1351 mergeSrcParam
= &(saoParam
->ctuParam
[0][addrLeft
]);
1352 else if (addrUp
>= 0 && mergeIdx
== 1)
1353 mergeSrcParam
= &(saoParam
->ctuParam
[0][addrUp
]);
1356 int64_t estDist
= 0;
1357 int typeIdx
= mergeSrcParam
->typeIdx
;
1360 int bandPos
= (typeIdx
== SAO_BO
) ? mergeSrcParam
->bandPos
: 0;
1361 for (int classIdx
= 0; classIdx
< SAO_NUM_OFFSET
; classIdx
++)
1363 int mergeOffset
= mergeSrcParam
->offset
[classIdx
];
1364 estDist
+= estSaoDist(m_count
[0][typeIdx
][classIdx
+ bandPos
+ 1], mergeOffset
, m_offsetOrg
[0][typeIdx
][classIdx
+ bandPos
+ 1]);
1368 copySaoUnit(&mergeSaoParam
[mergeIdx
], mergeSrcParam
);
1369 mergeSaoParam
[mergeIdx
].mergeMode
= mergeIdx
? SAO_MERGE_UP
: SAO_MERGE_LEFT
;
1371 mergeDist
[mergeIdx
+ 1] = ((double)estDist
/ m_lumaLambda
);
1374 resetSaoUnit(&mergeSaoParam
[mergeIdx
]);
1378 void SAO::sao2ChromaParamDist(SAOParam
* saoParam
, int addr
, int addrUp
, int addrLeft
, SaoCtuParam mergeSaoParam
[][2], double* mergeDist
)
1380 int64_t bestDist
= 0;
1382 SaoCtuParam
* lclCtuParam
[2] = { &saoParam
->ctuParam
[1][addr
], &saoParam
->ctuParam
[2][addr
] };
1384 double currentRdCostTableBo
[MAX_NUM_SAO_CLASS
];
1385 int bestClassTableBo
[2] = { 0, 0 };
1386 int currentDistortionTableBo
[MAX_NUM_SAO_CLASS
];
1388 resetSaoUnit(lclCtuParam
[0]);
1389 resetSaoUnit(lclCtuParam
[1]);
1390 m_entropyCoder
.load(m_rdContexts
.temp
);
1391 m_entropyCoder
.resetBits();
1392 m_entropyCoder
.codeSaoOffset(*lclCtuParam
[0], 1);
1393 m_entropyCoder
.codeSaoOffset(*lclCtuParam
[1], 2);
1395 double costPartBest
= m_entropyCoder
.getNumberOfWrittenBits() * m_chromaLambda
;
1397 for (int typeIdx
= 0; typeIdx
< MAX_NUM_SAO_TYPE
; typeIdx
++)
1400 if (typeIdx
== SAO_BO
)
1402 // Estimate Best Position
1403 for (int compIdx
= 0; compIdx
< 2; compIdx
++)
1405 double bestRDCostTableBo
= MAX_DOUBLE
;
1406 estDist
[compIdx
] = estSaoTypeDist(compIdx
+ 1, typeIdx
, m_chromaLambda
, currentDistortionTableBo
, currentRdCostTableBo
);
1407 for (int i
= 0; i
< SAO_NUM_BO_CLASSES
- SAO_BO_LEN
+ 1; i
++)
1409 double currentRDCost
= 0.0;
1410 for (int j
= i
; j
< i
+ SAO_BO_LEN
; j
++)
1411 currentRDCost
+= currentRdCostTableBo
[j
];
1413 if (currentRDCost
< bestRDCostTableBo
)
1415 bestRDCostTableBo
= currentRDCost
;
1416 bestClassTableBo
[compIdx
] = i
;
1420 // Re code all Offsets
1422 estDist
[compIdx
] = 0;
1423 for (int classIdx
= bestClassTableBo
[compIdx
]; classIdx
< bestClassTableBo
[compIdx
] + SAO_BO_LEN
; classIdx
++)
1424 estDist
[compIdx
] += currentDistortionTableBo
[classIdx
];
1429 estDist
[0] = estSaoTypeDist(1, typeIdx
, m_chromaLambda
, currentDistortionTableBo
, currentRdCostTableBo
);
1430 estDist
[1] = estSaoTypeDist(2, typeIdx
, m_chromaLambda
, currentDistortionTableBo
, currentRdCostTableBo
);
1433 m_entropyCoder
.load(m_rdContexts
.temp
);
1434 m_entropyCoder
.resetBits();
1436 SaoCtuParam ctuParamRdo
[2];
1437 for (int compIdx
= 0; compIdx
< 2; compIdx
++)
1439 ctuParamRdo
[compIdx
].mergeMode
= SAO_MERGE_NONE
;
1440 ctuParamRdo
[compIdx
].typeIdx
= typeIdx
;
1441 ctuParamRdo
[compIdx
].bandPos
= (typeIdx
== SAO_BO
) ? bestClassTableBo
[compIdx
] : 0;
1442 for (int classIdx
= 0; classIdx
< SAO_NUM_OFFSET
; classIdx
++)
1443 ctuParamRdo
[compIdx
].offset
[classIdx
] = (int)m_offset
[compIdx
+ 1][typeIdx
][classIdx
+ ctuParamRdo
[compIdx
].bandPos
+ 1];
1445 m_entropyCoder
.codeSaoOffset(ctuParamRdo
[compIdx
], compIdx
+ 1);
1448 uint32_t estRate
= m_entropyCoder
.getNumberOfWrittenBits();
1449 double cost
= (double)(estDist
[0] + estDist
[1]) + m_chromaLambda
* (double)estRate
;
1451 if (cost
< costPartBest
)
1453 costPartBest
= cost
;
1454 copySaoUnit(lclCtuParam
[0], &ctuParamRdo
[0]);
1455 copySaoUnit(lclCtuParam
[1], &ctuParamRdo
[1]);
1456 bestDist
= (estDist
[0] + estDist
[1]);
1460 mergeDist
[0] += ((double)bestDist
/ m_chromaLambda
);
1461 m_entropyCoder
.load(m_rdContexts
.temp
);
1462 m_entropyCoder
.codeSaoOffset(*lclCtuParam
[0], 1);
1463 m_entropyCoder
.codeSaoOffset(*lclCtuParam
[1], 2);
1464 m_entropyCoder
.store(m_rdContexts
.temp
);
1466 // merge left or merge up
1468 for (int mergeIdx
= 0; mergeIdx
< 2; mergeIdx
++)
1470 for (int compIdx
= 0; compIdx
< 2; compIdx
++)
1472 int plane
= compIdx
+ 1;
1473 SaoCtuParam
* mergeSrcParam
= NULL
;
1474 if (addrLeft
>= 0 && mergeIdx
== 0)
1475 mergeSrcParam
= &(saoParam
->ctuParam
[plane
][addrLeft
]);
1476 else if (addrUp
>= 0 && mergeIdx
== 1)
1477 mergeSrcParam
= &(saoParam
->ctuParam
[plane
][addrUp
]);
1480 int64_t estDist
= 0;
1481 int typeIdx
= mergeSrcParam
->typeIdx
;
1484 int bandPos
= (typeIdx
== SAO_BO
) ? mergeSrcParam
->bandPos
: 0;
1485 for (int classIdx
= 0; classIdx
< SAO_NUM_OFFSET
; classIdx
++)
1487 int mergeOffset
= mergeSrcParam
->offset
[classIdx
];
1488 estDist
+= estSaoDist(m_count
[plane
][typeIdx
][classIdx
+ bandPos
+ 1], mergeOffset
, m_offsetOrg
[plane
][typeIdx
][classIdx
+ bandPos
+ 1]);
1492 copySaoUnit(&mergeSaoParam
[plane
][mergeIdx
], mergeSrcParam
);
1493 mergeSaoParam
[plane
][mergeIdx
].mergeMode
= mergeIdx
? SAO_MERGE_UP
: SAO_MERGE_LEFT
;
1494 mergeDist
[mergeIdx
+ 1] += ((double)estDist
/ m_chromaLambda
);
1497 resetSaoUnit(&mergeSaoParam
[plane
][mergeIdx
]);