1 /*****************************************************************************
2 * Copyright (C) 2013 x265 project
4 * Authors: Steve Borho <steve@borho.org>
5 * Min Chen <chenm003@163.com>
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
21 * This program is also available under a commercial proprietary license.
22 * For more information, contact us at license @ x265.com.
23 *****************************************************************************/
27 #include "framedata.h"
33 inline int32_t roundIBDI(int32_t num
, int32_t den
)
35 return num
>= 0 ? ((num
* 2 + den
) / (den
* 2)) : -((-num
* 2 + den
) / (den
* 2));
38 /* get the sign of input variable (TODO: this is a dup, make common) */
39 inline int signOf(int x
)
41 return (x
>> 31) | ((int)((((uint32_t)-x
)) >> 31));
44 inline int64_t estSaoDist(int32_t count
, int offset
, int32_t offsetOrg
)
46 return (count
* offset
- offsetOrg
* 2) * offset
;
49 } // end anonymous namespace
54 const uint32_t SAO::s_eoTable
[NUM_EDGETYPE
] =
68 m_countPreDblk
= NULL
;
69 m_offsetOrgPreDblk
= NULL
;
75 m_clipTableBase
= NULL
;
86 m_depthSaoRate
[0][0] = 0;
87 m_depthSaoRate
[0][1] = 0;
88 m_depthSaoRate
[0][2] = 0;
89 m_depthSaoRate
[0][3] = 0;
90 m_depthSaoRate
[1][0] = 0;
91 m_depthSaoRate
[1][1] = 0;
92 m_depthSaoRate
[1][2] = 0;
93 m_depthSaoRate
[1][3] = 0;
96 bool SAO::create(x265_param
* param
)
99 m_hChromaShift
= CHROMA_H_SHIFT(param
->internalCsp
);
100 m_vChromaShift
= CHROMA_V_SHIFT(param
->internalCsp
);
102 m_numCuInWidth
= (m_param
->sourceWidth
+ g_maxCUSize
- 1) / g_maxCUSize
;
103 m_numCuInHeight
= (m_param
->sourceHeight
+ g_maxCUSize
- 1) / g_maxCUSize
;
105 const pixel maxY
= (1 << X265_DEPTH
) - 1;
106 const pixel rangeExt
= maxY
>> 1;
107 int numCtu
= m_numCuInWidth
* m_numCuInHeight
;
109 CHECKED_MALLOC(m_clipTableBase
, pixel
, maxY
+ 2 * rangeExt
);
110 CHECKED_MALLOC(m_offsetBo
, pixel
, maxY
+ 2 * rangeExt
);
112 CHECKED_MALLOC(m_tmpL1
, pixel
, g_maxCUSize
+ 1);
113 CHECKED_MALLOC(m_tmpL2
, pixel
, g_maxCUSize
+ 1);
115 for (int i
= 0; i
< 3; i
++)
117 CHECKED_MALLOC(m_tmpU1
[i
], pixel
, m_param
->sourceWidth
);
118 CHECKED_MALLOC(m_tmpU2
[i
], pixel
, m_param
->sourceWidth
);
121 CHECKED_MALLOC(m_count
, PerClass
, NUM_PLANE
);
122 CHECKED_MALLOC(m_offset
, PerClass
, NUM_PLANE
);
123 CHECKED_MALLOC(m_offsetOrg
, PerClass
, NUM_PLANE
);
125 CHECKED_MALLOC(m_countPreDblk
, PerPlane
, numCtu
);
126 CHECKED_MALLOC(m_offsetOrgPreDblk
, PerPlane
, numCtu
);
128 m_clipTable
= &(m_clipTableBase
[rangeExt
]);
130 for (int i
= 0; i
< rangeExt
; i
++)
131 m_clipTableBase
[i
] = 0;
133 for (int i
= 0; i
< maxY
; i
++)
134 m_clipTable
[i
] = (pixel
)i
;
136 for (int i
= maxY
; i
< maxY
+ rangeExt
; i
++)
137 m_clipTable
[i
] = maxY
;
147 X265_FREE(m_clipTableBase
);
148 X265_FREE(m_offsetBo
);
153 for (int i
= 0; i
< 3; i
++)
155 X265_FREE(m_tmpU1
[i
]);
156 X265_FREE(m_tmpU2
[i
]);
161 X265_FREE(m_offsetOrg
);
162 X265_FREE(m_countPreDblk
);
163 X265_FREE(m_offsetOrgPreDblk
);
166 /* allocate memory for SAO parameters */
167 void SAO::allocSaoParam(SAOParam
* saoParam
) const
169 saoParam
->numCuInWidth
= m_numCuInWidth
;
171 saoParam
->ctuParam
[0] = new SaoCtuParam
[m_numCuInHeight
* m_numCuInWidth
];
172 saoParam
->ctuParam
[1] = new SaoCtuParam
[m_numCuInHeight
* m_numCuInWidth
];
173 saoParam
->ctuParam
[2] = new SaoCtuParam
[m_numCuInHeight
* m_numCuInWidth
];
176 void SAO::startSlice(Frame
* frame
, Entropy
& initState
, int qp
)
178 Slice
* slice
= frame
->m_encData
->m_slice
;
180 int qpCb
= Clip3(0, QP_MAX_MAX
, qp
+ slice
->m_pps
->chromaCbQpOffset
);
181 m_lumaLambda
= x265_lambda2_tab
[qp
];
182 m_chromaLambda
= x265_lambda2_tab
[qpCb
]; // Use Cb QP for SAO chroma
185 switch (slice
->m_sliceType
)
194 m_refDepth
= 2 + !IS_REFERENCED(frame
);
200 m_entropyCoder
.load(initState
);
201 m_rdContexts
.next
.load(initState
);
202 m_rdContexts
.cur
.load(initState
);
204 SAOParam
* saoParam
= frame
->m_encData
->m_saoParam
;
207 saoParam
= new SAOParam
;
208 allocSaoParam(saoParam
);
209 frame
->m_encData
->m_saoParam
= saoParam
;
212 rdoSaoUnitRowInit(saoParam
);
214 // NOTE: Disable SAO automatic turn-off when frame parallelism is
215 // enabled for output exact independent of frame thread count
216 if (m_param
->frameNumThreads
> 1)
218 saoParam
->bSaoFlag
[0] = true;
219 saoParam
->bSaoFlag
[1] = true;
223 // CTU-based SAO process without slice granularity
224 void SAO::processSaoCu(int addr
, int typeIdx
, int plane
)
227 const CUData
* cu
= m_frame
->m_encData
->getPicCTU(addr
);
228 pixel
* rec
= m_frame
->m_reconPicYuv
->getPlaneAddr(plane
, addr
);
229 intptr_t stride
= plane
? m_frame
->m_reconPicYuv
->m_strideC
: m_frame
->m_reconPicYuv
->m_stride
;
230 uint32_t picWidth
= m_param
->sourceWidth
;
231 uint32_t picHeight
= m_param
->sourceHeight
;
232 int ctuWidth
= g_maxCUSize
;
233 int ctuHeight
= g_maxCUSize
;
234 uint32_t lpelx
= cu
->m_cuPelX
;
235 uint32_t tpely
= cu
->m_cuPelY
;
238 picWidth
>>= m_hChromaShift
;
239 picHeight
>>= m_vChromaShift
;
240 ctuWidth
>>= m_hChromaShift
;
241 ctuHeight
>>= m_vChromaShift
;
242 lpelx
>>= m_hChromaShift
;
243 tpely
>>= m_vChromaShift
;
245 uint32_t rpelx
= x265_min(lpelx
+ ctuWidth
, picWidth
);
246 uint32_t bpely
= x265_min(tpely
+ ctuHeight
, picHeight
);
247 ctuWidth
= rpelx
- lpelx
;
248 ctuHeight
= bpely
- tpely
;
257 int32_t _upBuff1
[MAX_CU_SIZE
+ 2], *upBuff1
= _upBuff1
+ 1;
258 int32_t _upBufft
[MAX_CU_SIZE
+ 2], *upBufft
= _upBufft
+ 1;
261 const pixel
* recR
= &rec
[ctuWidth
- 1];
262 for (int i
= 0; i
< ctuHeight
+ 1; i
++)
269 tmpU
= &(m_tmpU1
[plane
][lpelx
]);
274 case SAO_EO_0
: // dir: -
276 pixel firstPxl
= 0, lastPxl
= 0;
278 endX
= (rpelx
== picWidth
) ? ctuWidth
- 1 : ctuWidth
;
281 for (y
= 0; y
< ctuHeight
; y
++)
283 int signLeft
= signOf(rec
[startX
] - tmpL
[y
]);
284 for (x
= startX
; x
< endX
; x
++)
286 int signRight
= signOf(rec
[x
] - rec
[x
+ 1]);
287 int edgeType
= signRight
+ signLeft
+ 2;
288 signLeft
= -signRight
;
290 rec
[x
] = m_clipTable
[rec
[x
] + m_offsetEo
[edgeType
]];
298 for (y
= 0; y
< ctuHeight
; y
++)
300 int signLeft
= signOf(rec
[startX
] - tmpL
[y
]);
305 if (rpelx
== picWidth
)
306 lastPxl
= rec
[ctuWidth
- 1];
308 primitives
.saoCuOrgE0(rec
, m_offsetEo
, ctuWidth
, (int8_t)signLeft
);
313 if (rpelx
== picWidth
)
314 rec
[ctuWidth
- 1] = lastPxl
;
321 case SAO_EO_1
: // dir: |
324 endY
= (bpely
== picHeight
) ? ctuHeight
- 1 : ctuHeight
;
328 for (x
= 0; x
< ctuWidth
; x
++)
329 upBuff1
[x
] = signOf(rec
[x
] - tmpU
[x
]);
331 for (y
= startY
; y
< endY
; y
++)
333 for (x
= 0; x
< ctuWidth
; x
++)
335 int signDown
= signOf(rec
[x
] - rec
[x
+ stride
]);
336 int edgeType
= signDown
+ upBuff1
[x
] + 2;
337 upBuff1
[x
] = -signDown
;
339 rec
[x
] = m_clipTable
[rec
[x
] + m_offsetEo
[edgeType
]];
347 case SAO_EO_2
: // dir: 135
350 endX
= (rpelx
== picWidth
) ? ctuWidth
- 1 : ctuWidth
;
353 endY
= (bpely
== picHeight
) ? ctuHeight
- 1 : ctuHeight
;
358 for (x
= startX
; x
< endX
; x
++)
359 upBuff1
[x
] = signOf(rec
[x
] - tmpU
[x
- 1]);
361 for (y
= startY
; y
< endY
; y
++)
363 upBufft
[startX
] = signOf(rec
[stride
+ startX
] - tmpL
[y
]);
364 for (x
= startX
; x
< endX
; x
++)
366 int signDown
= signOf(rec
[x
] - rec
[x
+ stride
+ 1]);
367 int edgeType
= signDown
+ upBuff1
[x
] + 2;
368 upBufft
[x
+ 1] = -signDown
;
369 rec
[x
] = m_clipTable
[rec
[x
] + m_offsetEo
[edgeType
]];
372 std::swap(upBuff1
, upBufft
);
379 case SAO_EO_3
: // dir: 45
382 endX
= (rpelx
== picWidth
) ? ctuWidth
- 1 : ctuWidth
;
385 endY
= (bpely
== picHeight
) ? ctuHeight
- 1 : ctuHeight
;
390 for (x
= startX
- 1; x
< endX
; x
++)
391 upBuff1
[x
] = signOf(rec
[x
] - tmpU
[x
+ 1]);
393 for (y
= startY
; y
< endY
; y
++)
396 int signDown
= signOf(rec
[x
] - tmpL
[y
+ 1]);
397 int edgeType
= signDown
+ upBuff1
[x
] + 2;
398 upBuff1
[x
- 1] = -signDown
;
399 rec
[x
] = m_clipTable
[rec
[x
] + m_offsetEo
[edgeType
]];
400 for (x
= startX
+ 1; x
< endX
; x
++)
402 signDown
= signOf(rec
[x
] - rec
[x
+ stride
- 1]);
403 edgeType
= signDown
+ upBuff1
[x
] + 2;
404 upBuff1
[x
- 1] = -signDown
;
405 rec
[x
] = m_clipTable
[rec
[x
] + m_offsetEo
[edgeType
]];
408 upBuff1
[endX
- 1] = signOf(rec
[endX
- 1 + stride
] - rec
[endX
]);
417 const pixel
* offsetBo
= m_offsetBo
;
419 for (y
= 0; y
< ctuHeight
; y
++)
421 for (x
= 0; x
< ctuWidth
; x
++)
422 rec
[x
] = offsetBo
[rec
[x
]];
432 // if (iSaoType!=SAO_BO_0 || iSaoType!=SAO_BO_1)
433 std::swap(m_tmpL1
, m_tmpL2
);
436 /* Process SAO all units */
437 void SAO::processSaoUnitRow(SaoCtuParam
* ctuParam
, int idxY
, int plane
)
439 intptr_t stride
= plane
? m_frame
->m_reconPicYuv
->m_strideC
: m_frame
->m_reconPicYuv
->m_stride
;
440 uint32_t picWidth
= m_param
->sourceWidth
;
441 int ctuWidth
= g_maxCUSize
;
442 int ctuHeight
= g_maxCUSize
;
445 picWidth
>>= m_hChromaShift
;
446 ctuWidth
>>= m_hChromaShift
;
447 ctuHeight
>>= m_vChromaShift
;
452 pixel
* rec
= m_frame
->m_reconPicYuv
->m_picOrg
[plane
];
453 memcpy(m_tmpU1
[plane
], rec
, sizeof(pixel
) * picWidth
);
456 int addr
= idxY
* m_numCuInWidth
;
457 pixel
* rec
= plane
? m_frame
->m_reconPicYuv
->getChromaAddr(plane
, addr
) : m_frame
->m_reconPicYuv
->getLumaAddr(addr
);
459 for (int i
= 0; i
< ctuHeight
+ 1; i
++)
465 rec
-= (stride
<< 1);
467 memcpy(m_tmpU2
[plane
], rec
, sizeof(pixel
) * picWidth
);
469 const int boShift
= X265_DEPTH
- SAO_BO_BITS
;
471 for (int idxX
= 0; idxX
< m_numCuInWidth
; idxX
++)
473 addr
= idxY
* m_numCuInWidth
+ idxX
;
475 bool mergeLeftFlag
= ctuParam
[addr
].mergeMode
== SAO_MERGE_LEFT
;
476 int typeIdx
= ctuParam
[addr
].typeIdx
;
482 if (typeIdx
== SAO_BO
)
484 pixel
* offsetBo
= m_offsetBo
;
485 int offset
[SAO_NUM_BO_CLASSES
];
486 memset(offset
, 0, sizeof(offset
));
488 for (int i
= 0; i
< SAO_NUM_OFFSET
; i
++)
489 offset
[((ctuParam
[addr
].bandPos
+ i
) & (SAO_NUM_BO_CLASSES
- 1))] = ctuParam
[addr
].offset
[i
] << SAO_BIT_INC
;
491 for (int i
= 0; i
< (1 << X265_DEPTH
); i
++)
492 offsetBo
[i
] = m_clipTable
[i
+ offset
[i
>> boShift
]];
494 else // if (typeIdx == SAO_EO_0 || typeIdx == SAO_EO_1 || typeIdx == SAO_EO_2 || typeIdx == SAO_EO_3)
496 int offset
[NUM_EDGETYPE
];
498 for (int i
= 0; i
< SAO_NUM_OFFSET
; i
++)
499 offset
[i
+ 1] = ctuParam
[addr
].offset
[i
] << SAO_BIT_INC
;
501 for (int edgeType
= 0; edgeType
< NUM_EDGETYPE
; edgeType
++)
502 m_offsetEo
[edgeType
] = (int8_t)offset
[s_eoTable
[edgeType
]];
505 processSaoCu(addr
, typeIdx
, plane
);
507 else if (idxX
!= (m_numCuInWidth
- 1))
509 rec
= plane
? m_frame
->m_reconPicYuv
->getChromaAddr(plane
, addr
) : m_frame
->m_reconPicYuv
->getLumaAddr(addr
);
511 for (int i
= 0; i
< ctuHeight
+ 1; i
++)
513 m_tmpL1
[i
] = rec
[ctuWidth
- 1];
519 std::swap(m_tmpU1
[plane
], m_tmpU2
[plane
]);
522 void SAO::resetSaoUnit(SaoCtuParam
* saoUnit
)
524 saoUnit
->mergeMode
= SAO_MERGE_NONE
;
525 saoUnit
->typeIdx
= -1;
526 saoUnit
->bandPos
= 0;
528 for (int i
= 0; i
< SAO_NUM_OFFSET
; i
++)
529 saoUnit
->offset
[i
] = 0;
532 void SAO::copySaoUnit(SaoCtuParam
* saoUnitDst
, const SaoCtuParam
* saoUnitSrc
)
534 saoUnitDst
->mergeMode
= saoUnitSrc
->mergeMode
;
535 saoUnitDst
->typeIdx
= saoUnitSrc
->typeIdx
;
536 saoUnitDst
->bandPos
= saoUnitSrc
->bandPos
;
538 for (int i
= 0; i
< SAO_NUM_OFFSET
; i
++)
539 saoUnitDst
->offset
[i
] = saoUnitSrc
->offset
[i
];
542 /* Calculate SAO statistics for current CTU without non-crossing slice */
543 void SAO::calcSaoStatsCu(int addr
, int plane
)
546 CUData
* cu
= m_frame
->m_encData
->getPicCTU(addr
);
547 const pixel
* fenc0
= m_frame
->m_origPicYuv
->getPlaneAddr(plane
, addr
);
548 const pixel
* rec0
= m_frame
->m_reconPicYuv
->getPlaneAddr(plane
, addr
);
551 intptr_t stride
= plane
? m_frame
->m_reconPicYuv
->m_strideC
: m_frame
->m_reconPicYuv
->m_stride
;
552 uint32_t picWidth
= m_param
->sourceWidth
;
553 uint32_t picHeight
= m_param
->sourceHeight
;
554 int ctuWidth
= g_maxCUSize
;
555 int ctuHeight
= g_maxCUSize
;
556 uint32_t lpelx
= cu
->m_cuPelX
;
557 uint32_t tpely
= cu
->m_cuPelY
;
560 picWidth
>>= m_hChromaShift
;
561 picHeight
>>= m_vChromaShift
;
562 ctuWidth
>>= m_hChromaShift
;
563 ctuHeight
>>= m_vChromaShift
;
564 lpelx
>>= m_hChromaShift
;
565 tpely
>>= m_vChromaShift
;
567 uint32_t rpelx
= x265_min(lpelx
+ ctuWidth
, picWidth
);
568 uint32_t bpely
= x265_min(tpely
+ ctuHeight
, picHeight
);
569 ctuWidth
= rpelx
- lpelx
;
570 ctuHeight
= bpely
- tpely
;
579 int skipB
= plane
? 2 : 4;
580 int skipR
= plane
? 3 : 5;
582 int32_t _upBuff1
[MAX_CU_SIZE
+ 2], *upBuff1
= _upBuff1
+ 1;
583 int32_t _upBufft
[MAX_CU_SIZE
+ 2], *upBufft
= _upBufft
+ 1;
587 const int boShift
= X265_DEPTH
- SAO_BO_BITS
;
589 if (m_param
->bSaoNonDeblocked
)
591 skipB
= plane
? 1 : 3;
592 skipR
= plane
? 2 : 4;
594 stats
= m_offsetOrg
[plane
][SAO_BO
];
595 count
= m_count
[plane
][SAO_BO
];
600 endX
= (rpelx
== picWidth
) ? ctuWidth
: ctuWidth
- skipR
;
601 endY
= (bpely
== picHeight
) ? ctuHeight
: ctuHeight
- skipB
;
603 for (y
= 0; y
< endY
; y
++)
605 for (x
= 0; x
< endX
; x
++)
607 int classIdx
= 1 + (rec
[x
] >> boShift
);
608 stats
[classIdx
] += (fenc
[x
] - rec
[x
]);
618 // SAO_EO_0: // dir: -
620 if (m_param
->bSaoNonDeblocked
)
622 skipB
= plane
? 1 : 3;
623 skipR
= plane
? 3 : 5;
625 stats
= m_offsetOrg
[plane
][SAO_EO_0
];
626 count
= m_count
[plane
][SAO_EO_0
];
632 endX
= (rpelx
== picWidth
) ? ctuWidth
- 1 : ctuWidth
- skipR
;
633 for (y
= 0; y
< ctuHeight
- skipB
; y
++)
635 int signLeft
= signOf(rec
[startX
] - rec
[startX
- 1]);
636 for (x
= startX
; x
< endX
; x
++)
638 int signRight
= signOf(rec
[x
] - rec
[x
+ 1]);
639 int edgeType
= signRight
+ signLeft
+ 2;
640 signLeft
= -signRight
;
642 stats
[s_eoTable
[edgeType
]] += (fenc
[x
] - rec
[x
]);
643 count
[s_eoTable
[edgeType
]]++;
651 // SAO_EO_1: // dir: |
653 if (m_param
->bSaoNonDeblocked
)
655 skipB
= plane
? 2 : 4;
656 skipR
= plane
? 2 : 4;
658 stats
= m_offsetOrg
[plane
][SAO_EO_1
];
659 count
= m_count
[plane
][SAO_EO_1
];
665 endX
= (rpelx
== picWidth
) ? ctuWidth
: ctuWidth
- skipR
;
666 endY
= (bpely
== picHeight
) ? ctuHeight
- 1 : ctuHeight
- skipB
;
673 for (x
= 0; x
< ctuWidth
; x
++)
674 upBuff1
[x
] = signOf(rec
[x
] - rec
[x
- stride
]);
676 for (y
= startY
; y
< endY
; y
++)
678 for (x
= 0; x
< endX
; x
++)
680 int signDown
= signOf(rec
[x
] - rec
[x
+ stride
]);
681 int edgeType
= signDown
+ upBuff1
[x
] + 2;
682 upBuff1
[x
] = -signDown
;
684 stats
[s_eoTable
[edgeType
]] += (fenc
[x
] - rec
[x
]);
685 count
[s_eoTable
[edgeType
]]++;
693 // SAO_EO_2: // dir: 135
695 if (m_param
->bSaoNonDeblocked
)
697 skipB
= plane
? 2 : 4;
698 skipR
= plane
? 3 : 5;
700 stats
= m_offsetOrg
[plane
][SAO_EO_2
];
701 count
= m_count
[plane
][SAO_EO_2
];
707 endX
= (rpelx
== picWidth
) ? ctuWidth
- 1 : ctuWidth
- skipR
;
710 endY
= (bpely
== picHeight
) ? ctuHeight
- 1 : ctuHeight
- skipB
;
717 for (x
= startX
; x
< endX
; x
++)
718 upBuff1
[x
] = signOf(rec
[x
] - rec
[x
- stride
- 1]);
720 for (y
= startY
; y
< endY
; y
++)
722 upBufft
[startX
] = signOf(rec
[startX
+ stride
] - rec
[startX
- 1]);
723 for (x
= startX
; x
< endX
; x
++)
725 int signDown
= signOf(rec
[x
] - rec
[x
+ stride
+ 1]);
726 int edgeType
= signDown
+ upBuff1
[x
] + 2;
727 upBufft
[x
+ 1] = -signDown
;
728 stats
[s_eoTable
[edgeType
]] += (fenc
[x
] - rec
[x
]);
729 count
[s_eoTable
[edgeType
]]++;
732 std::swap(upBuff1
, upBufft
);
739 // SAO_EO_3: // dir: 45
741 if (m_param
->bSaoNonDeblocked
)
743 skipB
= plane
? 2 : 4;
744 skipR
= plane
? 3 : 5;
746 stats
= m_offsetOrg
[plane
][SAO_EO_3
];
747 count
= m_count
[plane
][SAO_EO_3
];
753 endX
= (rpelx
== picWidth
) ? ctuWidth
- 1 : ctuWidth
- skipR
;
756 endY
= (bpely
== picHeight
) ? ctuHeight
- 1 : ctuHeight
- skipB
;
764 for (x
= startX
- 1; x
< endX
; x
++)
765 upBuff1
[x
] = signOf(rec
[x
] - rec
[x
- stride
+ 1]);
767 for (y
= startY
; y
< endY
; y
++)
769 for (x
= startX
; x
< endX
; x
++)
771 int signDown
= signOf(rec
[x
] - rec
[x
+ stride
- 1]);
772 int edgeType
= signDown
+ upBuff1
[x
] + 2;
773 upBuff1
[x
- 1] = -signDown
;
774 stats
[s_eoTable
[edgeType
]] += (fenc
[x
] - rec
[x
]);
775 count
[s_eoTable
[edgeType
]]++;
778 upBuff1
[endX
- 1] = signOf(rec
[endX
- 1 + stride
] - rec
[endX
]);
787 void SAO::calcSaoStatsCu_BeforeDblk(Frame
* frame
, int idxX
, int idxY
)
789 int addr
= idxX
+ m_numCuInWidth
* idxY
;
792 CUData
* cu
= frame
->m_encData
->getPicCTU(addr
);
795 intptr_t stride
= m_frame
->m_reconPicYuv
->m_stride
;
796 uint32_t picWidth
= m_param
->sourceWidth
;
797 uint32_t picHeight
= m_param
->sourceHeight
;
798 int ctuWidth
= g_maxCUSize
;
799 int ctuHeight
= g_maxCUSize
;
800 uint32_t lpelx
= cu
->m_cuPelX
;
801 uint32_t tpely
= cu
->m_cuPelY
;
802 uint32_t rpelx
= x265_min(lpelx
+ ctuWidth
, picWidth
);
803 uint32_t bpely
= x265_min(tpely
+ ctuHeight
, picHeight
);
804 ctuWidth
= rpelx
- lpelx
;
805 ctuHeight
= bpely
- tpely
;
817 int32_t _upBuff1
[MAX_CU_SIZE
+ 2], *upBuff1
= _upBuff1
+ 1;
818 int32_t _upBufft
[MAX_CU_SIZE
+ 2], *upBufft
= _upBufft
+ 1;
820 const int boShift
= X265_DEPTH
- SAO_BO_BITS
;
822 memset(m_countPreDblk
[addr
], 0, sizeof(PerPlane
));
823 memset(m_offsetOrgPreDblk
[addr
], 0, sizeof(PerPlane
));
825 for (int plane
= 0; plane
< NUM_PLANE
; plane
++)
829 stride
= frame
->m_reconPicYuv
->m_strideC
;
830 picWidth
>>= m_hChromaShift
;
831 picHeight
>>= m_vChromaShift
;
832 ctuWidth
>>= m_hChromaShift
;
833 ctuHeight
>>= m_vChromaShift
;
834 lpelx
>>= m_hChromaShift
;
835 tpely
>>= m_vChromaShift
;
836 rpelx
>>= m_hChromaShift
;
837 bpely
>>= m_vChromaShift
;
842 skipB
= plane
? 1 : 3;
843 skipR
= plane
? 2 : 4;
845 stats
= m_offsetOrgPreDblk
[addr
][plane
][SAO_BO
];
846 count
= m_countPreDblk
[addr
][plane
][SAO_BO
];
848 const pixel
* fenc0
= m_frame
->m_origPicYuv
->getPlaneAddr(plane
, addr
);
849 const pixel
* rec0
= m_frame
->m_reconPicYuv
->getPlaneAddr(plane
, addr
);
853 startX
= (rpelx
== picWidth
) ? ctuWidth
: ctuWidth
- skipR
;
854 startY
= (bpely
== picHeight
) ? ctuHeight
: ctuHeight
- skipB
;
856 for (y
= 0; y
< ctuHeight
; y
++)
858 for (x
= (y
< startY
? startX
: 0); x
< ctuWidth
; x
++)
860 int classIdx
= 1 + (rec
[x
] >> boShift
);
861 stats
[classIdx
] += (fenc
[x
] - rec
[x
]);
869 // SAO_EO_0: // dir: -
871 skipB
= plane
? 1 : 3;
872 skipR
= plane
? 3 : 5;
874 stats
= m_offsetOrgPreDblk
[addr
][plane
][SAO_EO_0
];
875 count
= m_countPreDblk
[addr
][plane
][SAO_EO_0
];
880 startX
= (rpelx
== picWidth
) ? ctuWidth
- 1 : ctuWidth
- skipR
;
881 startY
= (bpely
== picHeight
) ? ctuHeight
: ctuHeight
- skipB
;
883 // endX = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth;
884 endX
= ctuWidth
- 1; // not refer right CTU
886 for (y
= 0; y
< ctuHeight
; y
++)
888 x
= (y
< startY
? startX
: firstX
);
889 int signLeft
= signOf(rec
[x
] - rec
[x
- 1]);
890 for (; x
< endX
; x
++)
892 int signRight
= signOf(rec
[x
] - rec
[x
+ 1]);
893 int edgeType
= signRight
+ signLeft
+ 2;
894 signLeft
= -signRight
;
896 stats
[s_eoTable
[edgeType
]] += (fenc
[x
] - rec
[x
]);
897 count
[s_eoTable
[edgeType
]]++;
905 // SAO_EO_1: // dir: |
907 skipB
= plane
? 2 : 4;
908 skipR
= plane
? 2 : 4;
910 stats
= m_offsetOrgPreDblk
[addr
][plane
][SAO_EO_1
];
911 count
= m_countPreDblk
[addr
][plane
][SAO_EO_1
];
916 startX
= (rpelx
== picWidth
) ? ctuWidth
: ctuWidth
- skipR
;
917 startY
= (bpely
== picHeight
) ? ctuHeight
- 1 : ctuHeight
- skipB
;
919 // endY = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight;
920 endY
= ctuHeight
- 1; // not refer below CTU
927 for (x
= startX
; x
< ctuWidth
; x
++)
928 upBuff1
[x
] = signOf(rec
[x
] - rec
[x
- stride
]);
930 for (y
= firstY
; y
< endY
; y
++)
932 for (x
= (y
< startY
- 1 ? startX
: 0); x
< ctuWidth
; x
++)
934 int signDown
= signOf(rec
[x
] - rec
[x
+ stride
]);
935 int edgeType
= signDown
+ upBuff1
[x
] + 2;
936 upBuff1
[x
] = -signDown
;
938 if (x
< startX
&& y
< startY
)
941 stats
[s_eoTable
[edgeType
]] += (fenc
[x
] - rec
[x
]);
942 count
[s_eoTable
[edgeType
]]++;
950 // SAO_EO_2: // dir: 135
952 skipB
= plane
? 2 : 4;
953 skipR
= plane
? 3 : 5;
955 stats
= m_offsetOrgPreDblk
[addr
][plane
][SAO_EO_2
];
956 count
= m_countPreDblk
[addr
][plane
][SAO_EO_2
];
961 startX
= (rpelx
== picWidth
) ? ctuWidth
- 1 : ctuWidth
- skipR
;
962 startY
= (bpely
== picHeight
) ? ctuHeight
- 1 : ctuHeight
- skipB
;
965 // endX = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth;
966 // endY = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight;
967 endX
= ctuWidth
- 1; // not refer right CTU
968 endY
= ctuHeight
- 1; // not refer below CTU
975 for (x
= startX
; x
< endX
; x
++)
976 upBuff1
[x
] = signOf(rec
[x
] - rec
[x
- stride
- 1]);
978 for (y
= firstY
; y
< endY
; y
++)
980 x
= (y
< startY
- 1 ? startX
: firstX
);
981 upBufft
[x
] = signOf(rec
[x
+ stride
] - rec
[x
- 1]);
982 for (; x
< endX
; x
++)
984 int signDown
= signOf(rec
[x
] - rec
[x
+ stride
+ 1]);
985 int edgeType
= signDown
+ upBuff1
[x
] + 2;
986 upBufft
[x
+ 1] = -signDown
;
988 if (x
< startX
&& y
< startY
)
991 stats
[s_eoTable
[edgeType
]] += (fenc
[x
] - rec
[x
]);
992 count
[s_eoTable
[edgeType
]]++;
995 std::swap(upBuff1
, upBufft
);
1002 // SAO_EO_3: // dir: 45
1004 skipB
= plane
? 2 : 4;
1005 skipR
= plane
? 3 : 5;
1007 stats
= m_offsetOrgPreDblk
[addr
][plane
][SAO_EO_3
];
1008 count
= m_countPreDblk
[addr
][plane
][SAO_EO_3
];
1013 startX
= (rpelx
== picWidth
) ? ctuWidth
- 1 : ctuWidth
- skipR
;
1014 startY
= (bpely
== picHeight
) ? ctuHeight
- 1 : ctuHeight
- skipB
;
1017 // endX = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth;
1018 // endY = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight;
1019 endX
= ctuWidth
- 1; // not refer right CTU
1020 endY
= ctuHeight
- 1; // not refer below CTU
1027 for (x
= startX
- 1; x
< endX
; x
++)
1028 upBuff1
[x
] = signOf(rec
[x
] - rec
[x
- stride
+ 1]);
1030 for (y
= firstY
; y
< endY
; y
++)
1032 for (x
= (y
< startY
- 1 ? startX
: firstX
); x
< endX
; x
++)
1034 int signDown
= signOf(rec
[x
] - rec
[x
+ stride
- 1]);
1035 int edgeType
= signDown
+ upBuff1
[x
] + 2;
1036 upBuff1
[x
- 1] = -signDown
;
1038 if (x
< startX
&& y
< startY
)
1041 stats
[s_eoTable
[edgeType
]] += (fenc
[x
] - rec
[x
]);
1042 count
[s_eoTable
[edgeType
]]++;
1045 upBuff1
[endX
- 1] = signOf(rec
[endX
- 1 + stride
] - rec
[endX
]);
1054 /* reset offset statistics */
1055 void SAO::resetStats()
1057 memset(m_count
, 0, sizeof(PerClass
) * NUM_PLANE
);
1058 memset(m_offset
, 0, sizeof(PerClass
) * NUM_PLANE
);
1059 memset(m_offsetOrg
, 0, sizeof(PerClass
) * NUM_PLANE
);
1062 void SAO::rdoSaoUnitRowInit(SAOParam
* saoParam
)
1064 saoParam
->bSaoFlag
[0] = true;
1065 saoParam
->bSaoFlag
[1] = true;
1067 m_numNoSao
[0] = 0; // Luma
1068 m_numNoSao
[1] = 0; // Chroma
1069 if (m_refDepth
> 0 && m_depthSaoRate
[0][m_refDepth
- 1] > SAO_ENCODING_RATE
)
1070 saoParam
->bSaoFlag
[0] = false;
1071 if (m_refDepth
> 0 && m_depthSaoRate
[1][m_refDepth
- 1] > SAO_ENCODING_RATE_CHROMA
)
1072 saoParam
->bSaoFlag
[1] = false;
1075 void SAO::rdoSaoUnitRowEnd(const SAOParam
* saoParam
, int numctus
)
1077 if (!saoParam
->bSaoFlag
[0])
1078 m_depthSaoRate
[0][m_refDepth
] = 1.0;
1080 m_depthSaoRate
[0][m_refDepth
] = m_numNoSao
[0] / ((double)numctus
);
1082 if (!saoParam
->bSaoFlag
[1])
1083 m_depthSaoRate
[1][m_refDepth
] = 1.0;
1085 m_depthSaoRate
[1][m_refDepth
] = m_numNoSao
[1] / ((double)numctus
);
1088 void SAO::rdoSaoUnitRow(SAOParam
* saoParam
, int idxY
)
1090 SaoCtuParam mergeSaoParam
[NUM_MERGE_MODE
][2];
1091 double mergeDist
[NUM_MERGE_MODE
];
1092 bool allowMerge
[2]; // left, up
1093 allowMerge
[1] = (idxY
> 0);
1095 for (int idxX
= 0; idxX
< m_numCuInWidth
; idxX
++)
1097 int addr
= idxX
+ idxY
* m_numCuInWidth
;
1098 int addrUp
= idxY
? addr
- m_numCuInWidth
: -1;
1099 int addrLeft
= idxX
? addr
- 1 : -1;
1100 allowMerge
[0] = (idxX
> 0);
1102 m_entropyCoder
.load(m_rdContexts
.cur
);
1104 m_entropyCoder
.codeSaoMerge(0);
1106 m_entropyCoder
.codeSaoMerge(0);
1107 m_entropyCoder
.store(m_rdContexts
.temp
);
1108 // reset stats Y, Cb, Cr
1109 for (int plane
= 0; plane
< 3; plane
++)
1111 for (int j
= 0; j
< MAX_NUM_SAO_TYPE
; j
++)
1113 for (int k
= 0; k
< MAX_NUM_SAO_CLASS
; k
++)
1115 m_offset
[plane
][j
][k
] = 0;
1116 if (m_param
->bSaoNonDeblocked
)
1118 m_count
[plane
][j
][k
] = m_countPreDblk
[addr
][plane
][j
][k
];
1119 m_offsetOrg
[plane
][j
][k
] = m_offsetOrgPreDblk
[addr
][plane
][j
][k
];
1123 m_count
[plane
][j
][k
] = 0;
1124 m_offsetOrg
[plane
][j
][k
] = 0;
1129 saoParam
->ctuParam
[plane
][addr
].mergeMode
= SAO_MERGE_NONE
;
1130 saoParam
->ctuParam
[plane
][addr
].typeIdx
= -1;
1131 saoParam
->ctuParam
[plane
][addr
].bandPos
= 0;
1132 if (saoParam
->bSaoFlag
[plane
> 0])
1133 calcSaoStatsCu(addr
, plane
);
1136 saoComponentParamDist(saoParam
, addr
, addrUp
, addrLeft
, &mergeSaoParam
[0][0], mergeDist
);
1138 sao2ChromaParamDist(saoParam
, addr
, addrUp
, addrLeft
, mergeSaoParam
, mergeDist
);
1140 if (saoParam
->bSaoFlag
[0] || saoParam
->bSaoFlag
[1])
1142 // Cost of new SAO_params
1143 m_entropyCoder
.load(m_rdContexts
.cur
);
1144 m_entropyCoder
.resetBits();
1146 m_entropyCoder
.codeSaoMerge(0);
1148 m_entropyCoder
.codeSaoMerge(0);
1149 for (int plane
= 0; plane
< 3; plane
++)
1151 if (saoParam
->bSaoFlag
[plane
> 0])
1152 m_entropyCoder
.codeSaoOffset(saoParam
->ctuParam
[plane
][addr
], plane
);
1155 uint32_t rate
= m_entropyCoder
.getNumberOfWrittenBits();
1156 double bestCost
= mergeDist
[0] + (double)rate
;
1157 m_entropyCoder
.store(m_rdContexts
.temp
);
1160 for (int mergeIdx
= 0; mergeIdx
< 2; ++mergeIdx
)
1162 if (!allowMerge
[mergeIdx
])
1165 m_entropyCoder
.load(m_rdContexts
.cur
);
1166 m_entropyCoder
.resetBits();
1168 m_entropyCoder
.codeSaoMerge(1 - mergeIdx
);
1169 if (allowMerge
[1] && (mergeIdx
== 1))
1170 m_entropyCoder
.codeSaoMerge(1);
1172 rate
= m_entropyCoder
.getNumberOfWrittenBits();
1173 double mergeCost
= mergeDist
[mergeIdx
+ 1] + (double)rate
;
1174 if (mergeCost
< bestCost
)
1176 SaoMergeMode mergeMode
= mergeIdx
? SAO_MERGE_UP
: SAO_MERGE_LEFT
;
1177 bestCost
= mergeCost
;
1178 m_entropyCoder
.store(m_rdContexts
.temp
);
1179 for (int plane
= 0; plane
< 3; plane
++)
1181 mergeSaoParam
[plane
][mergeIdx
].mergeMode
= mergeMode
;
1182 if (saoParam
->bSaoFlag
[plane
> 0])
1183 copySaoUnit(&saoParam
->ctuParam
[plane
][addr
], &mergeSaoParam
[plane
][mergeIdx
]);
1188 if (saoParam
->ctuParam
[0][addr
].typeIdx
< 0)
1190 if (saoParam
->ctuParam
[1][addr
].typeIdx
< 0)
1192 m_entropyCoder
.load(m_rdContexts
.temp
);
1193 m_entropyCoder
.store(m_rdContexts
.cur
);
1198 /** rate distortion optimization of SAO unit */
1199 inline int64_t SAO::estSaoTypeDist(int plane
, int typeIdx
, double lambda
, int32_t* currentDistortionTableBo
, double* currentRdCostTableBo
)
1201 int64_t estDist
= 0;
1203 for (int classIdx
= 1; classIdx
< ((typeIdx
< SAO_BO
) ? SAO_EO_LEN
+ 1 : SAO_NUM_BO_CLASSES
+ 1); classIdx
++)
1205 int32_t count
= m_count
[plane
][typeIdx
][classIdx
];
1206 int32_t& offsetOrg
= m_offsetOrg
[plane
][typeIdx
][classIdx
];
1207 int32_t& offsetOut
= m_offset
[plane
][typeIdx
][classIdx
];
1209 if (typeIdx
== SAO_BO
)
1211 currentDistortionTableBo
[classIdx
- 1] = 0;
1212 currentRdCostTableBo
[classIdx
- 1] = lambda
;
1216 int offset
= roundIBDI(offsetOrg
, count
<< SAO_BIT_INC
);
1217 offset
= Clip3(-OFFSET_THRESH
+ 1, OFFSET_THRESH
- 1, offset
);
1218 if (typeIdx
< SAO_BO
)
1221 offset
= X265_MAX(offset
, 0);
1223 offset
= X265_MIN(offset
, 0);
1225 offsetOut
= estIterOffset(typeIdx
, classIdx
, lambda
, offset
, count
, offsetOrg
, currentDistortionTableBo
, currentRdCostTableBo
);
1232 if (typeIdx
!= SAO_BO
)
1233 estDist
+= estSaoDist(count
, (int)offsetOut
<< SAO_BIT_INC
, offsetOrg
);
1239 inline int SAO::estIterOffset(int typeIdx
, int classIdx
, double lambda
, int offset
, int32_t count
, int32_t offsetOrg
, int32_t* currentDistortionTableBo
, double* currentRdCostTableBo
)
1243 // Assuming sending quantized value 0 results in zero offset and sending the value zero needs 1 bit. entropy coder can be used to measure the exact rate here.
1244 double tempMinCost
= lambda
;
1247 // Calculate the bits required for signalling the offset
1248 int tempRate
= (typeIdx
== SAO_BO
) ? (abs(offset
) + 2) : (abs(offset
) + 1);
1249 if (abs(offset
) == OFFSET_THRESH
- 1)
1252 // Do the dequntization before distorion calculation
1253 int tempOffset
= offset
<< SAO_BIT_INC
;
1254 int64_t tempDist
= estSaoDist(count
, tempOffset
, offsetOrg
);
1255 double tempCost
= ((double)tempDist
+ lambda
* (double)tempRate
);
1256 if (tempCost
< tempMinCost
)
1258 tempMinCost
= tempCost
;
1260 if (typeIdx
== SAO_BO
)
1262 currentDistortionTableBo
[classIdx
- 1] = (int)tempDist
;
1263 currentRdCostTableBo
[classIdx
- 1] = tempCost
;
1266 offset
= (offset
> 0) ? (offset
- 1) : (offset
+ 1);
1272 void SAO::saoComponentParamDist(SAOParam
* saoParam
, int addr
, int addrUp
, int addrLeft
, SaoCtuParam
* mergeSaoParam
, double* mergeDist
)
1274 int64_t bestDist
= 0;
1276 SaoCtuParam
* lclCtuParam
= &saoParam
->ctuParam
[0][addr
];
1278 double bestRDCostTableBo
= MAX_DOUBLE
;
1279 int bestClassTableBo
= 0;
1280 int currentDistortionTableBo
[MAX_NUM_SAO_CLASS
];
1281 double currentRdCostTableBo
[MAX_NUM_SAO_CLASS
];
1283 resetSaoUnit(lclCtuParam
);
1284 m_entropyCoder
.load(m_rdContexts
.temp
);
1285 m_entropyCoder
.resetBits();
1286 m_entropyCoder
.codeSaoOffset(*lclCtuParam
, 0);
1287 double dCostPartBest
= m_entropyCoder
.getNumberOfWrittenBits() * m_lumaLambda
;
1289 for (int typeIdx
= 0; typeIdx
< MAX_NUM_SAO_TYPE
; typeIdx
++)
1291 int64_t estDist
= estSaoTypeDist(0, typeIdx
, m_lumaLambda
, currentDistortionTableBo
, currentRdCostTableBo
);
1293 if (typeIdx
== SAO_BO
)
1295 // Estimate Best Position
1296 for (int i
= 0; i
< SAO_NUM_BO_CLASSES
- SAO_BO_LEN
+ 1; i
++)
1298 double currentRDCost
= 0.0;
1299 for (int j
= i
; j
< i
+ SAO_BO_LEN
; j
++)
1300 currentRDCost
+= currentRdCostTableBo
[j
];
1302 if (currentRDCost
< bestRDCostTableBo
)
1304 bestRDCostTableBo
= currentRDCost
;
1305 bestClassTableBo
= i
;
1309 // Re code all Offsets
1312 for (int classIdx
= bestClassTableBo
; classIdx
< bestClassTableBo
+ SAO_BO_LEN
; classIdx
++)
1313 estDist
+= currentDistortionTableBo
[classIdx
];
1315 SaoCtuParam ctuParamRdo
;
1316 ctuParamRdo
.mergeMode
= SAO_MERGE_NONE
;
1317 ctuParamRdo
.typeIdx
= typeIdx
;
1318 ctuParamRdo
.bandPos
= (typeIdx
== SAO_BO
) ? bestClassTableBo
: 0;
1319 for (int classIdx
= 0; classIdx
< SAO_NUM_OFFSET
; classIdx
++)
1320 ctuParamRdo
.offset
[classIdx
] = (int)m_offset
[0][typeIdx
][classIdx
+ ctuParamRdo
.bandPos
+ 1];
1322 m_entropyCoder
.load(m_rdContexts
.temp
);
1323 m_entropyCoder
.resetBits();
1324 m_entropyCoder
.codeSaoOffset(ctuParamRdo
, 0);
1326 uint32_t estRate
= m_entropyCoder
.getNumberOfWrittenBits();
1327 double cost
= (double)estDist
+ m_lumaLambda
* (double)estRate
;
1329 if (cost
< dCostPartBest
)
1331 dCostPartBest
= cost
;
1332 copySaoUnit(lclCtuParam
, &ctuParamRdo
);
1337 mergeDist
[0] = ((double)bestDist
/ m_lumaLambda
);
1338 m_entropyCoder
.load(m_rdContexts
.temp
);
1339 m_entropyCoder
.codeSaoOffset(*lclCtuParam
, 0);
1340 m_entropyCoder
.store(m_rdContexts
.temp
);
1342 // merge left or merge up
1344 for (int mergeIdx
= 0; mergeIdx
< 2; mergeIdx
++)
1346 SaoCtuParam
* mergeSrcParam
= NULL
;
1347 if (addrLeft
>= 0 && mergeIdx
== 0)
1348 mergeSrcParam
= &(saoParam
->ctuParam
[0][addrLeft
]);
1349 else if (addrUp
>= 0 && mergeIdx
== 1)
1350 mergeSrcParam
= &(saoParam
->ctuParam
[0][addrUp
]);
1353 int64_t estDist
= 0;
1354 int typeIdx
= mergeSrcParam
->typeIdx
;
1357 int bandPos
= (typeIdx
== SAO_BO
) ? mergeSrcParam
->bandPos
: 0;
1358 for (int classIdx
= 0; classIdx
< SAO_NUM_OFFSET
; classIdx
++)
1360 int mergeOffset
= mergeSrcParam
->offset
[classIdx
];
1361 estDist
+= estSaoDist(m_count
[0][typeIdx
][classIdx
+ bandPos
+ 1], mergeOffset
, m_offsetOrg
[0][typeIdx
][classIdx
+ bandPos
+ 1]);
1365 copySaoUnit(&mergeSaoParam
[mergeIdx
], mergeSrcParam
);
1366 mergeSaoParam
[mergeIdx
].mergeMode
= mergeIdx
? SAO_MERGE_UP
: SAO_MERGE_LEFT
;
1368 mergeDist
[mergeIdx
+ 1] = ((double)estDist
/ m_lumaLambda
);
1371 resetSaoUnit(&mergeSaoParam
[mergeIdx
]);
1375 void SAO::sao2ChromaParamDist(SAOParam
* saoParam
, int addr
, int addrUp
, int addrLeft
, SaoCtuParam mergeSaoParam
[][2], double* mergeDist
)
1377 int64_t bestDist
= 0;
1379 SaoCtuParam
* lclCtuParam
[2] = { &saoParam
->ctuParam
[1][addr
], &saoParam
->ctuParam
[2][addr
] };
1381 double currentRdCostTableBo
[MAX_NUM_SAO_CLASS
];
1382 int bestClassTableBo
[2] = { 0, 0 };
1383 int currentDistortionTableBo
[MAX_NUM_SAO_CLASS
];
1385 resetSaoUnit(lclCtuParam
[0]);
1386 resetSaoUnit(lclCtuParam
[1]);
1387 m_entropyCoder
.load(m_rdContexts
.temp
);
1388 m_entropyCoder
.resetBits();
1389 m_entropyCoder
.codeSaoOffset(*lclCtuParam
[0], 1);
1390 m_entropyCoder
.codeSaoOffset(*lclCtuParam
[1], 2);
1392 double costPartBest
= m_entropyCoder
.getNumberOfWrittenBits() * m_chromaLambda
;
1394 for (int typeIdx
= 0; typeIdx
< MAX_NUM_SAO_TYPE
; typeIdx
++)
1397 if (typeIdx
== SAO_BO
)
1399 // Estimate Best Position
1400 for (int compIdx
= 0; compIdx
< 2; compIdx
++)
1402 double bestRDCostTableBo
= MAX_DOUBLE
;
1403 estDist
[compIdx
] = estSaoTypeDist(compIdx
+ 1, typeIdx
, m_chromaLambda
, currentDistortionTableBo
, currentRdCostTableBo
);
1404 for (int i
= 0; i
< SAO_NUM_BO_CLASSES
- SAO_BO_LEN
+ 1; i
++)
1406 double currentRDCost
= 0.0;
1407 for (int j
= i
; j
< i
+ SAO_BO_LEN
; j
++)
1408 currentRDCost
+= currentRdCostTableBo
[j
];
1410 if (currentRDCost
< bestRDCostTableBo
)
1412 bestRDCostTableBo
= currentRDCost
;
1413 bestClassTableBo
[compIdx
] = i
;
1417 // Re code all Offsets
1419 estDist
[compIdx
] = 0;
1420 for (int classIdx
= bestClassTableBo
[compIdx
]; classIdx
< bestClassTableBo
[compIdx
] + SAO_BO_LEN
; classIdx
++)
1421 estDist
[compIdx
] += currentDistortionTableBo
[classIdx
];
1426 estDist
[0] = estSaoTypeDist(1, typeIdx
, m_chromaLambda
, currentDistortionTableBo
, currentRdCostTableBo
);
1427 estDist
[1] = estSaoTypeDist(2, typeIdx
, m_chromaLambda
, currentDistortionTableBo
, currentRdCostTableBo
);
1430 m_entropyCoder
.load(m_rdContexts
.temp
);
1431 m_entropyCoder
.resetBits();
1433 SaoCtuParam ctuParamRdo
[2];
1434 for (int compIdx
= 0; compIdx
< 2; compIdx
++)
1436 ctuParamRdo
[compIdx
].mergeMode
= SAO_MERGE_NONE
;
1437 ctuParamRdo
[compIdx
].typeIdx
= typeIdx
;
1438 ctuParamRdo
[compIdx
].bandPos
= (typeIdx
== SAO_BO
) ? bestClassTableBo
[compIdx
] : 0;
1439 for (int classIdx
= 0; classIdx
< SAO_NUM_OFFSET
; classIdx
++)
1440 ctuParamRdo
[compIdx
].offset
[classIdx
] = (int)m_offset
[compIdx
+ 1][typeIdx
][classIdx
+ ctuParamRdo
[compIdx
].bandPos
+ 1];
1442 m_entropyCoder
.codeSaoOffset(ctuParamRdo
[compIdx
], compIdx
+ 1);
1445 uint32_t estRate
= m_entropyCoder
.getNumberOfWrittenBits();
1446 double cost
= (double)(estDist
[0] + estDist
[1]) + m_chromaLambda
* (double)estRate
;
1448 if (cost
< costPartBest
)
1450 costPartBest
= cost
;
1451 copySaoUnit(lclCtuParam
[0], &ctuParamRdo
[0]);
1452 copySaoUnit(lclCtuParam
[1], &ctuParamRdo
[1]);
1453 bestDist
= (estDist
[0] + estDist
[1]);
1457 mergeDist
[0] += ((double)bestDist
/ m_chromaLambda
);
1458 m_entropyCoder
.load(m_rdContexts
.temp
);
1459 m_entropyCoder
.codeSaoOffset(*lclCtuParam
[0], 1);
1460 m_entropyCoder
.codeSaoOffset(*lclCtuParam
[1], 2);
1461 m_entropyCoder
.store(m_rdContexts
.temp
);
1463 // merge left or merge up
1465 for (int mergeIdx
= 0; mergeIdx
< 2; mergeIdx
++)
1467 for (int compIdx
= 0; compIdx
< 2; compIdx
++)
1469 int plane
= compIdx
+ 1;
1470 SaoCtuParam
* mergeSrcParam
= NULL
;
1471 if (addrLeft
>= 0 && mergeIdx
== 0)
1472 mergeSrcParam
= &(saoParam
->ctuParam
[plane
][addrLeft
]);
1473 else if (addrUp
>= 0 && mergeIdx
== 1)
1474 mergeSrcParam
= &(saoParam
->ctuParam
[plane
][addrUp
]);
1477 int64_t estDist
= 0;
1478 int typeIdx
= mergeSrcParam
->typeIdx
;
1481 int bandPos
= (typeIdx
== SAO_BO
) ? mergeSrcParam
->bandPos
: 0;
1482 for (int classIdx
= 0; classIdx
< SAO_NUM_OFFSET
; classIdx
++)
1484 int mergeOffset
= mergeSrcParam
->offset
[classIdx
];
1485 estDist
+= estSaoDist(m_count
[plane
][typeIdx
][classIdx
+ bandPos
+ 1], mergeOffset
, m_offsetOrg
[plane
][typeIdx
][classIdx
+ bandPos
+ 1]);
1489 copySaoUnit(&mergeSaoParam
[plane
][mergeIdx
], mergeSrcParam
);
1490 mergeSaoParam
[plane
][mergeIdx
].mergeMode
= mergeIdx
? SAO_MERGE_UP
: SAO_MERGE_LEFT
;
1491 mergeDist
[mergeIdx
+ 1] += ((double)estDist
/ m_chromaLambda
);
1494 resetSaoUnit(&mergeSaoParam
[plane
][mergeIdx
]);