Imported Upstream version 1.4+222+hg5f9f7194267b
[deb_x265.git] / source / common / quant.cpp
CommitLineData
72b9787e
JB
1/*****************************************************************************
2 * Copyright (C) 2014 x265 project
3 *
4 * Authors: Steve Borho <steve@borho.org>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
19 *
20 * This program is also available under a commercial proprietary license.
21 * For more information, contact us at license @ x265.com.
22 *****************************************************************************/
23
24#include "common.h"
25#include "primitives.h"
26#include "quant.h"
27#include "framedata.h"
28#include "entropy.h"
29#include "yuv.h"
30#include "cudata.h"
31#include "contexts.h"
32
33using namespace x265;
34
35#define SIGN(x,y) ((x^(y >> 31))-(y >> 31))
36
37namespace {
38
39struct coeffGroupRDStats
40{
41 int nnzBeforePos0; /* indicates coeff other than pos 0 are coded */
42 int64_t codedLevelAndDist; /* distortion and level cost of coded coefficients */
43 int64_t uncodedDist; /* uncoded distortion cost of coded coefficients */
44 int64_t sigCost; /* cost of signaling significant coeff bitmap */
45 int64_t sigCost0; /* cost of signaling sig coeff bit of coeff 0 */
46};
47
48inline int fastMin(int x, int y)
49{
50 return y + ((x - y) & ((x - y) >> (sizeof(int) * CHAR_BIT - 1))); // min(x, y)
51}
52
b53f7c52 53inline int getICRate(uint32_t absLevel, int32_t diffLevel, const int* greaterOneBits, const int* levelAbsBits, uint32_t absGoRice, uint32_t c1c2Idx)
72b9787e
JB
54{
55 X265_CHECK(c1c2Idx <= 3, "c1c2Idx check failure\n");
56 X265_CHECK(absGoRice <= 4, "absGoRice check failure\n");
57 if (!absLevel)
58 {
59 X265_CHECK(diffLevel < 0, "diffLevel check failure\n");
60 return 0;
61 }
62 int rate = 0;
63
64 if (diffLevel < 0)
65 {
66 X265_CHECK(absLevel <= 2, "absLevel check failure\n");
67 rate += greaterOneBits[(absLevel == 2)];
68
69 if (absLevel == 2)
70 rate += levelAbsBits[0];
71 }
72 else
73 {
74 uint32_t symbol = diffLevel;
75 const uint32_t maxVlc = g_goRiceRange[absGoRice];
76 bool expGolomb = (symbol > maxVlc);
77
78 if (expGolomb)
79 {
80 absLevel = symbol - maxVlc;
81
82 // NOTE: mapping to x86 hardware instruction BSR
83 unsigned long size;
b53f7c52 84 CLZ(size, absLevel);
72b9787e
JB
85 int egs = size * 2 + 1;
86
87 rate += egs << 15;
88
89 // NOTE: in here, expGolomb=true means (symbol >= maxVlc + 1)
90 X265_CHECK(fastMin(symbol, (maxVlc + 1)) == (int)maxVlc + 1, "min check failure\n");
91 symbol = maxVlc + 1;
92 }
93
94 uint32_t prefLen = (symbol >> absGoRice) + 1;
95 uint32_t numBins = fastMin(prefLen + absGoRice, 8 /* g_goRicePrefixLen[absGoRice] + absGoRice */);
96
97 rate += numBins << 15;
98
99 if (c1c2Idx & 1)
100 rate += greaterOneBits[1];
101
102 if (c1c2Idx == 3)
103 rate += levelAbsBits[1];
104 }
105 return rate;
106}
107
108/* Calculates the cost for specific absolute transform level */
b53f7c52 109inline uint32_t getICRateCost(uint32_t absLevel, int32_t diffLevel, const int* greaterOneBits, const int* levelAbsBits, uint32_t absGoRice, uint32_t c1c2Idx)
72b9787e
JB
110{
111 X265_CHECK(absLevel, "absLevel should not be zero\n");
112
113 if (diffLevel < 0)
114 {
115 X265_CHECK((absLevel == 1) || (absLevel == 2), "absLevel range check failure\n");
116
117 uint32_t rate = greaterOneBits[(absLevel == 2)];
118 if (absLevel == 2)
119 rate += levelAbsBits[0];
120 return rate;
121 }
122 else
123 {
124 uint32_t rate;
125 uint32_t symbol = diffLevel;
126 if ((symbol >> absGoRice) < COEF_REMAIN_BIN_REDUCTION)
127 {
128 uint32_t length = symbol >> absGoRice;
129 rate = (length + 1 + absGoRice) << 15;
130 }
131 else
132 {
133 uint32_t length = 0;
134 symbol = (symbol >> absGoRice) - COEF_REMAIN_BIN_REDUCTION;
135 if (symbol)
136 {
137 unsigned long idx;
b53f7c52 138 CLZ(idx, symbol + 1);
72b9787e
JB
139 length = idx;
140 }
141
142 rate = (COEF_REMAIN_BIN_REDUCTION + length + absGoRice + 1 + length) << 15;
143 }
144 if (c1c2Idx & 1)
145 rate += greaterOneBits[1];
146 if (c1c2Idx == 3)
147 rate += levelAbsBits[1];
148 return rate;
149 }
150}
151
152}
153
154Quant::Quant()
155{
156 m_resiDctCoeff = NULL;
157 m_fencDctCoeff = NULL;
158 m_fencShortBuf = NULL;
159 m_frameNr = NULL;
160 m_nr = NULL;
161}
162
163bool Quant::init(bool useRDOQ, double psyScale, const ScalingList& scalingList, Entropy& entropy)
164{
165 m_entropyCoder = &entropy;
166 m_useRDOQ = useRDOQ;
167 m_psyRdoqScale = (int64_t)(psyScale * 256.0);
168 m_scalingList = &scalingList;
b53f7c52 169 m_resiDctCoeff = X265_MALLOC(int16_t, MAX_TR_SIZE * MAX_TR_SIZE * 2);
72b9787e
JB
170 m_fencDctCoeff = m_resiDctCoeff + (MAX_TR_SIZE * MAX_TR_SIZE);
171 m_fencShortBuf = X265_MALLOC(int16_t, MAX_TR_SIZE * MAX_TR_SIZE);
172
173 return m_resiDctCoeff && m_fencShortBuf;
174}
175
176bool Quant::allocNoiseReduction(const x265_param& param)
177{
178 m_frameNr = X265_MALLOC(NoiseReduction, param.frameNumThreads);
179 if (m_frameNr)
180 memset(m_frameNr, 0, sizeof(NoiseReduction) * param.frameNumThreads);
181 else
182 return false;
183 return true;
184}
185
186Quant::~Quant()
187{
188 X265_FREE(m_frameNr);
189 X265_FREE(m_resiDctCoeff);
190 X265_FREE(m_fencShortBuf);
191}
192
193void Quant::setQPforQuant(const CUData& ctu)
194{
195 m_nr = m_frameNr ? &m_frameNr[ctu.m_encData->m_frameEncoderID] : NULL;
196 int qpy = ctu.m_qp[0];
197 m_qpParam[TEXT_LUMA].setQpParam(qpy + QP_BD_OFFSET);
b53f7c52
JB
198 setChromaQP(qpy + ctu.m_slice->m_pps->chromaQpOffset[0], TEXT_CHROMA_U, ctu.m_chromaFormat);
199 setChromaQP(qpy + ctu.m_slice->m_pps->chromaQpOffset[1], TEXT_CHROMA_V, ctu.m_chromaFormat);
72b9787e
JB
200}
201
202void Quant::setChromaQP(int qpin, TextType ttype, int chFmt)
203{
204 int qp = Clip3(-QP_BD_OFFSET, 57, qpin);
205 if (qp >= 30)
206 {
207 if (chFmt == X265_CSP_I420)
208 qp = g_chromaScale[qp];
209 else
210 qp = X265_MIN(qp, 51);
211 }
212 m_qpParam[ttype].setQpParam(qp + QP_BD_OFFSET);
213}
214
215/* To minimize the distortion only. No rate is considered */
216uint32_t Quant::signBitHidingHDQ(int16_t* coeff, int32_t* deltaU, uint32_t numSig, const TUEntropyCodingParameters &codeParams)
217{
218 const uint32_t log2TrSizeCG = codeParams.log2TrSizeCG;
b53f7c52 219 const uint16_t* scan = codeParams.scan;
72b9787e
JB
220 bool lastCG = true;
221
222 for (int cg = (1 << (log2TrSizeCG * 2)) - 1; cg >= 0; cg--)
223 {
224 int cgStartPos = cg << LOG2_SCAN_SET_SIZE;
225 int n;
226
227 for (n = SCAN_SET_SIZE - 1; n >= 0; --n)
228 if (coeff[scan[n + cgStartPos]])
229 break;
230 if (n < 0)
231 continue;
232
233 int lastNZPosInCG = n;
234
235 for (n = 0;; n++)
236 if (coeff[scan[n + cgStartPos]])
237 break;
238
239 int firstNZPosInCG = n;
240
241 if (lastNZPosInCG - firstNZPosInCG >= SBH_THRESHOLD)
242 {
243 uint32_t signbit = coeff[scan[cgStartPos + firstNZPosInCG]] > 0 ? 0 : 1;
244 uint32_t absSum = 0;
245
246 for (n = firstNZPosInCG; n <= lastNZPosInCG; n++)
247 absSum += coeff[scan[n + cgStartPos]];
248
249 if (signbit != (absSum & 0x1)) // compare signbit with sum_parity
250 {
251 int minCostInc = MAX_INT, minPos = -1, curCost = MAX_INT;
252 int16_t finalChange = 0, curChange = 0;
253
254 for (n = (lastCG ? lastNZPosInCG : SCAN_SET_SIZE - 1); n >= 0; --n)
255 {
256 uint32_t blkPos = scan[n + cgStartPos];
257 if (coeff[blkPos])
258 {
259 if (deltaU[blkPos] > 0)
260 {
261 curCost = -deltaU[blkPos];
262 curChange = 1;
263 }
264 else
265 {
266 if (n == firstNZPosInCG && abs(coeff[blkPos]) == 1)
267 curCost = MAX_INT;
268 else
269 {
270 curCost = deltaU[blkPos];
271 curChange = -1;
272 }
273 }
274 }
275 else
276 {
277 if (n < firstNZPosInCG)
278 {
279 uint32_t thisSignBit = m_resiDctCoeff[blkPos] >= 0 ? 0 : 1;
280 if (thisSignBit != signbit)
281 curCost = MAX_INT;
282 else
283 {
284 curCost = -deltaU[blkPos];
285 curChange = 1;
286 }
287 }
288 else
289 {
290 curCost = -deltaU[blkPos];
291 curChange = 1;
292 }
293 }
294
295 if (curCost < minCostInc)
296 {
297 minCostInc = curCost;
298 finalChange = curChange;
299 minPos = blkPos;
300 }
301 }
302
303 /* do not allow change to violate coeff clamp */
304 if (coeff[minPos] == 32767 || coeff[minPos] == -32768)
305 finalChange = -1;
306
307 if (!coeff[minPos])
308 numSig++;
309 else if (finalChange == -1 && abs(coeff[minPos]) == 1)
310 numSig--;
311
312 if (m_resiDctCoeff[minPos] >= 0)
313 coeff[minPos] += finalChange;
314 else
315 coeff[minPos] -= finalChange;
316 }
317 }
318
319 lastCG = false;
320 }
321
322 return numSig;
323}
324
b53f7c52 325uint32_t Quant::transformNxN(const CUData& cu, const pixel* fenc, uint32_t fencStride, const int16_t* residual, uint32_t resiStride,
72b9787e
JB
326 coeff_t* coeff, uint32_t log2TrSize, TextType ttype, uint32_t absPartIdx, bool useTransformSkip)
327{
b53f7c52 328 const uint32_t sizeIdx = log2TrSize - 2;
72b9787e
JB
329 if (cu.m_tqBypass[absPartIdx])
330 {
331 X265_CHECK(log2TrSize >= 2 && log2TrSize <= 5, "Block size mistake!\n");
b53f7c52 332 return primitives.copy_cnt[sizeIdx](coeff, residual, resiStride);
72b9787e
JB
333 }
334
335 bool isLuma = ttype == TEXT_LUMA;
336 bool usePsy = m_psyRdoqScale && isLuma && !useTransformSkip;
72b9787e 337 int transformShift = MAX_TR_DYNAMIC_RANGE - X265_DEPTH - log2TrSize; // Represents scaling through forward transform
72b9787e
JB
338
339 X265_CHECK((cu.m_slice->m_sps->quadtreeTULog2MaxSize >= log2TrSize), "transform size too large\n");
340 if (useTransformSkip)
341 {
342#if X265_DEPTH <= 10
b53f7c52
JB
343 X265_CHECK(transformShift >= 0, "invalid transformShift\n");
344 primitives.cpy2Dto1D_shl[sizeIdx](m_resiDctCoeff, residual, resiStride, transformShift);
72b9787e
JB
345#else
346 if (transformShift >= 0)
b53f7c52 347 primitives.cpy2Dto1D_shl[sizeIdx](m_resiDctCoeff, residual, resiStride, transformShift);
72b9787e 348 else
b53f7c52 349 primitives.cpy2Dto1D_shr[sizeIdx](m_resiDctCoeff, residual, resiStride, -transformShift);
72b9787e
JB
350#endif
351 }
352 else
353 {
b53f7c52 354 bool isIntra = cu.isIntra(absPartIdx);
72b9787e
JB
355 int useDST = !sizeIdx && isLuma && isIntra;
356 int index = DCT_4x4 + sizeIdx - useDST;
357
b53f7c52 358 primitives.dct[index](residual, m_resiDctCoeff, resiStride);
72b9787e
JB
359
360 /* NOTE: if RDOQ is disabled globally, psy-rdoq is also disabled, so
361 * there is no risk of performing this DCT unnecessarily */
362 if (usePsy)
363 {
b53f7c52 364 int trSize = 1 << log2TrSize;
72b9787e 365 /* perform DCT on source pixels for psy-rdoq */
b53f7c52 366 primitives.luma_copy_ps[sizeIdx](m_fencShortBuf, trSize, fenc, fencStride);
72b9787e
JB
367 primitives.dct[index](m_fencShortBuf, m_fencDctCoeff, trSize);
368 }
369
b53f7c52 370 if (m_nr)
72b9787e
JB
371 {
372 /* denoise is not applied to intra residual, so DST can be ignored */
b53f7c52 373 int cat = sizeIdx + 4 * !isLuma + 8 * !isIntra;
72b9787e
JB
374 int numCoeff = 1 << (log2TrSize * 2);
375 primitives.denoiseDct(m_resiDctCoeff, m_nr->residualSum[cat], m_nr->offsetDenoise[cat], numCoeff);
376 m_nr->count[cat]++;
377 }
378 }
379
380 if (m_useRDOQ)
381 return rdoQuant(cu, coeff, log2TrSize, ttype, absPartIdx, usePsy);
382 else
383 {
384 int deltaU[32 * 32];
385
386 int scalingListType = ttype + (isLuma ? 3 : 0);
387 int rem = m_qpParam[ttype].rem;
388 int per = m_qpParam[ttype].per;
b53f7c52 389 const int32_t* quantCoeff = m_scalingList->m_quantCoef[log2TrSize - 2][scalingListType][rem];
72b9787e
JB
390
391 int qbits = QUANT_SHIFT + per + transformShift;
392 int add = (cu.m_slice->m_sliceType == I_SLICE ? 171 : 85) << (qbits - 9);
393 int numCoeff = 1 << (log2TrSize * 2);
394
395 uint32_t numSig = primitives.quant(m_resiDctCoeff, quantCoeff, deltaU, coeff, qbits, add, numCoeff);
396
397 if (numSig >= 2 && cu.m_slice->m_pps->bSignHideEnabled)
398 {
399 TUEntropyCodingParameters codeParams;
400 cu.getTUEntropyCodingParameters(codeParams, absPartIdx, log2TrSize, isLuma);
401 return signBitHidingHDQ(coeff, deltaU, numSig, codeParams);
402 }
403 else
404 return numSig;
405 }
406}
407
b53f7c52 408void Quant::invtransformNxN(bool transQuantBypass, int16_t* residual, uint32_t resiStride, const coeff_t* coeff,
72b9787e
JB
409 uint32_t log2TrSize, TextType ttype, bool bIntra, bool useTransformSkip, uint32_t numSig)
410{
b53f7c52 411 const uint32_t sizeIdx = log2TrSize - 2;
72b9787e
JB
412 if (transQuantBypass)
413 {
b53f7c52 414 primitives.cpy1Dto2D_shl[sizeIdx](residual, coeff, resiStride, 0);
72b9787e
JB
415 return;
416 }
417
418 // Values need to pass as input parameter in dequant
419 int rem = m_qpParam[ttype].rem;
420 int per = m_qpParam[ttype].per;
421 int transformShift = MAX_TR_DYNAMIC_RANGE - X265_DEPTH - log2TrSize;
422 int shift = QUANT_IQUANT_SHIFT - QUANT_SHIFT - transformShift;
423 int numCoeff = 1 << (log2TrSize * 2);
424
425 if (m_scalingList->m_bEnabled)
426 {
427 int scalingListType = (bIntra ? 0 : 3) + ttype;
b53f7c52 428 const int32_t* dequantCoef = m_scalingList->m_dequantCoef[sizeIdx][scalingListType][rem];
72b9787e
JB
429 primitives.dequant_scaling(coeff, dequantCoef, m_resiDctCoeff, numCoeff, per, shift);
430 }
431 else
432 {
433 int scale = m_scalingList->s_invQuantScales[rem] << per;
434 primitives.dequant_normal(coeff, m_resiDctCoeff, numCoeff, scale, shift);
435 }
436
437 if (useTransformSkip)
438 {
72b9787e 439#if X265_DEPTH <= 10
b53f7c52
JB
440 X265_CHECK(transformShift > 0, "invalid transformShift\n");
441 primitives.cpy1Dto2D_shr[sizeIdx](residual, m_resiDctCoeff, resiStride, transformShift);
72b9787e
JB
442#else
443 if (transformShift > 0)
b53f7c52 444 primitives.cpy1Dto2D_shr[sizeIdx](residual, m_resiDctCoeff, resiStride, transformShift);
72b9787e 445 else
b53f7c52 446 primitives.cpy1Dto2D_shl[sizeIdx](residual, m_resiDctCoeff, resiStride, -transformShift);
72b9787e
JB
447#endif
448 }
449 else
450 {
72b9787e
JB
451 int useDST = !sizeIdx && ttype == TEXT_LUMA && bIntra;
452
453 X265_CHECK((int)numSig == primitives.count_nonzero(coeff, 1 << (log2TrSize * 2)), "numSig differ\n");
454
455 // DC only
456 if (numSig == 1 && coeff[0] != 0 && !useDST)
457 {
b53f7c52 458 const int shift_1st = 7 - 6;
72b9787e 459 const int add_1st = 1 << (shift_1st - 1);
b53f7c52 460 const int shift_2nd = 12 - (X265_DEPTH - 8) - 3;
72b9787e
JB
461 const int add_2nd = 1 << (shift_2nd - 1);
462
b53f7c52
JB
463 int dc_val = (((m_resiDctCoeff[0] * (64 >> 6) + add_1st) >> shift_1st) * (64 >> 3) + add_2nd) >> shift_2nd;
464 primitives.blockfill_s[sizeIdx](residual, resiStride, (int16_t)dc_val);
72b9787e
JB
465 return;
466 }
467
b53f7c52 468 primitives.idct[IDCT_4x4 + sizeIdx - useDST](m_resiDctCoeff, residual, resiStride);
72b9787e
JB
469 }
470}
471
472/* Rate distortion optimized quantization for entropy coding engines using
473 * probability models like CABAC */
b53f7c52 474uint32_t Quant::rdoQuant(const CUData& cu, int16_t* dstCoeff, uint32_t log2TrSize, TextType ttype, uint32_t absPartIdx, bool usePsy)
72b9787e
JB
475{
476 int transformShift = MAX_TR_DYNAMIC_RANGE - X265_DEPTH - log2TrSize; /* Represents scaling through forward transform */
477 int scalingListType = (cu.isIntra(absPartIdx) ? 0 : 3) + ttype;
478
479 X265_CHECK(scalingListType < 6, "scaling list type out of range\n");
480
481 int rem = m_qpParam[ttype].rem;
482 int per = m_qpParam[ttype].per;
483 int qbits = QUANT_SHIFT + per + transformShift; /* Right shift of non-RDOQ quantizer level = (coeff*Q + offset)>>q_bits */
484 int add = (1 << (qbits - 1));
b53f7c52 485 const int32_t* qCoef = m_scalingList->m_quantCoef[log2TrSize - 2][scalingListType][rem];
72b9787e
JB
486
487 int numCoeff = 1 << (log2TrSize * 2);
488
489 uint32_t numSig = primitives.nquant(m_resiDctCoeff, qCoef, dstCoeff, qbits, add, numCoeff);
490
491 X265_CHECK((int)numSig == primitives.count_nonzero(dstCoeff, 1 << (log2TrSize * 2)), "numSig differ\n");
492 if (!numSig)
493 return 0;
494
495 uint32_t trSize = 1 << log2TrSize;
496 int64_t lambda2 = m_qpParam[ttype].lambda2;
497 int64_t psyScale = (m_psyRdoqScale * m_qpParam[ttype].lambda);
498
499 /* unquant constants for measuring distortion. Scaling list quant coefficients have a (1 << 4)
500 * scale applied that must be removed during unquant. Note that in real dequant there is clipping
501 * at several stages. We skip the clipping for simplicity when measuring RD cost */
b53f7c52 502 const int32_t* unquantScale = m_scalingList->m_dequantCoef[log2TrSize - 2][scalingListType][rem];
72b9787e
JB
503 int unquantShift = QUANT_IQUANT_SHIFT - QUANT_SHIFT - transformShift + (m_scalingList->m_bEnabled ? 4 : 0);
504 int unquantRound = (unquantShift > per) ? 1 << (unquantShift - per - 1) : 0;
505 int scaleBits = SCALE_BITS - 2 * transformShift;
506
507#define UNQUANT(lvl) (((lvl) * (unquantScale[blkPos] << per) + unquantRound) >> unquantShift)
508#define SIGCOST(bits) ((lambda2 * (bits)) >> 8)
509#define RDCOST(d, bits) ((((int64_t)d * d) << scaleBits) + SIGCOST(bits))
510#define PSYVALUE(rec) ((psyScale * (rec)) >> (16 - scaleBits))
511
512 int64_t costCoeff[32 * 32]; /* d*d + lambda * bits */
513 int64_t costUncoded[32 * 32]; /* d*d + lambda * 0 */
514 int64_t costSig[32 * 32]; /* lambda * bits */
515
516 int rateIncUp[32 * 32]; /* signal overhead of increasing level */
517 int rateIncDown[32 * 32]; /* signal overhead of decreasing level */
518 int sigRateDelta[32 * 32]; /* signal difference between zero and non-zero */
519
520 int64_t costCoeffGroupSig[MLS_GRP_NUM]; /* lambda * bits of group coding cost */
521 uint64_t sigCoeffGroupFlag64 = 0;
522
523 uint32_t ctxSet = 0;
524 int c1 = 1;
525 int c2 = 0;
526 uint32_t goRiceParam = 0;
527 uint32_t c1Idx = 0;
528 uint32_t c2Idx = 0;
529 int cgLastScanPos = -1;
530 int lastScanPos = -1;
531 const uint32_t cgSize = (1 << MLS_CG_SIZE); /* 4x4 num coef = 16 */
532 bool bIsLuma = ttype == TEXT_LUMA;
533
534 /* total rate distortion cost of transform block, as CBF=0 */
535 int64_t totalUncodedCost = 0;
536
537 /* Total rate distortion cost of this transform block, counting te distortion of uncoded blocks,
538 * the distortion and signal cost of coded blocks, and the coding cost of significant
539 * coefficient and coefficient group bitmaps */
540 int64_t totalRdCost = 0;
541
542 TUEntropyCodingParameters codeParams;
543 cu.getTUEntropyCodingParameters(codeParams, absPartIdx, log2TrSize, bIsLuma);
544 const uint32_t cgNum = 1 << (codeParams.log2TrSizeCG * 2);
545
546 /* TODO: update bit estimates if dirty */
547 EstBitsSbac& estBitsSbac = m_entropyCoder->m_estBitsSbac;
548
549 uint32_t scanPos;
550 coeffGroupRDStats cgRdStats;
551
552 /* iterate over coding groups in reverse scan order */
553 for (int cgScanPos = cgNum - 1; cgScanPos >= 0; cgScanPos--)
554 {
555 const uint32_t cgBlkPos = codeParams.scanCG[cgScanPos];
556 const uint32_t cgPosY = cgBlkPos >> codeParams.log2TrSizeCG;
557 const uint32_t cgPosX = cgBlkPos - (cgPosY << codeParams.log2TrSizeCG);
558 const uint64_t cgBlkPosMask = ((uint64_t)1 << cgBlkPos);
559 memset(&cgRdStats, 0, sizeof(coeffGroupRDStats));
560
561 const int patternSigCtx = calcPatternSigCtx(sigCoeffGroupFlag64, cgPosX, cgPosY, codeParams.log2TrSizeCG);
562
563 /* iterate over coefficients in each group in reverse scan order */
564 for (int scanPosinCG = cgSize - 1; scanPosinCG >= 0; scanPosinCG--)
565 {
566 scanPos = (cgScanPos << MLS_CG_SIZE) + scanPosinCG;
567 uint32_t blkPos = codeParams.scan[scanPos];
568 uint16_t maxAbsLevel = (int16_t)abs(dstCoeff[blkPos]); /* abs(quantized coeff) */
569 int signCoef = m_resiDctCoeff[blkPos]; /* pre-quantization DCT coeff */
570 int predictedCoef = m_fencDctCoeff[blkPos] - signCoef; /* predicted DCT = source DCT - residual DCT*/
571
572 /* RDOQ measures distortion as the squared difference between the unquantized coded level
573 * and the original DCT coefficient. The result is shifted scaleBits to account for the
574 * FIX15 nature of the CABAC cost tables minus the forward transform scale */
575
576 /* cost of not coding this coefficient (all distortion, no signal bits) */
577 costUncoded[scanPos] = (int64_t)(signCoef * signCoef) << scaleBits;
578 if (usePsy && blkPos)
579 /* when no residual coefficient is coded, predicted coef == recon coef */
580 costUncoded[scanPos] -= PSYVALUE(predictedCoef);
581
582 totalUncodedCost += costUncoded[scanPos];
583
584 if (maxAbsLevel && lastScanPos < 0)
585 {
586 /* remember the first non-zero coef found in this reverse scan as the last pos */
587 lastScanPos = scanPos;
588 ctxSet = (scanPos < SCAN_SET_SIZE || !bIsLuma) ? 0 : 2;
589 cgLastScanPos = cgScanPos;
590 }
591
592 if (lastScanPos < 0)
593 {
594 /* coefficients after lastNZ have no distortion signal cost */
595 costCoeff[scanPos] = 0;
596 costSig[scanPos] = 0;
597
598 /* No non-zero coefficient yet found, but this does not mean
599 * there is no uncoded-cost for this coefficient. Pre-
600 * quantization the coefficient may have been non-zero */
601 totalRdCost += costUncoded[scanPos];
602 }
603 else
604 {
605 const uint32_t c1c2Idx = ((c1Idx - 8) >> (sizeof(int) * CHAR_BIT - 1)) + (((-(int)c2Idx) >> (sizeof(int) * CHAR_BIT - 1)) + 1) * 2;
606 const uint32_t baseLevel = ((uint32_t)0xD9 >> (c1c2Idx * 2)) & 3; // {1, 2, 1, 3}
607
608 X265_CHECK(!!((int)c1Idx < C1FLAG_NUMBER) == (int)((c1Idx - 8) >> (sizeof(int) * CHAR_BIT - 1)), "scan validation 1\n");
609 X265_CHECK(!!(c2Idx == 0) == ((-(int)c2Idx) >> (sizeof(int) * CHAR_BIT - 1)) + 1, "scan validation 2\n");
610 X265_CHECK((int)baseLevel == ((c1Idx < C1FLAG_NUMBER) ? (2 + (c2Idx == 0)) : 1), "scan validation 3\n");
611
612 // coefficient level estimation
613 const uint32_t oneCtx = 4 * ctxSet + c1;
614 const uint32_t absCtx = ctxSet + c2;
b53f7c52
JB
615 const int* greaterOneBits = estBitsSbac.greaterOneBits[oneCtx];
616 const int* levelAbsBits = estBitsSbac.levelAbsBits[absCtx];
72b9787e
JB
617
618 uint16_t level = 0;
619 uint32_t sigCoefBits = 0;
620 costCoeff[scanPos] = MAX_INT64;
621
622 if ((int)scanPos == lastScanPos)
623 sigRateDelta[blkPos] = 0;
624 else
625 {
626 const uint32_t ctxSig = getSigCtxInc(patternSigCtx, log2TrSize, trSize, blkPos, bIsLuma, codeParams.firstSignificanceMapContext);
627 if (maxAbsLevel < 3)
628 {
629 /* set default costs to uncoded costs */
630 costSig[scanPos] = SIGCOST(estBitsSbac.significantBits[ctxSig][0]);
631 costCoeff[scanPos] = costUncoded[scanPos] + costSig[scanPos];
632 }
633 sigRateDelta[blkPos] = estBitsSbac.significantBits[ctxSig][1] - estBitsSbac.significantBits[ctxSig][0];
634 sigCoefBits = estBitsSbac.significantBits[ctxSig][1];
635 }
636 if (maxAbsLevel)
637 {
638 uint16_t minAbsLevel = X265_MAX(maxAbsLevel - 1, 1);
639 for (uint16_t lvl = maxAbsLevel; lvl >= minAbsLevel; lvl--)
640 {
641 uint32_t levelBits = getICRateCost(lvl, lvl - baseLevel, greaterOneBits, levelAbsBits, goRiceParam, c1c2Idx) + IEP_RATE;
642
643 int unquantAbsLevel = UNQUANT(lvl);
644 int d = abs(signCoef) - unquantAbsLevel;
645 int64_t curCost = RDCOST(d, sigCoefBits + levelBits);
646
647 /* Psy RDOQ: bias in favor of higher AC coefficients in the reconstructed frame */
648 if (usePsy && blkPos)
649 {
650 int reconCoef = abs(unquantAbsLevel + SIGN(predictedCoef, signCoef));
651 curCost -= PSYVALUE(reconCoef);
652 }
653
654 if (curCost < costCoeff[scanPos])
655 {
656 level = lvl;
657 costCoeff[scanPos] = curCost;
658 costSig[scanPos] = SIGCOST(sigCoefBits);
659 }
660 }
661 }
662
663 dstCoeff[blkPos] = level;
664 totalRdCost += costCoeff[scanPos];
665
666 /* record costs for sign-hiding performed at the end */
667 if (level)
668 {
669 int rateNow = getICRate(level, level - baseLevel, greaterOneBits, levelAbsBits, goRiceParam, c1c2Idx);
670 rateIncUp[blkPos] = getICRate(level + 1, level + 1 - baseLevel, greaterOneBits, levelAbsBits, goRiceParam, c1c2Idx) - rateNow;
671 rateIncDown[blkPos] = getICRate(level - 1, level - 1 - baseLevel, greaterOneBits, levelAbsBits, goRiceParam, c1c2Idx) - rateNow;
672 }
673 else
674 {
675 rateIncUp[blkPos] = greaterOneBits[0];
676 rateIncDown[blkPos] = 0;
677 }
678
679 /* Update CABAC estimation state */
680 if (level >= baseLevel && goRiceParam < 4 && level > (3U << goRiceParam))
681 goRiceParam++;
682
683 c1Idx -= (-(int32_t)level) >> 31;
684
685 /* update bin model */
686 if (level > 1)
687 {
688 c1 = 0;
689 c2 += (uint32_t)(c2 - 2) >> 31;
690 c2Idx++;
691 }
692 else if ((c1 < 3) && (c1 > 0) && level)
693 c1++;
694
695 /* context set update */
696 if (!(scanPos % SCAN_SET_SIZE) && scanPos)
697 {
698 c2 = 0;
699 goRiceParam = 0;
700
701 c1Idx = 0;
702 c2Idx = 0;
703 ctxSet = (scanPos == SCAN_SET_SIZE || !bIsLuma) ? 0 : 2;
704 X265_CHECK(c1 >= 0, "c1 is negative\n");
705 ctxSet -= ((int32_t)(c1 - 1) >> 31);
706 c1 = 1;
707 }
708 }
709
710 cgRdStats.sigCost += costSig[scanPos];
711 if (!scanPosinCG)
712 cgRdStats.sigCost0 = costSig[scanPos];
713
714 if (dstCoeff[blkPos])
715 {
716 sigCoeffGroupFlag64 |= cgBlkPosMask;
717 cgRdStats.codedLevelAndDist += costCoeff[scanPos] - costSig[scanPos];
718 cgRdStats.uncodedDist += costUncoded[scanPos];
719 cgRdStats.nnzBeforePos0 += scanPosinCG;
720 }
721 } /* end for (scanPosinCG) */
722
723 costCoeffGroupSig[cgScanPos] = 0;
724
725 if (cgLastScanPos < 0)
726 {
727 /* nothing to do at this point */
728 }
729 else if (!cgScanPos || cgScanPos == cgLastScanPos)
730 {
731 /* coeff group 0 is implied to be present, no signal cost */
732 /* coeff group with last NZ is implied to be present, handled below */
733 }
734 else if (sigCoeffGroupFlag64 & cgBlkPosMask)
735 {
736 if (!cgRdStats.nnzBeforePos0)
737 {
738 /* if only coeff 0 in this CG is coded, its significant coeff bit is implied */
739 totalRdCost -= cgRdStats.sigCost0;
740 cgRdStats.sigCost -= cgRdStats.sigCost0;
741 }
742
743 /* there are coded coefficients in this group, but now we include the signaling cost
744 * of the significant coefficient group flag and evaluate whether the RD cost of the
745 * coded group is more than the RD cost of the uncoded group */
746
747 uint32_t sigCtx = getSigCoeffGroupCtxInc(sigCoeffGroupFlag64, cgPosX, cgPosY, codeParams.log2TrSizeCG);
748
749 int64_t costZeroCG = totalRdCost + SIGCOST(estBitsSbac.significantCoeffGroupBits[sigCtx][0]);
750 costZeroCG += cgRdStats.uncodedDist; /* add distortion for resetting non-zero levels to zero levels */
751 costZeroCG -= cgRdStats.codedLevelAndDist; /* remove distortion and level cost of coded coefficients */
752 costZeroCG -= cgRdStats.sigCost; /* remove signaling cost of significant coeff bitmap */
753
754 costCoeffGroupSig[cgScanPos] = SIGCOST(estBitsSbac.significantCoeffGroupBits[sigCtx][1]);
755 totalRdCost += costCoeffGroupSig[cgScanPos]; /* add the cost of 1 bit in significant CG bitmap */
756
757 if (costZeroCG < totalRdCost)
758 {
759 sigCoeffGroupFlag64 &= ~cgBlkPosMask;
760 totalRdCost = costZeroCG;
761 costCoeffGroupSig[cgScanPos] = SIGCOST(estBitsSbac.significantCoeffGroupBits[sigCtx][0]);
762
763 /* reset all coeffs to 0. UNCODE THIS COEFF GROUP! */
764 for (int scanPosinCG = cgSize - 1; scanPosinCG >= 0; scanPosinCG--)
765 {
766 scanPos = cgScanPos * cgSize + scanPosinCG;
767 uint32_t blkPos = codeParams.scan[scanPos];
768 if (dstCoeff[blkPos])
769 {
770 costCoeff[scanPos] = costUncoded[scanPos];
771 costSig[scanPos] = 0;
772 }
773 dstCoeff[blkPos] = 0;
774 }
775 }
776 }
777 else
778 {
779 /* there were no coded coefficients in this coefficient group */
780 uint32_t ctxSig = getSigCoeffGroupCtxInc(sigCoeffGroupFlag64, cgPosX, cgPosY, codeParams.log2TrSizeCG);
781 costCoeffGroupSig[cgScanPos] = SIGCOST(estBitsSbac.significantCoeffGroupBits[ctxSig][0]);
782 totalRdCost += costCoeffGroupSig[cgScanPos]; /* add cost of 0 bit in significant CG bitmap */
783 totalRdCost -= cgRdStats.sigCost; /* remove cost of significant coefficient bitmap */
784 }
785 } /* end for (cgScanPos) */
786
787 X265_CHECK(lastScanPos >= 0, "numSig non zero, but no coded CG\n");
788
789 /* calculate RD cost of uncoded block CBF=0, and add cost of CBF=1 to total */
790 int64_t bestCost;
791 if (!cu.isIntra(absPartIdx) && bIsLuma && !cu.m_tuDepth[absPartIdx])
792 {
793 bestCost = totalUncodedCost + SIGCOST(estBitsSbac.blockRootCbpBits[0]);
794 totalRdCost += SIGCOST(estBitsSbac.blockRootCbpBits[1]);
795 }
796 else
797 {
798 int ctx = ctxCbf[ttype][cu.m_tuDepth[absPartIdx]];
799 bestCost = totalUncodedCost + SIGCOST(estBitsSbac.blockCbpBits[ctx][0]);
800 totalRdCost += SIGCOST(estBitsSbac.blockCbpBits[ctx][1]);
801 }
802
803 /* This loop starts with the last non-zero found in the first loop and then refines this last
804 * non-zero by measuring the true RD cost of the last NZ at this position, and then the RD costs
805 * at all previous coefficients until a coefficient greater than 1 is encountered or we run out
806 * of coefficients to evaluate. This will factor in the cost of coding empty groups and empty
807 * coeff prior to the last NZ. The base best cost is the RD cost of CBF=0 */
808 int bestLastIdx = 0;
809 bool foundLast = false;
810 for (int cgScanPos = cgLastScanPos; cgScanPos >= 0 && !foundLast; cgScanPos--)
811 {
812 if (!cgScanPos || cgScanPos == cgLastScanPos)
813 {
814 /* the presence of these coefficient groups are inferred, they have no bit in
815 * sigCoeffGroupFlag64 and no saved costCoeffGroupSig[] cost */
816 }
817 else if (sigCoeffGroupFlag64 & (1ULL << codeParams.scanCG[cgScanPos]))
818 {
819 /* remove cost of significant coeff group flag, the group's presence would be inferred
820 * from lastNZ if it were present in this group */
821 totalRdCost -= costCoeffGroupSig[cgScanPos];
822 }
823 else
824 {
825 /* remove cost of signaling this empty group as not present */
826 totalRdCost -= costCoeffGroupSig[cgScanPos];
827 continue;
828 }
829
830 for (int scanPosinCG = cgSize - 1; scanPosinCG >= 0; scanPosinCG--)
831 {
832 scanPos = cgScanPos * cgSize + scanPosinCG;
833 if ((int)scanPos > lastScanPos)
834 continue;
835
836 /* if the coefficient was coded, measure the RD cost of it as the last non-zero and then
837 * continue as if it were uncoded. If the coefficient was already uncoded, remove the
838 * cost of signaling it as not-significant */
839 uint32_t blkPos = codeParams.scan[scanPos];
840 if (dstCoeff[blkPos])
841 {
842 /* Swap the cost of signaling its significant coeff bit with the cost of
843 * signaling its lastNZ pos */
844 uint32_t posY = blkPos >> log2TrSize;
845 uint32_t posX = blkPos - (posY << log2TrSize);
846 uint32_t bitsLastNZ = codeParams.scanType == SCAN_VER ? getRateLast(posY, posX) : getRateLast(posX, posY);
847 int64_t costAsLast = totalRdCost - costSig[scanPos] + SIGCOST(bitsLastNZ);
848
849 if (costAsLast < bestCost)
850 {
851 bestLastIdx = scanPos + 1;
852 bestCost = costAsLast;
853 }
854 if (dstCoeff[blkPos] > 1)
855 {
856 foundLast = true;
857 break;
858 }
859
860 totalRdCost -= costCoeff[scanPos];
861 totalRdCost += costUncoded[scanPos];
862 }
863 else
864 totalRdCost -= costSig[scanPos];
865 }
866 }
867
868 /* recount non-zero coefficients and re-apply sign of DCT coef */
869 numSig = 0;
870 for (int pos = 0; pos < bestLastIdx; pos++)
871 {
872 int blkPos = codeParams.scan[pos];
873 int level = dstCoeff[blkPos];
874 numSig += (level != 0);
875
876 uint32_t mask = (int32_t)m_resiDctCoeff[blkPos] >> 31;
877 dstCoeff[blkPos] = (int16_t)((level ^ mask) - mask);
878 }
879
880 /* clean uncoded coefficients */
881 for (int pos = bestLastIdx; pos <= lastScanPos; pos++)
882 dstCoeff[codeParams.scan[pos]] = 0;
883
884 /* rate-distortion based sign-hiding */
885 if (cu.m_slice->m_pps->bSignHideEnabled && numSig >= 2)
886 {
887 int lastCG = true;
888 for (int subSet = cgLastScanPos; subSet >= 0; subSet--)
889 {
890 int subPos = subSet << LOG2_SCAN_SET_SIZE;
891 int n;
892
893 /* measure distance between first and last non-zero coef in this
894 * coding group */
895 for (n = SCAN_SET_SIZE - 1; n >= 0; --n)
896 if (dstCoeff[codeParams.scan[n + subPos]])
897 break;
898 if (n < 0)
899 continue;
900
901 int lastNZPosInCG = n;
902
903 for (n = 0;; n++)
904 if (dstCoeff[codeParams.scan[n + subPos]])
905 break;
906
907 int firstNZPosInCG = n;
908
909 if (lastNZPosInCG - firstNZPosInCG >= SBH_THRESHOLD)
910 {
911 uint32_t signbit = (dstCoeff[codeParams.scan[subPos + firstNZPosInCG]] > 0 ? 0 : 1);
912 int absSum = 0;
913
914 for (n = firstNZPosInCG; n <= lastNZPosInCG; n++)
915 absSum += dstCoeff[codeParams.scan[n + subPos]];
916
917 if (signbit != (absSum & 1U))
918 {
919 /* We must find a coeff to toggle up or down so the sign bit of the first non-zero coeff
920 * is properly implied. Note dstCoeff[] are signed by this point but curChange and
921 * finalChange imply absolute levels (+1 is away from zero, -1 is towards zero) */
922
923 int64_t minCostInc = MAX_INT64, curCost = MAX_INT64;
924 int minPos = -1;
925 int16_t finalChange = 0, curChange = 0;
926
927 for (n = (lastCG ? lastNZPosInCG : SCAN_SET_SIZE - 1); n >= 0; --n)
928 {
929 uint32_t blkPos = codeParams.scan[n + subPos];
930 int signCoef = m_resiDctCoeff[blkPos]; /* pre-quantization DCT coeff */
931 int absLevel = abs(dstCoeff[blkPos]);
932
933 int d = abs(signCoef) - UNQUANT(absLevel);
934 int64_t origDist = (((int64_t)d * d)) << scaleBits;
935
936#define DELTARDCOST(d, deltabits) ((((int64_t)d * d) << scaleBits) - origDist + ((lambda2 * (int64_t)(deltabits)) >> 8))
937
938 if (dstCoeff[blkPos])
939 {
940 d = abs(signCoef) - UNQUANT(absLevel + 1);
941 int64_t costUp = DELTARDCOST(d, rateIncUp[blkPos]);
942
943 /* if decrementing would make the coeff 0, we can include the
944 * significant coeff flag cost savings */
945 d = abs(signCoef) - UNQUANT(absLevel - 1);
946 bool isOne = abs(dstCoeff[blkPos]) == 1;
947 int downBits = rateIncDown[blkPos] - (isOne ? (IEP_RATE + sigRateDelta[blkPos]) : 0);
948 int64_t costDown = DELTARDCOST(d, downBits);
949
950 if (lastCG && lastNZPosInCG == n && isOne)
951 costDown -= 4 * IEP_RATE;
952
953 if (costUp < costDown)
954 {
955 curCost = costUp;
956 curChange = 1;
957 }
958 else
959 {
960 curChange = -1;
961 if (n == firstNZPosInCG && isOne)
962 curCost = MAX_INT64;
963 else
964 curCost = costDown;
965 }
966 }
967 else if (n < firstNZPosInCG && signbit != (signCoef >= 0 ? 0 : 1U))
968 {
969 /* don't try to make a new coded coeff before the first coeff if its
970 * sign would be different than the first coeff, the inferred sign would
971 * still be wrong and we'd have to do this again. */
972 curCost = MAX_INT64;
973 }
974 else
975 {
976 /* evaluate changing an uncoded coeff 0 to a coded coeff +/-1 */
977 d = abs(signCoef) - UNQUANT(1);
978 curCost = DELTARDCOST(d, rateIncUp[blkPos] + IEP_RATE + sigRateDelta[blkPos]);
979 curChange = 1;
980 }
981
982 if (curCost < minCostInc)
983 {
984 minCostInc = curCost;
985 finalChange = curChange;
986 minPos = blkPos;
987 }
988 }
989
990 if (dstCoeff[minPos] == 32767 || dstCoeff[minPos] == -32768)
991 /* don't allow sign hiding to violate the SPEC range */
992 finalChange = -1;
993
994 if (dstCoeff[minPos] == 0)
995 numSig++;
996 else if (finalChange == -1 && abs(dstCoeff[minPos]) == 1)
997 numSig--;
998
999 if (m_resiDctCoeff[minPos] >= 0)
1000 dstCoeff[minPos] += finalChange;
1001 else
1002 dstCoeff[minPos] -= finalChange;
1003 }
1004 }
1005
1006 lastCG = false;
1007 }
1008 }
1009
1010 return numSig;
1011}
1012
1013/* Pattern decision for context derivation process of significant_coeff_flag */
1014uint32_t Quant::calcPatternSigCtx(uint64_t sigCoeffGroupFlag64, uint32_t cgPosX, uint32_t cgPosY, uint32_t log2TrSizeCG)
1015{
1016 if (!log2TrSizeCG)
1017 return 0;
1018
1019 const uint32_t trSizeCG = 1 << log2TrSizeCG;
1020 X265_CHECK(trSizeCG <= 8, "transform CG is too large\n");
1021 const uint32_t sigPos = (uint32_t)(sigCoeffGroupFlag64 >> (1 + (cgPosY << log2TrSizeCG) + cgPosX));
1022 const uint32_t sigRight = ((int32_t)(cgPosX - (trSizeCG - 1)) >> 31) & (sigPos & 1);
1023 const uint32_t sigLower = ((int32_t)(cgPosY - (trSizeCG - 1)) >> 31) & (sigPos >> (trSizeCG - 2)) & 2;
1024
1025 return sigRight + sigLower;
1026}
1027
1028/* Context derivation process of coeff_abs_significant_flag */
1029uint32_t Quant::getSigCtxInc(uint32_t patternSigCtx, uint32_t log2TrSize, uint32_t trSize, uint32_t blkPos, bool bIsLuma,
1030 uint32_t firstSignificanceMapContext)
1031{
1032 static const uint8_t ctxIndMap[16] =
1033 {
1034 0, 1, 4, 5,
1035 2, 3, 4, 5,
1036 6, 6, 8, 8,
1037 7, 7, 8, 8
1038 };
1039
1040 if (!blkPos) // special case for the DC context variable
1041 return 0;
1042
1043 if (log2TrSize == 2) // 4x4
1044 return ctxIndMap[blkPos];
1045
1046 const uint32_t posY = blkPos >> log2TrSize;
1047 const uint32_t posX = blkPos & (trSize - 1);
1048 X265_CHECK((blkPos - (posY << log2TrSize)) == posX, "block pos check failed\n");
1049
1050 int posXinSubset = blkPos & 3;
1051 X265_CHECK((posX & 3) == (blkPos & 3), "pos alignment fail\n");
1052 int posYinSubset = posY & 3;
1053
1054 // NOTE: [patternSigCtx][posXinSubset][posYinSubset]
1055 static const uint8_t table_cnt[4][4][4] =
1056 {
1057 // patternSigCtx = 0
1058 {
1059 { 2, 1, 1, 0 },
1060 { 1, 1, 0, 0 },
1061 { 1, 0, 0, 0 },
1062 { 0, 0, 0, 0 },
1063 },
1064 // patternSigCtx = 1
1065 {
1066 { 2, 1, 0, 0 },
1067 { 2, 1, 0, 0 },
1068 { 2, 1, 0, 0 },
1069 { 2, 1, 0, 0 },
1070 },
1071 // patternSigCtx = 2
1072 {
1073 { 2, 2, 2, 2 },
1074 { 1, 1, 1, 1 },
1075 { 0, 0, 0, 0 },
1076 { 0, 0, 0, 0 },
1077 },
1078 // patternSigCtx = 3
1079 {
1080 { 2, 2, 2, 2 },
1081 { 2, 2, 2, 2 },
1082 { 2, 2, 2, 2 },
1083 { 2, 2, 2, 2 },
1084 }
1085 };
1086
1087 int cnt = table_cnt[patternSigCtx][posXinSubset][posYinSubset];
1088 int offset = firstSignificanceMapContext;
1089
1090 offset += cnt;
1091
1092 return (bIsLuma && (posX | posY) >= 4) ? 3 + offset : offset;
1093}
1094
1095/* Calculates the cost of signaling the last significant coefficient in the block */
1096inline uint32_t Quant::getRateLast(uint32_t posx, uint32_t posy) const
1097{
1098 uint32_t ctxX = getGroupIdx(posx);
1099 uint32_t ctxY = getGroupIdx(posy);
1100 uint32_t cost = m_entropyCoder->m_estBitsSbac.lastXBits[ctxX] + m_entropyCoder->m_estBitsSbac.lastYBits[ctxY];
1101
1102 int32_t maskX = (int32_t)(2 - posx) >> 31;
1103 int32_t maskY = (int32_t)(2 - posy) >> 31;
1104
1105 cost += maskX & (IEP_RATE * ((ctxX - 2) >> 1));
1106 cost += maskY & (IEP_RATE * ((ctxY - 2) >> 1));
1107 return cost;
1108}
1109
1110/* Context derivation process of coeff_abs_significant_flag */
1111uint32_t Quant::getSigCoeffGroupCtxInc(uint64_t cgGroupMask, uint32_t cgPosX, uint32_t cgPosY, uint32_t log2TrSizeCG)
1112{
1113 const uint32_t trSizeCG = 1 << log2TrSizeCG;
1114
1115 const uint32_t sigPos = (uint32_t)(cgGroupMask >> (1 + (cgPosY << log2TrSizeCG) + cgPosX));
1116 const uint32_t sigRight = ((int32_t)(cgPosX - (trSizeCG - 1)) >> 31) & sigPos;
1117 const uint32_t sigLower = ((int32_t)(cgPosY - (trSizeCG - 1)) >> 31) & (sigPos >> (trSizeCG - 1));
1118
1119 return (sigRight | sigLower) & 1;
1120}