Imported Upstream version 1.4+222+hg5f9f7194267b
[deb_x265.git] / source / common / cudata.cpp
CommitLineData
72b9787e
JB
1/*****************************************************************************
2 * Copyright (C) 2014 x265 project
3 *
4 * Authors: Steve Borho <steve@borho.org>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
19 *
20 * This program is also available under a commercial proprietary license.
21 * For more information, contact us at license @ x265.com.
22 *****************************************************************************/
23
24#include "common.h"
25#include "frame.h"
26#include "framedata.h"
27#include "picyuv.h"
28#include "mv.h"
29#include "cudata.h"
30
31using namespace x265;
32
33namespace {
34// file private namespace
35
36/* for all bcast* and copy* functions, dst and src are aligned to MIN(size, 32) */
37
38void bcast1(uint8_t* dst, uint8_t val) { dst[0] = val; }
39
40void copy4(uint8_t* dst, uint8_t* src) { ((uint32_t*)dst)[0] = ((uint32_t*)src)[0]; }
41void bcast4(uint8_t* dst, uint8_t val) { ((uint32_t*)dst)[0] = 0x01010101 * val; }
42
43void copy16(uint8_t* dst, uint8_t* src) { ((uint64_t*)dst)[0] = ((uint64_t*)src)[0]; ((uint64_t*)dst)[1] = ((uint64_t*)src)[1]; }
44void bcast16(uint8_t* dst, uint8_t val) { uint64_t bval = 0x0101010101010101ULL * val; ((uint64_t*)dst)[0] = bval; ((uint64_t*)dst)[1] = bval; }
45
46void copy64(uint8_t* dst, uint8_t* src) { ((uint64_t*)dst)[0] = ((uint64_t*)src)[0]; ((uint64_t*)dst)[1] = ((uint64_t*)src)[1];
47 ((uint64_t*)dst)[2] = ((uint64_t*)src)[2]; ((uint64_t*)dst)[3] = ((uint64_t*)src)[3];
48 ((uint64_t*)dst)[4] = ((uint64_t*)src)[4]; ((uint64_t*)dst)[5] = ((uint64_t*)src)[5];
49 ((uint64_t*)dst)[6] = ((uint64_t*)src)[6]; ((uint64_t*)dst)[7] = ((uint64_t*)src)[7]; }
50void bcast64(uint8_t* dst, uint8_t val) { uint64_t bval = 0x0101010101010101ULL * val;
51 ((uint64_t*)dst)[0] = bval; ((uint64_t*)dst)[1] = bval; ((uint64_t*)dst)[2] = bval; ((uint64_t*)dst)[3] = bval;
52 ((uint64_t*)dst)[4] = bval; ((uint64_t*)dst)[5] = bval; ((uint64_t*)dst)[6] = bval; ((uint64_t*)dst)[7] = bval; }
53
54/* at 256 bytes, memset/memcpy will probably use SIMD more effectively than our uint64_t hack,
55 * but hand-written assembly would beat it. */
56void copy256(uint8_t* dst, uint8_t* src) { memcpy(dst, src, 256); }
57void bcast256(uint8_t* dst, uint8_t val) { memset(dst, val, 256); }
58
59/* Check whether 2 addresses point to the same column */
60inline bool isEqualCol(int addrA, int addrB, int numUnitsPerRow)
61{
62 // addrA % numUnitsPerRow == addrB % numUnitsPerRow
63 return ((addrA ^ addrB) & (numUnitsPerRow - 1)) == 0;
64}
65
66/* Check whether 2 addresses point to the same row */
67inline bool isEqualRow(int addrA, int addrB, int numUnitsPerRow)
68{
69 // addrA / numUnitsPerRow == addrB / numUnitsPerRow
70 return ((addrA ^ addrB) & ~(numUnitsPerRow - 1)) == 0;
71}
72
73/* Check whether 2 addresses point to the same row or column */
74inline bool isEqualRowOrCol(int addrA, int addrB, int numUnitsPerRow)
75{
76 return isEqualCol(addrA, addrB, numUnitsPerRow) | isEqualRow(addrA, addrB, numUnitsPerRow);
77}
78
79/* Check whether one address points to the first column */
80inline bool isZeroCol(int addr, int numUnitsPerRow)
81{
82 // addr % numUnitsPerRow == 0
83 return (addr & (numUnitsPerRow - 1)) == 0;
84}
85
86/* Check whether one address points to the first row */
87inline bool isZeroRow(int addr, int numUnitsPerRow)
88{
89 // addr / numUnitsPerRow == 0
90 return (addr & ~(numUnitsPerRow - 1)) == 0;
91}
92
93/* Check whether one address points to a column whose index is smaller than a given value */
94inline bool lessThanCol(int addr, int val, int numUnitsPerRow)
95{
96 // addr % numUnitsPerRow < val
97 return (addr & (numUnitsPerRow - 1)) < val;
98}
99
100/* Check whether one address points to a row whose index is smaller than a given value */
101inline bool lessThanRow(int addr, int val, int numUnitsPerRow)
102{
103 // addr / numUnitsPerRow < val
104 return addr < val * numUnitsPerRow;
105}
106
107inline MV scaleMv(MV mv, int scale)
108{
109 int mvx = Clip3(-32768, 32767, (scale * mv.x + 127 + (scale * mv.x < 0)) >> 8);
110 int mvy = Clip3(-32768, 32767, (scale * mv.y + 127 + (scale * mv.y < 0)) >> 8);
111
112 return MV((int16_t)mvx, (int16_t)mvy);
113}
114
115// Partition table.
116// First index is partitioning mode. Second index is partition index.
117// Third index is 0 for partition sizes, 1 for partition offsets. The
118// sizes and offsets are encoded as two packed 4-bit values (X,Y).
119// X and Y represent 1/4 fractions of the block size.
120const uint32_t partTable[8][4][2] =
121{
122 // XY
123 { { 0x44, 0x00 }, { 0x00, 0x00 }, { 0x00, 0x00 }, { 0x00, 0x00 } }, // SIZE_2Nx2N.
124 { { 0x42, 0x00 }, { 0x42, 0x02 }, { 0x00, 0x00 }, { 0x00, 0x00 } }, // SIZE_2NxN.
125 { { 0x24, 0x00 }, { 0x24, 0x20 }, { 0x00, 0x00 }, { 0x00, 0x00 } }, // SIZE_Nx2N.
126 { { 0x22, 0x00 }, { 0x22, 0x20 }, { 0x22, 0x02 }, { 0x22, 0x22 } }, // SIZE_NxN.
127 { { 0x41, 0x00 }, { 0x43, 0x01 }, { 0x00, 0x00 }, { 0x00, 0x00 } }, // SIZE_2NxnU.
128 { { 0x43, 0x00 }, { 0x41, 0x03 }, { 0x00, 0x00 }, { 0x00, 0x00 } }, // SIZE_2NxnD.
129 { { 0x14, 0x00 }, { 0x34, 0x10 }, { 0x00, 0x00 }, { 0x00, 0x00 } }, // SIZE_nLx2N.
130 { { 0x34, 0x00 }, { 0x14, 0x30 }, { 0x00, 0x00 }, { 0x00, 0x00 } } // SIZE_nRx2N.
131};
132
133// Partition Address table.
134// First index is partitioning mode. Second index is partition address.
135const uint32_t partAddrTable[8][4] =
136{
137 { 0x00, 0x00, 0x00, 0x00 }, // SIZE_2Nx2N.
138 { 0x00, 0x08, 0x08, 0x08 }, // SIZE_2NxN.
139 { 0x00, 0x04, 0x04, 0x04 }, // SIZE_Nx2N.
140 { 0x00, 0x04, 0x08, 0x0C }, // SIZE_NxN.
141 { 0x00, 0x02, 0x02, 0x02 }, // SIZE_2NxnU.
142 { 0x00, 0x0A, 0x0A, 0x0A }, // SIZE_2NxnD.
143 { 0x00, 0x01, 0x01, 0x01 }, // SIZE_nLx2N.
144 { 0x00, 0x05, 0x05, 0x05 } // SIZE_nRx2N.
145};
146
147}
148
149cubcast_t CUData::s_partSet[NUM_FULL_DEPTH] = { NULL, NULL, NULL, NULL, NULL };
150uint32_t CUData::s_numPartInCUSize;
151
152CUData::CUData()
153{
154 memset(this, 0, sizeof(*this));
155}
156
157void CUData::initialize(const CUDataMemPool& dataPool, uint32_t depth, int csp, int instance)
158{
159 m_chromaFormat = csp;
160 m_hChromaShift = CHROMA_H_SHIFT(csp);
161 m_vChromaShift = CHROMA_V_SHIFT(csp);
162 m_numPartitions = NUM_CU_PARTITIONS >> (depth * 2);
163
164 if (!s_partSet[0])
165 {
166 s_numPartInCUSize = 1 << g_maxFullDepth;
167 switch (g_maxLog2CUSize)
168 {
169 case 6:
170 s_partSet[0] = bcast256;
171 s_partSet[1] = bcast64;
172 s_partSet[2] = bcast16;
173 s_partSet[3] = bcast4;
174 s_partSet[4] = bcast1;
175 break;
176 case 5:
177 s_partSet[0] = bcast64;
178 s_partSet[1] = bcast16;
179 s_partSet[2] = bcast4;
180 s_partSet[3] = bcast1;
181 s_partSet[4] = NULL;
182 break;
183 case 4:
184 s_partSet[0] = bcast16;
185 s_partSet[1] = bcast4;
186 s_partSet[2] = bcast1;
187 s_partSet[3] = NULL;
188 s_partSet[4] = NULL;
189 break;
190 default:
191 X265_CHECK(0, "unexpected CTU size\n");
192 break;
193 }
194 }
195
196 switch (m_numPartitions)
197 {
198 case 256: // 64x64 CU
199 m_partCopy = copy256;
200 m_partSet = bcast256;
201 m_subPartCopy = copy64;
202 m_subPartSet = bcast64;
203 break;
204 case 64: // 32x32 CU
205 m_partCopy = copy64;
206 m_partSet = bcast64;
207 m_subPartCopy = copy16;
208 m_subPartSet = bcast16;
209 break;
210 case 16: // 16x16 CU
211 m_partCopy = copy16;
212 m_partSet = bcast16;
213 m_subPartCopy = copy4;
214 m_subPartSet = bcast4;
215 break;
216 case 4: // 8x8 CU
217 m_partCopy = copy4;
218 m_partSet = bcast4;
219 m_subPartCopy = NULL;
220 m_subPartSet = NULL;
221 break;
222 default:
223 X265_CHECK(0, "unexpected CU partition count\n");
224 break;
225 }
226
227 /* Each CU's data is layed out sequentially within the charMemBlock */
228 uint8_t *charBuf = dataPool.charMemBlock + (m_numPartitions * BytesPerPartition) * instance;
229
b53f7c52 230 m_qp = (int8_t*)charBuf; charBuf += m_numPartitions;
72b9787e 231 m_log2CUSize = charBuf; charBuf += m_numPartitions;
72b9787e
JB
232 m_lumaIntraDir = charBuf; charBuf += m_numPartitions;
233 m_tqBypass = charBuf; charBuf += m_numPartitions;
b53f7c52
JB
234 m_refIdx[0] = (int8_t*)charBuf; charBuf += m_numPartitions;
235 m_refIdx[1] = (int8_t*)charBuf; charBuf += m_numPartitions;
72b9787e 236 m_cuDepth = charBuf; charBuf += m_numPartitions;
b53f7c52
JB
237 m_predMode = charBuf; charBuf += m_numPartitions; /* the order up to here is important in initCTU() and initSubCU() */
238 m_partSize = charBuf; charBuf += m_numPartitions;
72b9787e
JB
239 m_mergeFlag = charBuf; charBuf += m_numPartitions;
240 m_interDir = charBuf; charBuf += m_numPartitions;
241 m_mvpIdx[0] = charBuf; charBuf += m_numPartitions;
242 m_mvpIdx[1] = charBuf; charBuf += m_numPartitions;
243 m_tuDepth = charBuf; charBuf += m_numPartitions;
244 m_transformSkip[0] = charBuf; charBuf += m_numPartitions;
245 m_transformSkip[1] = charBuf; charBuf += m_numPartitions;
246 m_transformSkip[2] = charBuf; charBuf += m_numPartitions;
247 m_cbf[0] = charBuf; charBuf += m_numPartitions;
248 m_cbf[1] = charBuf; charBuf += m_numPartitions;
249 m_cbf[2] = charBuf; charBuf += m_numPartitions;
250 m_chromaIntraDir = charBuf; charBuf += m_numPartitions;
251
252 X265_CHECK(charBuf == dataPool.charMemBlock + (m_numPartitions * BytesPerPartition) * (instance + 1), "CU data layout is broken\n");
253
254 m_mv[0] = dataPool.mvMemBlock + (instance * 4) * m_numPartitions;
255 m_mv[1] = m_mv[0] + m_numPartitions;
256 m_mvd[0] = m_mv[1] + m_numPartitions;
257 m_mvd[1] = m_mvd[0] + m_numPartitions;
258
259 uint32_t cuSize = g_maxCUSize >> depth;
260 uint32_t sizeL = cuSize * cuSize;
261 uint32_t sizeC = sizeL >> (m_hChromaShift + m_vChromaShift);
262 m_trCoeff[0] = dataPool.trCoeffMemBlock + instance * (sizeL + sizeC * 2);
263 m_trCoeff[1] = m_trCoeff[0] + sizeL;
264 m_trCoeff[2] = m_trCoeff[0] + sizeL + sizeC;
265}
266
267void CUData::initCTU(const Frame& frame, uint32_t cuAddr, int qp)
268{
269 m_encData = frame.m_encData;
270 m_slice = m_encData->m_slice;
271 m_cuAddr = cuAddr;
272 m_cuPelX = (cuAddr % m_slice->m_sps->numCuInWidth) << g_maxLog2CUSize;
273 m_cuPelY = (cuAddr / m_slice->m_sps->numCuInWidth) << g_maxLog2CUSize;
274 m_absIdxInCTU = 0;
275 m_numPartitions = NUM_CU_PARTITIONS;
276
277 /* sequential memsets */
278 m_partSet((uint8_t*)m_qp, (uint8_t)qp);
279 m_partSet(m_log2CUSize, (uint8_t)g_maxLog2CUSize);
72b9787e
JB
280 m_partSet(m_lumaIntraDir, (uint8_t)DC_IDX);
281 m_partSet(m_tqBypass, (uint8_t)frame.m_encData->m_param->bLossless);
282 if (m_slice->m_sliceType != I_SLICE)
283 {
284 m_partSet((uint8_t*)m_refIdx[0], (uint8_t)REF_NOT_VALID);
285 m_partSet((uint8_t*)m_refIdx[1], (uint8_t)REF_NOT_VALID);
286 }
287
288 X265_CHECK(!(frame.m_encData->m_param->bLossless && !m_slice->m_pps->bTransquantBypassEnabled), "lossless enabled without TQbypass in PPS\n");
289
290 /* initialize the remaining CU data in one memset */
b53f7c52 291 memset(m_cuDepth, 0, (BytesPerPartition - 6) * m_numPartitions);
72b9787e
JB
292
293 uint32_t widthInCU = m_slice->m_sps->numCuInWidth;
294 m_cuLeft = (m_cuAddr % widthInCU) ? m_encData->getPicCTU(m_cuAddr - 1) : NULL;
295 m_cuAbove = (m_cuAddr / widthInCU) ? m_encData->getPicCTU(m_cuAddr - widthInCU) : NULL;
296 m_cuAboveLeft = (m_cuLeft && m_cuAbove) ? m_encData->getPicCTU(m_cuAddr - widthInCU - 1) : NULL;
297 m_cuAboveRight = (m_cuAbove && ((m_cuAddr % widthInCU) < (widthInCU - 1))) ? m_encData->getPicCTU(m_cuAddr - widthInCU + 1) : NULL;
298}
299
300// initialize Sub partition
301void CUData::initSubCU(const CUData& ctu, const CUGeom& cuGeom)
302{
303 m_absIdxInCTU = cuGeom.encodeIdx;
304 m_encData = ctu.m_encData;
305 m_slice = ctu.m_slice;
306 m_cuAddr = ctu.m_cuAddr;
307 m_cuPelX = ctu.m_cuPelX + g_zscanToPelX[cuGeom.encodeIdx];
308 m_cuPelY = ctu.m_cuPelY + g_zscanToPelY[cuGeom.encodeIdx];
309 m_cuLeft = ctu.m_cuLeft;
310 m_cuAbove = ctu.m_cuAbove;
311 m_cuAboveLeft = ctu.m_cuAboveLeft;
312 m_cuAboveRight = ctu.m_cuAboveRight;
313 X265_CHECK(m_numPartitions == cuGeom.numPartitions, "initSubCU() size mismatch\n");
314
315 /* sequential memsets */
316 m_partSet((uint8_t*)m_qp, (uint8_t)ctu.m_qp[0]);
317 m_partSet(m_log2CUSize, (uint8_t)cuGeom.log2CUSize);
72b9787e
JB
318 m_partSet(m_lumaIntraDir, (uint8_t)DC_IDX);
319 m_partSet(m_tqBypass, (uint8_t)m_encData->m_param->bLossless);
320 m_partSet((uint8_t*)m_refIdx[0], (uint8_t)REF_NOT_VALID);
321 m_partSet((uint8_t*)m_refIdx[1], (uint8_t)REF_NOT_VALID);
322 m_partSet(m_cuDepth, (uint8_t)cuGeom.depth);
323
324 /* initialize the remaining CU data in one memset */
b53f7c52 325 memset(m_predMode, 0, (BytesPerPartition - 7) * m_numPartitions);
72b9787e
JB
326}
327
328/* Copy the results of a sub-part (split) CU to the parent CU */
329void CUData::copyPartFrom(const CUData& subCU, const CUGeom& childGeom, uint32_t subPartIdx)
330{
331 X265_CHECK(subPartIdx < 4, "part unit should be less than 4\n");
332
333 uint32_t offset = childGeom.numPartitions * subPartIdx;
334
335 m_subPartCopy((uint8_t*)m_qp + offset, (uint8_t*)subCU.m_qp);
336 m_subPartCopy(m_log2CUSize + offset, subCU.m_log2CUSize);
72b9787e
JB
337 m_subPartCopy(m_lumaIntraDir + offset, subCU.m_lumaIntraDir);
338 m_subPartCopy(m_tqBypass + offset, subCU.m_tqBypass);
339 m_subPartCopy((uint8_t*)m_refIdx[0] + offset, (uint8_t*)subCU.m_refIdx[0]);
340 m_subPartCopy((uint8_t*)m_refIdx[1] + offset, (uint8_t*)subCU.m_refIdx[1]);
341 m_subPartCopy(m_cuDepth + offset, subCU.m_cuDepth);
b53f7c52
JB
342 m_subPartCopy(m_predMode + offset, subCU.m_predMode);
343 m_subPartCopy(m_partSize + offset, subCU.m_partSize);
72b9787e
JB
344 m_subPartCopy(m_mergeFlag + offset, subCU.m_mergeFlag);
345 m_subPartCopy(m_interDir + offset, subCU.m_interDir);
346 m_subPartCopy(m_mvpIdx[0] + offset, subCU.m_mvpIdx[0]);
347 m_subPartCopy(m_mvpIdx[1] + offset, subCU.m_mvpIdx[1]);
348 m_subPartCopy(m_tuDepth + offset, subCU.m_tuDepth);
349 m_subPartCopy(m_transformSkip[0] + offset, subCU.m_transformSkip[0]);
350 m_subPartCopy(m_transformSkip[1] + offset, subCU.m_transformSkip[1]);
351 m_subPartCopy(m_transformSkip[2] + offset, subCU.m_transformSkip[2]);
352 m_subPartCopy(m_cbf[0] + offset, subCU.m_cbf[0]);
353 m_subPartCopy(m_cbf[1] + offset, subCU.m_cbf[1]);
354 m_subPartCopy(m_cbf[2] + offset, subCU.m_cbf[2]);
355 m_subPartCopy(m_chromaIntraDir + offset, subCU.m_chromaIntraDir);
356
357 memcpy(m_mv[0] + offset, subCU.m_mv[0], childGeom.numPartitions * sizeof(MV));
358 memcpy(m_mv[1] + offset, subCU.m_mv[1], childGeom.numPartitions * sizeof(MV));
359 memcpy(m_mvd[0] + offset, subCU.m_mvd[0], childGeom.numPartitions * sizeof(MV));
360 memcpy(m_mvd[1] + offset, subCU.m_mvd[1], childGeom.numPartitions * sizeof(MV));
361
362 uint32_t tmp = 1 << ((g_maxLog2CUSize - childGeom.depth) * 2);
363 uint32_t tmp2 = subPartIdx * tmp;
364 memcpy(m_trCoeff[0] + tmp2, subCU.m_trCoeff[0], sizeof(coeff_t) * tmp);
365
366 uint32_t tmpC = tmp >> (m_hChromaShift + m_vChromaShift);
367 uint32_t tmpC2 = tmp2 >> (m_hChromaShift + m_vChromaShift);
368 memcpy(m_trCoeff[1] + tmpC2, subCU.m_trCoeff[1], sizeof(coeff_t) * tmpC);
369 memcpy(m_trCoeff[2] + tmpC2, subCU.m_trCoeff[2], sizeof(coeff_t) * tmpC);
370}
371
372/* If a sub-CU part is not present (off the edge of the picture) its depth and
373 * log2size should still be configured */
374void CUData::setEmptyPart(const CUGeom& childGeom, uint32_t subPartIdx)
375{
376 uint32_t offset = childGeom.numPartitions * subPartIdx;
377 m_subPartSet(m_cuDepth + offset, (uint8_t)childGeom.depth);
378 m_subPartSet(m_log2CUSize + offset, (uint8_t)childGeom.log2CUSize);
379}
380
381/* Copy all CU data from one instance to the next, except set lossless flag
382 * This will only get used when --cu-lossless is enabled but --lossless is not. */
383void CUData::initLosslessCU(const CUData& cu, const CUGeom& cuGeom)
384{
385 /* Start by making an exact copy */
386 m_encData = cu.m_encData;
387 m_slice = cu.m_slice;
388 m_cuAddr = cu.m_cuAddr;
389 m_cuPelX = cu.m_cuPelX;
390 m_cuPelY = cu.m_cuPelY;
391 m_cuLeft = cu.m_cuLeft;
392 m_cuAbove = cu.m_cuAbove;
393 m_cuAboveLeft = cu.m_cuAboveLeft;
394 m_cuAboveRight = cu.m_cuAboveRight;
395 m_absIdxInCTU = cuGeom.encodeIdx;
396 m_numPartitions = cuGeom.numPartitions;
397 memcpy(m_qp, cu.m_qp, BytesPerPartition * m_numPartitions);
398 memcpy(m_mv[0], cu.m_mv[0], m_numPartitions * sizeof(MV));
399 memcpy(m_mv[1], cu.m_mv[1], m_numPartitions * sizeof(MV));
400 memcpy(m_mvd[0], cu.m_mvd[0], m_numPartitions * sizeof(MV));
401 memcpy(m_mvd[1], cu.m_mvd[1], m_numPartitions * sizeof(MV));
402
403 /* force TQBypass to true */
404 m_partSet(m_tqBypass, true);
405
406 /* clear residual coding flags */
b53f7c52 407 m_partSet(m_predMode, cu.m_predMode[0] & (MODE_INTRA | MODE_INTER));
72b9787e
JB
408 m_partSet(m_tuDepth, 0);
409 m_partSet(m_transformSkip[0], 0);
410 m_partSet(m_transformSkip[1], 0);
411 m_partSet(m_transformSkip[2], 0);
412 m_partSet(m_cbf[0], 0);
413 m_partSet(m_cbf[1], 0);
414 m_partSet(m_cbf[2], 0);
415}
416
417/* Copy completed predicted CU to CTU in picture */
418void CUData::copyToPic(uint32_t depth) const
419{
420 CUData& ctu = *m_encData->getPicCTU(m_cuAddr);
421
422 m_partCopy((uint8_t*)ctu.m_qp + m_absIdxInCTU, (uint8_t*)m_qp);
423 m_partCopy(ctu.m_log2CUSize + m_absIdxInCTU, m_log2CUSize);
72b9787e
JB
424 m_partCopy(ctu.m_lumaIntraDir + m_absIdxInCTU, m_lumaIntraDir);
425 m_partCopy(ctu.m_tqBypass + m_absIdxInCTU, m_tqBypass);
426 m_partCopy((uint8_t*)ctu.m_refIdx[0] + m_absIdxInCTU, (uint8_t*)m_refIdx[0]);
427 m_partCopy((uint8_t*)ctu.m_refIdx[1] + m_absIdxInCTU, (uint8_t*)m_refIdx[1]);
428 m_partCopy(ctu.m_cuDepth + m_absIdxInCTU, m_cuDepth);
b53f7c52
JB
429 m_partCopy(ctu.m_predMode + m_absIdxInCTU, m_predMode);
430 m_partCopy(ctu.m_partSize + m_absIdxInCTU, m_partSize);
72b9787e
JB
431 m_partCopy(ctu.m_mergeFlag + m_absIdxInCTU, m_mergeFlag);
432 m_partCopy(ctu.m_interDir + m_absIdxInCTU, m_interDir);
433 m_partCopy(ctu.m_mvpIdx[0] + m_absIdxInCTU, m_mvpIdx[0]);
434 m_partCopy(ctu.m_mvpIdx[1] + m_absIdxInCTU, m_mvpIdx[1]);
435 m_partCopy(ctu.m_tuDepth + m_absIdxInCTU, m_tuDepth);
436 m_partCopy(ctu.m_transformSkip[0] + m_absIdxInCTU, m_transformSkip[0]);
437 m_partCopy(ctu.m_transformSkip[1] + m_absIdxInCTU, m_transformSkip[1]);
438 m_partCopy(ctu.m_transformSkip[2] + m_absIdxInCTU, m_transformSkip[2]);
439 m_partCopy(ctu.m_cbf[0] + m_absIdxInCTU, m_cbf[0]);
440 m_partCopy(ctu.m_cbf[1] + m_absIdxInCTU, m_cbf[1]);
441 m_partCopy(ctu.m_cbf[2] + m_absIdxInCTU, m_cbf[2]);
442 m_partCopy(ctu.m_chromaIntraDir + m_absIdxInCTU, m_chromaIntraDir);
443
444 memcpy(ctu.m_mv[0] + m_absIdxInCTU, m_mv[0], m_numPartitions * sizeof(MV));
445 memcpy(ctu.m_mv[1] + m_absIdxInCTU, m_mv[1], m_numPartitions * sizeof(MV));
446 memcpy(ctu.m_mvd[0] + m_absIdxInCTU, m_mvd[0], m_numPartitions * sizeof(MV));
447 memcpy(ctu.m_mvd[1] + m_absIdxInCTU, m_mvd[1], m_numPartitions * sizeof(MV));
448
449 uint32_t tmpY = 1 << ((g_maxLog2CUSize - depth) * 2);
450 uint32_t tmpY2 = m_absIdxInCTU << (LOG2_UNIT_SIZE * 2);
451 memcpy(ctu.m_trCoeff[0] + tmpY2, m_trCoeff[0], sizeof(coeff_t) * tmpY);
452
453 uint32_t tmpC = tmpY >> (m_hChromaShift + m_vChromaShift);
454 uint32_t tmpC2 = tmpY2 >> (m_hChromaShift + m_vChromaShift);
455 memcpy(ctu.m_trCoeff[1] + tmpC2, m_trCoeff[1], sizeof(coeff_t) * tmpC);
456 memcpy(ctu.m_trCoeff[2] + tmpC2, m_trCoeff[2], sizeof(coeff_t) * tmpC);
457}
458
459/* The reverse of copyToPic, called only by encodeResidue */
460void CUData::copyFromPic(const CUData& ctu, const CUGeom& cuGeom)
461{
462 m_encData = ctu.m_encData;
463 m_slice = ctu.m_slice;
464 m_cuAddr = ctu.m_cuAddr;
465 m_cuPelX = ctu.m_cuPelX + g_zscanToPelX[cuGeom.encodeIdx];
466 m_cuPelY = ctu.m_cuPelY + g_zscanToPelY[cuGeom.encodeIdx];
467 m_absIdxInCTU = cuGeom.encodeIdx;
468 m_numPartitions = cuGeom.numPartitions;
469
470 /* copy out all prediction info for this part */
471 m_partCopy((uint8_t*)m_qp, (uint8_t*)ctu.m_qp + m_absIdxInCTU);
472 m_partCopy(m_log2CUSize, ctu.m_log2CUSize + m_absIdxInCTU);
72b9787e
JB
473 m_partCopy(m_lumaIntraDir, ctu.m_lumaIntraDir + m_absIdxInCTU);
474 m_partCopy(m_tqBypass, ctu.m_tqBypass + m_absIdxInCTU);
475 m_partCopy((uint8_t*)m_refIdx[0], (uint8_t*)ctu.m_refIdx[0] + m_absIdxInCTU);
476 m_partCopy((uint8_t*)m_refIdx[1], (uint8_t*)ctu.m_refIdx[1] + m_absIdxInCTU);
477 m_partCopy(m_cuDepth, ctu.m_cuDepth + m_absIdxInCTU);
b53f7c52
JB
478 m_partSet(m_predMode, ctu.m_predMode[m_absIdxInCTU] & (MODE_INTRA | MODE_INTER)); /* clear skip flag */
479 m_partCopy(m_partSize, ctu.m_partSize + m_absIdxInCTU);
72b9787e
JB
480 m_partCopy(m_mergeFlag, ctu.m_mergeFlag + m_absIdxInCTU);
481 m_partCopy(m_interDir, ctu.m_interDir + m_absIdxInCTU);
482 m_partCopy(m_mvpIdx[0], ctu.m_mvpIdx[0] + m_absIdxInCTU);
483 m_partCopy(m_mvpIdx[1], ctu.m_mvpIdx[1] + m_absIdxInCTU);
484 m_partCopy(m_chromaIntraDir, ctu.m_chromaIntraDir + m_absIdxInCTU);
485
486 memcpy(m_mv[0], ctu.m_mv[0] + m_absIdxInCTU, m_numPartitions * sizeof(MV));
487 memcpy(m_mv[1], ctu.m_mv[1] + m_absIdxInCTU, m_numPartitions * sizeof(MV));
488 memcpy(m_mvd[0], ctu.m_mvd[0] + m_absIdxInCTU, m_numPartitions * sizeof(MV));
489 memcpy(m_mvd[1], ctu.m_mvd[1] + m_absIdxInCTU, m_numPartitions * sizeof(MV));
490
491 /* clear residual coding flags */
72b9787e
JB
492 m_partSet(m_tuDepth, 0);
493 m_partSet(m_transformSkip[0], 0);
494 m_partSet(m_transformSkip[1], 0);
495 m_partSet(m_transformSkip[2], 0);
496 m_partSet(m_cbf[0], 0);
497 m_partSet(m_cbf[1], 0);
498 m_partSet(m_cbf[2], 0);
499}
500
501/* Only called by encodeResidue, these fields can be modified during inter/intra coding */
502void CUData::updatePic(uint32_t depth) const
503{
504 CUData& ctu = *m_encData->getPicCTU(m_cuAddr);
505
506 m_partCopy((uint8_t*)ctu.m_qp + m_absIdxInCTU, (uint8_t*)m_qp);
507 m_partCopy(ctu.m_transformSkip[0] + m_absIdxInCTU, m_transformSkip[0]);
508 m_partCopy(ctu.m_transformSkip[1] + m_absIdxInCTU, m_transformSkip[1]);
509 m_partCopy(ctu.m_transformSkip[2] + m_absIdxInCTU, m_transformSkip[2]);
b53f7c52 510 m_partCopy(ctu.m_predMode + m_absIdxInCTU, m_predMode);
72b9787e
JB
511 m_partCopy(ctu.m_tuDepth + m_absIdxInCTU, m_tuDepth);
512 m_partCopy(ctu.m_cbf[0] + m_absIdxInCTU, m_cbf[0]);
513 m_partCopy(ctu.m_cbf[1] + m_absIdxInCTU, m_cbf[1]);
514 m_partCopy(ctu.m_cbf[2] + m_absIdxInCTU, m_cbf[2]);
515 m_partCopy(ctu.m_chromaIntraDir + m_absIdxInCTU, m_chromaIntraDir);
516
517 uint32_t tmpY = 1 << ((g_maxLog2CUSize - depth) * 2);
518 uint32_t tmpY2 = m_absIdxInCTU << (LOG2_UNIT_SIZE * 2);
519 memcpy(ctu.m_trCoeff[0] + tmpY2, m_trCoeff[0], sizeof(coeff_t) * tmpY);
520 tmpY >>= m_hChromaShift + m_vChromaShift;
521 tmpY2 >>= m_hChromaShift + m_vChromaShift;
522 memcpy(ctu.m_trCoeff[1] + tmpY2, m_trCoeff[1], sizeof(coeff_t) * tmpY);
523 memcpy(ctu.m_trCoeff[2] + tmpY2, m_trCoeff[2], sizeof(coeff_t) * tmpY);
524}
525
526const CUData* CUData::getPULeft(uint32_t& lPartUnitIdx, uint32_t curPartUnitIdx) const
527{
528 uint32_t absPartIdx = g_zscanToRaster[curPartUnitIdx];
529
530 if (!isZeroCol(absPartIdx, s_numPartInCUSize))
531 {
532 uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInCTU];
533 lPartUnitIdx = g_rasterToZscan[absPartIdx - 1];
534 if (isEqualCol(absPartIdx, absZorderCUIdx, s_numPartInCUSize))
535 return m_encData->getPicCTU(m_cuAddr);
536 else
537 {
538 lPartUnitIdx -= m_absIdxInCTU;
539 return this;
540 }
541 }
542
543 lPartUnitIdx = g_rasterToZscan[absPartIdx + s_numPartInCUSize - 1];
544 return m_cuLeft;
545}
546
b53f7c52 547const CUData* CUData::getPUAbove(uint32_t& aPartUnitIdx, uint32_t curPartUnitIdx) const
72b9787e
JB
548{
549 uint32_t absPartIdx = g_zscanToRaster[curPartUnitIdx];
550
551 if (!isZeroRow(absPartIdx, s_numPartInCUSize))
552 {
553 uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInCTU];
554 aPartUnitIdx = g_rasterToZscan[absPartIdx - s_numPartInCUSize];
555 if (isEqualRow(absPartIdx, absZorderCUIdx, s_numPartInCUSize))
556 return m_encData->getPicCTU(m_cuAddr);
557 else
72b9787e 558 aPartUnitIdx -= m_absIdxInCTU;
b53f7c52 559 return this;
72b9787e
JB
560 }
561
72b9787e
JB
562 aPartUnitIdx = g_rasterToZscan[absPartIdx + NUM_CU_PARTITIONS - s_numPartInCUSize];
563 return m_cuAbove;
564}
565
566const CUData* CUData::getPUAboveLeft(uint32_t& alPartUnitIdx, uint32_t curPartUnitIdx) const
567{
568 uint32_t absPartIdx = g_zscanToRaster[curPartUnitIdx];
569
570 if (!isZeroCol(absPartIdx, s_numPartInCUSize))
571 {
572 if (!isZeroRow(absPartIdx, s_numPartInCUSize))
573 {
574 uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInCTU];
575 alPartUnitIdx = g_rasterToZscan[absPartIdx - s_numPartInCUSize - 1];
576 if (isEqualRowOrCol(absPartIdx, absZorderCUIdx, s_numPartInCUSize))
577 return m_encData->getPicCTU(m_cuAddr);
578 else
579 {
580 alPartUnitIdx -= m_absIdxInCTU;
581 return this;
582 }
583 }
584 alPartUnitIdx = g_rasterToZscan[absPartIdx + NUM_CU_PARTITIONS - s_numPartInCUSize - 1];
585 return m_cuAbove;
586 }
587
588 if (!isZeroRow(absPartIdx, s_numPartInCUSize))
589 {
590 alPartUnitIdx = g_rasterToZscan[absPartIdx - 1];
591 return m_cuLeft;
592 }
593
594 alPartUnitIdx = g_rasterToZscan[NUM_CU_PARTITIONS - 1];
595 return m_cuAboveLeft;
596}
597
598const CUData* CUData::getPUAboveRight(uint32_t& arPartUnitIdx, uint32_t curPartUnitIdx) const
599{
600 if ((m_encData->getPicCTU(m_cuAddr)->m_cuPelX + g_zscanToPelX[curPartUnitIdx] + UNIT_SIZE) >= m_slice->m_sps->picWidthInLumaSamples)
601 return NULL;
602
603 uint32_t absPartIdxRT = g_zscanToRaster[curPartUnitIdx];
604
605 if (lessThanCol(absPartIdxRT, s_numPartInCUSize - 1, s_numPartInCUSize))
606 {
607 if (!isZeroRow(absPartIdxRT, s_numPartInCUSize))
608 {
609 if (curPartUnitIdx > g_rasterToZscan[absPartIdxRT - s_numPartInCUSize + 1])
610 {
611 uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInCTU] + (1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1;
612 arPartUnitIdx = g_rasterToZscan[absPartIdxRT - s_numPartInCUSize + 1];
613 if (isEqualRowOrCol(absPartIdxRT, absZorderCUIdx, s_numPartInCUSize))
614 return m_encData->getPicCTU(m_cuAddr);
615 else
616 {
617 arPartUnitIdx -= m_absIdxInCTU;
618 return this;
619 }
620 }
621 return NULL;
622 }
623 arPartUnitIdx = g_rasterToZscan[absPartIdxRT + NUM_CU_PARTITIONS - s_numPartInCUSize + 1];
624 return m_cuAbove;
625 }
626
627 if (!isZeroRow(absPartIdxRT, s_numPartInCUSize))
628 return NULL;
629
630 arPartUnitIdx = g_rasterToZscan[NUM_CU_PARTITIONS - s_numPartInCUSize];
631 return m_cuAboveRight;
632}
633
634const CUData* CUData::getPUBelowLeft(uint32_t& blPartUnitIdx, uint32_t curPartUnitIdx) const
635{
636 if ((m_encData->getPicCTU(m_cuAddr)->m_cuPelY + g_zscanToPelY[curPartUnitIdx] + UNIT_SIZE) >= m_slice->m_sps->picHeightInLumaSamples)
637 return NULL;
638
639 uint32_t absPartIdxLB = g_zscanToRaster[curPartUnitIdx];
640
641 if (lessThanRow(absPartIdxLB, s_numPartInCUSize - 1, s_numPartInCUSize))
642 {
643 if (!isZeroCol(absPartIdxLB, s_numPartInCUSize))
644 {
645 if (curPartUnitIdx > g_rasterToZscan[absPartIdxLB + s_numPartInCUSize - 1])
646 {
647 uint32_t absZorderCUIdxLB = g_zscanToRaster[m_absIdxInCTU] + ((1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1) * s_numPartInCUSize;
648 blPartUnitIdx = g_rasterToZscan[absPartIdxLB + s_numPartInCUSize - 1];
649 if (isEqualRowOrCol(absPartIdxLB, absZorderCUIdxLB, s_numPartInCUSize))
650 return m_encData->getPicCTU(m_cuAddr);
651 else
652 {
653 blPartUnitIdx -= m_absIdxInCTU;
654 return this;
655 }
656 }
657 return NULL;
658 }
659 blPartUnitIdx = g_rasterToZscan[absPartIdxLB + s_numPartInCUSize * 2 - 1];
660 return m_cuLeft;
661 }
662
663 return NULL;
664}
665
666const CUData* CUData::getPUBelowLeftAdi(uint32_t& blPartUnitIdx, uint32_t curPartUnitIdx, uint32_t partUnitOffset) const
667{
668 if ((m_encData->getPicCTU(m_cuAddr)->m_cuPelY + g_zscanToPelY[curPartUnitIdx] + (partUnitOffset << LOG2_UNIT_SIZE)) >= m_slice->m_sps->picHeightInLumaSamples)
669 return NULL;
670
671 uint32_t absPartIdxLB = g_zscanToRaster[curPartUnitIdx];
672
673 if (lessThanRow(absPartIdxLB, s_numPartInCUSize - partUnitOffset, s_numPartInCUSize))
674 {
675 if (!isZeroCol(absPartIdxLB, s_numPartInCUSize))
676 {
677 if (curPartUnitIdx > g_rasterToZscan[absPartIdxLB + partUnitOffset * s_numPartInCUSize - 1])
678 {
679 uint32_t absZorderCUIdxLB = g_zscanToRaster[m_absIdxInCTU] + ((1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1) * s_numPartInCUSize;
680 blPartUnitIdx = g_rasterToZscan[absPartIdxLB + partUnitOffset * s_numPartInCUSize - 1];
681 if (isEqualRowOrCol(absPartIdxLB, absZorderCUIdxLB, s_numPartInCUSize))
682 return m_encData->getPicCTU(m_cuAddr);
683 else
684 {
685 blPartUnitIdx -= m_absIdxInCTU;
686 return this;
687 }
688 }
689 return NULL;
690 }
691 blPartUnitIdx = g_rasterToZscan[absPartIdxLB + (1 + partUnitOffset) * s_numPartInCUSize - 1];
692 if (!m_cuLeft || !m_cuLeft->m_slice)
693 return NULL;
694 return m_cuLeft;
695 }
696
697 return NULL;
698}
699
700const CUData* CUData::getPUAboveRightAdi(uint32_t& arPartUnitIdx, uint32_t curPartUnitIdx, uint32_t partUnitOffset) const
701{
702 if ((m_encData->getPicCTU(m_cuAddr)->m_cuPelX + g_zscanToPelX[curPartUnitIdx] + (partUnitOffset << LOG2_UNIT_SIZE)) >= m_slice->m_sps->picWidthInLumaSamples)
703 return NULL;
704
705 uint32_t absPartIdxRT = g_zscanToRaster[curPartUnitIdx];
706
707 if (lessThanCol(absPartIdxRT, s_numPartInCUSize - partUnitOffset, s_numPartInCUSize))
708 {
709 if (!isZeroRow(absPartIdxRT, s_numPartInCUSize))
710 {
711 if (curPartUnitIdx > g_rasterToZscan[absPartIdxRT - s_numPartInCUSize + partUnitOffset])
712 {
713 uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInCTU] + (1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1;
714 arPartUnitIdx = g_rasterToZscan[absPartIdxRT - s_numPartInCUSize + partUnitOffset];
715 if (isEqualRowOrCol(absPartIdxRT, absZorderCUIdx, s_numPartInCUSize))
716 return m_encData->getPicCTU(m_cuAddr);
717 else
718 {
719 arPartUnitIdx -= m_absIdxInCTU;
720 return this;
721 }
722 }
723 return NULL;
724 }
725 arPartUnitIdx = g_rasterToZscan[absPartIdxRT + NUM_CU_PARTITIONS - s_numPartInCUSize + partUnitOffset];
726 if (!m_cuAbove || !m_cuAbove->m_slice)
727 return NULL;
728 return m_cuAbove;
729 }
730
731 if (!isZeroRow(absPartIdxRT, s_numPartInCUSize))
732 return NULL;
733
734 arPartUnitIdx = g_rasterToZscan[NUM_CU_PARTITIONS - s_numPartInCUSize + partUnitOffset - 1];
735 if ((m_cuAboveRight == NULL || m_cuAboveRight->m_slice == NULL || (m_cuAboveRight->m_cuAddr) > m_cuAddr))
736 return NULL;
737 return m_cuAboveRight;
738}
739
740/* Get left QpMinCu */
741const CUData* CUData::getQpMinCuLeft(uint32_t& lPartUnitIdx, uint32_t curAbsIdxInCTU) const
742{
743 uint32_t absZorderQpMinCUIdx = curAbsIdxInCTU & (0xFF << (g_maxFullDepth - m_slice->m_pps->maxCuDQPDepth) * 2);
744 uint32_t absRorderQpMinCUIdx = g_zscanToRaster[absZorderQpMinCUIdx];
745
746 // check for left CTU boundary
747 if (isZeroCol(absRorderQpMinCUIdx, s_numPartInCUSize))
748 return NULL;
749
750 // get index of left-CU relative to top-left corner of current quantization group
751 lPartUnitIdx = g_rasterToZscan[absRorderQpMinCUIdx - 1];
752
753 // return pointer to current CTU
754 return m_encData->getPicCTU(m_cuAddr);
755}
756
757/* Get above QpMinCu */
758const CUData* CUData::getQpMinCuAbove(uint32_t& aPartUnitIdx, uint32_t curAbsIdxInCTU) const
759{
760 uint32_t absZorderQpMinCUIdx = curAbsIdxInCTU & (0xFF << (g_maxFullDepth - m_slice->m_pps->maxCuDQPDepth) * 2);
761 uint32_t absRorderQpMinCUIdx = g_zscanToRaster[absZorderQpMinCUIdx];
762
763 // check for top CTU boundary
764 if (isZeroRow(absRorderQpMinCUIdx, s_numPartInCUSize))
765 return NULL;
766
767 // get index of top-CU relative to top-left corner of current quantization group
768 aPartUnitIdx = g_rasterToZscan[absRorderQpMinCUIdx - s_numPartInCUSize];
769
770 // return pointer to current CTU
771 return m_encData->getPicCTU(m_cuAddr);
772}
773
774/* Get reference QP from left QpMinCu or latest coded QP */
b53f7c52 775int8_t CUData::getRefQP(uint32_t curAbsIdxInCTU) const
72b9787e
JB
776{
777 uint32_t lPartIdx = 0, aPartIdx = 0;
778 const CUData* cULeft = getQpMinCuLeft(lPartIdx, m_absIdxInCTU + curAbsIdxInCTU);
779 const CUData* cUAbove = getQpMinCuAbove(aPartIdx, m_absIdxInCTU + curAbsIdxInCTU);
780
781 return ((cULeft ? cULeft->m_qp[lPartIdx] : getLastCodedQP(curAbsIdxInCTU)) + (cUAbove ? cUAbove->m_qp[aPartIdx] : getLastCodedQP(curAbsIdxInCTU)) + 1) >> 1;
782}
783
784int CUData::getLastValidPartIdx(int absPartIdx) const
785{
786 int lastValidPartIdx = absPartIdx - 1;
787
788 while (lastValidPartIdx >= 0 && m_predMode[lastValidPartIdx] == MODE_NONE)
789 {
790 uint32_t depth = m_cuDepth[lastValidPartIdx];
791 lastValidPartIdx -= m_numPartitions >> (depth << 1);
792 }
793
794 return lastValidPartIdx;
795}
796
b53f7c52 797int8_t CUData::getLastCodedQP(uint32_t absPartIdx) const
72b9787e
JB
798{
799 uint32_t quPartIdxMask = 0xFF << (g_maxFullDepth - m_slice->m_pps->maxCuDQPDepth) * 2;
800 int lastValidPartIdx = getLastValidPartIdx(absPartIdx & quPartIdxMask);
801
802 if (lastValidPartIdx >= 0)
803 return m_qp[lastValidPartIdx];
804 else
805 {
806 if (m_absIdxInCTU)
807 return m_encData->getPicCTU(m_cuAddr)->getLastCodedQP(m_absIdxInCTU);
808 else if (m_cuAddr > 0 && !(m_slice->m_pps->bEntropyCodingSyncEnabled && !(m_cuAddr % m_slice->m_sps->numCuInWidth)))
809 return m_encData->getPicCTU(m_cuAddr - 1)->getLastCodedQP(NUM_CU_PARTITIONS);
810 else
b53f7c52 811 return (int8_t)m_slice->m_sliceQp;
72b9787e
JB
812 }
813}
814
815/* Get allowed chroma intra modes */
816void CUData::getAllowedChromaDir(uint32_t absPartIdx, uint32_t* modeList) const
817{
818 modeList[0] = PLANAR_IDX;
819 modeList[1] = VER_IDX;
820 modeList[2] = HOR_IDX;
821 modeList[3] = DC_IDX;
822 modeList[4] = DM_CHROMA_IDX;
823
824 uint32_t lumaMode = m_lumaIntraDir[absPartIdx];
825
826 for (int i = 0; i < NUM_CHROMA_MODE - 1; i++)
827 {
828 if (lumaMode == modeList[i])
829 {
830 modeList[i] = 34; // VER+8 mode
831 break;
832 }
833 }
834}
835
836/* Get most probable intra modes */
837int CUData::getIntraDirLumaPredictor(uint32_t absPartIdx, uint32_t* intraDirPred) const
838{
839 const CUData* tempCU;
840 uint32_t tempPartIdx;
841 uint32_t leftIntraDir, aboveIntraDir;
842
843 // Get intra direction of left PU
844 tempCU = getPULeft(tempPartIdx, m_absIdxInCTU + absPartIdx);
845
846 leftIntraDir = (tempCU && tempCU->isIntra(tempPartIdx)) ? tempCU->m_lumaIntraDir[tempPartIdx] : DC_IDX;
847
848 // Get intra direction of above PU
b53f7c52 849 tempCU = g_zscanToPelY[m_absIdxInCTU + absPartIdx] > 0 ? getPUAbove(tempPartIdx, m_absIdxInCTU + absPartIdx) : NULL;
72b9787e
JB
850
851 aboveIntraDir = (tempCU && tempCU->isIntra(tempPartIdx)) ? tempCU->m_lumaIntraDir[tempPartIdx] : DC_IDX;
852
853 if (leftIntraDir == aboveIntraDir)
854 {
855 if (leftIntraDir >= 2) // angular modes
856 {
857 intraDirPred[0] = leftIntraDir;
858 intraDirPred[1] = ((leftIntraDir - 2 + 31) & 31) + 2;
859 intraDirPred[2] = ((leftIntraDir - 2 + 1) & 31) + 2;
860 }
861 else //non-angular
862 {
863 intraDirPred[0] = PLANAR_IDX;
864 intraDirPred[1] = DC_IDX;
865 intraDirPred[2] = VER_IDX;
866 }
867 return 1;
868 }
869 else
870 {
871 intraDirPred[0] = leftIntraDir;
872 intraDirPred[1] = aboveIntraDir;
873
874 if (leftIntraDir && aboveIntraDir) //both modes are non-planar
875 intraDirPred[2] = PLANAR_IDX;
876 else
877 intraDirPred[2] = (leftIntraDir + aboveIntraDir) < 2 ? VER_IDX : DC_IDX;
878 return 2;
879 }
880}
881
882uint32_t CUData::getCtxSplitFlag(uint32_t absPartIdx, uint32_t depth) const
883{
884 const CUData* tempCU;
885 uint32_t tempPartIdx;
886 uint32_t ctx;
887
888 // Get left split flag
889 tempCU = getPULeft(tempPartIdx, m_absIdxInCTU + absPartIdx);
890 ctx = (tempCU) ? ((tempCU->m_cuDepth[tempPartIdx] > depth) ? 1 : 0) : 0;
891
892 // Get above split flag
893 tempCU = getPUAbove(tempPartIdx, m_absIdxInCTU + absPartIdx);
894 ctx += (tempCU) ? ((tempCU->m_cuDepth[tempPartIdx] > depth) ? 1 : 0) : 0;
895
896 return ctx;
897}
898
899void CUData::getIntraTUQtDepthRange(uint32_t tuDepthRange[2], uint32_t absPartIdx) const
900{
901 uint32_t log2CUSize = m_log2CUSize[absPartIdx];
b53f7c52 902 uint32_t splitFlag = m_partSize[absPartIdx] != SIZE_2Nx2N;
72b9787e
JB
903
904 tuDepthRange[0] = m_slice->m_sps->quadtreeTULog2MinSize;
905 tuDepthRange[1] = m_slice->m_sps->quadtreeTULog2MaxSize;
906
907 tuDepthRange[0] = X265_MAX(tuDepthRange[0], X265_MIN(log2CUSize - (m_slice->m_sps->quadtreeTUMaxDepthIntra - 1 + splitFlag), tuDepthRange[1]));
908}
909
910void CUData::getInterTUQtDepthRange(uint32_t tuDepthRange[2], uint32_t absPartIdx) const
911{
912 uint32_t log2CUSize = m_log2CUSize[absPartIdx];
913 uint32_t quadtreeTUMaxDepth = m_slice->m_sps->quadtreeTUMaxDepthInter;
914 uint32_t splitFlag = quadtreeTUMaxDepth == 1 && m_partSize[absPartIdx] != SIZE_2Nx2N;
915
916 tuDepthRange[0] = m_slice->m_sps->quadtreeTULog2MinSize;
917 tuDepthRange[1] = m_slice->m_sps->quadtreeTULog2MaxSize;
918
919 tuDepthRange[0] = X265_MAX(tuDepthRange[0], X265_MIN(log2CUSize - (quadtreeTUMaxDepth - 1 + splitFlag), tuDepthRange[1]));
920}
921
922uint32_t CUData::getCtxSkipFlag(uint32_t absPartIdx) const
923{
924 const CUData* tempCU;
925 uint32_t tempPartIdx;
926 uint32_t ctx;
927
928 // Get BCBP of left PU
929 tempCU = getPULeft(tempPartIdx, m_absIdxInCTU + absPartIdx);
930 ctx = tempCU ? tempCU->isSkipped(tempPartIdx) : 0;
931
932 // Get BCBP of above PU
933 tempCU = getPUAbove(tempPartIdx, m_absIdxInCTU + absPartIdx);
934 ctx += tempCU ? tempCU->isSkipped(tempPartIdx) : 0;
935
936 return ctx;
937}
938
b53f7c52 939bool CUData::setQPSubCUs(int8_t qp, uint32_t absPartIdx, uint32_t depth)
72b9787e
JB
940{
941 uint32_t curPartNumb = NUM_CU_PARTITIONS >> (depth << 1);
942 uint32_t curPartNumQ = curPartNumb >> 2;
943
944 if (m_cuDepth[absPartIdx] > depth)
945 {
946 for (uint32_t subPartIdx = 0; subPartIdx < 4; subPartIdx++)
947 if (setQPSubCUs(qp, absPartIdx + subPartIdx * curPartNumQ, depth + 1))
948 return true;
949 }
950 else
951 {
952 if (getQtRootCbf(absPartIdx))
953 return true;
954 else
955 setQPSubParts(qp, absPartIdx, depth);
956 }
957
958 return false;
959}
960
961void CUData::setPUInterDir(uint8_t dir, uint32_t absPartIdx, uint32_t puIdx)
962{
963 uint32_t curPartNumQ = m_numPartitions >> 2;
964 X265_CHECK(puIdx < 2, "unexpected part unit index\n");
965
966 switch (m_partSize[absPartIdx])
967 {
968 case SIZE_2Nx2N:
969 memset(m_interDir + absPartIdx, dir, 4 * curPartNumQ);
970 break;
971 case SIZE_2NxN:
972 memset(m_interDir + absPartIdx, dir, 2 * curPartNumQ);
973 break;
974 case SIZE_Nx2N:
975 memset(m_interDir + absPartIdx, dir, curPartNumQ);
976 memset(m_interDir + absPartIdx + 2 * curPartNumQ, dir, curPartNumQ);
977 break;
978 case SIZE_NxN:
979 memset(m_interDir + absPartIdx, dir, curPartNumQ);
980 break;
981 case SIZE_2NxnU:
982 if (!puIdx)
983 {
984 memset(m_interDir + absPartIdx, dir, (curPartNumQ >> 1));
985 memset(m_interDir + absPartIdx + curPartNumQ, dir, (curPartNumQ >> 1));
986 }
987 else
988 {
989 memset(m_interDir + absPartIdx, dir, (curPartNumQ >> 1));
990 memset(m_interDir + absPartIdx + curPartNumQ, dir, ((curPartNumQ >> 1) + (curPartNumQ << 1)));
991 }
992 break;
993 case SIZE_2NxnD:
994 if (!puIdx)
995 {
996 memset(m_interDir + absPartIdx, dir, ((curPartNumQ << 1) + (curPartNumQ >> 1)));
997 memset(m_interDir + absPartIdx + (curPartNumQ << 1) + curPartNumQ, dir, (curPartNumQ >> 1));
998 }
999 else
1000 {
1001 memset(m_interDir + absPartIdx, dir, (curPartNumQ >> 1));
1002 memset(m_interDir + absPartIdx + curPartNumQ, dir, (curPartNumQ >> 1));
1003 }
1004 break;
1005 case SIZE_nLx2N:
1006 if (!puIdx)
1007 {
1008 memset(m_interDir + absPartIdx, dir, (curPartNumQ >> 2));
1009 memset(m_interDir + absPartIdx + (curPartNumQ >> 1), dir, (curPartNumQ >> 2));
1010 memset(m_interDir + absPartIdx + (curPartNumQ << 1), dir, (curPartNumQ >> 2));
1011 memset(m_interDir + absPartIdx + (curPartNumQ << 1) + (curPartNumQ >> 1), dir, (curPartNumQ >> 2));
1012 }
1013 else
1014 {
1015 memset(m_interDir + absPartIdx, dir, (curPartNumQ >> 2));
1016 memset(m_interDir + absPartIdx + (curPartNumQ >> 1), dir, (curPartNumQ + (curPartNumQ >> 2)));
1017 memset(m_interDir + absPartIdx + (curPartNumQ << 1), dir, (curPartNumQ >> 2));
1018 memset(m_interDir + absPartIdx + (curPartNumQ << 1) + (curPartNumQ >> 1), dir, (curPartNumQ + (curPartNumQ >> 2)));
1019 }
1020 break;
1021 case SIZE_nRx2N:
1022 if (!puIdx)
1023 {
1024 memset(m_interDir + absPartIdx, dir, (curPartNumQ + (curPartNumQ >> 2)));
1025 memset(m_interDir + absPartIdx + curPartNumQ + (curPartNumQ >> 1), dir, (curPartNumQ >> 2));
1026 memset(m_interDir + absPartIdx + (curPartNumQ << 1), dir, (curPartNumQ + (curPartNumQ >> 2)));
1027 memset(m_interDir + absPartIdx + (curPartNumQ << 1) + curPartNumQ + (curPartNumQ >> 1), dir, (curPartNumQ >> 2));
1028 }
1029 else
1030 {
1031 memset(m_interDir + absPartIdx, dir, (curPartNumQ >> 2));
1032 memset(m_interDir + absPartIdx + (curPartNumQ >> 1), dir, (curPartNumQ >> 2));
1033 memset(m_interDir + absPartIdx + (curPartNumQ << 1), dir, (curPartNumQ >> 2));
1034 memset(m_interDir + absPartIdx + (curPartNumQ << 1) + (curPartNumQ >> 1), dir, (curPartNumQ >> 2));
1035 }
1036 break;
1037 default:
1038 X265_CHECK(0, "unexpected part type\n");
1039 break;
1040 }
1041}
1042
1043template<typename T>
1044void CUData::setAllPU(T* p, const T& val, int absPartIdx, int puIdx)
1045{
1046 int i;
1047
1048 p += absPartIdx;
1049 int numElements = m_numPartitions;
1050
1051 switch (m_partSize[absPartIdx])
1052 {
1053 case SIZE_2Nx2N:
1054 for (i = 0; i < numElements; i++)
1055 p[i] = val;
1056 break;
1057
1058 case SIZE_2NxN:
1059 numElements >>= 1;
1060 for (i = 0; i < numElements; i++)
1061 p[i] = val;
1062 break;
1063
1064 case SIZE_Nx2N:
1065 numElements >>= 2;
1066 for (i = 0; i < numElements; i++)
1067 {
1068 p[i] = val;
1069 p[i + 2 * numElements] = val;
1070 }
1071 break;
1072
1073 case SIZE_2NxnU:
1074 {
1075 int curPartNumQ = numElements >> 2;
1076 if (!puIdx)
1077 {
1078 T *pT = p;
1079 T *pT2 = p + curPartNumQ;
1080 for (i = 0; i < (curPartNumQ >> 1); i++)
1081 {
1082 pT[i] = val;
1083 pT2[i] = val;
1084 }
1085 }
1086 else
1087 {
1088 T *pT = p;
1089 for (i = 0; i < (curPartNumQ >> 1); i++)
1090 pT[i] = val;
1091
1092 pT = p + curPartNumQ;
1093 for (i = 0; i < ((curPartNumQ >> 1) + (curPartNumQ << 1)); i++)
1094 pT[i] = val;
1095 }
1096 break;
1097 }
1098
1099 case SIZE_2NxnD:
1100 {
1101 int curPartNumQ = numElements >> 2;
1102 if (!puIdx)
1103 {
1104 T *pT = p;
1105 for (i = 0; i < ((curPartNumQ >> 1) + (curPartNumQ << 1)); i++)
1106 pT[i] = val;
1107
1108 pT = p + (numElements - curPartNumQ);
1109 for (i = 0; i < (curPartNumQ >> 1); i++)
1110 pT[i] = val;
1111 }
1112 else
1113 {
1114 T *pT = p;
1115 T *pT2 = p + curPartNumQ;
1116 for (i = 0; i < (curPartNumQ >> 1); i++)
1117 {
1118 pT[i] = val;
1119 pT2[i] = val;
1120 }
1121 }
1122 break;
1123 }
1124
1125 case SIZE_nLx2N:
1126 {
1127 int curPartNumQ = numElements >> 2;
1128 if (!puIdx)
1129 {
1130 T *pT = p;
1131 T *pT2 = p + (curPartNumQ << 1);
1132 T *pT3 = p + (curPartNumQ >> 1);
1133 T *pT4 = p + (curPartNumQ << 1) + (curPartNumQ >> 1);
1134
1135 for (i = 0; i < (curPartNumQ >> 2); i++)
1136 {
1137 pT[i] = val;
1138 pT2[i] = val;
1139 pT3[i] = val;
1140 pT4[i] = val;
1141 }
1142 }
1143 else
1144 {
1145 T *pT = p;
1146 T *pT2 = p + (curPartNumQ << 1);
1147 for (i = 0; i < (curPartNumQ >> 2); i++)
1148 {
1149 pT[i] = val;
1150 pT2[i] = val;
1151 }
1152
1153 pT = p + (curPartNumQ >> 1);
1154 pT2 = p + (curPartNumQ << 1) + (curPartNumQ >> 1);
1155 for (i = 0; i < ((curPartNumQ >> 2) + curPartNumQ); i++)
1156 {
1157 pT[i] = val;
1158 pT2[i] = val;
1159 }
1160 }
1161 break;
1162 }
1163
1164 case SIZE_nRx2N:
1165 {
1166 int curPartNumQ = numElements >> 2;
1167 if (!puIdx)
1168 {
1169 T *pT = p;
1170 T *pT2 = p + (curPartNumQ << 1);
1171 for (i = 0; i < ((curPartNumQ >> 2) + curPartNumQ); i++)
1172 {
1173 pT[i] = val;
1174 pT2[i] = val;
1175 }
1176
1177 pT = p + curPartNumQ + (curPartNumQ >> 1);
1178 pT2 = p + numElements - curPartNumQ + (curPartNumQ >> 1);
1179 for (i = 0; i < (curPartNumQ >> 2); i++)
1180 {
1181 pT[i] = val;
1182 pT2[i] = val;
1183 }
1184 }
1185 else
1186 {
1187 T *pT = p;
1188 T *pT2 = p + (curPartNumQ >> 1);
1189 T *pT3 = p + (curPartNumQ << 1);
1190 T *pT4 = p + (curPartNumQ << 1) + (curPartNumQ >> 1);
1191 for (i = 0; i < (curPartNumQ >> 2); i++)
1192 {
1193 pT[i] = val;
1194 pT2[i] = val;
1195 pT3[i] = val;
1196 pT4[i] = val;
1197 }
1198 }
1199 break;
1200 }
1201
1202 case SIZE_NxN:
1203 default:
1204 X265_CHECK(0, "unknown partition type\n");
1205 break;
1206 }
1207}
1208
1209void CUData::setPUMv(int list, const MV& mv, int absPartIdx, int puIdx)
1210{
1211 setAllPU(m_mv[list], mv, absPartIdx, puIdx);
1212}
1213
b53f7c52 1214void CUData::setPURefIdx(int list, int8_t refIdx, int absPartIdx, int puIdx)
72b9787e
JB
1215{
1216 setAllPU(m_refIdx[list], refIdx, absPartIdx, puIdx);
1217}
1218
1219void CUData::getPartIndexAndSize(uint32_t partIdx, uint32_t& outPartAddr, int& outWidth, int& outHeight) const
1220{
1221 int cuSize = 1 << m_log2CUSize[0];
1222 int partType = m_partSize[0];
1223
1224 int tmp = partTable[partType][partIdx][0];
1225 outWidth = ((tmp >> 4) * cuSize) >> 2;
1226 outHeight = ((tmp & 0xF) * cuSize) >> 2;
1227 outPartAddr = (partAddrTable[partType][partIdx] * m_numPartitions) >> 4;
1228}
1229
1230void CUData::getMvField(const CUData* cu, uint32_t absPartIdx, int picList, MVField& outMvField) const
1231{
1232 if (cu)
1233 {
1234 outMvField.mv = cu->m_mv[picList][absPartIdx];
1235 outMvField.refIdx = cu->m_refIdx[picList][absPartIdx];
1236 }
1237 else
1238 {
1239 // OUT OF BOUNDARY
b53f7c52 1240 outMvField.mv = 0;
72b9787e
JB
1241 outMvField.refIdx = REF_NOT_VALID;
1242 }
1243}
1244
1245void CUData::deriveLeftRightTopIdx(uint32_t partIdx, uint32_t& partIdxLT, uint32_t& partIdxRT) const
1246{
1247 partIdxLT = m_absIdxInCTU;
1248 partIdxRT = g_rasterToZscan[g_zscanToRaster[partIdxLT] + (1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1];
1249
1250 switch (m_partSize[0])
1251 {
1252 case SIZE_2Nx2N: break;
1253 case SIZE_2NxN:
1254 partIdxLT += (partIdx == 0) ? 0 : m_numPartitions >> 1;
1255 partIdxRT += (partIdx == 0) ? 0 : m_numPartitions >> 1;
1256 break;
1257 case SIZE_Nx2N:
1258 partIdxLT += (partIdx == 0) ? 0 : m_numPartitions >> 2;
1259 partIdxRT -= (partIdx == 1) ? 0 : m_numPartitions >> 2;
1260 break;
1261 case SIZE_NxN:
1262 partIdxLT += (m_numPartitions >> 2) * partIdx;
1263 partIdxRT += (m_numPartitions >> 2) * (partIdx - 1);
1264 break;
1265 case SIZE_2NxnU:
1266 partIdxLT += (partIdx == 0) ? 0 : m_numPartitions >> 3;
1267 partIdxRT += (partIdx == 0) ? 0 : m_numPartitions >> 3;
1268 break;
1269 case SIZE_2NxnD:
1270 partIdxLT += (partIdx == 0) ? 0 : (m_numPartitions >> 1) + (m_numPartitions >> 3);
1271 partIdxRT += (partIdx == 0) ? 0 : (m_numPartitions >> 1) + (m_numPartitions >> 3);
1272 break;
1273 case SIZE_nLx2N:
1274 partIdxLT += (partIdx == 0) ? 0 : m_numPartitions >> 4;
1275 partIdxRT -= (partIdx == 1) ? 0 : (m_numPartitions >> 2) + (m_numPartitions >> 4);
1276 break;
1277 case SIZE_nRx2N:
1278 partIdxLT += (partIdx == 0) ? 0 : (m_numPartitions >> 2) + (m_numPartitions >> 4);
1279 partIdxRT -= (partIdx == 1) ? 0 : m_numPartitions >> 4;
1280 break;
1281 default:
1282 X265_CHECK(0, "unexpected part index\n");
1283 break;
1284 }
1285}
1286
1287uint32_t CUData::deriveLeftBottomIdx(uint32_t puIdx) const
1288{
1289 uint32_t outPartIdxLB;
1290 outPartIdxLB = g_rasterToZscan[g_zscanToRaster[m_absIdxInCTU] + ((1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE - 1)) - 1) * s_numPartInCUSize];
1291
1292 switch (m_partSize[0])
1293 {
1294 case SIZE_2Nx2N:
1295 outPartIdxLB += m_numPartitions >> 1;
1296 break;
1297 case SIZE_2NxN:
1298 outPartIdxLB += puIdx ? m_numPartitions >> 1 : 0;
1299 break;
1300 case SIZE_Nx2N:
1301 outPartIdxLB += puIdx ? (m_numPartitions >> 2) * 3 : m_numPartitions >> 1;
1302 break;
1303 case SIZE_NxN:
1304 outPartIdxLB += (m_numPartitions >> 2) * puIdx;
1305 break;
1306 case SIZE_2NxnU:
1307 outPartIdxLB += puIdx ? m_numPartitions >> 1 : -((int)m_numPartitions >> 3);
1308 break;
1309 case SIZE_2NxnD:
1310 outPartIdxLB += puIdx ? m_numPartitions >> 1 : (m_numPartitions >> 2) + (m_numPartitions >> 3);
1311 break;
1312 case SIZE_nLx2N:
1313 outPartIdxLB += puIdx ? (m_numPartitions >> 1) + (m_numPartitions >> 4) : m_numPartitions >> 1;
1314 break;
1315 case SIZE_nRx2N:
1316 outPartIdxLB += puIdx ? (m_numPartitions >> 1) + (m_numPartitions >> 2) + (m_numPartitions >> 4) : m_numPartitions >> 1;
1317 break;
1318 default:
1319 X265_CHECK(0, "unexpected part index\n");
1320 break;
1321 }
1322 return outPartIdxLB;
1323}
1324
1325/* Derives the partition index of neighboring bottom right block */
1326uint32_t CUData::deriveRightBottomIdx(uint32_t puIdx) const
1327{
1328 uint32_t outPartIdxRB;
1329 outPartIdxRB = g_rasterToZscan[g_zscanToRaster[m_absIdxInCTU] +
1330 ((1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE - 1)) - 1) * s_numPartInCUSize +
1331 (1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1];
1332
1333 switch (m_partSize[0])
1334 {
1335 case SIZE_2Nx2N:
1336 outPartIdxRB += m_numPartitions >> 1;
1337 break;
1338 case SIZE_2NxN:
1339 outPartIdxRB += puIdx ? m_numPartitions >> 1 : 0;
1340 break;
1341 case SIZE_Nx2N:
1342 outPartIdxRB += puIdx ? m_numPartitions >> 1 : m_numPartitions >> 2;
1343 break;
1344 case SIZE_NxN:
1345 outPartIdxRB += (m_numPartitions >> 2) * (puIdx - 1);
1346 break;
1347 case SIZE_2NxnU:
1348 outPartIdxRB += puIdx ? m_numPartitions >> 1 : -((int)m_numPartitions >> 3);
1349 break;
1350 case SIZE_2NxnD:
1351 outPartIdxRB += puIdx ? m_numPartitions >> 1 : (m_numPartitions >> 2) + (m_numPartitions >> 3);
1352 break;
1353 case SIZE_nLx2N:
1354 outPartIdxRB += puIdx ? m_numPartitions >> 1 : (m_numPartitions >> 3) + (m_numPartitions >> 4);
1355 break;
1356 case SIZE_nRx2N:
1357 outPartIdxRB += puIdx ? m_numPartitions >> 1 : (m_numPartitions >> 2) + (m_numPartitions >> 3) + (m_numPartitions >> 4);
1358 break;
1359 default:
1360 X265_CHECK(0, "unexpected part index\n");
1361 break;
1362 }
1363 return outPartIdxRB;
1364}
1365
1366void CUData::deriveLeftRightTopIdxAdi(uint32_t& outPartIdxLT, uint32_t& outPartIdxRT, uint32_t partOffset, uint32_t partDepth) const
1367{
1368 uint32_t numPartInWidth = 1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE - partDepth);
1369
1370 outPartIdxLT = m_absIdxInCTU + partOffset;
1371 outPartIdxRT = g_rasterToZscan[g_zscanToRaster[outPartIdxLT] + numPartInWidth - 1];
1372}
1373
1374bool CUData::hasEqualMotion(uint32_t absPartIdx, const CUData& candCU, uint32_t candAbsPartIdx) const
1375{
1376 if (m_interDir[absPartIdx] != candCU.m_interDir[candAbsPartIdx])
1377 return false;
1378
1379 for (uint32_t refListIdx = 0; refListIdx < 2; refListIdx++)
1380 {
1381 if (m_interDir[absPartIdx] & (1 << refListIdx))
1382 {
1383 if (m_mv[refListIdx][absPartIdx] != candCU.m_mv[refListIdx][candAbsPartIdx] ||
1384 m_refIdx[refListIdx][absPartIdx] != candCU.m_refIdx[refListIdx][candAbsPartIdx])
1385 return false;
1386 }
1387 }
1388
1389 return true;
1390}
1391
1392/* Construct list of merging candidates */
1393uint32_t CUData::getInterMergeCandidates(uint32_t absPartIdx, uint32_t puIdx, MVField(*mvFieldNeighbours)[2], uint8_t* interDirNeighbours) const
1394{
1395 uint32_t absPartAddr = m_absIdxInCTU + absPartIdx;
1396 const bool isInterB = m_slice->isInterB();
1397
1398 const uint32_t maxNumMergeCand = m_slice->m_maxNumMergeCand;
1399
1400 for (uint32_t i = 0; i < maxNumMergeCand; ++i)
1401 {
b53f7c52
JB
1402 mvFieldNeighbours[i][0].mv = 0;
1403 mvFieldNeighbours[i][1].mv = 0;
72b9787e
JB
1404 mvFieldNeighbours[i][0].refIdx = REF_NOT_VALID;
1405 mvFieldNeighbours[i][1].refIdx = REF_NOT_VALID;
1406 }
1407
1408 /* calculate the location of upper-left corner pixel and size of the current PU */
1409 int xP, yP, nPSW, nPSH;
1410
1411 int cuSize = 1 << m_log2CUSize[0];
1412 int partMode = m_partSize[0];
1413
1414 int tmp = partTable[partMode][puIdx][0];
1415 nPSW = ((tmp >> 4) * cuSize) >> 2;
1416 nPSH = ((tmp & 0xF) * cuSize) >> 2;
1417
1418 tmp = partTable[partMode][puIdx][1];
1419 xP = ((tmp >> 4) * cuSize) >> 2;
1420 yP = ((tmp & 0xF) * cuSize) >> 2;
1421
1422 uint32_t count = 0;
1423
1424 uint32_t partIdxLT, partIdxRT, partIdxLB = deriveLeftBottomIdx(puIdx);
1425 PartSize curPS = (PartSize)m_partSize[absPartIdx];
1426
1427 // left
1428 uint32_t leftPartIdx = 0;
1429 const CUData* cuLeft = getPULeft(leftPartIdx, partIdxLB);
1430 bool isAvailableA1 = cuLeft &&
1431 cuLeft->isDiffMER(xP - 1, yP + nPSH - 1, xP, yP) &&
1432 !(puIdx == 1 && (curPS == SIZE_Nx2N || curPS == SIZE_nLx2N || curPS == SIZE_nRx2N)) &&
b53f7c52 1433 cuLeft->isInter(leftPartIdx);
72b9787e
JB
1434 if (isAvailableA1)
1435 {
1436 // get Inter Dir
1437 interDirNeighbours[count] = cuLeft->m_interDir[leftPartIdx];
1438 // get Mv from Left
1439 cuLeft->getMvField(cuLeft, leftPartIdx, 0, mvFieldNeighbours[count][0]);
1440 if (isInterB)
1441 cuLeft->getMvField(cuLeft, leftPartIdx, 1, mvFieldNeighbours[count][1]);
1442
1443 count++;
1444
1445 if (count == maxNumMergeCand)
1446 return maxNumMergeCand;
1447 }
1448
1449 deriveLeftRightTopIdx(puIdx, partIdxLT, partIdxRT);
1450
1451 // above
1452 uint32_t abovePartIdx = 0;
1453 const CUData* cuAbove = getPUAbove(abovePartIdx, partIdxRT);
1454 bool isAvailableB1 = cuAbove &&
1455 cuAbove->isDiffMER(xP + nPSW - 1, yP - 1, xP, yP) &&
1456 !(puIdx == 1 && (curPS == SIZE_2NxN || curPS == SIZE_2NxnU || curPS == SIZE_2NxnD)) &&
b53f7c52 1457 cuAbove->isInter(abovePartIdx);
72b9787e
JB
1458 if (isAvailableB1 && (!isAvailableA1 || !cuLeft->hasEqualMotion(leftPartIdx, *cuAbove, abovePartIdx)))
1459 {
1460 // get Inter Dir
1461 interDirNeighbours[count] = cuAbove->m_interDir[abovePartIdx];
1462 // get Mv from Left
1463 cuAbove->getMvField(cuAbove, abovePartIdx, 0, mvFieldNeighbours[count][0]);
1464 if (isInterB)
1465 cuAbove->getMvField(cuAbove, abovePartIdx, 1, mvFieldNeighbours[count][1]);
1466
1467 count++;
1468
1469 if (count == maxNumMergeCand)
1470 return maxNumMergeCand;
1471 }
1472
1473 // above right
1474 uint32_t aboveRightPartIdx = 0;
1475 const CUData* cuAboveRight = getPUAboveRight(aboveRightPartIdx, partIdxRT);
1476 bool isAvailableB0 = cuAboveRight &&
1477 cuAboveRight->isDiffMER(xP + nPSW, yP - 1, xP, yP) &&
b53f7c52 1478 cuAboveRight->isInter(aboveRightPartIdx);
72b9787e
JB
1479 if (isAvailableB0 && (!isAvailableB1 || !cuAbove->hasEqualMotion(abovePartIdx, *cuAboveRight, aboveRightPartIdx)))
1480 {
1481 // get Inter Dir
1482 interDirNeighbours[count] = cuAboveRight->m_interDir[aboveRightPartIdx];
1483 // get Mv from Left
1484 cuAboveRight->getMvField(cuAboveRight, aboveRightPartIdx, 0, mvFieldNeighbours[count][0]);
1485 if (isInterB)
1486 cuAboveRight->getMvField(cuAboveRight, aboveRightPartIdx, 1, mvFieldNeighbours[count][1]);
1487
1488 count++;
1489
1490 if (count == maxNumMergeCand)
1491 return maxNumMergeCand;
1492 }
1493
1494 // left bottom
1495 uint32_t leftBottomPartIdx = 0;
1496 const CUData* cuLeftBottom = this->getPUBelowLeft(leftBottomPartIdx, partIdxLB);
1497 bool isAvailableA0 = cuLeftBottom &&
1498 cuLeftBottom->isDiffMER(xP - 1, yP + nPSH, xP, yP) &&
b53f7c52 1499 cuLeftBottom->isInter(leftBottomPartIdx);
72b9787e
JB
1500 if (isAvailableA0 && (!isAvailableA1 || !cuLeft->hasEqualMotion(leftPartIdx, *cuLeftBottom, leftBottomPartIdx)))
1501 {
1502 // get Inter Dir
1503 interDirNeighbours[count] = cuLeftBottom->m_interDir[leftBottomPartIdx];
1504 // get Mv from Left
1505 cuLeftBottom->getMvField(cuLeftBottom, leftBottomPartIdx, 0, mvFieldNeighbours[count][0]);
1506 if (isInterB)
1507 cuLeftBottom->getMvField(cuLeftBottom, leftBottomPartIdx, 1, mvFieldNeighbours[count][1]);
1508
1509 count++;
1510
1511 if (count == maxNumMergeCand)
1512 return maxNumMergeCand;
1513 }
1514
1515 // above left
1516 if (count < 4)
1517 {
1518 uint32_t aboveLeftPartIdx = 0;
1519 const CUData* cuAboveLeft = getPUAboveLeft(aboveLeftPartIdx, absPartAddr);
1520 bool isAvailableB2 = cuAboveLeft &&
1521 cuAboveLeft->isDiffMER(xP - 1, yP - 1, xP, yP) &&
b53f7c52 1522 cuAboveLeft->isInter(aboveLeftPartIdx);
72b9787e
JB
1523 if (isAvailableB2 && (!isAvailableA1 || !cuLeft->hasEqualMotion(leftPartIdx, *cuAboveLeft, aboveLeftPartIdx))
1524 && (!isAvailableB1 || !cuAbove->hasEqualMotion(abovePartIdx, *cuAboveLeft, aboveLeftPartIdx)))
1525 {
1526 // get Inter Dir
1527 interDirNeighbours[count] = cuAboveLeft->m_interDir[aboveLeftPartIdx];
1528 // get Mv from Left
1529 cuAboveLeft->getMvField(cuAboveLeft, aboveLeftPartIdx, 0, mvFieldNeighbours[count][0]);
1530 if (isInterB)
1531 cuAboveLeft->getMvField(cuAboveLeft, aboveLeftPartIdx, 1, mvFieldNeighbours[count][1]);
1532
1533 count++;
1534
1535 if (count == maxNumMergeCand)
1536 return maxNumMergeCand;
1537 }
1538 }
1539 if (m_slice->m_sps->bTemporalMVPEnabled)
1540 {
1541 uint32_t partIdxRB = deriveRightBottomIdx(puIdx);
1542 MV colmv;
1543 int ctuIdx = -1;
1544
1545 // image boundary check
1546 if (m_encData->getPicCTU(m_cuAddr)->m_cuPelX + g_zscanToPelX[partIdxRB] + UNIT_SIZE < m_slice->m_sps->picWidthInLumaSamples &&
1547 m_encData->getPicCTU(m_cuAddr)->m_cuPelY + g_zscanToPelY[partIdxRB] + UNIT_SIZE < m_slice->m_sps->picHeightInLumaSamples)
1548 {
1549 uint32_t absPartIdxRB = g_zscanToRaster[partIdxRB];
1550 uint32_t numPartInCUSize = s_numPartInCUSize;
1551 bool bNotLastCol = lessThanCol(absPartIdxRB, numPartInCUSize - 1, numPartInCUSize); // is not at the last column of CTU
1552 bool bNotLastRow = lessThanRow(absPartIdxRB, numPartInCUSize - 1, numPartInCUSize); // is not at the last row of CTU
1553
1554 if (bNotLastCol && bNotLastRow)
1555 {
1556 absPartAddr = g_rasterToZscan[absPartIdxRB + numPartInCUSize + 1];
1557 ctuIdx = m_cuAddr;
1558 }
1559 else if (bNotLastCol)
1560 absPartAddr = g_rasterToZscan[(absPartIdxRB + numPartInCUSize + 1) & (numPartInCUSize - 1)];
1561 else if (bNotLastRow)
1562 {
1563 absPartAddr = g_rasterToZscan[absPartIdxRB + 1];
1564 ctuIdx = m_cuAddr + 1;
1565 }
1566 else // is the right bottom corner of CTU
1567 absPartAddr = 0;
1568 }
1569
1570 int refIdx = 0;
1571 uint32_t partIdxCenter = deriveCenterIdx(puIdx);
1572 uint32_t curCTUIdx = m_cuAddr;
1573 int dir = 0;
1574 bool bExistMV = ctuIdx >= 0 && getColMVP(colmv, refIdx, 0, ctuIdx, absPartAddr);
1575 if (!bExistMV)
1576 bExistMV = getColMVP(colmv, refIdx, 0, curCTUIdx, partIdxCenter);
1577 if (bExistMV)
1578 {
1579 dir |= 1;
1580 mvFieldNeighbours[count][0].mv = colmv;
1581 mvFieldNeighbours[count][0].refIdx = refIdx;
1582 }
1583
1584 if (isInterB)
1585 {
1586 bExistMV = ctuIdx >= 0 && getColMVP(colmv, refIdx, 1, ctuIdx, absPartAddr);
1587 if (!bExistMV)
1588 bExistMV = getColMVP(colmv, refIdx, 1, curCTUIdx, partIdxCenter);
1589
1590 if (bExistMV)
1591 {
1592 dir |= 2;
1593 mvFieldNeighbours[count][1].mv = colmv;
1594 mvFieldNeighbours[count][1].refIdx = refIdx;
1595 }
1596 }
1597
1598 if (dir != 0)
1599 {
1600 interDirNeighbours[count] = (uint8_t)dir;
1601
1602 count++;
1603
1604 if (count == maxNumMergeCand)
1605 return maxNumMergeCand;
1606 }
1607 }
1608
1609 if (isInterB)
1610 {
1611 const uint32_t cutoff = count * (count - 1);
1612 uint32_t priorityList0 = 0xEDC984; // { 0, 1, 0, 2, 1, 2, 0, 3, 1, 3, 2, 3 }
1613 uint32_t priorityList1 = 0xB73621; // { 1, 0, 2, 0, 2, 1, 3, 0, 3, 1, 3, 2 }
1614
1615 for (uint32_t idx = 0; idx < cutoff; idx++)
1616 {
1617 int i = priorityList0 & 3;
1618 int j = priorityList1 & 3;
1619 priorityList0 >>= 2;
1620 priorityList1 >>= 2;
1621
1622 if ((interDirNeighbours[i] & 0x1) && (interDirNeighbours[j] & 0x2))
1623 {
1624 // get Mv from cand[i] and cand[j]
1625 int refIdxL0 = mvFieldNeighbours[i][0].refIdx;
1626 int refIdxL1 = mvFieldNeighbours[j][1].refIdx;
1627 int refPOCL0 = m_slice->m_refPOCList[0][refIdxL0];
1628 int refPOCL1 = m_slice->m_refPOCList[1][refIdxL1];
1629 if (!(refPOCL0 == refPOCL1 && mvFieldNeighbours[i][0].mv == mvFieldNeighbours[j][1].mv))
1630 {
1631 mvFieldNeighbours[count][0].mv = mvFieldNeighbours[i][0].mv;
1632 mvFieldNeighbours[count][0].refIdx = refIdxL0;
1633 mvFieldNeighbours[count][1].mv = mvFieldNeighbours[j][1].mv;
1634 mvFieldNeighbours[count][1].refIdx = refIdxL1;
1635 interDirNeighbours[count] = 3;
1636
1637 count++;
1638
1639 if (count == maxNumMergeCand)
1640 return maxNumMergeCand;
1641 }
1642 }
1643 }
1644 }
1645 int numRefIdx = (isInterB) ? X265_MIN(m_slice->m_numRefIdx[0], m_slice->m_numRefIdx[1]) : m_slice->m_numRefIdx[0];
1646 int r = 0;
1647 int refcnt = 0;
1648 while (count < maxNumMergeCand)
1649 {
1650 interDirNeighbours[count] = 1;
b53f7c52 1651 mvFieldNeighbours[count][0].mv = 0;
72b9787e
JB
1652 mvFieldNeighbours[count][0].refIdx = r;
1653
1654 if (isInterB)
1655 {
1656 interDirNeighbours[count] = 3;
1657 mvFieldNeighbours[count][1].mv.word = 0;
1658 mvFieldNeighbours[count][1].refIdx = r;
1659 }
1660
1661 count++;
1662
1663 if (refcnt == numRefIdx - 1)
1664 r = 0;
1665 else
1666 {
1667 ++r;
1668 ++refcnt;
1669 }
1670 }
1671
1672 return count;
1673}
1674
1675/* Check whether the current PU and a spatial neighboring PU are in a same ME region */
1676bool CUData::isDiffMER(int xN, int yN, int xP, int yP) const
1677{
1678 uint32_t plevel = 2;
1679
1680 if ((xN >> plevel) != (xP >> plevel))
1681 return true;
1682 if ((yN >> plevel) != (yP >> plevel))
1683 return true;
1684 return false;
1685}
1686
1687/* Constructs a list of candidates for AMVP, and a larger list of motion candidates */
1688int CUData::fillMvpCand(uint32_t puIdx, uint32_t absPartIdx, int picList, int refIdx, MV* amvpCand, MV* mvc) const
1689{
1690 int num = 0;
1691
1692 // spatial MV
1693 uint32_t partIdxLT, partIdxRT, partIdxLB = deriveLeftBottomIdx(puIdx);
1694
1695 deriveLeftRightTopIdx(puIdx, partIdxLT, partIdxRT);
1696
1697 MV mv[MD_ABOVE_LEFT + 1];
1698 MV mvOrder[MD_ABOVE_LEFT + 1];
1699 bool valid[MD_ABOVE_LEFT + 1];
1700 bool validOrder[MD_ABOVE_LEFT + 1];
1701
1702 valid[MD_BELOW_LEFT] = addMVPCand(mv[MD_BELOW_LEFT], picList, refIdx, partIdxLB, MD_BELOW_LEFT);
1703 valid[MD_LEFT] = addMVPCand(mv[MD_LEFT], picList, refIdx, partIdxLB, MD_LEFT);
1704 valid[MD_ABOVE_RIGHT] = addMVPCand(mv[MD_ABOVE_RIGHT], picList, refIdx, partIdxRT, MD_ABOVE_RIGHT);
1705 valid[MD_ABOVE] = addMVPCand(mv[MD_ABOVE], picList, refIdx, partIdxRT, MD_ABOVE);
1706 valid[MD_ABOVE_LEFT] = addMVPCand(mv[MD_ABOVE_LEFT], picList, refIdx, partIdxLT, MD_ABOVE_LEFT);
1707
1708 validOrder[MD_BELOW_LEFT] = addMVPCandOrder(mvOrder[MD_BELOW_LEFT], picList, refIdx, partIdxLB, MD_BELOW_LEFT);
1709 validOrder[MD_LEFT] = addMVPCandOrder(mvOrder[MD_LEFT], picList, refIdx, partIdxLB, MD_LEFT);
1710 validOrder[MD_ABOVE_RIGHT] = addMVPCandOrder(mvOrder[MD_ABOVE_RIGHT], picList, refIdx, partIdxRT, MD_ABOVE_RIGHT);
1711 validOrder[MD_ABOVE] = addMVPCandOrder(mvOrder[MD_ABOVE], picList, refIdx, partIdxRT, MD_ABOVE);
1712 validOrder[MD_ABOVE_LEFT] = addMVPCandOrder(mvOrder[MD_ABOVE_LEFT], picList, refIdx, partIdxLT, MD_ABOVE_LEFT);
1713
1714 // Left predictor search
1715 if (valid[MD_BELOW_LEFT])
1716 amvpCand[num++] = mv[MD_BELOW_LEFT];
1717 else if (valid[MD_LEFT])
1718 amvpCand[num++] = mv[MD_LEFT];
1719 else if (validOrder[MD_BELOW_LEFT])
1720 amvpCand[num++] = mvOrder[MD_BELOW_LEFT];
1721 else if (validOrder[MD_LEFT])
1722 amvpCand[num++] = mvOrder[MD_LEFT];
1723
1724 bool bAddedSmvp = num > 0;
1725
1726 // Above predictor search
1727 if (valid[MD_ABOVE_RIGHT])
1728 amvpCand[num++] = mv[MD_ABOVE_RIGHT];
1729 else if (valid[MD_ABOVE])
1730 amvpCand[num++] = mv[MD_ABOVE];
1731 else if (valid[MD_ABOVE_LEFT])
1732 amvpCand[num++] = mv[MD_ABOVE_LEFT];
1733
1734 if (!bAddedSmvp)
1735 {
1736 if (validOrder[MD_ABOVE_RIGHT])
1737 amvpCand[num++] = mvOrder[MD_ABOVE_RIGHT];
1738 else if (validOrder[MD_ABOVE])
1739 amvpCand[num++] = mvOrder[MD_ABOVE];
1740 else if (validOrder[MD_ABOVE_LEFT])
1741 amvpCand[num++] = mvOrder[MD_ABOVE_LEFT];
1742 }
1743
1744 int numMvc = 0;
1745 for (int dir = MD_LEFT; dir <= MD_ABOVE_LEFT; dir++)
1746 {
1747 if (valid[dir] && mv[dir].notZero())
1748 mvc[numMvc++] = mv[dir];
1749
1750 if (validOrder[dir] && mvOrder[dir].notZero())
1751 mvc[numMvc++] = mvOrder[dir];
1752 }
1753
1754 if (num == 2)
1755 {
1756 if (amvpCand[0] == amvpCand[1])
1757 num = 1;
1758 else
1759 /* AMVP_NUM_CANDS = 2 */
1760 return numMvc;
1761 }
1762
1763 if (m_slice->m_sps->bTemporalMVPEnabled)
1764 {
1765 uint32_t absPartAddr = m_absIdxInCTU + absPartIdx;
1766 uint32_t partIdxRB = deriveRightBottomIdx(puIdx);
1767 MV colmv;
1768
1769 // co-located RightBottom temporal predictor (H)
1770 int ctuIdx = -1;
1771
1772 // image boundary check
1773 if (m_encData->getPicCTU(m_cuAddr)->m_cuPelX + g_zscanToPelX[partIdxRB] + UNIT_SIZE < m_slice->m_sps->picWidthInLumaSamples &&
1774 m_encData->getPicCTU(m_cuAddr)->m_cuPelY + g_zscanToPelY[partIdxRB] + UNIT_SIZE < m_slice->m_sps->picHeightInLumaSamples)
1775 {
1776 uint32_t absPartIdxRB = g_zscanToRaster[partIdxRB];
1777 uint32_t numPartInCUSize = s_numPartInCUSize;
1778 bool bNotLastCol = lessThanCol(absPartIdxRB, numPartInCUSize - 1, numPartInCUSize); // is not at the last column of CTU
1779 bool bNotLastRow = lessThanRow(absPartIdxRB, numPartInCUSize - 1, numPartInCUSize); // is not at the last row of CTU
1780
1781 if (bNotLastCol && bNotLastRow)
1782 {
1783 absPartAddr = g_rasterToZscan[absPartIdxRB + numPartInCUSize + 1];
1784 ctuIdx = m_cuAddr;
1785 }
1786 else if (bNotLastCol)
1787 absPartAddr = g_rasterToZscan[(absPartIdxRB + numPartInCUSize + 1) & (numPartInCUSize - 1)];
1788 else if (bNotLastRow)
1789 {
1790 absPartAddr = g_rasterToZscan[absPartIdxRB + 1];
1791 ctuIdx = m_cuAddr + 1;
1792 }
1793 else // is the right bottom corner of CTU
1794 absPartAddr = 0;
1795 }
1796 if (ctuIdx >= 0 && getColMVP(colmv, refIdx, picList, ctuIdx, absPartAddr))
1797 {
1798 amvpCand[num++] = colmv;
1799 mvc[numMvc++] = colmv;
1800 }
1801 else
1802 {
1803 uint32_t partIdxCenter = deriveCenterIdx(puIdx);
1804 uint32_t curCTUIdx = m_cuAddr;
1805 if (getColMVP(colmv, refIdx, picList, curCTUIdx, partIdxCenter))
1806 {
1807 amvpCand[num++] = colmv;
1808 mvc[numMvc++] = colmv;
1809 }
1810 }
1811 }
1812
1813 while (num < AMVP_NUM_CANDS)
1814 amvpCand[num++] = 0;
1815
1816 return numMvc;
1817}
1818
1819void CUData::clipMv(MV& outMV) const
1820{
1821 int mvshift = 2;
1822 int offset = 8;
1823 int xmax = (m_slice->m_sps->picWidthInLumaSamples + offset - m_cuPelX - 1) << mvshift;
1824 int xmin = (-(int)g_maxCUSize - offset - (int)m_cuPelX + 1) << mvshift;
1825
1826 int ymax = (m_slice->m_sps->picHeightInLumaSamples + offset - m_cuPelY - 1) << mvshift;
1827 int ymin = (-(int)g_maxCUSize - offset - (int)m_cuPelY + 1) << mvshift;
1828
1829 outMV.x = (int16_t)X265_MIN(xmax, X265_MAX(xmin, (int)outMV.x));
1830 outMV.y = (int16_t)X265_MIN(ymax, X265_MAX(ymin, (int)outMV.y));
1831}
1832
1833bool CUData::addMVPCand(MV& mvp, int picList, int refIdx, uint32_t partUnitIdx, MVP_DIR dir) const
1834{
1835 const CUData* tmpCU = NULL;
1836 uint32_t idx = 0;
1837
1838 switch (dir)
1839 {
1840 case MD_LEFT:
1841 tmpCU = getPULeft(idx, partUnitIdx);
1842 break;
1843 case MD_ABOVE:
1844 tmpCU = getPUAbove(idx, partUnitIdx);
1845 break;
1846 case MD_ABOVE_RIGHT:
1847 tmpCU = getPUAboveRight(idx, partUnitIdx);
1848 break;
1849 case MD_BELOW_LEFT:
1850 tmpCU = getPUBelowLeft(idx, partUnitIdx);
1851 break;
1852 case MD_ABOVE_LEFT:
1853 tmpCU = getPUAboveLeft(idx, partUnitIdx);
1854 break;
1855 default:
1856 return false;
1857 }
1858
1859 if (!tmpCU)
1860 return false;
1861
1862 int refPOC = m_slice->m_refPOCList[picList][refIdx];
1863 int partRefIdx = tmpCU->m_refIdx[picList][idx];
1864 if (partRefIdx >= 0 && refPOC == tmpCU->m_slice->m_refPOCList[picList][partRefIdx])
1865 {
1866 mvp = tmpCU->m_mv[picList][idx];
1867 return true;
1868 }
1869
1870 int refPicList2nd = 0;
1871 if (picList == 0)
1872 refPicList2nd = 1;
1873 else if (picList == 1)
1874 refPicList2nd = 0;
1875
1876 int curRefPOC = m_slice->m_refPOCList[picList][refIdx];
1877 int neibRefPOC;
1878
1879 partRefIdx = tmpCU->m_refIdx[refPicList2nd][idx];
1880 if (partRefIdx >= 0)
1881 {
1882 neibRefPOC = tmpCU->m_slice->m_refPOCList[refPicList2nd][partRefIdx];
1883 if (neibRefPOC == curRefPOC)
1884 {
1885 // Same reference frame but different list
1886 mvp = tmpCU->m_mv[refPicList2nd][idx];
1887 return true;
1888 }
1889 }
1890 return false;
1891}
1892
1893bool CUData::addMVPCandOrder(MV& outMV, int picList, int refIdx, uint32_t partUnitIdx, MVP_DIR dir) const
1894{
1895 const CUData* tmpCU = NULL;
1896 uint32_t idx = 0;
1897
1898 switch (dir)
1899 {
1900 case MD_LEFT:
1901 tmpCU = getPULeft(idx, partUnitIdx);
1902 break;
1903 case MD_ABOVE:
1904 tmpCU = getPUAbove(idx, partUnitIdx);
1905 break;
1906 case MD_ABOVE_RIGHT:
1907 tmpCU = getPUAboveRight(idx, partUnitIdx);
1908 break;
1909 case MD_BELOW_LEFT:
1910 tmpCU = getPUBelowLeft(idx, partUnitIdx);
1911 break;
1912 case MD_ABOVE_LEFT:
1913 tmpCU = getPUAboveLeft(idx, partUnitIdx);
1914 break;
1915 default:
1916 return false;
1917 }
1918
1919 if (!tmpCU)
1920 return false;
1921
1922 int refPicList2nd = 0;
1923 if (picList == 0)
1924 refPicList2nd = 1;
1925 else if (picList == 1)
1926 refPicList2nd = 0;
1927
1928 int curPOC = m_slice->m_poc;
1929 int curRefPOC = m_slice->m_refPOCList[picList][refIdx];
1930 int neibPOC = curPOC;
1931 int neibRefPOC;
1932
1933 int partRefIdx = tmpCU->m_refIdx[picList][idx];
1934 if (partRefIdx >= 0)
1935 {
1936 neibRefPOC = tmpCU->m_slice->m_refPOCList[picList][partRefIdx];
1937 MV mvp = tmpCU->m_mv[picList][idx];
1938
1939 scaleMvByPOCDist(outMV, mvp, curPOC, curRefPOC, neibPOC, neibRefPOC);
1940 return true;
1941 }
1942
1943 partRefIdx = tmpCU->m_refIdx[refPicList2nd][idx];
1944 if (partRefIdx >= 0)
1945 {
1946 neibRefPOC = tmpCU->m_slice->m_refPOCList[refPicList2nd][partRefIdx];
1947 MV mvp = tmpCU->m_mv[refPicList2nd][idx];
1948
1949 scaleMvByPOCDist(outMV, mvp, curPOC, curRefPOC, neibPOC, neibRefPOC);
1950 return true;
1951 }
1952
1953 return false;
1954}
1955
1956bool CUData::getColMVP(MV& outMV, int& outRefIdx, int picList, int cuAddr, int partUnitIdx) const
1957{
b53f7c52
JB
1958 const Frame* colPic = m_slice->m_refPicList[m_slice->isInterB() && !m_slice->m_colFromL0Flag][m_slice->m_colRefIdx];
1959 const CUData* colCU = colPic->m_encData->getPicCTU(cuAddr);
72b9787e 1960
b53f7c52 1961 if (colCU->m_predMode[partUnitIdx] == MODE_NONE)
72b9787e
JB
1962 return false;
1963
b53f7c52 1964 uint32_t absPartAddr = partUnitIdx & TMVP_UNIT_MASK;
72b9787e
JB
1965
1966 if (colCU->isIntra(absPartAddr))
1967 return false;
1968
b53f7c52 1969 int colRefPicList = m_slice->m_bCheckLDC ? picList : m_slice->m_colFromL0Flag;
72b9787e
JB
1970
1971 int colRefIdx = colCU->m_refIdx[colRefPicList][absPartAddr];
1972
1973 if (colRefIdx < 0)
1974 {
1975 colRefPicList = 1 - colRefPicList;
1976 colRefIdx = colCU->m_refIdx[colRefPicList][absPartAddr];
1977
1978 if (colRefIdx < 0)
1979 return false;
1980 }
1981
1982 // Scale the vector
b53f7c52
JB
1983 int colRefPOC = colCU->m_slice->m_refPOCList[colRefPicList][colRefIdx];
1984 int colPOC = colCU->m_slice->m_poc;
1985 MV colmv = colCU->m_mv[colRefPicList][absPartAddr];
1986
1987 int curRefPOC = m_slice->m_refPOCList[picList][outRefIdx];
1988 int curPOC = m_slice->m_poc;
72b9787e
JB
1989
1990 scaleMvByPOCDist(outMV, colmv, curPOC, curRefPOC, colPOC, colRefPOC);
1991 return true;
1992}
1993
1994void CUData::scaleMvByPOCDist(MV& outMV, const MV& inMV, int curPOC, int curRefPOC, int colPOC, int colRefPOC) const
1995{
1996 int diffPocD = colPOC - colRefPOC;
1997 int diffPocB = curPOC - curRefPOC;
1998
1999 if (diffPocD == diffPocB)
2000 outMV = inMV;
2001 else
2002 {
2003 int tdb = Clip3(-128, 127, diffPocB);
2004 int tdd = Clip3(-128, 127, diffPocD);
2005 int x = (0x4000 + abs(tdd / 2)) / tdd;
2006 int scale = Clip3(-4096, 4095, (tdb * x + 32) >> 6);
2007 outMV = scaleMv(inMV, scale);
2008 }
2009}
2010
2011uint32_t CUData::deriveCenterIdx(uint32_t puIdx) const
2012{
2013 uint32_t absPartIdx;
2014 int puWidth, puHeight;
2015
2016 getPartIndexAndSize(puIdx, absPartIdx, puWidth, puHeight);
2017
2018 return g_rasterToZscan[g_zscanToRaster[m_absIdxInCTU + absPartIdx]
2019 + (puHeight >> (LOG2_UNIT_SIZE + 1)) * s_numPartInCUSize
2020 + (puWidth >> (LOG2_UNIT_SIZE + 1))];
2021}
2022
2023ScanType CUData::getCoefScanIdx(uint32_t absPartIdx, uint32_t log2TrSize, bool bIsLuma, bool bIsIntra) const
2024{
2025 uint32_t dirMode;
2026
2027 if (!bIsIntra)
2028 return SCAN_DIAG;
2029
2030 // check that MDCS can be used for this TU
2031 if (bIsLuma)
2032 {
2033 if (log2TrSize > MDCS_LOG2_MAX_SIZE)
2034 return SCAN_DIAG;
2035
2036 dirMode = m_lumaIntraDir[absPartIdx];
2037 }
2038 else
2039 {
2040 if (log2TrSize > (uint32_t)(MDCS_LOG2_MAX_SIZE - m_hChromaShift))
2041 return SCAN_DIAG;
2042
2043 dirMode = m_chromaIntraDir[absPartIdx];
2044 if (dirMode == DM_CHROMA_IDX)
2045 {
2046 dirMode = m_lumaIntraDir[(m_chromaFormat == X265_CSP_I444) ? absPartIdx : absPartIdx & 0xFC];
2047 dirMode = (m_chromaFormat == X265_CSP_I422) ? g_chroma422IntraAngleMappingTable[dirMode] : dirMode;
2048 }
2049 }
2050
2051 if (abs((int)dirMode - VER_IDX) <= MDCS_ANGLE_LIMIT)
2052 return SCAN_HOR;
2053 else if (abs((int)dirMode - HOR_IDX) <= MDCS_ANGLE_LIMIT)
2054 return SCAN_VER;
2055 else
2056 return SCAN_DIAG;
2057}
2058
2059void CUData::getTUEntropyCodingParameters(TUEntropyCodingParameters &result, uint32_t absPartIdx, uint32_t log2TrSize, bool bIsLuma) const
2060{
2061 // set the group layout
2062 result.log2TrSizeCG = log2TrSize - 2;
2063
2064 // set the scan orders
2065 result.scanType = getCoefScanIdx(absPartIdx, log2TrSize, bIsLuma, isIntra(absPartIdx));
2066 result.scan = g_scanOrder[result.scanType][log2TrSize - 2];
2067 result.scanCG = g_scanOrderCG[result.scanType][result.log2TrSizeCG];
2068
2069 if (log2TrSize == 2)
2070 result.firstSignificanceMapContext = 0;
2071 else if (log2TrSize == 3)
2072 {
2073 result.firstSignificanceMapContext = 9;
2074 if (result.scanType != SCAN_DIAG && bIsLuma)
2075 result.firstSignificanceMapContext += 6;
2076 }
2077 else
2078 result.firstSignificanceMapContext = bIsLuma ? 21 : 12;
2079}
2080
2081#define CU_SET_FLAG(bitfield, flag, value) (bitfield) = ((bitfield) & (~(flag))) | ((~((value) - 1)) & (flag))
2082
b53f7c52 2083void CUData::calcCTUGeoms(uint32_t ctuWidth, uint32_t ctuHeight, uint32_t maxCUSize, CUGeom cuDataArray[CUGeom::MAX_GEOMS])
72b9787e
JB
2084{
2085 // Initialize the coding blocks inside the CTB
2086 for (uint32_t log2CUSize = g_log2Size[maxCUSize], rangeCUIdx = 0; log2CUSize >= MIN_LOG2_CU_SIZE; log2CUSize--)
2087 {
2088 uint32_t blockSize = 1 << log2CUSize;
2089 uint32_t sbWidth = 1 << (g_log2Size[maxCUSize] - log2CUSize);
2090 int32_t lastLevelFlag = log2CUSize == MIN_LOG2_CU_SIZE;
2091 for (uint32_t sbY = 0; sbY < sbWidth; sbY++)
2092 {
2093 for (uint32_t sbX = 0; sbX < sbWidth; sbX++)
2094 {
2095 uint32_t depthIdx = g_depthScanIdx[sbY][sbX];
2096 uint32_t cuIdx = rangeCUIdx + depthIdx;
2097 uint32_t childIdx = rangeCUIdx + sbWidth * sbWidth + (depthIdx << 2);
b53f7c52
JB
2098 uint32_t px = sbX * blockSize;
2099 uint32_t py = sbY * blockSize;
2100 int32_t presentFlag = px < ctuWidth && py < ctuHeight;
2101 int32_t splitMandatoryFlag = presentFlag && !lastLevelFlag && (px + blockSize > ctuWidth || py + blockSize > ctuHeight);
72b9787e
JB
2102
2103 /* Offset of the luma CU in the X, Y direction in terms of pixels from the CTU origin */
2104 uint32_t xOffset = (sbX * blockSize) >> 3;
2105 uint32_t yOffset = (sbY * blockSize) >> 3;
2106 X265_CHECK(cuIdx < CUGeom::MAX_GEOMS, "CU geom index bug\n");
2107
2108 CUGeom *cu = cuDataArray + cuIdx;
2109 cu->log2CUSize = log2CUSize;
2110 cu->childOffset = childIdx - cuIdx;
2111 cu->encodeIdx = g_depthScanIdx[yOffset][xOffset] * 4;
2112 cu->numPartitions = (NUM_CU_PARTITIONS >> ((g_maxLog2CUSize - cu->log2CUSize) * 2));
2113 cu->depth = g_log2Size[maxCUSize] - log2CUSize;
2114
2115 cu->flags = 0;
2116 CU_SET_FLAG(cu->flags, CUGeom::PRESENT, presentFlag);
2117 CU_SET_FLAG(cu->flags, CUGeom::SPLIT_MANDATORY | CUGeom::SPLIT, splitMandatoryFlag);
2118 CU_SET_FLAG(cu->flags, CUGeom::LEAF, lastLevelFlag);
2119 }
2120 }
2121 rangeCUIdx += sbWidth * sbWidth;
2122 }
2123}