Imported Upstream version 1.4
[deb_x265.git] / source / common / cudata.cpp
CommitLineData
72b9787e
JB
1/*****************************************************************************
2 * Copyright (C) 2014 x265 project
3 *
4 * Authors: Steve Borho <steve@borho.org>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
19 *
20 * This program is also available under a commercial proprietary license.
21 * For more information, contact us at license @ x265.com.
22 *****************************************************************************/
23
24#include "common.h"
25#include "frame.h"
26#include "framedata.h"
27#include "picyuv.h"
28#include "mv.h"
29#include "cudata.h"
30
31using namespace x265;
32
33namespace {
34// file private namespace
35
36/* for all bcast* and copy* functions, dst and src are aligned to MIN(size, 32) */
37
38void bcast1(uint8_t* dst, uint8_t val) { dst[0] = val; }
39
40void copy4(uint8_t* dst, uint8_t* src) { ((uint32_t*)dst)[0] = ((uint32_t*)src)[0]; }
41void bcast4(uint8_t* dst, uint8_t val) { ((uint32_t*)dst)[0] = 0x01010101 * val; }
42
43void copy16(uint8_t* dst, uint8_t* src) { ((uint64_t*)dst)[0] = ((uint64_t*)src)[0]; ((uint64_t*)dst)[1] = ((uint64_t*)src)[1]; }
44void bcast16(uint8_t* dst, uint8_t val) { uint64_t bval = 0x0101010101010101ULL * val; ((uint64_t*)dst)[0] = bval; ((uint64_t*)dst)[1] = bval; }
45
46void copy64(uint8_t* dst, uint8_t* src) { ((uint64_t*)dst)[0] = ((uint64_t*)src)[0]; ((uint64_t*)dst)[1] = ((uint64_t*)src)[1];
47 ((uint64_t*)dst)[2] = ((uint64_t*)src)[2]; ((uint64_t*)dst)[3] = ((uint64_t*)src)[3];
48 ((uint64_t*)dst)[4] = ((uint64_t*)src)[4]; ((uint64_t*)dst)[5] = ((uint64_t*)src)[5];
49 ((uint64_t*)dst)[6] = ((uint64_t*)src)[6]; ((uint64_t*)dst)[7] = ((uint64_t*)src)[7]; }
50void bcast64(uint8_t* dst, uint8_t val) { uint64_t bval = 0x0101010101010101ULL * val;
51 ((uint64_t*)dst)[0] = bval; ((uint64_t*)dst)[1] = bval; ((uint64_t*)dst)[2] = bval; ((uint64_t*)dst)[3] = bval;
52 ((uint64_t*)dst)[4] = bval; ((uint64_t*)dst)[5] = bval; ((uint64_t*)dst)[6] = bval; ((uint64_t*)dst)[7] = bval; }
53
54/* at 256 bytes, memset/memcpy will probably use SIMD more effectively than our uint64_t hack,
55 * but hand-written assembly would beat it. */
56void copy256(uint8_t* dst, uint8_t* src) { memcpy(dst, src, 256); }
57void bcast256(uint8_t* dst, uint8_t val) { memset(dst, val, 256); }
58
59/* Check whether 2 addresses point to the same column */
60inline bool isEqualCol(int addrA, int addrB, int numUnitsPerRow)
61{
62 // addrA % numUnitsPerRow == addrB % numUnitsPerRow
63 return ((addrA ^ addrB) & (numUnitsPerRow - 1)) == 0;
64}
65
66/* Check whether 2 addresses point to the same row */
67inline bool isEqualRow(int addrA, int addrB, int numUnitsPerRow)
68{
69 // addrA / numUnitsPerRow == addrB / numUnitsPerRow
70 return ((addrA ^ addrB) & ~(numUnitsPerRow - 1)) == 0;
71}
72
73/* Check whether 2 addresses point to the same row or column */
74inline bool isEqualRowOrCol(int addrA, int addrB, int numUnitsPerRow)
75{
76 return isEqualCol(addrA, addrB, numUnitsPerRow) | isEqualRow(addrA, addrB, numUnitsPerRow);
77}
78
79/* Check whether one address points to the first column */
80inline bool isZeroCol(int addr, int numUnitsPerRow)
81{
82 // addr % numUnitsPerRow == 0
83 return (addr & (numUnitsPerRow - 1)) == 0;
84}
85
86/* Check whether one address points to the first row */
87inline bool isZeroRow(int addr, int numUnitsPerRow)
88{
89 // addr / numUnitsPerRow == 0
90 return (addr & ~(numUnitsPerRow - 1)) == 0;
91}
92
93/* Check whether one address points to a column whose index is smaller than a given value */
94inline bool lessThanCol(int addr, int val, int numUnitsPerRow)
95{
96 // addr % numUnitsPerRow < val
97 return (addr & (numUnitsPerRow - 1)) < val;
98}
99
100/* Check whether one address points to a row whose index is smaller than a given value */
101inline bool lessThanRow(int addr, int val, int numUnitsPerRow)
102{
103 // addr / numUnitsPerRow < val
104 return addr < val * numUnitsPerRow;
105}
106
107inline MV scaleMv(MV mv, int scale)
108{
109 int mvx = Clip3(-32768, 32767, (scale * mv.x + 127 + (scale * mv.x < 0)) >> 8);
110 int mvy = Clip3(-32768, 32767, (scale * mv.y + 127 + (scale * mv.y < 0)) >> 8);
111
112 return MV((int16_t)mvx, (int16_t)mvy);
113}
114
115// Partition table.
116// First index is partitioning mode. Second index is partition index.
117// Third index is 0 for partition sizes, 1 for partition offsets. The
118// sizes and offsets are encoded as two packed 4-bit values (X,Y).
119// X and Y represent 1/4 fractions of the block size.
120const uint32_t partTable[8][4][2] =
121{
122 // XY
123 { { 0x44, 0x00 }, { 0x00, 0x00 }, { 0x00, 0x00 }, { 0x00, 0x00 } }, // SIZE_2Nx2N.
124 { { 0x42, 0x00 }, { 0x42, 0x02 }, { 0x00, 0x00 }, { 0x00, 0x00 } }, // SIZE_2NxN.
125 { { 0x24, 0x00 }, { 0x24, 0x20 }, { 0x00, 0x00 }, { 0x00, 0x00 } }, // SIZE_Nx2N.
126 { { 0x22, 0x00 }, { 0x22, 0x20 }, { 0x22, 0x02 }, { 0x22, 0x22 } }, // SIZE_NxN.
127 { { 0x41, 0x00 }, { 0x43, 0x01 }, { 0x00, 0x00 }, { 0x00, 0x00 } }, // SIZE_2NxnU.
128 { { 0x43, 0x00 }, { 0x41, 0x03 }, { 0x00, 0x00 }, { 0x00, 0x00 } }, // SIZE_2NxnD.
129 { { 0x14, 0x00 }, { 0x34, 0x10 }, { 0x00, 0x00 }, { 0x00, 0x00 } }, // SIZE_nLx2N.
130 { { 0x34, 0x00 }, { 0x14, 0x30 }, { 0x00, 0x00 }, { 0x00, 0x00 } } // SIZE_nRx2N.
131};
132
133// Partition Address table.
134// First index is partitioning mode. Second index is partition address.
135const uint32_t partAddrTable[8][4] =
136{
137 { 0x00, 0x00, 0x00, 0x00 }, // SIZE_2Nx2N.
138 { 0x00, 0x08, 0x08, 0x08 }, // SIZE_2NxN.
139 { 0x00, 0x04, 0x04, 0x04 }, // SIZE_Nx2N.
140 { 0x00, 0x04, 0x08, 0x0C }, // SIZE_NxN.
141 { 0x00, 0x02, 0x02, 0x02 }, // SIZE_2NxnU.
142 { 0x00, 0x0A, 0x0A, 0x0A }, // SIZE_2NxnD.
143 { 0x00, 0x01, 0x01, 0x01 }, // SIZE_nLx2N.
144 { 0x00, 0x05, 0x05, 0x05 } // SIZE_nRx2N.
145};
146
147}
148
149cubcast_t CUData::s_partSet[NUM_FULL_DEPTH] = { NULL, NULL, NULL, NULL, NULL };
150uint32_t CUData::s_numPartInCUSize;
151
152CUData::CUData()
153{
154 memset(this, 0, sizeof(*this));
155}
156
157void CUData::initialize(const CUDataMemPool& dataPool, uint32_t depth, int csp, int instance)
158{
159 m_chromaFormat = csp;
160 m_hChromaShift = CHROMA_H_SHIFT(csp);
161 m_vChromaShift = CHROMA_V_SHIFT(csp);
162 m_numPartitions = NUM_CU_PARTITIONS >> (depth * 2);
163
164 if (!s_partSet[0])
165 {
166 s_numPartInCUSize = 1 << g_maxFullDepth;
167 switch (g_maxLog2CUSize)
168 {
169 case 6:
170 s_partSet[0] = bcast256;
171 s_partSet[1] = bcast64;
172 s_partSet[2] = bcast16;
173 s_partSet[3] = bcast4;
174 s_partSet[4] = bcast1;
175 break;
176 case 5:
177 s_partSet[0] = bcast64;
178 s_partSet[1] = bcast16;
179 s_partSet[2] = bcast4;
180 s_partSet[3] = bcast1;
181 s_partSet[4] = NULL;
182 break;
183 case 4:
184 s_partSet[0] = bcast16;
185 s_partSet[1] = bcast4;
186 s_partSet[2] = bcast1;
187 s_partSet[3] = NULL;
188 s_partSet[4] = NULL;
189 break;
190 default:
191 X265_CHECK(0, "unexpected CTU size\n");
192 break;
193 }
194 }
195
196 switch (m_numPartitions)
197 {
198 case 256: // 64x64 CU
199 m_partCopy = copy256;
200 m_partSet = bcast256;
201 m_subPartCopy = copy64;
202 m_subPartSet = bcast64;
203 break;
204 case 64: // 32x32 CU
205 m_partCopy = copy64;
206 m_partSet = bcast64;
207 m_subPartCopy = copy16;
208 m_subPartSet = bcast16;
209 break;
210 case 16: // 16x16 CU
211 m_partCopy = copy16;
212 m_partSet = bcast16;
213 m_subPartCopy = copy4;
214 m_subPartSet = bcast4;
215 break;
216 case 4: // 8x8 CU
217 m_partCopy = copy4;
218 m_partSet = bcast4;
219 m_subPartCopy = NULL;
220 m_subPartSet = NULL;
221 break;
222 default:
223 X265_CHECK(0, "unexpected CU partition count\n");
224 break;
225 }
226
227 /* Each CU's data is layed out sequentially within the charMemBlock */
228 uint8_t *charBuf = dataPool.charMemBlock + (m_numPartitions * BytesPerPartition) * instance;
229
230 m_qp = (char*)charBuf; charBuf += m_numPartitions;
231 m_log2CUSize = charBuf; charBuf += m_numPartitions;
232 m_partSize = charBuf; charBuf += m_numPartitions;
233 m_predMode = charBuf; charBuf += m_numPartitions;
234 m_lumaIntraDir = charBuf; charBuf += m_numPartitions;
235 m_tqBypass = charBuf; charBuf += m_numPartitions;
236 m_refIdx[0] = (char*)charBuf; charBuf += m_numPartitions;
237 m_refIdx[1] = (char*)charBuf; charBuf += m_numPartitions;
238 m_cuDepth = charBuf; charBuf += m_numPartitions;
239 m_skipFlag = charBuf; charBuf += m_numPartitions; /* the order up to here is important in initCTU() and initSubCU() */
240 m_mergeFlag = charBuf; charBuf += m_numPartitions;
241 m_interDir = charBuf; charBuf += m_numPartitions;
242 m_mvpIdx[0] = charBuf; charBuf += m_numPartitions;
243 m_mvpIdx[1] = charBuf; charBuf += m_numPartitions;
244 m_tuDepth = charBuf; charBuf += m_numPartitions;
245 m_transformSkip[0] = charBuf; charBuf += m_numPartitions;
246 m_transformSkip[1] = charBuf; charBuf += m_numPartitions;
247 m_transformSkip[2] = charBuf; charBuf += m_numPartitions;
248 m_cbf[0] = charBuf; charBuf += m_numPartitions;
249 m_cbf[1] = charBuf; charBuf += m_numPartitions;
250 m_cbf[2] = charBuf; charBuf += m_numPartitions;
251 m_chromaIntraDir = charBuf; charBuf += m_numPartitions;
252
253 X265_CHECK(charBuf == dataPool.charMemBlock + (m_numPartitions * BytesPerPartition) * (instance + 1), "CU data layout is broken\n");
254
255 m_mv[0] = dataPool.mvMemBlock + (instance * 4) * m_numPartitions;
256 m_mv[1] = m_mv[0] + m_numPartitions;
257 m_mvd[0] = m_mv[1] + m_numPartitions;
258 m_mvd[1] = m_mvd[0] + m_numPartitions;
259
260 uint32_t cuSize = g_maxCUSize >> depth;
261 uint32_t sizeL = cuSize * cuSize;
262 uint32_t sizeC = sizeL >> (m_hChromaShift + m_vChromaShift);
263 m_trCoeff[0] = dataPool.trCoeffMemBlock + instance * (sizeL + sizeC * 2);
264 m_trCoeff[1] = m_trCoeff[0] + sizeL;
265 m_trCoeff[2] = m_trCoeff[0] + sizeL + sizeC;
266}
267
268void CUData::initCTU(const Frame& frame, uint32_t cuAddr, int qp)
269{
270 m_encData = frame.m_encData;
271 m_slice = m_encData->m_slice;
272 m_cuAddr = cuAddr;
273 m_cuPelX = (cuAddr % m_slice->m_sps->numCuInWidth) << g_maxLog2CUSize;
274 m_cuPelY = (cuAddr / m_slice->m_sps->numCuInWidth) << g_maxLog2CUSize;
275 m_absIdxInCTU = 0;
276 m_numPartitions = NUM_CU_PARTITIONS;
277
278 /* sequential memsets */
279 m_partSet((uint8_t*)m_qp, (uint8_t)qp);
280 m_partSet(m_log2CUSize, (uint8_t)g_maxLog2CUSize);
281 m_partSet(m_partSize, (uint8_t)SIZE_NONE);
282 m_partSet(m_predMode, (uint8_t)MODE_NONE);
283 m_partSet(m_lumaIntraDir, (uint8_t)DC_IDX);
284 m_partSet(m_tqBypass, (uint8_t)frame.m_encData->m_param->bLossless);
285 if (m_slice->m_sliceType != I_SLICE)
286 {
287 m_partSet((uint8_t*)m_refIdx[0], (uint8_t)REF_NOT_VALID);
288 m_partSet((uint8_t*)m_refIdx[1], (uint8_t)REF_NOT_VALID);
289 }
290
291 X265_CHECK(!(frame.m_encData->m_param->bLossless && !m_slice->m_pps->bTransquantBypassEnabled), "lossless enabled without TQbypass in PPS\n");
292
293 /* initialize the remaining CU data in one memset */
294 memset(m_cuDepth, 0, (BytesPerPartition - 8) * m_numPartitions);
295
296 uint32_t widthInCU = m_slice->m_sps->numCuInWidth;
297 m_cuLeft = (m_cuAddr % widthInCU) ? m_encData->getPicCTU(m_cuAddr - 1) : NULL;
298 m_cuAbove = (m_cuAddr / widthInCU) ? m_encData->getPicCTU(m_cuAddr - widthInCU) : NULL;
299 m_cuAboveLeft = (m_cuLeft && m_cuAbove) ? m_encData->getPicCTU(m_cuAddr - widthInCU - 1) : NULL;
300 m_cuAboveRight = (m_cuAbove && ((m_cuAddr % widthInCU) < (widthInCU - 1))) ? m_encData->getPicCTU(m_cuAddr - widthInCU + 1) : NULL;
301}
302
303// initialize Sub partition
304void CUData::initSubCU(const CUData& ctu, const CUGeom& cuGeom)
305{
306 m_absIdxInCTU = cuGeom.encodeIdx;
307 m_encData = ctu.m_encData;
308 m_slice = ctu.m_slice;
309 m_cuAddr = ctu.m_cuAddr;
310 m_cuPelX = ctu.m_cuPelX + g_zscanToPelX[cuGeom.encodeIdx];
311 m_cuPelY = ctu.m_cuPelY + g_zscanToPelY[cuGeom.encodeIdx];
312 m_cuLeft = ctu.m_cuLeft;
313 m_cuAbove = ctu.m_cuAbove;
314 m_cuAboveLeft = ctu.m_cuAboveLeft;
315 m_cuAboveRight = ctu.m_cuAboveRight;
316 X265_CHECK(m_numPartitions == cuGeom.numPartitions, "initSubCU() size mismatch\n");
317
318 /* sequential memsets */
319 m_partSet((uint8_t*)m_qp, (uint8_t)ctu.m_qp[0]);
320 m_partSet(m_log2CUSize, (uint8_t)cuGeom.log2CUSize);
321 m_partSet(m_partSize, (uint8_t)SIZE_NONE);
322 m_partSet(m_predMode, (uint8_t)MODE_NONE);
323 m_partSet(m_lumaIntraDir, (uint8_t)DC_IDX);
324 m_partSet(m_tqBypass, (uint8_t)m_encData->m_param->bLossless);
325 m_partSet((uint8_t*)m_refIdx[0], (uint8_t)REF_NOT_VALID);
326 m_partSet((uint8_t*)m_refIdx[1], (uint8_t)REF_NOT_VALID);
327 m_partSet(m_cuDepth, (uint8_t)cuGeom.depth);
328
329 /* initialize the remaining CU data in one memset */
330 memset(m_skipFlag, 0, (BytesPerPartition - 9) * m_numPartitions);
331}
332
333/* Copy the results of a sub-part (split) CU to the parent CU */
334void CUData::copyPartFrom(const CUData& subCU, const CUGeom& childGeom, uint32_t subPartIdx)
335{
336 X265_CHECK(subPartIdx < 4, "part unit should be less than 4\n");
337
338 uint32_t offset = childGeom.numPartitions * subPartIdx;
339
340 m_subPartCopy((uint8_t*)m_qp + offset, (uint8_t*)subCU.m_qp);
341 m_subPartCopy(m_log2CUSize + offset, subCU.m_log2CUSize);
342 m_subPartCopy(m_partSize + offset, subCU.m_partSize);
343 m_subPartCopy(m_predMode + offset, subCU.m_predMode);
344 m_subPartCopy(m_lumaIntraDir + offset, subCU.m_lumaIntraDir);
345 m_subPartCopy(m_tqBypass + offset, subCU.m_tqBypass);
346 m_subPartCopy((uint8_t*)m_refIdx[0] + offset, (uint8_t*)subCU.m_refIdx[0]);
347 m_subPartCopy((uint8_t*)m_refIdx[1] + offset, (uint8_t*)subCU.m_refIdx[1]);
348 m_subPartCopy(m_cuDepth + offset, subCU.m_cuDepth);
349 m_subPartCopy(m_skipFlag + offset, subCU.m_skipFlag);
350 m_subPartCopy(m_mergeFlag + offset, subCU.m_mergeFlag);
351 m_subPartCopy(m_interDir + offset, subCU.m_interDir);
352 m_subPartCopy(m_mvpIdx[0] + offset, subCU.m_mvpIdx[0]);
353 m_subPartCopy(m_mvpIdx[1] + offset, subCU.m_mvpIdx[1]);
354 m_subPartCopy(m_tuDepth + offset, subCU.m_tuDepth);
355 m_subPartCopy(m_transformSkip[0] + offset, subCU.m_transformSkip[0]);
356 m_subPartCopy(m_transformSkip[1] + offset, subCU.m_transformSkip[1]);
357 m_subPartCopy(m_transformSkip[2] + offset, subCU.m_transformSkip[2]);
358 m_subPartCopy(m_cbf[0] + offset, subCU.m_cbf[0]);
359 m_subPartCopy(m_cbf[1] + offset, subCU.m_cbf[1]);
360 m_subPartCopy(m_cbf[2] + offset, subCU.m_cbf[2]);
361 m_subPartCopy(m_chromaIntraDir + offset, subCU.m_chromaIntraDir);
362
363 memcpy(m_mv[0] + offset, subCU.m_mv[0], childGeom.numPartitions * sizeof(MV));
364 memcpy(m_mv[1] + offset, subCU.m_mv[1], childGeom.numPartitions * sizeof(MV));
365 memcpy(m_mvd[0] + offset, subCU.m_mvd[0], childGeom.numPartitions * sizeof(MV));
366 memcpy(m_mvd[1] + offset, subCU.m_mvd[1], childGeom.numPartitions * sizeof(MV));
367
368 uint32_t tmp = 1 << ((g_maxLog2CUSize - childGeom.depth) * 2);
369 uint32_t tmp2 = subPartIdx * tmp;
370 memcpy(m_trCoeff[0] + tmp2, subCU.m_trCoeff[0], sizeof(coeff_t) * tmp);
371
372 uint32_t tmpC = tmp >> (m_hChromaShift + m_vChromaShift);
373 uint32_t tmpC2 = tmp2 >> (m_hChromaShift + m_vChromaShift);
374 memcpy(m_trCoeff[1] + tmpC2, subCU.m_trCoeff[1], sizeof(coeff_t) * tmpC);
375 memcpy(m_trCoeff[2] + tmpC2, subCU.m_trCoeff[2], sizeof(coeff_t) * tmpC);
376}
377
378/* If a sub-CU part is not present (off the edge of the picture) its depth and
379 * log2size should still be configured */
380void CUData::setEmptyPart(const CUGeom& childGeom, uint32_t subPartIdx)
381{
382 uint32_t offset = childGeom.numPartitions * subPartIdx;
383 m_subPartSet(m_cuDepth + offset, (uint8_t)childGeom.depth);
384 m_subPartSet(m_log2CUSize + offset, (uint8_t)childGeom.log2CUSize);
385}
386
387/* Copy all CU data from one instance to the next, except set lossless flag
388 * This will only get used when --cu-lossless is enabled but --lossless is not. */
389void CUData::initLosslessCU(const CUData& cu, const CUGeom& cuGeom)
390{
391 /* Start by making an exact copy */
392 m_encData = cu.m_encData;
393 m_slice = cu.m_slice;
394 m_cuAddr = cu.m_cuAddr;
395 m_cuPelX = cu.m_cuPelX;
396 m_cuPelY = cu.m_cuPelY;
397 m_cuLeft = cu.m_cuLeft;
398 m_cuAbove = cu.m_cuAbove;
399 m_cuAboveLeft = cu.m_cuAboveLeft;
400 m_cuAboveRight = cu.m_cuAboveRight;
401 m_absIdxInCTU = cuGeom.encodeIdx;
402 m_numPartitions = cuGeom.numPartitions;
403 memcpy(m_qp, cu.m_qp, BytesPerPartition * m_numPartitions);
404 memcpy(m_mv[0], cu.m_mv[0], m_numPartitions * sizeof(MV));
405 memcpy(m_mv[1], cu.m_mv[1], m_numPartitions * sizeof(MV));
406 memcpy(m_mvd[0], cu.m_mvd[0], m_numPartitions * sizeof(MV));
407 memcpy(m_mvd[1], cu.m_mvd[1], m_numPartitions * sizeof(MV));
408
409 /* force TQBypass to true */
410 m_partSet(m_tqBypass, true);
411
412 /* clear residual coding flags */
413 m_partSet(m_skipFlag, 0);
414 m_partSet(m_tuDepth, 0);
415 m_partSet(m_transformSkip[0], 0);
416 m_partSet(m_transformSkip[1], 0);
417 m_partSet(m_transformSkip[2], 0);
418 m_partSet(m_cbf[0], 0);
419 m_partSet(m_cbf[1], 0);
420 m_partSet(m_cbf[2], 0);
421}
422
423/* Copy completed predicted CU to CTU in picture */
424void CUData::copyToPic(uint32_t depth) const
425{
426 CUData& ctu = *m_encData->getPicCTU(m_cuAddr);
427
428 m_partCopy((uint8_t*)ctu.m_qp + m_absIdxInCTU, (uint8_t*)m_qp);
429 m_partCopy(ctu.m_log2CUSize + m_absIdxInCTU, m_log2CUSize);
430 m_partCopy(ctu.m_partSize + m_absIdxInCTU, m_partSize);
431 m_partCopy(ctu.m_predMode + m_absIdxInCTU, m_predMode);
432 m_partCopy(ctu.m_lumaIntraDir + m_absIdxInCTU, m_lumaIntraDir);
433 m_partCopy(ctu.m_tqBypass + m_absIdxInCTU, m_tqBypass);
434 m_partCopy((uint8_t*)ctu.m_refIdx[0] + m_absIdxInCTU, (uint8_t*)m_refIdx[0]);
435 m_partCopy((uint8_t*)ctu.m_refIdx[1] + m_absIdxInCTU, (uint8_t*)m_refIdx[1]);
436 m_partCopy(ctu.m_cuDepth + m_absIdxInCTU, m_cuDepth);
437 m_partCopy(ctu.m_skipFlag + m_absIdxInCTU, m_skipFlag);
438 m_partCopy(ctu.m_mergeFlag + m_absIdxInCTU, m_mergeFlag);
439 m_partCopy(ctu.m_interDir + m_absIdxInCTU, m_interDir);
440 m_partCopy(ctu.m_mvpIdx[0] + m_absIdxInCTU, m_mvpIdx[0]);
441 m_partCopy(ctu.m_mvpIdx[1] + m_absIdxInCTU, m_mvpIdx[1]);
442 m_partCopy(ctu.m_tuDepth + m_absIdxInCTU, m_tuDepth);
443 m_partCopy(ctu.m_transformSkip[0] + m_absIdxInCTU, m_transformSkip[0]);
444 m_partCopy(ctu.m_transformSkip[1] + m_absIdxInCTU, m_transformSkip[1]);
445 m_partCopy(ctu.m_transformSkip[2] + m_absIdxInCTU, m_transformSkip[2]);
446 m_partCopy(ctu.m_cbf[0] + m_absIdxInCTU, m_cbf[0]);
447 m_partCopy(ctu.m_cbf[1] + m_absIdxInCTU, m_cbf[1]);
448 m_partCopy(ctu.m_cbf[2] + m_absIdxInCTU, m_cbf[2]);
449 m_partCopy(ctu.m_chromaIntraDir + m_absIdxInCTU, m_chromaIntraDir);
450
451 memcpy(ctu.m_mv[0] + m_absIdxInCTU, m_mv[0], m_numPartitions * sizeof(MV));
452 memcpy(ctu.m_mv[1] + m_absIdxInCTU, m_mv[1], m_numPartitions * sizeof(MV));
453 memcpy(ctu.m_mvd[0] + m_absIdxInCTU, m_mvd[0], m_numPartitions * sizeof(MV));
454 memcpy(ctu.m_mvd[1] + m_absIdxInCTU, m_mvd[1], m_numPartitions * sizeof(MV));
455
456 uint32_t tmpY = 1 << ((g_maxLog2CUSize - depth) * 2);
457 uint32_t tmpY2 = m_absIdxInCTU << (LOG2_UNIT_SIZE * 2);
458 memcpy(ctu.m_trCoeff[0] + tmpY2, m_trCoeff[0], sizeof(coeff_t) * tmpY);
459
460 uint32_t tmpC = tmpY >> (m_hChromaShift + m_vChromaShift);
461 uint32_t tmpC2 = tmpY2 >> (m_hChromaShift + m_vChromaShift);
462 memcpy(ctu.m_trCoeff[1] + tmpC2, m_trCoeff[1], sizeof(coeff_t) * tmpC);
463 memcpy(ctu.m_trCoeff[2] + tmpC2, m_trCoeff[2], sizeof(coeff_t) * tmpC);
464}
465
466/* The reverse of copyToPic, called only by encodeResidue */
467void CUData::copyFromPic(const CUData& ctu, const CUGeom& cuGeom)
468{
469 m_encData = ctu.m_encData;
470 m_slice = ctu.m_slice;
471 m_cuAddr = ctu.m_cuAddr;
472 m_cuPelX = ctu.m_cuPelX + g_zscanToPelX[cuGeom.encodeIdx];
473 m_cuPelY = ctu.m_cuPelY + g_zscanToPelY[cuGeom.encodeIdx];
474 m_absIdxInCTU = cuGeom.encodeIdx;
475 m_numPartitions = cuGeom.numPartitions;
476
477 /* copy out all prediction info for this part */
478 m_partCopy((uint8_t*)m_qp, (uint8_t*)ctu.m_qp + m_absIdxInCTU);
479 m_partCopy(m_log2CUSize, ctu.m_log2CUSize + m_absIdxInCTU);
480 m_partCopy(m_partSize, ctu.m_partSize + m_absIdxInCTU);
481 m_partCopy(m_predMode, ctu.m_predMode + m_absIdxInCTU);
482 m_partCopy(m_lumaIntraDir, ctu.m_lumaIntraDir + m_absIdxInCTU);
483 m_partCopy(m_tqBypass, ctu.m_tqBypass + m_absIdxInCTU);
484 m_partCopy((uint8_t*)m_refIdx[0], (uint8_t*)ctu.m_refIdx[0] + m_absIdxInCTU);
485 m_partCopy((uint8_t*)m_refIdx[1], (uint8_t*)ctu.m_refIdx[1] + m_absIdxInCTU);
486 m_partCopy(m_cuDepth, ctu.m_cuDepth + m_absIdxInCTU);
487 m_partCopy(m_mergeFlag, ctu.m_mergeFlag + m_absIdxInCTU);
488 m_partCopy(m_interDir, ctu.m_interDir + m_absIdxInCTU);
489 m_partCopy(m_mvpIdx[0], ctu.m_mvpIdx[0] + m_absIdxInCTU);
490 m_partCopy(m_mvpIdx[1], ctu.m_mvpIdx[1] + m_absIdxInCTU);
491 m_partCopy(m_chromaIntraDir, ctu.m_chromaIntraDir + m_absIdxInCTU);
492
493 memcpy(m_mv[0], ctu.m_mv[0] + m_absIdxInCTU, m_numPartitions * sizeof(MV));
494 memcpy(m_mv[1], ctu.m_mv[1] + m_absIdxInCTU, m_numPartitions * sizeof(MV));
495 memcpy(m_mvd[0], ctu.m_mvd[0] + m_absIdxInCTU, m_numPartitions * sizeof(MV));
496 memcpy(m_mvd[1], ctu.m_mvd[1] + m_absIdxInCTU, m_numPartitions * sizeof(MV));
497
498 /* clear residual coding flags */
499 m_partSet(m_skipFlag, 0);
500 m_partSet(m_tuDepth, 0);
501 m_partSet(m_transformSkip[0], 0);
502 m_partSet(m_transformSkip[1], 0);
503 m_partSet(m_transformSkip[2], 0);
504 m_partSet(m_cbf[0], 0);
505 m_partSet(m_cbf[1], 0);
506 m_partSet(m_cbf[2], 0);
507}
508
509/* Only called by encodeResidue, these fields can be modified during inter/intra coding */
510void CUData::updatePic(uint32_t depth) const
511{
512 CUData& ctu = *m_encData->getPicCTU(m_cuAddr);
513
514 m_partCopy((uint8_t*)ctu.m_qp + m_absIdxInCTU, (uint8_t*)m_qp);
515 m_partCopy(ctu.m_transformSkip[0] + m_absIdxInCTU, m_transformSkip[0]);
516 m_partCopy(ctu.m_transformSkip[1] + m_absIdxInCTU, m_transformSkip[1]);
517 m_partCopy(ctu.m_transformSkip[2] + m_absIdxInCTU, m_transformSkip[2]);
518 m_partCopy(ctu.m_skipFlag + m_absIdxInCTU, m_skipFlag);
519 m_partCopy(ctu.m_tuDepth + m_absIdxInCTU, m_tuDepth);
520 m_partCopy(ctu.m_cbf[0] + m_absIdxInCTU, m_cbf[0]);
521 m_partCopy(ctu.m_cbf[1] + m_absIdxInCTU, m_cbf[1]);
522 m_partCopy(ctu.m_cbf[2] + m_absIdxInCTU, m_cbf[2]);
523 m_partCopy(ctu.m_chromaIntraDir + m_absIdxInCTU, m_chromaIntraDir);
524
525 uint32_t tmpY = 1 << ((g_maxLog2CUSize - depth) * 2);
526 uint32_t tmpY2 = m_absIdxInCTU << (LOG2_UNIT_SIZE * 2);
527 memcpy(ctu.m_trCoeff[0] + tmpY2, m_trCoeff[0], sizeof(coeff_t) * tmpY);
528 tmpY >>= m_hChromaShift + m_vChromaShift;
529 tmpY2 >>= m_hChromaShift + m_vChromaShift;
530 memcpy(ctu.m_trCoeff[1] + tmpY2, m_trCoeff[1], sizeof(coeff_t) * tmpY);
531 memcpy(ctu.m_trCoeff[2] + tmpY2, m_trCoeff[2], sizeof(coeff_t) * tmpY);
532}
533
534const CUData* CUData::getPULeft(uint32_t& lPartUnitIdx, uint32_t curPartUnitIdx) const
535{
536 uint32_t absPartIdx = g_zscanToRaster[curPartUnitIdx];
537
538 if (!isZeroCol(absPartIdx, s_numPartInCUSize))
539 {
540 uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInCTU];
541 lPartUnitIdx = g_rasterToZscan[absPartIdx - 1];
542 if (isEqualCol(absPartIdx, absZorderCUIdx, s_numPartInCUSize))
543 return m_encData->getPicCTU(m_cuAddr);
544 else
545 {
546 lPartUnitIdx -= m_absIdxInCTU;
547 return this;
548 }
549 }
550
551 lPartUnitIdx = g_rasterToZscan[absPartIdx + s_numPartInCUSize - 1];
552 return m_cuLeft;
553}
554
555const CUData* CUData::getPUAbove(uint32_t& aPartUnitIdx, uint32_t curPartUnitIdx, bool planarAtCTUBoundary) const
556{
557 uint32_t absPartIdx = g_zscanToRaster[curPartUnitIdx];
558
559 if (!isZeroRow(absPartIdx, s_numPartInCUSize))
560 {
561 uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInCTU];
562 aPartUnitIdx = g_rasterToZscan[absPartIdx - s_numPartInCUSize];
563 if (isEqualRow(absPartIdx, absZorderCUIdx, s_numPartInCUSize))
564 return m_encData->getPicCTU(m_cuAddr);
565 else
566 {
567 aPartUnitIdx -= m_absIdxInCTU;
568 return this;
569 }
570 }
571
572 if (planarAtCTUBoundary)
573 return NULL;
574
575 aPartUnitIdx = g_rasterToZscan[absPartIdx + NUM_CU_PARTITIONS - s_numPartInCUSize];
576 return m_cuAbove;
577}
578
579const CUData* CUData::getPUAboveLeft(uint32_t& alPartUnitIdx, uint32_t curPartUnitIdx) const
580{
581 uint32_t absPartIdx = g_zscanToRaster[curPartUnitIdx];
582
583 if (!isZeroCol(absPartIdx, s_numPartInCUSize))
584 {
585 if (!isZeroRow(absPartIdx, s_numPartInCUSize))
586 {
587 uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInCTU];
588 alPartUnitIdx = g_rasterToZscan[absPartIdx - s_numPartInCUSize - 1];
589 if (isEqualRowOrCol(absPartIdx, absZorderCUIdx, s_numPartInCUSize))
590 return m_encData->getPicCTU(m_cuAddr);
591 else
592 {
593 alPartUnitIdx -= m_absIdxInCTU;
594 return this;
595 }
596 }
597 alPartUnitIdx = g_rasterToZscan[absPartIdx + NUM_CU_PARTITIONS - s_numPartInCUSize - 1];
598 return m_cuAbove;
599 }
600
601 if (!isZeroRow(absPartIdx, s_numPartInCUSize))
602 {
603 alPartUnitIdx = g_rasterToZscan[absPartIdx - 1];
604 return m_cuLeft;
605 }
606
607 alPartUnitIdx = g_rasterToZscan[NUM_CU_PARTITIONS - 1];
608 return m_cuAboveLeft;
609}
610
611const CUData* CUData::getPUAboveRight(uint32_t& arPartUnitIdx, uint32_t curPartUnitIdx) const
612{
613 if ((m_encData->getPicCTU(m_cuAddr)->m_cuPelX + g_zscanToPelX[curPartUnitIdx] + UNIT_SIZE) >= m_slice->m_sps->picWidthInLumaSamples)
614 return NULL;
615
616 uint32_t absPartIdxRT = g_zscanToRaster[curPartUnitIdx];
617
618 if (lessThanCol(absPartIdxRT, s_numPartInCUSize - 1, s_numPartInCUSize))
619 {
620 if (!isZeroRow(absPartIdxRT, s_numPartInCUSize))
621 {
622 if (curPartUnitIdx > g_rasterToZscan[absPartIdxRT - s_numPartInCUSize + 1])
623 {
624 uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInCTU] + (1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1;
625 arPartUnitIdx = g_rasterToZscan[absPartIdxRT - s_numPartInCUSize + 1];
626 if (isEqualRowOrCol(absPartIdxRT, absZorderCUIdx, s_numPartInCUSize))
627 return m_encData->getPicCTU(m_cuAddr);
628 else
629 {
630 arPartUnitIdx -= m_absIdxInCTU;
631 return this;
632 }
633 }
634 return NULL;
635 }
636 arPartUnitIdx = g_rasterToZscan[absPartIdxRT + NUM_CU_PARTITIONS - s_numPartInCUSize + 1];
637 return m_cuAbove;
638 }
639
640 if (!isZeroRow(absPartIdxRT, s_numPartInCUSize))
641 return NULL;
642
643 arPartUnitIdx = g_rasterToZscan[NUM_CU_PARTITIONS - s_numPartInCUSize];
644 return m_cuAboveRight;
645}
646
647const CUData* CUData::getPUBelowLeft(uint32_t& blPartUnitIdx, uint32_t curPartUnitIdx) const
648{
649 if ((m_encData->getPicCTU(m_cuAddr)->m_cuPelY + g_zscanToPelY[curPartUnitIdx] + UNIT_SIZE) >= m_slice->m_sps->picHeightInLumaSamples)
650 return NULL;
651
652 uint32_t absPartIdxLB = g_zscanToRaster[curPartUnitIdx];
653
654 if (lessThanRow(absPartIdxLB, s_numPartInCUSize - 1, s_numPartInCUSize))
655 {
656 if (!isZeroCol(absPartIdxLB, s_numPartInCUSize))
657 {
658 if (curPartUnitIdx > g_rasterToZscan[absPartIdxLB + s_numPartInCUSize - 1])
659 {
660 uint32_t absZorderCUIdxLB = g_zscanToRaster[m_absIdxInCTU] + ((1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1) * s_numPartInCUSize;
661 blPartUnitIdx = g_rasterToZscan[absPartIdxLB + s_numPartInCUSize - 1];
662 if (isEqualRowOrCol(absPartIdxLB, absZorderCUIdxLB, s_numPartInCUSize))
663 return m_encData->getPicCTU(m_cuAddr);
664 else
665 {
666 blPartUnitIdx -= m_absIdxInCTU;
667 return this;
668 }
669 }
670 return NULL;
671 }
672 blPartUnitIdx = g_rasterToZscan[absPartIdxLB + s_numPartInCUSize * 2 - 1];
673 return m_cuLeft;
674 }
675
676 return NULL;
677}
678
679const CUData* CUData::getPUBelowLeftAdi(uint32_t& blPartUnitIdx, uint32_t curPartUnitIdx, uint32_t partUnitOffset) const
680{
681 if ((m_encData->getPicCTU(m_cuAddr)->m_cuPelY + g_zscanToPelY[curPartUnitIdx] + (partUnitOffset << LOG2_UNIT_SIZE)) >= m_slice->m_sps->picHeightInLumaSamples)
682 return NULL;
683
684 uint32_t absPartIdxLB = g_zscanToRaster[curPartUnitIdx];
685
686 if (lessThanRow(absPartIdxLB, s_numPartInCUSize - partUnitOffset, s_numPartInCUSize))
687 {
688 if (!isZeroCol(absPartIdxLB, s_numPartInCUSize))
689 {
690 if (curPartUnitIdx > g_rasterToZscan[absPartIdxLB + partUnitOffset * s_numPartInCUSize - 1])
691 {
692 uint32_t absZorderCUIdxLB = g_zscanToRaster[m_absIdxInCTU] + ((1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1) * s_numPartInCUSize;
693 blPartUnitIdx = g_rasterToZscan[absPartIdxLB + partUnitOffset * s_numPartInCUSize - 1];
694 if (isEqualRowOrCol(absPartIdxLB, absZorderCUIdxLB, s_numPartInCUSize))
695 return m_encData->getPicCTU(m_cuAddr);
696 else
697 {
698 blPartUnitIdx -= m_absIdxInCTU;
699 return this;
700 }
701 }
702 return NULL;
703 }
704 blPartUnitIdx = g_rasterToZscan[absPartIdxLB + (1 + partUnitOffset) * s_numPartInCUSize - 1];
705 if (!m_cuLeft || !m_cuLeft->m_slice)
706 return NULL;
707 return m_cuLeft;
708 }
709
710 return NULL;
711}
712
713const CUData* CUData::getPUAboveRightAdi(uint32_t& arPartUnitIdx, uint32_t curPartUnitIdx, uint32_t partUnitOffset) const
714{
715 if ((m_encData->getPicCTU(m_cuAddr)->m_cuPelX + g_zscanToPelX[curPartUnitIdx] + (partUnitOffset << LOG2_UNIT_SIZE)) >= m_slice->m_sps->picWidthInLumaSamples)
716 return NULL;
717
718 uint32_t absPartIdxRT = g_zscanToRaster[curPartUnitIdx];
719
720 if (lessThanCol(absPartIdxRT, s_numPartInCUSize - partUnitOffset, s_numPartInCUSize))
721 {
722 if (!isZeroRow(absPartIdxRT, s_numPartInCUSize))
723 {
724 if (curPartUnitIdx > g_rasterToZscan[absPartIdxRT - s_numPartInCUSize + partUnitOffset])
725 {
726 uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInCTU] + (1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1;
727 arPartUnitIdx = g_rasterToZscan[absPartIdxRT - s_numPartInCUSize + partUnitOffset];
728 if (isEqualRowOrCol(absPartIdxRT, absZorderCUIdx, s_numPartInCUSize))
729 return m_encData->getPicCTU(m_cuAddr);
730 else
731 {
732 arPartUnitIdx -= m_absIdxInCTU;
733 return this;
734 }
735 }
736 return NULL;
737 }
738 arPartUnitIdx = g_rasterToZscan[absPartIdxRT + NUM_CU_PARTITIONS - s_numPartInCUSize + partUnitOffset];
739 if (!m_cuAbove || !m_cuAbove->m_slice)
740 return NULL;
741 return m_cuAbove;
742 }
743
744 if (!isZeroRow(absPartIdxRT, s_numPartInCUSize))
745 return NULL;
746
747 arPartUnitIdx = g_rasterToZscan[NUM_CU_PARTITIONS - s_numPartInCUSize + partUnitOffset - 1];
748 if ((m_cuAboveRight == NULL || m_cuAboveRight->m_slice == NULL || (m_cuAboveRight->m_cuAddr) > m_cuAddr))
749 return NULL;
750 return m_cuAboveRight;
751}
752
753/* Get left QpMinCu */
754const CUData* CUData::getQpMinCuLeft(uint32_t& lPartUnitIdx, uint32_t curAbsIdxInCTU) const
755{
756 uint32_t absZorderQpMinCUIdx = curAbsIdxInCTU & (0xFF << (g_maxFullDepth - m_slice->m_pps->maxCuDQPDepth) * 2);
757 uint32_t absRorderQpMinCUIdx = g_zscanToRaster[absZorderQpMinCUIdx];
758
759 // check for left CTU boundary
760 if (isZeroCol(absRorderQpMinCUIdx, s_numPartInCUSize))
761 return NULL;
762
763 // get index of left-CU relative to top-left corner of current quantization group
764 lPartUnitIdx = g_rasterToZscan[absRorderQpMinCUIdx - 1];
765
766 // return pointer to current CTU
767 return m_encData->getPicCTU(m_cuAddr);
768}
769
770/* Get above QpMinCu */
771const CUData* CUData::getQpMinCuAbove(uint32_t& aPartUnitIdx, uint32_t curAbsIdxInCTU) const
772{
773 uint32_t absZorderQpMinCUIdx = curAbsIdxInCTU & (0xFF << (g_maxFullDepth - m_slice->m_pps->maxCuDQPDepth) * 2);
774 uint32_t absRorderQpMinCUIdx = g_zscanToRaster[absZorderQpMinCUIdx];
775
776 // check for top CTU boundary
777 if (isZeroRow(absRorderQpMinCUIdx, s_numPartInCUSize))
778 return NULL;
779
780 // get index of top-CU relative to top-left corner of current quantization group
781 aPartUnitIdx = g_rasterToZscan[absRorderQpMinCUIdx - s_numPartInCUSize];
782
783 // return pointer to current CTU
784 return m_encData->getPicCTU(m_cuAddr);
785}
786
787/* Get reference QP from left QpMinCu or latest coded QP */
788char CUData::getRefQP(uint32_t curAbsIdxInCTU) const
789{
790 uint32_t lPartIdx = 0, aPartIdx = 0;
791 const CUData* cULeft = getQpMinCuLeft(lPartIdx, m_absIdxInCTU + curAbsIdxInCTU);
792 const CUData* cUAbove = getQpMinCuAbove(aPartIdx, m_absIdxInCTU + curAbsIdxInCTU);
793
794 return ((cULeft ? cULeft->m_qp[lPartIdx] : getLastCodedQP(curAbsIdxInCTU)) + (cUAbove ? cUAbove->m_qp[aPartIdx] : getLastCodedQP(curAbsIdxInCTU)) + 1) >> 1;
795}
796
797int CUData::getLastValidPartIdx(int absPartIdx) const
798{
799 int lastValidPartIdx = absPartIdx - 1;
800
801 while (lastValidPartIdx >= 0 && m_predMode[lastValidPartIdx] == MODE_NONE)
802 {
803 uint32_t depth = m_cuDepth[lastValidPartIdx];
804 lastValidPartIdx -= m_numPartitions >> (depth << 1);
805 }
806
807 return lastValidPartIdx;
808}
809
810char CUData::getLastCodedQP(uint32_t absPartIdx) const
811{
812 uint32_t quPartIdxMask = 0xFF << (g_maxFullDepth - m_slice->m_pps->maxCuDQPDepth) * 2;
813 int lastValidPartIdx = getLastValidPartIdx(absPartIdx & quPartIdxMask);
814
815 if (lastValidPartIdx >= 0)
816 return m_qp[lastValidPartIdx];
817 else
818 {
819 if (m_absIdxInCTU)
820 return m_encData->getPicCTU(m_cuAddr)->getLastCodedQP(m_absIdxInCTU);
821 else if (m_cuAddr > 0 && !(m_slice->m_pps->bEntropyCodingSyncEnabled && !(m_cuAddr % m_slice->m_sps->numCuInWidth)))
822 return m_encData->getPicCTU(m_cuAddr - 1)->getLastCodedQP(NUM_CU_PARTITIONS);
823 else
824 return (char)m_slice->m_sliceQp;
825 }
826}
827
828/* Get allowed chroma intra modes */
829void CUData::getAllowedChromaDir(uint32_t absPartIdx, uint32_t* modeList) const
830{
831 modeList[0] = PLANAR_IDX;
832 modeList[1] = VER_IDX;
833 modeList[2] = HOR_IDX;
834 modeList[3] = DC_IDX;
835 modeList[4] = DM_CHROMA_IDX;
836
837 uint32_t lumaMode = m_lumaIntraDir[absPartIdx];
838
839 for (int i = 0; i < NUM_CHROMA_MODE - 1; i++)
840 {
841 if (lumaMode == modeList[i])
842 {
843 modeList[i] = 34; // VER+8 mode
844 break;
845 }
846 }
847}
848
849/* Get most probable intra modes */
850int CUData::getIntraDirLumaPredictor(uint32_t absPartIdx, uint32_t* intraDirPred) const
851{
852 const CUData* tempCU;
853 uint32_t tempPartIdx;
854 uint32_t leftIntraDir, aboveIntraDir;
855
856 // Get intra direction of left PU
857 tempCU = getPULeft(tempPartIdx, m_absIdxInCTU + absPartIdx);
858
859 leftIntraDir = (tempCU && tempCU->isIntra(tempPartIdx)) ? tempCU->m_lumaIntraDir[tempPartIdx] : DC_IDX;
860
861 // Get intra direction of above PU
862 tempCU = getPUAbove(tempPartIdx, m_absIdxInCTU + absPartIdx, true);
863
864 aboveIntraDir = (tempCU && tempCU->isIntra(tempPartIdx)) ? tempCU->m_lumaIntraDir[tempPartIdx] : DC_IDX;
865
866 if (leftIntraDir == aboveIntraDir)
867 {
868 if (leftIntraDir >= 2) // angular modes
869 {
870 intraDirPred[0] = leftIntraDir;
871 intraDirPred[1] = ((leftIntraDir - 2 + 31) & 31) + 2;
872 intraDirPred[2] = ((leftIntraDir - 2 + 1) & 31) + 2;
873 }
874 else //non-angular
875 {
876 intraDirPred[0] = PLANAR_IDX;
877 intraDirPred[1] = DC_IDX;
878 intraDirPred[2] = VER_IDX;
879 }
880 return 1;
881 }
882 else
883 {
884 intraDirPred[0] = leftIntraDir;
885 intraDirPred[1] = aboveIntraDir;
886
887 if (leftIntraDir && aboveIntraDir) //both modes are non-planar
888 intraDirPred[2] = PLANAR_IDX;
889 else
890 intraDirPred[2] = (leftIntraDir + aboveIntraDir) < 2 ? VER_IDX : DC_IDX;
891 return 2;
892 }
893}
894
895uint32_t CUData::getCtxSplitFlag(uint32_t absPartIdx, uint32_t depth) const
896{
897 const CUData* tempCU;
898 uint32_t tempPartIdx;
899 uint32_t ctx;
900
901 // Get left split flag
902 tempCU = getPULeft(tempPartIdx, m_absIdxInCTU + absPartIdx);
903 ctx = (tempCU) ? ((tempCU->m_cuDepth[tempPartIdx] > depth) ? 1 : 0) : 0;
904
905 // Get above split flag
906 tempCU = getPUAbove(tempPartIdx, m_absIdxInCTU + absPartIdx);
907 ctx += (tempCU) ? ((tempCU->m_cuDepth[tempPartIdx] > depth) ? 1 : 0) : 0;
908
909 return ctx;
910}
911
912void CUData::getIntraTUQtDepthRange(uint32_t tuDepthRange[2], uint32_t absPartIdx) const
913{
914 uint32_t log2CUSize = m_log2CUSize[absPartIdx];
915 uint32_t splitFlag = m_partSize[absPartIdx] == SIZE_NxN;
916
917 tuDepthRange[0] = m_slice->m_sps->quadtreeTULog2MinSize;
918 tuDepthRange[1] = m_slice->m_sps->quadtreeTULog2MaxSize;
919
920 tuDepthRange[0] = X265_MAX(tuDepthRange[0], X265_MIN(log2CUSize - (m_slice->m_sps->quadtreeTUMaxDepthIntra - 1 + splitFlag), tuDepthRange[1]));
921}
922
923void CUData::getInterTUQtDepthRange(uint32_t tuDepthRange[2], uint32_t absPartIdx) const
924{
925 uint32_t log2CUSize = m_log2CUSize[absPartIdx];
926 uint32_t quadtreeTUMaxDepth = m_slice->m_sps->quadtreeTUMaxDepthInter;
927 uint32_t splitFlag = quadtreeTUMaxDepth == 1 && m_partSize[absPartIdx] != SIZE_2Nx2N;
928
929 tuDepthRange[0] = m_slice->m_sps->quadtreeTULog2MinSize;
930 tuDepthRange[1] = m_slice->m_sps->quadtreeTULog2MaxSize;
931
932 tuDepthRange[0] = X265_MAX(tuDepthRange[0], X265_MIN(log2CUSize - (quadtreeTUMaxDepth - 1 + splitFlag), tuDepthRange[1]));
933}
934
935uint32_t CUData::getCtxSkipFlag(uint32_t absPartIdx) const
936{
937 const CUData* tempCU;
938 uint32_t tempPartIdx;
939 uint32_t ctx;
940
941 // Get BCBP of left PU
942 tempCU = getPULeft(tempPartIdx, m_absIdxInCTU + absPartIdx);
943 ctx = tempCU ? tempCU->isSkipped(tempPartIdx) : 0;
944
945 // Get BCBP of above PU
946 tempCU = getPUAbove(tempPartIdx, m_absIdxInCTU + absPartIdx);
947 ctx += tempCU ? tempCU->isSkipped(tempPartIdx) : 0;
948
949 return ctx;
950}
951
952bool CUData::setQPSubCUs(char qp, uint32_t absPartIdx, uint32_t depth)
953{
954 uint32_t curPartNumb = NUM_CU_PARTITIONS >> (depth << 1);
955 uint32_t curPartNumQ = curPartNumb >> 2;
956
957 if (m_cuDepth[absPartIdx] > depth)
958 {
959 for (uint32_t subPartIdx = 0; subPartIdx < 4; subPartIdx++)
960 if (setQPSubCUs(qp, absPartIdx + subPartIdx * curPartNumQ, depth + 1))
961 return true;
962 }
963 else
964 {
965 if (getQtRootCbf(absPartIdx))
966 return true;
967 else
968 setQPSubParts(qp, absPartIdx, depth);
969 }
970
971 return false;
972}
973
974void CUData::setPUInterDir(uint8_t dir, uint32_t absPartIdx, uint32_t puIdx)
975{
976 uint32_t curPartNumQ = m_numPartitions >> 2;
977 X265_CHECK(puIdx < 2, "unexpected part unit index\n");
978
979 switch (m_partSize[absPartIdx])
980 {
981 case SIZE_2Nx2N:
982 memset(m_interDir + absPartIdx, dir, 4 * curPartNumQ);
983 break;
984 case SIZE_2NxN:
985 memset(m_interDir + absPartIdx, dir, 2 * curPartNumQ);
986 break;
987 case SIZE_Nx2N:
988 memset(m_interDir + absPartIdx, dir, curPartNumQ);
989 memset(m_interDir + absPartIdx + 2 * curPartNumQ, dir, curPartNumQ);
990 break;
991 case SIZE_NxN:
992 memset(m_interDir + absPartIdx, dir, curPartNumQ);
993 break;
994 case SIZE_2NxnU:
995 if (!puIdx)
996 {
997 memset(m_interDir + absPartIdx, dir, (curPartNumQ >> 1));
998 memset(m_interDir + absPartIdx + curPartNumQ, dir, (curPartNumQ >> 1));
999 }
1000 else
1001 {
1002 memset(m_interDir + absPartIdx, dir, (curPartNumQ >> 1));
1003 memset(m_interDir + absPartIdx + curPartNumQ, dir, ((curPartNumQ >> 1) + (curPartNumQ << 1)));
1004 }
1005 break;
1006 case SIZE_2NxnD:
1007 if (!puIdx)
1008 {
1009 memset(m_interDir + absPartIdx, dir, ((curPartNumQ << 1) + (curPartNumQ >> 1)));
1010 memset(m_interDir + absPartIdx + (curPartNumQ << 1) + curPartNumQ, dir, (curPartNumQ >> 1));
1011 }
1012 else
1013 {
1014 memset(m_interDir + absPartIdx, dir, (curPartNumQ >> 1));
1015 memset(m_interDir + absPartIdx + curPartNumQ, dir, (curPartNumQ >> 1));
1016 }
1017 break;
1018 case SIZE_nLx2N:
1019 if (!puIdx)
1020 {
1021 memset(m_interDir + absPartIdx, dir, (curPartNumQ >> 2));
1022 memset(m_interDir + absPartIdx + (curPartNumQ >> 1), dir, (curPartNumQ >> 2));
1023 memset(m_interDir + absPartIdx + (curPartNumQ << 1), dir, (curPartNumQ >> 2));
1024 memset(m_interDir + absPartIdx + (curPartNumQ << 1) + (curPartNumQ >> 1), dir, (curPartNumQ >> 2));
1025 }
1026 else
1027 {
1028 memset(m_interDir + absPartIdx, dir, (curPartNumQ >> 2));
1029 memset(m_interDir + absPartIdx + (curPartNumQ >> 1), dir, (curPartNumQ + (curPartNumQ >> 2)));
1030 memset(m_interDir + absPartIdx + (curPartNumQ << 1), dir, (curPartNumQ >> 2));
1031 memset(m_interDir + absPartIdx + (curPartNumQ << 1) + (curPartNumQ >> 1), dir, (curPartNumQ + (curPartNumQ >> 2)));
1032 }
1033 break;
1034 case SIZE_nRx2N:
1035 if (!puIdx)
1036 {
1037 memset(m_interDir + absPartIdx, dir, (curPartNumQ + (curPartNumQ >> 2)));
1038 memset(m_interDir + absPartIdx + curPartNumQ + (curPartNumQ >> 1), dir, (curPartNumQ >> 2));
1039 memset(m_interDir + absPartIdx + (curPartNumQ << 1), dir, (curPartNumQ + (curPartNumQ >> 2)));
1040 memset(m_interDir + absPartIdx + (curPartNumQ << 1) + curPartNumQ + (curPartNumQ >> 1), dir, (curPartNumQ >> 2));
1041 }
1042 else
1043 {
1044 memset(m_interDir + absPartIdx, dir, (curPartNumQ >> 2));
1045 memset(m_interDir + absPartIdx + (curPartNumQ >> 1), dir, (curPartNumQ >> 2));
1046 memset(m_interDir + absPartIdx + (curPartNumQ << 1), dir, (curPartNumQ >> 2));
1047 memset(m_interDir + absPartIdx + (curPartNumQ << 1) + (curPartNumQ >> 1), dir, (curPartNumQ >> 2));
1048 }
1049 break;
1050 default:
1051 X265_CHECK(0, "unexpected part type\n");
1052 break;
1053 }
1054}
1055
1056template<typename T>
1057void CUData::setAllPU(T* p, const T& val, int absPartIdx, int puIdx)
1058{
1059 int i;
1060
1061 p += absPartIdx;
1062 int numElements = m_numPartitions;
1063
1064 switch (m_partSize[absPartIdx])
1065 {
1066 case SIZE_2Nx2N:
1067 for (i = 0; i < numElements; i++)
1068 p[i] = val;
1069 break;
1070
1071 case SIZE_2NxN:
1072 numElements >>= 1;
1073 for (i = 0; i < numElements; i++)
1074 p[i] = val;
1075 break;
1076
1077 case SIZE_Nx2N:
1078 numElements >>= 2;
1079 for (i = 0; i < numElements; i++)
1080 {
1081 p[i] = val;
1082 p[i + 2 * numElements] = val;
1083 }
1084 break;
1085
1086 case SIZE_2NxnU:
1087 {
1088 int curPartNumQ = numElements >> 2;
1089 if (!puIdx)
1090 {
1091 T *pT = p;
1092 T *pT2 = p + curPartNumQ;
1093 for (i = 0; i < (curPartNumQ >> 1); i++)
1094 {
1095 pT[i] = val;
1096 pT2[i] = val;
1097 }
1098 }
1099 else
1100 {
1101 T *pT = p;
1102 for (i = 0; i < (curPartNumQ >> 1); i++)
1103 pT[i] = val;
1104
1105 pT = p + curPartNumQ;
1106 for (i = 0; i < ((curPartNumQ >> 1) + (curPartNumQ << 1)); i++)
1107 pT[i] = val;
1108 }
1109 break;
1110 }
1111
1112 case SIZE_2NxnD:
1113 {
1114 int curPartNumQ = numElements >> 2;
1115 if (!puIdx)
1116 {
1117 T *pT = p;
1118 for (i = 0; i < ((curPartNumQ >> 1) + (curPartNumQ << 1)); i++)
1119 pT[i] = val;
1120
1121 pT = p + (numElements - curPartNumQ);
1122 for (i = 0; i < (curPartNumQ >> 1); i++)
1123 pT[i] = val;
1124 }
1125 else
1126 {
1127 T *pT = p;
1128 T *pT2 = p + curPartNumQ;
1129 for (i = 0; i < (curPartNumQ >> 1); i++)
1130 {
1131 pT[i] = val;
1132 pT2[i] = val;
1133 }
1134 }
1135 break;
1136 }
1137
1138 case SIZE_nLx2N:
1139 {
1140 int curPartNumQ = numElements >> 2;
1141 if (!puIdx)
1142 {
1143 T *pT = p;
1144 T *pT2 = p + (curPartNumQ << 1);
1145 T *pT3 = p + (curPartNumQ >> 1);
1146 T *pT4 = p + (curPartNumQ << 1) + (curPartNumQ >> 1);
1147
1148 for (i = 0; i < (curPartNumQ >> 2); i++)
1149 {
1150 pT[i] = val;
1151 pT2[i] = val;
1152 pT3[i] = val;
1153 pT4[i] = val;
1154 }
1155 }
1156 else
1157 {
1158 T *pT = p;
1159 T *pT2 = p + (curPartNumQ << 1);
1160 for (i = 0; i < (curPartNumQ >> 2); i++)
1161 {
1162 pT[i] = val;
1163 pT2[i] = val;
1164 }
1165
1166 pT = p + (curPartNumQ >> 1);
1167 pT2 = p + (curPartNumQ << 1) + (curPartNumQ >> 1);
1168 for (i = 0; i < ((curPartNumQ >> 2) + curPartNumQ); i++)
1169 {
1170 pT[i] = val;
1171 pT2[i] = val;
1172 }
1173 }
1174 break;
1175 }
1176
1177 case SIZE_nRx2N:
1178 {
1179 int curPartNumQ = numElements >> 2;
1180 if (!puIdx)
1181 {
1182 T *pT = p;
1183 T *pT2 = p + (curPartNumQ << 1);
1184 for (i = 0; i < ((curPartNumQ >> 2) + curPartNumQ); i++)
1185 {
1186 pT[i] = val;
1187 pT2[i] = val;
1188 }
1189
1190 pT = p + curPartNumQ + (curPartNumQ >> 1);
1191 pT2 = p + numElements - curPartNumQ + (curPartNumQ >> 1);
1192 for (i = 0; i < (curPartNumQ >> 2); i++)
1193 {
1194 pT[i] = val;
1195 pT2[i] = val;
1196 }
1197 }
1198 else
1199 {
1200 T *pT = p;
1201 T *pT2 = p + (curPartNumQ >> 1);
1202 T *pT3 = p + (curPartNumQ << 1);
1203 T *pT4 = p + (curPartNumQ << 1) + (curPartNumQ >> 1);
1204 for (i = 0; i < (curPartNumQ >> 2); i++)
1205 {
1206 pT[i] = val;
1207 pT2[i] = val;
1208 pT3[i] = val;
1209 pT4[i] = val;
1210 }
1211 }
1212 break;
1213 }
1214
1215 case SIZE_NxN:
1216 default:
1217 X265_CHECK(0, "unknown partition type\n");
1218 break;
1219 }
1220}
1221
1222void CUData::setPUMv(int list, const MV& mv, int absPartIdx, int puIdx)
1223{
1224 setAllPU(m_mv[list], mv, absPartIdx, puIdx);
1225}
1226
1227void CUData::setPURefIdx(int list, char refIdx, int absPartIdx, int puIdx)
1228{
1229 setAllPU(m_refIdx[list], refIdx, absPartIdx, puIdx);
1230}
1231
1232void CUData::getPartIndexAndSize(uint32_t partIdx, uint32_t& outPartAddr, int& outWidth, int& outHeight) const
1233{
1234 int cuSize = 1 << m_log2CUSize[0];
1235 int partType = m_partSize[0];
1236
1237 int tmp = partTable[partType][partIdx][0];
1238 outWidth = ((tmp >> 4) * cuSize) >> 2;
1239 outHeight = ((tmp & 0xF) * cuSize) >> 2;
1240 outPartAddr = (partAddrTable[partType][partIdx] * m_numPartitions) >> 4;
1241}
1242
1243void CUData::getMvField(const CUData* cu, uint32_t absPartIdx, int picList, MVField& outMvField) const
1244{
1245 if (cu)
1246 {
1247 outMvField.mv = cu->m_mv[picList][absPartIdx];
1248 outMvField.refIdx = cu->m_refIdx[picList][absPartIdx];
1249 }
1250 else
1251 {
1252 // OUT OF BOUNDARY
1253 outMvField.mv.word = 0;
1254 outMvField.refIdx = REF_NOT_VALID;
1255 }
1256}
1257
1258void CUData::deriveLeftRightTopIdx(uint32_t partIdx, uint32_t& partIdxLT, uint32_t& partIdxRT) const
1259{
1260 partIdxLT = m_absIdxInCTU;
1261 partIdxRT = g_rasterToZscan[g_zscanToRaster[partIdxLT] + (1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1];
1262
1263 switch (m_partSize[0])
1264 {
1265 case SIZE_2Nx2N: break;
1266 case SIZE_2NxN:
1267 partIdxLT += (partIdx == 0) ? 0 : m_numPartitions >> 1;
1268 partIdxRT += (partIdx == 0) ? 0 : m_numPartitions >> 1;
1269 break;
1270 case SIZE_Nx2N:
1271 partIdxLT += (partIdx == 0) ? 0 : m_numPartitions >> 2;
1272 partIdxRT -= (partIdx == 1) ? 0 : m_numPartitions >> 2;
1273 break;
1274 case SIZE_NxN:
1275 partIdxLT += (m_numPartitions >> 2) * partIdx;
1276 partIdxRT += (m_numPartitions >> 2) * (partIdx - 1);
1277 break;
1278 case SIZE_2NxnU:
1279 partIdxLT += (partIdx == 0) ? 0 : m_numPartitions >> 3;
1280 partIdxRT += (partIdx == 0) ? 0 : m_numPartitions >> 3;
1281 break;
1282 case SIZE_2NxnD:
1283 partIdxLT += (partIdx == 0) ? 0 : (m_numPartitions >> 1) + (m_numPartitions >> 3);
1284 partIdxRT += (partIdx == 0) ? 0 : (m_numPartitions >> 1) + (m_numPartitions >> 3);
1285 break;
1286 case SIZE_nLx2N:
1287 partIdxLT += (partIdx == 0) ? 0 : m_numPartitions >> 4;
1288 partIdxRT -= (partIdx == 1) ? 0 : (m_numPartitions >> 2) + (m_numPartitions >> 4);
1289 break;
1290 case SIZE_nRx2N:
1291 partIdxLT += (partIdx == 0) ? 0 : (m_numPartitions >> 2) + (m_numPartitions >> 4);
1292 partIdxRT -= (partIdx == 1) ? 0 : m_numPartitions >> 4;
1293 break;
1294 default:
1295 X265_CHECK(0, "unexpected part index\n");
1296 break;
1297 }
1298}
1299
1300uint32_t CUData::deriveLeftBottomIdx(uint32_t puIdx) const
1301{
1302 uint32_t outPartIdxLB;
1303 outPartIdxLB = g_rasterToZscan[g_zscanToRaster[m_absIdxInCTU] + ((1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE - 1)) - 1) * s_numPartInCUSize];
1304
1305 switch (m_partSize[0])
1306 {
1307 case SIZE_2Nx2N:
1308 outPartIdxLB += m_numPartitions >> 1;
1309 break;
1310 case SIZE_2NxN:
1311 outPartIdxLB += puIdx ? m_numPartitions >> 1 : 0;
1312 break;
1313 case SIZE_Nx2N:
1314 outPartIdxLB += puIdx ? (m_numPartitions >> 2) * 3 : m_numPartitions >> 1;
1315 break;
1316 case SIZE_NxN:
1317 outPartIdxLB += (m_numPartitions >> 2) * puIdx;
1318 break;
1319 case SIZE_2NxnU:
1320 outPartIdxLB += puIdx ? m_numPartitions >> 1 : -((int)m_numPartitions >> 3);
1321 break;
1322 case SIZE_2NxnD:
1323 outPartIdxLB += puIdx ? m_numPartitions >> 1 : (m_numPartitions >> 2) + (m_numPartitions >> 3);
1324 break;
1325 case SIZE_nLx2N:
1326 outPartIdxLB += puIdx ? (m_numPartitions >> 1) + (m_numPartitions >> 4) : m_numPartitions >> 1;
1327 break;
1328 case SIZE_nRx2N:
1329 outPartIdxLB += puIdx ? (m_numPartitions >> 1) + (m_numPartitions >> 2) + (m_numPartitions >> 4) : m_numPartitions >> 1;
1330 break;
1331 default:
1332 X265_CHECK(0, "unexpected part index\n");
1333 break;
1334 }
1335 return outPartIdxLB;
1336}
1337
1338/* Derives the partition index of neighboring bottom right block */
1339uint32_t CUData::deriveRightBottomIdx(uint32_t puIdx) const
1340{
1341 uint32_t outPartIdxRB;
1342 outPartIdxRB = g_rasterToZscan[g_zscanToRaster[m_absIdxInCTU] +
1343 ((1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE - 1)) - 1) * s_numPartInCUSize +
1344 (1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1];
1345
1346 switch (m_partSize[0])
1347 {
1348 case SIZE_2Nx2N:
1349 outPartIdxRB += m_numPartitions >> 1;
1350 break;
1351 case SIZE_2NxN:
1352 outPartIdxRB += puIdx ? m_numPartitions >> 1 : 0;
1353 break;
1354 case SIZE_Nx2N:
1355 outPartIdxRB += puIdx ? m_numPartitions >> 1 : m_numPartitions >> 2;
1356 break;
1357 case SIZE_NxN:
1358 outPartIdxRB += (m_numPartitions >> 2) * (puIdx - 1);
1359 break;
1360 case SIZE_2NxnU:
1361 outPartIdxRB += puIdx ? m_numPartitions >> 1 : -((int)m_numPartitions >> 3);
1362 break;
1363 case SIZE_2NxnD:
1364 outPartIdxRB += puIdx ? m_numPartitions >> 1 : (m_numPartitions >> 2) + (m_numPartitions >> 3);
1365 break;
1366 case SIZE_nLx2N:
1367 outPartIdxRB += puIdx ? m_numPartitions >> 1 : (m_numPartitions >> 3) + (m_numPartitions >> 4);
1368 break;
1369 case SIZE_nRx2N:
1370 outPartIdxRB += puIdx ? m_numPartitions >> 1 : (m_numPartitions >> 2) + (m_numPartitions >> 3) + (m_numPartitions >> 4);
1371 break;
1372 default:
1373 X265_CHECK(0, "unexpected part index\n");
1374 break;
1375 }
1376 return outPartIdxRB;
1377}
1378
1379void CUData::deriveLeftRightTopIdxAdi(uint32_t& outPartIdxLT, uint32_t& outPartIdxRT, uint32_t partOffset, uint32_t partDepth) const
1380{
1381 uint32_t numPartInWidth = 1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE - partDepth);
1382
1383 outPartIdxLT = m_absIdxInCTU + partOffset;
1384 outPartIdxRT = g_rasterToZscan[g_zscanToRaster[outPartIdxLT] + numPartInWidth - 1];
1385}
1386
1387bool CUData::hasEqualMotion(uint32_t absPartIdx, const CUData& candCU, uint32_t candAbsPartIdx) const
1388{
1389 if (m_interDir[absPartIdx] != candCU.m_interDir[candAbsPartIdx])
1390 return false;
1391
1392 for (uint32_t refListIdx = 0; refListIdx < 2; refListIdx++)
1393 {
1394 if (m_interDir[absPartIdx] & (1 << refListIdx))
1395 {
1396 if (m_mv[refListIdx][absPartIdx] != candCU.m_mv[refListIdx][candAbsPartIdx] ||
1397 m_refIdx[refListIdx][absPartIdx] != candCU.m_refIdx[refListIdx][candAbsPartIdx])
1398 return false;
1399 }
1400 }
1401
1402 return true;
1403}
1404
1405/* Construct list of merging candidates */
1406uint32_t CUData::getInterMergeCandidates(uint32_t absPartIdx, uint32_t puIdx, MVField(*mvFieldNeighbours)[2], uint8_t* interDirNeighbours) const
1407{
1408 uint32_t absPartAddr = m_absIdxInCTU + absPartIdx;
1409 const bool isInterB = m_slice->isInterB();
1410
1411 const uint32_t maxNumMergeCand = m_slice->m_maxNumMergeCand;
1412
1413 for (uint32_t i = 0; i < maxNumMergeCand; ++i)
1414 {
1415 mvFieldNeighbours[i][0].refIdx = REF_NOT_VALID;
1416 mvFieldNeighbours[i][1].refIdx = REF_NOT_VALID;
1417 }
1418
1419 /* calculate the location of upper-left corner pixel and size of the current PU */
1420 int xP, yP, nPSW, nPSH;
1421
1422 int cuSize = 1 << m_log2CUSize[0];
1423 int partMode = m_partSize[0];
1424
1425 int tmp = partTable[partMode][puIdx][0];
1426 nPSW = ((tmp >> 4) * cuSize) >> 2;
1427 nPSH = ((tmp & 0xF) * cuSize) >> 2;
1428
1429 tmp = partTable[partMode][puIdx][1];
1430 xP = ((tmp >> 4) * cuSize) >> 2;
1431 yP = ((tmp & 0xF) * cuSize) >> 2;
1432
1433 uint32_t count = 0;
1434
1435 uint32_t partIdxLT, partIdxRT, partIdxLB = deriveLeftBottomIdx(puIdx);
1436 PartSize curPS = (PartSize)m_partSize[absPartIdx];
1437
1438 // left
1439 uint32_t leftPartIdx = 0;
1440 const CUData* cuLeft = getPULeft(leftPartIdx, partIdxLB);
1441 bool isAvailableA1 = cuLeft &&
1442 cuLeft->isDiffMER(xP - 1, yP + nPSH - 1, xP, yP) &&
1443 !(puIdx == 1 && (curPS == SIZE_Nx2N || curPS == SIZE_nLx2N || curPS == SIZE_nRx2N)) &&
1444 !cuLeft->isIntra(leftPartIdx);
1445 if (isAvailableA1)
1446 {
1447 // get Inter Dir
1448 interDirNeighbours[count] = cuLeft->m_interDir[leftPartIdx];
1449 // get Mv from Left
1450 cuLeft->getMvField(cuLeft, leftPartIdx, 0, mvFieldNeighbours[count][0]);
1451 if (isInterB)
1452 cuLeft->getMvField(cuLeft, leftPartIdx, 1, mvFieldNeighbours[count][1]);
1453
1454 count++;
1455
1456 if (count == maxNumMergeCand)
1457 return maxNumMergeCand;
1458 }
1459
1460 deriveLeftRightTopIdx(puIdx, partIdxLT, partIdxRT);
1461
1462 // above
1463 uint32_t abovePartIdx = 0;
1464 const CUData* cuAbove = getPUAbove(abovePartIdx, partIdxRT);
1465 bool isAvailableB1 = cuAbove &&
1466 cuAbove->isDiffMER(xP + nPSW - 1, yP - 1, xP, yP) &&
1467 !(puIdx == 1 && (curPS == SIZE_2NxN || curPS == SIZE_2NxnU || curPS == SIZE_2NxnD)) &&
1468 !cuAbove->isIntra(abovePartIdx);
1469 if (isAvailableB1 && (!isAvailableA1 || !cuLeft->hasEqualMotion(leftPartIdx, *cuAbove, abovePartIdx)))
1470 {
1471 // get Inter Dir
1472 interDirNeighbours[count] = cuAbove->m_interDir[abovePartIdx];
1473 // get Mv from Left
1474 cuAbove->getMvField(cuAbove, abovePartIdx, 0, mvFieldNeighbours[count][0]);
1475 if (isInterB)
1476 cuAbove->getMvField(cuAbove, abovePartIdx, 1, mvFieldNeighbours[count][1]);
1477
1478 count++;
1479
1480 if (count == maxNumMergeCand)
1481 return maxNumMergeCand;
1482 }
1483
1484 // above right
1485 uint32_t aboveRightPartIdx = 0;
1486 const CUData* cuAboveRight = getPUAboveRight(aboveRightPartIdx, partIdxRT);
1487 bool isAvailableB0 = cuAboveRight &&
1488 cuAboveRight->isDiffMER(xP + nPSW, yP - 1, xP, yP) &&
1489 !cuAboveRight->isIntra(aboveRightPartIdx);
1490 if (isAvailableB0 && (!isAvailableB1 || !cuAbove->hasEqualMotion(abovePartIdx, *cuAboveRight, aboveRightPartIdx)))
1491 {
1492 // get Inter Dir
1493 interDirNeighbours[count] = cuAboveRight->m_interDir[aboveRightPartIdx];
1494 // get Mv from Left
1495 cuAboveRight->getMvField(cuAboveRight, aboveRightPartIdx, 0, mvFieldNeighbours[count][0]);
1496 if (isInterB)
1497 cuAboveRight->getMvField(cuAboveRight, aboveRightPartIdx, 1, mvFieldNeighbours[count][1]);
1498
1499 count++;
1500
1501 if (count == maxNumMergeCand)
1502 return maxNumMergeCand;
1503 }
1504
1505 // left bottom
1506 uint32_t leftBottomPartIdx = 0;
1507 const CUData* cuLeftBottom = this->getPUBelowLeft(leftBottomPartIdx, partIdxLB);
1508 bool isAvailableA0 = cuLeftBottom &&
1509 cuLeftBottom->isDiffMER(xP - 1, yP + nPSH, xP, yP) &&
1510 !cuLeftBottom->isIntra(leftBottomPartIdx);
1511 if (isAvailableA0 && (!isAvailableA1 || !cuLeft->hasEqualMotion(leftPartIdx, *cuLeftBottom, leftBottomPartIdx)))
1512 {
1513 // get Inter Dir
1514 interDirNeighbours[count] = cuLeftBottom->m_interDir[leftBottomPartIdx];
1515 // get Mv from Left
1516 cuLeftBottom->getMvField(cuLeftBottom, leftBottomPartIdx, 0, mvFieldNeighbours[count][0]);
1517 if (isInterB)
1518 cuLeftBottom->getMvField(cuLeftBottom, leftBottomPartIdx, 1, mvFieldNeighbours[count][1]);
1519
1520 count++;
1521
1522 if (count == maxNumMergeCand)
1523 return maxNumMergeCand;
1524 }
1525
1526 // above left
1527 if (count < 4)
1528 {
1529 uint32_t aboveLeftPartIdx = 0;
1530 const CUData* cuAboveLeft = getPUAboveLeft(aboveLeftPartIdx, absPartAddr);
1531 bool isAvailableB2 = cuAboveLeft &&
1532 cuAboveLeft->isDiffMER(xP - 1, yP - 1, xP, yP) &&
1533 !cuAboveLeft->isIntra(aboveLeftPartIdx);
1534 if (isAvailableB2 && (!isAvailableA1 || !cuLeft->hasEqualMotion(leftPartIdx, *cuAboveLeft, aboveLeftPartIdx))
1535 && (!isAvailableB1 || !cuAbove->hasEqualMotion(abovePartIdx, *cuAboveLeft, aboveLeftPartIdx)))
1536 {
1537 // get Inter Dir
1538 interDirNeighbours[count] = cuAboveLeft->m_interDir[aboveLeftPartIdx];
1539 // get Mv from Left
1540 cuAboveLeft->getMvField(cuAboveLeft, aboveLeftPartIdx, 0, mvFieldNeighbours[count][0]);
1541 if (isInterB)
1542 cuAboveLeft->getMvField(cuAboveLeft, aboveLeftPartIdx, 1, mvFieldNeighbours[count][1]);
1543
1544 count++;
1545
1546 if (count == maxNumMergeCand)
1547 return maxNumMergeCand;
1548 }
1549 }
1550 if (m_slice->m_sps->bTemporalMVPEnabled)
1551 {
1552 uint32_t partIdxRB = deriveRightBottomIdx(puIdx);
1553 MV colmv;
1554 int ctuIdx = -1;
1555
1556 // image boundary check
1557 if (m_encData->getPicCTU(m_cuAddr)->m_cuPelX + g_zscanToPelX[partIdxRB] + UNIT_SIZE < m_slice->m_sps->picWidthInLumaSamples &&
1558 m_encData->getPicCTU(m_cuAddr)->m_cuPelY + g_zscanToPelY[partIdxRB] + UNIT_SIZE < m_slice->m_sps->picHeightInLumaSamples)
1559 {
1560 uint32_t absPartIdxRB = g_zscanToRaster[partIdxRB];
1561 uint32_t numPartInCUSize = s_numPartInCUSize;
1562 bool bNotLastCol = lessThanCol(absPartIdxRB, numPartInCUSize - 1, numPartInCUSize); // is not at the last column of CTU
1563 bool bNotLastRow = lessThanRow(absPartIdxRB, numPartInCUSize - 1, numPartInCUSize); // is not at the last row of CTU
1564
1565 if (bNotLastCol && bNotLastRow)
1566 {
1567 absPartAddr = g_rasterToZscan[absPartIdxRB + numPartInCUSize + 1];
1568 ctuIdx = m_cuAddr;
1569 }
1570 else if (bNotLastCol)
1571 absPartAddr = g_rasterToZscan[(absPartIdxRB + numPartInCUSize + 1) & (numPartInCUSize - 1)];
1572 else if (bNotLastRow)
1573 {
1574 absPartAddr = g_rasterToZscan[absPartIdxRB + 1];
1575 ctuIdx = m_cuAddr + 1;
1576 }
1577 else // is the right bottom corner of CTU
1578 absPartAddr = 0;
1579 }
1580
1581 int refIdx = 0;
1582 uint32_t partIdxCenter = deriveCenterIdx(puIdx);
1583 uint32_t curCTUIdx = m_cuAddr;
1584 int dir = 0;
1585 bool bExistMV = ctuIdx >= 0 && getColMVP(colmv, refIdx, 0, ctuIdx, absPartAddr);
1586 if (!bExistMV)
1587 bExistMV = getColMVP(colmv, refIdx, 0, curCTUIdx, partIdxCenter);
1588 if (bExistMV)
1589 {
1590 dir |= 1;
1591 mvFieldNeighbours[count][0].mv = colmv;
1592 mvFieldNeighbours[count][0].refIdx = refIdx;
1593 }
1594
1595 if (isInterB)
1596 {
1597 bExistMV = ctuIdx >= 0 && getColMVP(colmv, refIdx, 1, ctuIdx, absPartAddr);
1598 if (!bExistMV)
1599 bExistMV = getColMVP(colmv, refIdx, 1, curCTUIdx, partIdxCenter);
1600
1601 if (bExistMV)
1602 {
1603 dir |= 2;
1604 mvFieldNeighbours[count][1].mv = colmv;
1605 mvFieldNeighbours[count][1].refIdx = refIdx;
1606 }
1607 }
1608
1609 if (dir != 0)
1610 {
1611 interDirNeighbours[count] = (uint8_t)dir;
1612
1613 count++;
1614
1615 if (count == maxNumMergeCand)
1616 return maxNumMergeCand;
1617 }
1618 }
1619
1620 if (isInterB)
1621 {
1622 const uint32_t cutoff = count * (count - 1);
1623 uint32_t priorityList0 = 0xEDC984; // { 0, 1, 0, 2, 1, 2, 0, 3, 1, 3, 2, 3 }
1624 uint32_t priorityList1 = 0xB73621; // { 1, 0, 2, 0, 2, 1, 3, 0, 3, 1, 3, 2 }
1625
1626 for (uint32_t idx = 0; idx < cutoff; idx++)
1627 {
1628 int i = priorityList0 & 3;
1629 int j = priorityList1 & 3;
1630 priorityList0 >>= 2;
1631 priorityList1 >>= 2;
1632
1633 if ((interDirNeighbours[i] & 0x1) && (interDirNeighbours[j] & 0x2))
1634 {
1635 // get Mv from cand[i] and cand[j]
1636 int refIdxL0 = mvFieldNeighbours[i][0].refIdx;
1637 int refIdxL1 = mvFieldNeighbours[j][1].refIdx;
1638 int refPOCL0 = m_slice->m_refPOCList[0][refIdxL0];
1639 int refPOCL1 = m_slice->m_refPOCList[1][refIdxL1];
1640 if (!(refPOCL0 == refPOCL1 && mvFieldNeighbours[i][0].mv == mvFieldNeighbours[j][1].mv))
1641 {
1642 mvFieldNeighbours[count][0].mv = mvFieldNeighbours[i][0].mv;
1643 mvFieldNeighbours[count][0].refIdx = refIdxL0;
1644 mvFieldNeighbours[count][1].mv = mvFieldNeighbours[j][1].mv;
1645 mvFieldNeighbours[count][1].refIdx = refIdxL1;
1646 interDirNeighbours[count] = 3;
1647
1648 count++;
1649
1650 if (count == maxNumMergeCand)
1651 return maxNumMergeCand;
1652 }
1653 }
1654 }
1655 }
1656 int numRefIdx = (isInterB) ? X265_MIN(m_slice->m_numRefIdx[0], m_slice->m_numRefIdx[1]) : m_slice->m_numRefIdx[0];
1657 int r = 0;
1658 int refcnt = 0;
1659 while (count < maxNumMergeCand)
1660 {
1661 interDirNeighbours[count] = 1;
1662 mvFieldNeighbours[count][0].mv.word = 0;
1663 mvFieldNeighbours[count][0].refIdx = r;
1664
1665 if (isInterB)
1666 {
1667 interDirNeighbours[count] = 3;
1668 mvFieldNeighbours[count][1].mv.word = 0;
1669 mvFieldNeighbours[count][1].refIdx = r;
1670 }
1671
1672 count++;
1673
1674 if (refcnt == numRefIdx - 1)
1675 r = 0;
1676 else
1677 {
1678 ++r;
1679 ++refcnt;
1680 }
1681 }
1682
1683 return count;
1684}
1685
1686/* Check whether the current PU and a spatial neighboring PU are in a same ME region */
1687bool CUData::isDiffMER(int xN, int yN, int xP, int yP) const
1688{
1689 uint32_t plevel = 2;
1690
1691 if ((xN >> plevel) != (xP >> plevel))
1692 return true;
1693 if ((yN >> plevel) != (yP >> plevel))
1694 return true;
1695 return false;
1696}
1697
1698/* Constructs a list of candidates for AMVP, and a larger list of motion candidates */
1699int CUData::fillMvpCand(uint32_t puIdx, uint32_t absPartIdx, int picList, int refIdx, MV* amvpCand, MV* mvc) const
1700{
1701 int num = 0;
1702
1703 // spatial MV
1704 uint32_t partIdxLT, partIdxRT, partIdxLB = deriveLeftBottomIdx(puIdx);
1705
1706 deriveLeftRightTopIdx(puIdx, partIdxLT, partIdxRT);
1707
1708 MV mv[MD_ABOVE_LEFT + 1];
1709 MV mvOrder[MD_ABOVE_LEFT + 1];
1710 bool valid[MD_ABOVE_LEFT + 1];
1711 bool validOrder[MD_ABOVE_LEFT + 1];
1712
1713 valid[MD_BELOW_LEFT] = addMVPCand(mv[MD_BELOW_LEFT], picList, refIdx, partIdxLB, MD_BELOW_LEFT);
1714 valid[MD_LEFT] = addMVPCand(mv[MD_LEFT], picList, refIdx, partIdxLB, MD_LEFT);
1715 valid[MD_ABOVE_RIGHT] = addMVPCand(mv[MD_ABOVE_RIGHT], picList, refIdx, partIdxRT, MD_ABOVE_RIGHT);
1716 valid[MD_ABOVE] = addMVPCand(mv[MD_ABOVE], picList, refIdx, partIdxRT, MD_ABOVE);
1717 valid[MD_ABOVE_LEFT] = addMVPCand(mv[MD_ABOVE_LEFT], picList, refIdx, partIdxLT, MD_ABOVE_LEFT);
1718
1719 validOrder[MD_BELOW_LEFT] = addMVPCandOrder(mvOrder[MD_BELOW_LEFT], picList, refIdx, partIdxLB, MD_BELOW_LEFT);
1720 validOrder[MD_LEFT] = addMVPCandOrder(mvOrder[MD_LEFT], picList, refIdx, partIdxLB, MD_LEFT);
1721 validOrder[MD_ABOVE_RIGHT] = addMVPCandOrder(mvOrder[MD_ABOVE_RIGHT], picList, refIdx, partIdxRT, MD_ABOVE_RIGHT);
1722 validOrder[MD_ABOVE] = addMVPCandOrder(mvOrder[MD_ABOVE], picList, refIdx, partIdxRT, MD_ABOVE);
1723 validOrder[MD_ABOVE_LEFT] = addMVPCandOrder(mvOrder[MD_ABOVE_LEFT], picList, refIdx, partIdxLT, MD_ABOVE_LEFT);
1724
1725 // Left predictor search
1726 if (valid[MD_BELOW_LEFT])
1727 amvpCand[num++] = mv[MD_BELOW_LEFT];
1728 else if (valid[MD_LEFT])
1729 amvpCand[num++] = mv[MD_LEFT];
1730 else if (validOrder[MD_BELOW_LEFT])
1731 amvpCand[num++] = mvOrder[MD_BELOW_LEFT];
1732 else if (validOrder[MD_LEFT])
1733 amvpCand[num++] = mvOrder[MD_LEFT];
1734
1735 bool bAddedSmvp = num > 0;
1736
1737 // Above predictor search
1738 if (valid[MD_ABOVE_RIGHT])
1739 amvpCand[num++] = mv[MD_ABOVE_RIGHT];
1740 else if (valid[MD_ABOVE])
1741 amvpCand[num++] = mv[MD_ABOVE];
1742 else if (valid[MD_ABOVE_LEFT])
1743 amvpCand[num++] = mv[MD_ABOVE_LEFT];
1744
1745 if (!bAddedSmvp)
1746 {
1747 if (validOrder[MD_ABOVE_RIGHT])
1748 amvpCand[num++] = mvOrder[MD_ABOVE_RIGHT];
1749 else if (validOrder[MD_ABOVE])
1750 amvpCand[num++] = mvOrder[MD_ABOVE];
1751 else if (validOrder[MD_ABOVE_LEFT])
1752 amvpCand[num++] = mvOrder[MD_ABOVE_LEFT];
1753 }
1754
1755 int numMvc = 0;
1756 for (int dir = MD_LEFT; dir <= MD_ABOVE_LEFT; dir++)
1757 {
1758 if (valid[dir] && mv[dir].notZero())
1759 mvc[numMvc++] = mv[dir];
1760
1761 if (validOrder[dir] && mvOrder[dir].notZero())
1762 mvc[numMvc++] = mvOrder[dir];
1763 }
1764
1765 if (num == 2)
1766 {
1767 if (amvpCand[0] == amvpCand[1])
1768 num = 1;
1769 else
1770 /* AMVP_NUM_CANDS = 2 */
1771 return numMvc;
1772 }
1773
1774 if (m_slice->m_sps->bTemporalMVPEnabled)
1775 {
1776 uint32_t absPartAddr = m_absIdxInCTU + absPartIdx;
1777 uint32_t partIdxRB = deriveRightBottomIdx(puIdx);
1778 MV colmv;
1779
1780 // co-located RightBottom temporal predictor (H)
1781 int ctuIdx = -1;
1782
1783 // image boundary check
1784 if (m_encData->getPicCTU(m_cuAddr)->m_cuPelX + g_zscanToPelX[partIdxRB] + UNIT_SIZE < m_slice->m_sps->picWidthInLumaSamples &&
1785 m_encData->getPicCTU(m_cuAddr)->m_cuPelY + g_zscanToPelY[partIdxRB] + UNIT_SIZE < m_slice->m_sps->picHeightInLumaSamples)
1786 {
1787 uint32_t absPartIdxRB = g_zscanToRaster[partIdxRB];
1788 uint32_t numPartInCUSize = s_numPartInCUSize;
1789 bool bNotLastCol = lessThanCol(absPartIdxRB, numPartInCUSize - 1, numPartInCUSize); // is not at the last column of CTU
1790 bool bNotLastRow = lessThanRow(absPartIdxRB, numPartInCUSize - 1, numPartInCUSize); // is not at the last row of CTU
1791
1792 if (bNotLastCol && bNotLastRow)
1793 {
1794 absPartAddr = g_rasterToZscan[absPartIdxRB + numPartInCUSize + 1];
1795 ctuIdx = m_cuAddr;
1796 }
1797 else if (bNotLastCol)
1798 absPartAddr = g_rasterToZscan[(absPartIdxRB + numPartInCUSize + 1) & (numPartInCUSize - 1)];
1799 else if (bNotLastRow)
1800 {
1801 absPartAddr = g_rasterToZscan[absPartIdxRB + 1];
1802 ctuIdx = m_cuAddr + 1;
1803 }
1804 else // is the right bottom corner of CTU
1805 absPartAddr = 0;
1806 }
1807 if (ctuIdx >= 0 && getColMVP(colmv, refIdx, picList, ctuIdx, absPartAddr))
1808 {
1809 amvpCand[num++] = colmv;
1810 mvc[numMvc++] = colmv;
1811 }
1812 else
1813 {
1814 uint32_t partIdxCenter = deriveCenterIdx(puIdx);
1815 uint32_t curCTUIdx = m_cuAddr;
1816 if (getColMVP(colmv, refIdx, picList, curCTUIdx, partIdxCenter))
1817 {
1818 amvpCand[num++] = colmv;
1819 mvc[numMvc++] = colmv;
1820 }
1821 }
1822 }
1823
1824 while (num < AMVP_NUM_CANDS)
1825 amvpCand[num++] = 0;
1826
1827 return numMvc;
1828}
1829
1830void CUData::clipMv(MV& outMV) const
1831{
1832 int mvshift = 2;
1833 int offset = 8;
1834 int xmax = (m_slice->m_sps->picWidthInLumaSamples + offset - m_cuPelX - 1) << mvshift;
1835 int xmin = (-(int)g_maxCUSize - offset - (int)m_cuPelX + 1) << mvshift;
1836
1837 int ymax = (m_slice->m_sps->picHeightInLumaSamples + offset - m_cuPelY - 1) << mvshift;
1838 int ymin = (-(int)g_maxCUSize - offset - (int)m_cuPelY + 1) << mvshift;
1839
1840 outMV.x = (int16_t)X265_MIN(xmax, X265_MAX(xmin, (int)outMV.x));
1841 outMV.y = (int16_t)X265_MIN(ymax, X265_MAX(ymin, (int)outMV.y));
1842}
1843
1844bool CUData::addMVPCand(MV& mvp, int picList, int refIdx, uint32_t partUnitIdx, MVP_DIR dir) const
1845{
1846 const CUData* tmpCU = NULL;
1847 uint32_t idx = 0;
1848
1849 switch (dir)
1850 {
1851 case MD_LEFT:
1852 tmpCU = getPULeft(idx, partUnitIdx);
1853 break;
1854 case MD_ABOVE:
1855 tmpCU = getPUAbove(idx, partUnitIdx);
1856 break;
1857 case MD_ABOVE_RIGHT:
1858 tmpCU = getPUAboveRight(idx, partUnitIdx);
1859 break;
1860 case MD_BELOW_LEFT:
1861 tmpCU = getPUBelowLeft(idx, partUnitIdx);
1862 break;
1863 case MD_ABOVE_LEFT:
1864 tmpCU = getPUAboveLeft(idx, partUnitIdx);
1865 break;
1866 default:
1867 return false;
1868 }
1869
1870 if (!tmpCU)
1871 return false;
1872
1873 int refPOC = m_slice->m_refPOCList[picList][refIdx];
1874 int partRefIdx = tmpCU->m_refIdx[picList][idx];
1875 if (partRefIdx >= 0 && refPOC == tmpCU->m_slice->m_refPOCList[picList][partRefIdx])
1876 {
1877 mvp = tmpCU->m_mv[picList][idx];
1878 return true;
1879 }
1880
1881 int refPicList2nd = 0;
1882 if (picList == 0)
1883 refPicList2nd = 1;
1884 else if (picList == 1)
1885 refPicList2nd = 0;
1886
1887 int curRefPOC = m_slice->m_refPOCList[picList][refIdx];
1888 int neibRefPOC;
1889
1890 partRefIdx = tmpCU->m_refIdx[refPicList2nd][idx];
1891 if (partRefIdx >= 0)
1892 {
1893 neibRefPOC = tmpCU->m_slice->m_refPOCList[refPicList2nd][partRefIdx];
1894 if (neibRefPOC == curRefPOC)
1895 {
1896 // Same reference frame but different list
1897 mvp = tmpCU->m_mv[refPicList2nd][idx];
1898 return true;
1899 }
1900 }
1901 return false;
1902}
1903
1904bool CUData::addMVPCandOrder(MV& outMV, int picList, int refIdx, uint32_t partUnitIdx, MVP_DIR dir) const
1905{
1906 const CUData* tmpCU = NULL;
1907 uint32_t idx = 0;
1908
1909 switch (dir)
1910 {
1911 case MD_LEFT:
1912 tmpCU = getPULeft(idx, partUnitIdx);
1913 break;
1914 case MD_ABOVE:
1915 tmpCU = getPUAbove(idx, partUnitIdx);
1916 break;
1917 case MD_ABOVE_RIGHT:
1918 tmpCU = getPUAboveRight(idx, partUnitIdx);
1919 break;
1920 case MD_BELOW_LEFT:
1921 tmpCU = getPUBelowLeft(idx, partUnitIdx);
1922 break;
1923 case MD_ABOVE_LEFT:
1924 tmpCU = getPUAboveLeft(idx, partUnitIdx);
1925 break;
1926 default:
1927 return false;
1928 }
1929
1930 if (!tmpCU)
1931 return false;
1932
1933 int refPicList2nd = 0;
1934 if (picList == 0)
1935 refPicList2nd = 1;
1936 else if (picList == 1)
1937 refPicList2nd = 0;
1938
1939 int curPOC = m_slice->m_poc;
1940 int curRefPOC = m_slice->m_refPOCList[picList][refIdx];
1941 int neibPOC = curPOC;
1942 int neibRefPOC;
1943
1944 int partRefIdx = tmpCU->m_refIdx[picList][idx];
1945 if (partRefIdx >= 0)
1946 {
1947 neibRefPOC = tmpCU->m_slice->m_refPOCList[picList][partRefIdx];
1948 MV mvp = tmpCU->m_mv[picList][idx];
1949
1950 scaleMvByPOCDist(outMV, mvp, curPOC, curRefPOC, neibPOC, neibRefPOC);
1951 return true;
1952 }
1953
1954 partRefIdx = tmpCU->m_refIdx[refPicList2nd][idx];
1955 if (partRefIdx >= 0)
1956 {
1957 neibRefPOC = tmpCU->m_slice->m_refPOCList[refPicList2nd][partRefIdx];
1958 MV mvp = tmpCU->m_mv[refPicList2nd][idx];
1959
1960 scaleMvByPOCDist(outMV, mvp, curPOC, curRefPOC, neibPOC, neibRefPOC);
1961 return true;
1962 }
1963
1964 return false;
1965}
1966
1967bool CUData::getColMVP(MV& outMV, int& outRefIdx, int picList, int cuAddr, int partUnitIdx) const
1968{
1969 uint32_t absPartAddr = partUnitIdx & TMVP_UNIT_MASK;
1970
1971 int colRefPicList;
1972 int colPOC, colRefPOC, curPOC, curRefPOC;
1973 MV colmv;
1974
1975 // use coldir.
1976 Frame *colPic = m_slice->m_refPicList[m_slice->isInterB() ? 1 - m_slice->m_colFromL0Flag : 0][m_slice->m_colRefIdx];
1977 CUData *colCU = colPic->m_encData->getPicCTU(cuAddr);
1978
1979 if (colCU->m_partSize[partUnitIdx] == SIZE_NONE)
1980 return false;
1981
1982 curPOC = m_slice->m_poc;
1983 colPOC = colCU->m_slice->m_poc;
1984
1985 if (colCU->isIntra(absPartAddr))
1986 return false;
1987
1988 colRefPicList = m_slice->m_bCheckLDC ? picList : m_slice->m_colFromL0Flag;
1989
1990 int colRefIdx = colCU->m_refIdx[colRefPicList][absPartAddr];
1991
1992 if (colRefIdx < 0)
1993 {
1994 colRefPicList = 1 - colRefPicList;
1995 colRefIdx = colCU->m_refIdx[colRefPicList][absPartAddr];
1996
1997 if (colRefIdx < 0)
1998 return false;
1999 }
2000
2001 // Scale the vector
2002 colRefPOC = colCU->m_slice->m_refPOCList[colRefPicList][colRefIdx];
2003 colmv = colCU->m_mv[colRefPicList][absPartAddr];
2004 curRefPOC = m_slice->m_refPOCList[picList][outRefIdx];
2005
2006 scaleMvByPOCDist(outMV, colmv, curPOC, curRefPOC, colPOC, colRefPOC);
2007 return true;
2008}
2009
2010void CUData::scaleMvByPOCDist(MV& outMV, const MV& inMV, int curPOC, int curRefPOC, int colPOC, int colRefPOC) const
2011{
2012 int diffPocD = colPOC - colRefPOC;
2013 int diffPocB = curPOC - curRefPOC;
2014
2015 if (diffPocD == diffPocB)
2016 outMV = inMV;
2017 else
2018 {
2019 int tdb = Clip3(-128, 127, diffPocB);
2020 int tdd = Clip3(-128, 127, diffPocD);
2021 int x = (0x4000 + abs(tdd / 2)) / tdd;
2022 int scale = Clip3(-4096, 4095, (tdb * x + 32) >> 6);
2023 outMV = scaleMv(inMV, scale);
2024 }
2025}
2026
2027uint32_t CUData::deriveCenterIdx(uint32_t puIdx) const
2028{
2029 uint32_t absPartIdx;
2030 int puWidth, puHeight;
2031
2032 getPartIndexAndSize(puIdx, absPartIdx, puWidth, puHeight);
2033
2034 return g_rasterToZscan[g_zscanToRaster[m_absIdxInCTU + absPartIdx]
2035 + (puHeight >> (LOG2_UNIT_SIZE + 1)) * s_numPartInCUSize
2036 + (puWidth >> (LOG2_UNIT_SIZE + 1))];
2037}
2038
2039ScanType CUData::getCoefScanIdx(uint32_t absPartIdx, uint32_t log2TrSize, bool bIsLuma, bool bIsIntra) const
2040{
2041 uint32_t dirMode;
2042
2043 if (!bIsIntra)
2044 return SCAN_DIAG;
2045
2046 // check that MDCS can be used for this TU
2047 if (bIsLuma)
2048 {
2049 if (log2TrSize > MDCS_LOG2_MAX_SIZE)
2050 return SCAN_DIAG;
2051
2052 dirMode = m_lumaIntraDir[absPartIdx];
2053 }
2054 else
2055 {
2056 if (log2TrSize > (uint32_t)(MDCS_LOG2_MAX_SIZE - m_hChromaShift))
2057 return SCAN_DIAG;
2058
2059 dirMode = m_chromaIntraDir[absPartIdx];
2060 if (dirMode == DM_CHROMA_IDX)
2061 {
2062 dirMode = m_lumaIntraDir[(m_chromaFormat == X265_CSP_I444) ? absPartIdx : absPartIdx & 0xFC];
2063 dirMode = (m_chromaFormat == X265_CSP_I422) ? g_chroma422IntraAngleMappingTable[dirMode] : dirMode;
2064 }
2065 }
2066
2067 if (abs((int)dirMode - VER_IDX) <= MDCS_ANGLE_LIMIT)
2068 return SCAN_HOR;
2069 else if (abs((int)dirMode - HOR_IDX) <= MDCS_ANGLE_LIMIT)
2070 return SCAN_VER;
2071 else
2072 return SCAN_DIAG;
2073}
2074
2075void CUData::getTUEntropyCodingParameters(TUEntropyCodingParameters &result, uint32_t absPartIdx, uint32_t log2TrSize, bool bIsLuma) const
2076{
2077 // set the group layout
2078 result.log2TrSizeCG = log2TrSize - 2;
2079
2080 // set the scan orders
2081 result.scanType = getCoefScanIdx(absPartIdx, log2TrSize, bIsLuma, isIntra(absPartIdx));
2082 result.scan = g_scanOrder[result.scanType][log2TrSize - 2];
2083 result.scanCG = g_scanOrderCG[result.scanType][result.log2TrSizeCG];
2084
2085 if (log2TrSize == 2)
2086 result.firstSignificanceMapContext = 0;
2087 else if (log2TrSize == 3)
2088 {
2089 result.firstSignificanceMapContext = 9;
2090 if (result.scanType != SCAN_DIAG && bIsLuma)
2091 result.firstSignificanceMapContext += 6;
2092 }
2093 else
2094 result.firstSignificanceMapContext = bIsLuma ? 21 : 12;
2095}
2096
2097#define CU_SET_FLAG(bitfield, flag, value) (bitfield) = ((bitfield) & (~(flag))) | ((~((value) - 1)) & (flag))
2098
2099void CUData::calcCTUGeoms(uint32_t picWidth, uint32_t picHeight, uint32_t maxCUSize, CUGeom cuDataArray[CUGeom::MAX_GEOMS]) const
2100{
2101 // Initialize the coding blocks inside the CTB
2102 for (uint32_t log2CUSize = g_log2Size[maxCUSize], rangeCUIdx = 0; log2CUSize >= MIN_LOG2_CU_SIZE; log2CUSize--)
2103 {
2104 uint32_t blockSize = 1 << log2CUSize;
2105 uint32_t sbWidth = 1 << (g_log2Size[maxCUSize] - log2CUSize);
2106 int32_t lastLevelFlag = log2CUSize == MIN_LOG2_CU_SIZE;
2107 for (uint32_t sbY = 0; sbY < sbWidth; sbY++)
2108 {
2109 for (uint32_t sbX = 0; sbX < sbWidth; sbX++)
2110 {
2111 uint32_t depthIdx = g_depthScanIdx[sbY][sbX];
2112 uint32_t cuIdx = rangeCUIdx + depthIdx;
2113 uint32_t childIdx = rangeCUIdx + sbWidth * sbWidth + (depthIdx << 2);
2114 uint32_t px = m_cuPelX + sbX * blockSize;
2115 uint32_t py = m_cuPelY + sbY * blockSize;
2116 int32_t presentFlag = px < picWidth && py < picHeight;
2117 int32_t splitMandatoryFlag = presentFlag && !lastLevelFlag && (px + blockSize > picWidth || py + blockSize > picHeight);
2118
2119 /* Offset of the luma CU in the X, Y direction in terms of pixels from the CTU origin */
2120 uint32_t xOffset = (sbX * blockSize) >> 3;
2121 uint32_t yOffset = (sbY * blockSize) >> 3;
2122 X265_CHECK(cuIdx < CUGeom::MAX_GEOMS, "CU geom index bug\n");
2123
2124 CUGeom *cu = cuDataArray + cuIdx;
2125 cu->log2CUSize = log2CUSize;
2126 cu->childOffset = childIdx - cuIdx;
2127 cu->encodeIdx = g_depthScanIdx[yOffset][xOffset] * 4;
2128 cu->numPartitions = (NUM_CU_PARTITIONS >> ((g_maxLog2CUSize - cu->log2CUSize) * 2));
2129 cu->depth = g_log2Size[maxCUSize] - log2CUSize;
2130
2131 cu->flags = 0;
2132 CU_SET_FLAG(cu->flags, CUGeom::PRESENT, presentFlag);
2133 CU_SET_FLAG(cu->flags, CUGeom::SPLIT_MANDATORY | CUGeom::SPLIT, splitMandatoryFlag);
2134 CU_SET_FLAG(cu->flags, CUGeom::LEAF, lastLevelFlag);
2135 }
2136 }
2137 rangeCUIdx += sbWidth * sbWidth;
2138 }
2139}