Commit | Line | Data |
---|---|---|
72b9787e JB |
1 | /***************************************************************************** |
2 | * Copyright (C) 2014 x265 project | |
3 | * | |
4 | * Authors: Steve Borho <steve@borho.org> | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License as published by | |
8 | * the Free Software Foundation; either version 2 of the License, or | |
9 | * (at your option) any later version. | |
10 | * | |
11 | * This program is distributed in the hope that it will be useful, | |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | * GNU General Public License for more details. | |
15 | * | |
16 | * You should have received a copy of the GNU General Public License | |
17 | * along with this program; if not, write to the Free Software | |
18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. | |
19 | * | |
20 | * This program is also available under a commercial proprietary license. | |
21 | * For more information, contact us at license @ x265.com. | |
22 | *****************************************************************************/ | |
23 | ||
24 | #include "common.h" | |
25 | #include "frame.h" | |
26 | #include "framedata.h" | |
27 | #include "picyuv.h" | |
28 | #include "mv.h" | |
29 | #include "cudata.h" | |
30 | ||
31 | using namespace x265; | |
32 | ||
33 | namespace { | |
34 | // file private namespace | |
35 | ||
36 | /* for all bcast* and copy* functions, dst and src are aligned to MIN(size, 32) */ | |
37 | ||
38 | void bcast1(uint8_t* dst, uint8_t val) { dst[0] = val; } | |
39 | ||
40 | void copy4(uint8_t* dst, uint8_t* src) { ((uint32_t*)dst)[0] = ((uint32_t*)src)[0]; } | |
41 | void bcast4(uint8_t* dst, uint8_t val) { ((uint32_t*)dst)[0] = 0x01010101 * val; } | |
42 | ||
43 | void copy16(uint8_t* dst, uint8_t* src) { ((uint64_t*)dst)[0] = ((uint64_t*)src)[0]; ((uint64_t*)dst)[1] = ((uint64_t*)src)[1]; } | |
44 | void bcast16(uint8_t* dst, uint8_t val) { uint64_t bval = 0x0101010101010101ULL * val; ((uint64_t*)dst)[0] = bval; ((uint64_t*)dst)[1] = bval; } | |
45 | ||
46 | void copy64(uint8_t* dst, uint8_t* src) { ((uint64_t*)dst)[0] = ((uint64_t*)src)[0]; ((uint64_t*)dst)[1] = ((uint64_t*)src)[1]; | |
47 | ((uint64_t*)dst)[2] = ((uint64_t*)src)[2]; ((uint64_t*)dst)[3] = ((uint64_t*)src)[3]; | |
48 | ((uint64_t*)dst)[4] = ((uint64_t*)src)[4]; ((uint64_t*)dst)[5] = ((uint64_t*)src)[5]; | |
49 | ((uint64_t*)dst)[6] = ((uint64_t*)src)[6]; ((uint64_t*)dst)[7] = ((uint64_t*)src)[7]; } | |
50 | void bcast64(uint8_t* dst, uint8_t val) { uint64_t bval = 0x0101010101010101ULL * val; | |
51 | ((uint64_t*)dst)[0] = bval; ((uint64_t*)dst)[1] = bval; ((uint64_t*)dst)[2] = bval; ((uint64_t*)dst)[3] = bval; | |
52 | ((uint64_t*)dst)[4] = bval; ((uint64_t*)dst)[5] = bval; ((uint64_t*)dst)[6] = bval; ((uint64_t*)dst)[7] = bval; } | |
53 | ||
54 | /* at 256 bytes, memset/memcpy will probably use SIMD more effectively than our uint64_t hack, | |
55 | * but hand-written assembly would beat it. */ | |
56 | void copy256(uint8_t* dst, uint8_t* src) { memcpy(dst, src, 256); } | |
57 | void bcast256(uint8_t* dst, uint8_t val) { memset(dst, val, 256); } | |
58 | ||
59 | /* Check whether 2 addresses point to the same column */ | |
60 | inline bool isEqualCol(int addrA, int addrB, int numUnitsPerRow) | |
61 | { | |
62 | // addrA % numUnitsPerRow == addrB % numUnitsPerRow | |
63 | return ((addrA ^ addrB) & (numUnitsPerRow - 1)) == 0; | |
64 | } | |
65 | ||
66 | /* Check whether 2 addresses point to the same row */ | |
67 | inline bool isEqualRow(int addrA, int addrB, int numUnitsPerRow) | |
68 | { | |
69 | // addrA / numUnitsPerRow == addrB / numUnitsPerRow | |
70 | return ((addrA ^ addrB) & ~(numUnitsPerRow - 1)) == 0; | |
71 | } | |
72 | ||
73 | /* Check whether 2 addresses point to the same row or column */ | |
74 | inline bool isEqualRowOrCol(int addrA, int addrB, int numUnitsPerRow) | |
75 | { | |
76 | return isEqualCol(addrA, addrB, numUnitsPerRow) | isEqualRow(addrA, addrB, numUnitsPerRow); | |
77 | } | |
78 | ||
79 | /* Check whether one address points to the first column */ | |
80 | inline bool isZeroCol(int addr, int numUnitsPerRow) | |
81 | { | |
82 | // addr % numUnitsPerRow == 0 | |
83 | return (addr & (numUnitsPerRow - 1)) == 0; | |
84 | } | |
85 | ||
86 | /* Check whether one address points to the first row */ | |
87 | inline bool isZeroRow(int addr, int numUnitsPerRow) | |
88 | { | |
89 | // addr / numUnitsPerRow == 0 | |
90 | return (addr & ~(numUnitsPerRow - 1)) == 0; | |
91 | } | |
92 | ||
93 | /* Check whether one address points to a column whose index is smaller than a given value */ | |
94 | inline bool lessThanCol(int addr, int val, int numUnitsPerRow) | |
95 | { | |
96 | // addr % numUnitsPerRow < val | |
97 | return (addr & (numUnitsPerRow - 1)) < val; | |
98 | } | |
99 | ||
100 | /* Check whether one address points to a row whose index is smaller than a given value */ | |
101 | inline bool lessThanRow(int addr, int val, int numUnitsPerRow) | |
102 | { | |
103 | // addr / numUnitsPerRow < val | |
104 | return addr < val * numUnitsPerRow; | |
105 | } | |
106 | ||
107 | inline MV scaleMv(MV mv, int scale) | |
108 | { | |
109 | int mvx = Clip3(-32768, 32767, (scale * mv.x + 127 + (scale * mv.x < 0)) >> 8); | |
110 | int mvy = Clip3(-32768, 32767, (scale * mv.y + 127 + (scale * mv.y < 0)) >> 8); | |
111 | ||
112 | return MV((int16_t)mvx, (int16_t)mvy); | |
113 | } | |
114 | ||
115 | // Partition table. | |
116 | // First index is partitioning mode. Second index is partition index. | |
117 | // Third index is 0 for partition sizes, 1 for partition offsets. The | |
118 | // sizes and offsets are encoded as two packed 4-bit values (X,Y). | |
119 | // X and Y represent 1/4 fractions of the block size. | |
120 | const uint32_t partTable[8][4][2] = | |
121 | { | |
122 | // XY | |
123 | { { 0x44, 0x00 }, { 0x00, 0x00 }, { 0x00, 0x00 }, { 0x00, 0x00 } }, // SIZE_2Nx2N. | |
124 | { { 0x42, 0x00 }, { 0x42, 0x02 }, { 0x00, 0x00 }, { 0x00, 0x00 } }, // SIZE_2NxN. | |
125 | { { 0x24, 0x00 }, { 0x24, 0x20 }, { 0x00, 0x00 }, { 0x00, 0x00 } }, // SIZE_Nx2N. | |
126 | { { 0x22, 0x00 }, { 0x22, 0x20 }, { 0x22, 0x02 }, { 0x22, 0x22 } }, // SIZE_NxN. | |
127 | { { 0x41, 0x00 }, { 0x43, 0x01 }, { 0x00, 0x00 }, { 0x00, 0x00 } }, // SIZE_2NxnU. | |
128 | { { 0x43, 0x00 }, { 0x41, 0x03 }, { 0x00, 0x00 }, { 0x00, 0x00 } }, // SIZE_2NxnD. | |
129 | { { 0x14, 0x00 }, { 0x34, 0x10 }, { 0x00, 0x00 }, { 0x00, 0x00 } }, // SIZE_nLx2N. | |
130 | { { 0x34, 0x00 }, { 0x14, 0x30 }, { 0x00, 0x00 }, { 0x00, 0x00 } } // SIZE_nRx2N. | |
131 | }; | |
132 | ||
133 | // Partition Address table. | |
134 | // First index is partitioning mode. Second index is partition address. | |
135 | const uint32_t partAddrTable[8][4] = | |
136 | { | |
137 | { 0x00, 0x00, 0x00, 0x00 }, // SIZE_2Nx2N. | |
138 | { 0x00, 0x08, 0x08, 0x08 }, // SIZE_2NxN. | |
139 | { 0x00, 0x04, 0x04, 0x04 }, // SIZE_Nx2N. | |
140 | { 0x00, 0x04, 0x08, 0x0C }, // SIZE_NxN. | |
141 | { 0x00, 0x02, 0x02, 0x02 }, // SIZE_2NxnU. | |
142 | { 0x00, 0x0A, 0x0A, 0x0A }, // SIZE_2NxnD. | |
143 | { 0x00, 0x01, 0x01, 0x01 }, // SIZE_nLx2N. | |
144 | { 0x00, 0x05, 0x05, 0x05 } // SIZE_nRx2N. | |
145 | }; | |
146 | ||
147 | } | |
148 | ||
149 | cubcast_t CUData::s_partSet[NUM_FULL_DEPTH] = { NULL, NULL, NULL, NULL, NULL }; | |
150 | uint32_t CUData::s_numPartInCUSize; | |
151 | ||
152 | CUData::CUData() | |
153 | { | |
154 | memset(this, 0, sizeof(*this)); | |
155 | } | |
156 | ||
157 | void CUData::initialize(const CUDataMemPool& dataPool, uint32_t depth, int csp, int instance) | |
158 | { | |
159 | m_chromaFormat = csp; | |
160 | m_hChromaShift = CHROMA_H_SHIFT(csp); | |
161 | m_vChromaShift = CHROMA_V_SHIFT(csp); | |
162 | m_numPartitions = NUM_CU_PARTITIONS >> (depth * 2); | |
163 | ||
164 | if (!s_partSet[0]) | |
165 | { | |
166 | s_numPartInCUSize = 1 << g_maxFullDepth; | |
167 | switch (g_maxLog2CUSize) | |
168 | { | |
169 | case 6: | |
170 | s_partSet[0] = bcast256; | |
171 | s_partSet[1] = bcast64; | |
172 | s_partSet[2] = bcast16; | |
173 | s_partSet[3] = bcast4; | |
174 | s_partSet[4] = bcast1; | |
175 | break; | |
176 | case 5: | |
177 | s_partSet[0] = bcast64; | |
178 | s_partSet[1] = bcast16; | |
179 | s_partSet[2] = bcast4; | |
180 | s_partSet[3] = bcast1; | |
181 | s_partSet[4] = NULL; | |
182 | break; | |
183 | case 4: | |
184 | s_partSet[0] = bcast16; | |
185 | s_partSet[1] = bcast4; | |
186 | s_partSet[2] = bcast1; | |
187 | s_partSet[3] = NULL; | |
188 | s_partSet[4] = NULL; | |
189 | break; | |
190 | default: | |
191 | X265_CHECK(0, "unexpected CTU size\n"); | |
192 | break; | |
193 | } | |
194 | } | |
195 | ||
196 | switch (m_numPartitions) | |
197 | { | |
198 | case 256: // 64x64 CU | |
199 | m_partCopy = copy256; | |
200 | m_partSet = bcast256; | |
201 | m_subPartCopy = copy64; | |
202 | m_subPartSet = bcast64; | |
203 | break; | |
204 | case 64: // 32x32 CU | |
205 | m_partCopy = copy64; | |
206 | m_partSet = bcast64; | |
207 | m_subPartCopy = copy16; | |
208 | m_subPartSet = bcast16; | |
209 | break; | |
210 | case 16: // 16x16 CU | |
211 | m_partCopy = copy16; | |
212 | m_partSet = bcast16; | |
213 | m_subPartCopy = copy4; | |
214 | m_subPartSet = bcast4; | |
215 | break; | |
216 | case 4: // 8x8 CU | |
217 | m_partCopy = copy4; | |
218 | m_partSet = bcast4; | |
219 | m_subPartCopy = NULL; | |
220 | m_subPartSet = NULL; | |
221 | break; | |
222 | default: | |
223 | X265_CHECK(0, "unexpected CU partition count\n"); | |
224 | break; | |
225 | } | |
226 | ||
227 | /* Each CU's data is layed out sequentially within the charMemBlock */ | |
228 | uint8_t *charBuf = dataPool.charMemBlock + (m_numPartitions * BytesPerPartition) * instance; | |
229 | ||
230 | m_qp = (char*)charBuf; charBuf += m_numPartitions; | |
231 | m_log2CUSize = charBuf; charBuf += m_numPartitions; | |
232 | m_partSize = charBuf; charBuf += m_numPartitions; | |
233 | m_predMode = charBuf; charBuf += m_numPartitions; | |
234 | m_lumaIntraDir = charBuf; charBuf += m_numPartitions; | |
235 | m_tqBypass = charBuf; charBuf += m_numPartitions; | |
236 | m_refIdx[0] = (char*)charBuf; charBuf += m_numPartitions; | |
237 | m_refIdx[1] = (char*)charBuf; charBuf += m_numPartitions; | |
238 | m_cuDepth = charBuf; charBuf += m_numPartitions; | |
239 | m_skipFlag = charBuf; charBuf += m_numPartitions; /* the order up to here is important in initCTU() and initSubCU() */ | |
240 | m_mergeFlag = charBuf; charBuf += m_numPartitions; | |
241 | m_interDir = charBuf; charBuf += m_numPartitions; | |
242 | m_mvpIdx[0] = charBuf; charBuf += m_numPartitions; | |
243 | m_mvpIdx[1] = charBuf; charBuf += m_numPartitions; | |
244 | m_tuDepth = charBuf; charBuf += m_numPartitions; | |
245 | m_transformSkip[0] = charBuf; charBuf += m_numPartitions; | |
246 | m_transformSkip[1] = charBuf; charBuf += m_numPartitions; | |
247 | m_transformSkip[2] = charBuf; charBuf += m_numPartitions; | |
248 | m_cbf[0] = charBuf; charBuf += m_numPartitions; | |
249 | m_cbf[1] = charBuf; charBuf += m_numPartitions; | |
250 | m_cbf[2] = charBuf; charBuf += m_numPartitions; | |
251 | m_chromaIntraDir = charBuf; charBuf += m_numPartitions; | |
252 | ||
253 | X265_CHECK(charBuf == dataPool.charMemBlock + (m_numPartitions * BytesPerPartition) * (instance + 1), "CU data layout is broken\n"); | |
254 | ||
255 | m_mv[0] = dataPool.mvMemBlock + (instance * 4) * m_numPartitions; | |
256 | m_mv[1] = m_mv[0] + m_numPartitions; | |
257 | m_mvd[0] = m_mv[1] + m_numPartitions; | |
258 | m_mvd[1] = m_mvd[0] + m_numPartitions; | |
259 | ||
260 | uint32_t cuSize = g_maxCUSize >> depth; | |
261 | uint32_t sizeL = cuSize * cuSize; | |
262 | uint32_t sizeC = sizeL >> (m_hChromaShift + m_vChromaShift); | |
263 | m_trCoeff[0] = dataPool.trCoeffMemBlock + instance * (sizeL + sizeC * 2); | |
264 | m_trCoeff[1] = m_trCoeff[0] + sizeL; | |
265 | m_trCoeff[2] = m_trCoeff[0] + sizeL + sizeC; | |
266 | } | |
267 | ||
268 | void CUData::initCTU(const Frame& frame, uint32_t cuAddr, int qp) | |
269 | { | |
270 | m_encData = frame.m_encData; | |
271 | m_slice = m_encData->m_slice; | |
272 | m_cuAddr = cuAddr; | |
273 | m_cuPelX = (cuAddr % m_slice->m_sps->numCuInWidth) << g_maxLog2CUSize; | |
274 | m_cuPelY = (cuAddr / m_slice->m_sps->numCuInWidth) << g_maxLog2CUSize; | |
275 | m_absIdxInCTU = 0; | |
276 | m_numPartitions = NUM_CU_PARTITIONS; | |
277 | ||
278 | /* sequential memsets */ | |
279 | m_partSet((uint8_t*)m_qp, (uint8_t)qp); | |
280 | m_partSet(m_log2CUSize, (uint8_t)g_maxLog2CUSize); | |
281 | m_partSet(m_partSize, (uint8_t)SIZE_NONE); | |
282 | m_partSet(m_predMode, (uint8_t)MODE_NONE); | |
283 | m_partSet(m_lumaIntraDir, (uint8_t)DC_IDX); | |
284 | m_partSet(m_tqBypass, (uint8_t)frame.m_encData->m_param->bLossless); | |
285 | if (m_slice->m_sliceType != I_SLICE) | |
286 | { | |
287 | m_partSet((uint8_t*)m_refIdx[0], (uint8_t)REF_NOT_VALID); | |
288 | m_partSet((uint8_t*)m_refIdx[1], (uint8_t)REF_NOT_VALID); | |
289 | } | |
290 | ||
291 | X265_CHECK(!(frame.m_encData->m_param->bLossless && !m_slice->m_pps->bTransquantBypassEnabled), "lossless enabled without TQbypass in PPS\n"); | |
292 | ||
293 | /* initialize the remaining CU data in one memset */ | |
294 | memset(m_cuDepth, 0, (BytesPerPartition - 8) * m_numPartitions); | |
295 | ||
296 | uint32_t widthInCU = m_slice->m_sps->numCuInWidth; | |
297 | m_cuLeft = (m_cuAddr % widthInCU) ? m_encData->getPicCTU(m_cuAddr - 1) : NULL; | |
298 | m_cuAbove = (m_cuAddr / widthInCU) ? m_encData->getPicCTU(m_cuAddr - widthInCU) : NULL; | |
299 | m_cuAboveLeft = (m_cuLeft && m_cuAbove) ? m_encData->getPicCTU(m_cuAddr - widthInCU - 1) : NULL; | |
300 | m_cuAboveRight = (m_cuAbove && ((m_cuAddr % widthInCU) < (widthInCU - 1))) ? m_encData->getPicCTU(m_cuAddr - widthInCU + 1) : NULL; | |
301 | } | |
302 | ||
303 | // initialize Sub partition | |
304 | void CUData::initSubCU(const CUData& ctu, const CUGeom& cuGeom) | |
305 | { | |
306 | m_absIdxInCTU = cuGeom.encodeIdx; | |
307 | m_encData = ctu.m_encData; | |
308 | m_slice = ctu.m_slice; | |
309 | m_cuAddr = ctu.m_cuAddr; | |
310 | m_cuPelX = ctu.m_cuPelX + g_zscanToPelX[cuGeom.encodeIdx]; | |
311 | m_cuPelY = ctu.m_cuPelY + g_zscanToPelY[cuGeom.encodeIdx]; | |
312 | m_cuLeft = ctu.m_cuLeft; | |
313 | m_cuAbove = ctu.m_cuAbove; | |
314 | m_cuAboveLeft = ctu.m_cuAboveLeft; | |
315 | m_cuAboveRight = ctu.m_cuAboveRight; | |
316 | X265_CHECK(m_numPartitions == cuGeom.numPartitions, "initSubCU() size mismatch\n"); | |
317 | ||
318 | /* sequential memsets */ | |
319 | m_partSet((uint8_t*)m_qp, (uint8_t)ctu.m_qp[0]); | |
320 | m_partSet(m_log2CUSize, (uint8_t)cuGeom.log2CUSize); | |
321 | m_partSet(m_partSize, (uint8_t)SIZE_NONE); | |
322 | m_partSet(m_predMode, (uint8_t)MODE_NONE); | |
323 | m_partSet(m_lumaIntraDir, (uint8_t)DC_IDX); | |
324 | m_partSet(m_tqBypass, (uint8_t)m_encData->m_param->bLossless); | |
325 | m_partSet((uint8_t*)m_refIdx[0], (uint8_t)REF_NOT_VALID); | |
326 | m_partSet((uint8_t*)m_refIdx[1], (uint8_t)REF_NOT_VALID); | |
327 | m_partSet(m_cuDepth, (uint8_t)cuGeom.depth); | |
328 | ||
329 | /* initialize the remaining CU data in one memset */ | |
330 | memset(m_skipFlag, 0, (BytesPerPartition - 9) * m_numPartitions); | |
331 | } | |
332 | ||
333 | /* Copy the results of a sub-part (split) CU to the parent CU */ | |
334 | void CUData::copyPartFrom(const CUData& subCU, const CUGeom& childGeom, uint32_t subPartIdx) | |
335 | { | |
336 | X265_CHECK(subPartIdx < 4, "part unit should be less than 4\n"); | |
337 | ||
338 | uint32_t offset = childGeom.numPartitions * subPartIdx; | |
339 | ||
340 | m_subPartCopy((uint8_t*)m_qp + offset, (uint8_t*)subCU.m_qp); | |
341 | m_subPartCopy(m_log2CUSize + offset, subCU.m_log2CUSize); | |
342 | m_subPartCopy(m_partSize + offset, subCU.m_partSize); | |
343 | m_subPartCopy(m_predMode + offset, subCU.m_predMode); | |
344 | m_subPartCopy(m_lumaIntraDir + offset, subCU.m_lumaIntraDir); | |
345 | m_subPartCopy(m_tqBypass + offset, subCU.m_tqBypass); | |
346 | m_subPartCopy((uint8_t*)m_refIdx[0] + offset, (uint8_t*)subCU.m_refIdx[0]); | |
347 | m_subPartCopy((uint8_t*)m_refIdx[1] + offset, (uint8_t*)subCU.m_refIdx[1]); | |
348 | m_subPartCopy(m_cuDepth + offset, subCU.m_cuDepth); | |
349 | m_subPartCopy(m_skipFlag + offset, subCU.m_skipFlag); | |
350 | m_subPartCopy(m_mergeFlag + offset, subCU.m_mergeFlag); | |
351 | m_subPartCopy(m_interDir + offset, subCU.m_interDir); | |
352 | m_subPartCopy(m_mvpIdx[0] + offset, subCU.m_mvpIdx[0]); | |
353 | m_subPartCopy(m_mvpIdx[1] + offset, subCU.m_mvpIdx[1]); | |
354 | m_subPartCopy(m_tuDepth + offset, subCU.m_tuDepth); | |
355 | m_subPartCopy(m_transformSkip[0] + offset, subCU.m_transformSkip[0]); | |
356 | m_subPartCopy(m_transformSkip[1] + offset, subCU.m_transformSkip[1]); | |
357 | m_subPartCopy(m_transformSkip[2] + offset, subCU.m_transformSkip[2]); | |
358 | m_subPartCopy(m_cbf[0] + offset, subCU.m_cbf[0]); | |
359 | m_subPartCopy(m_cbf[1] + offset, subCU.m_cbf[1]); | |
360 | m_subPartCopy(m_cbf[2] + offset, subCU.m_cbf[2]); | |
361 | m_subPartCopy(m_chromaIntraDir + offset, subCU.m_chromaIntraDir); | |
362 | ||
363 | memcpy(m_mv[0] + offset, subCU.m_mv[0], childGeom.numPartitions * sizeof(MV)); | |
364 | memcpy(m_mv[1] + offset, subCU.m_mv[1], childGeom.numPartitions * sizeof(MV)); | |
365 | memcpy(m_mvd[0] + offset, subCU.m_mvd[0], childGeom.numPartitions * sizeof(MV)); | |
366 | memcpy(m_mvd[1] + offset, subCU.m_mvd[1], childGeom.numPartitions * sizeof(MV)); | |
367 | ||
368 | uint32_t tmp = 1 << ((g_maxLog2CUSize - childGeom.depth) * 2); | |
369 | uint32_t tmp2 = subPartIdx * tmp; | |
370 | memcpy(m_trCoeff[0] + tmp2, subCU.m_trCoeff[0], sizeof(coeff_t) * tmp); | |
371 | ||
372 | uint32_t tmpC = tmp >> (m_hChromaShift + m_vChromaShift); | |
373 | uint32_t tmpC2 = tmp2 >> (m_hChromaShift + m_vChromaShift); | |
374 | memcpy(m_trCoeff[1] + tmpC2, subCU.m_trCoeff[1], sizeof(coeff_t) * tmpC); | |
375 | memcpy(m_trCoeff[2] + tmpC2, subCU.m_trCoeff[2], sizeof(coeff_t) * tmpC); | |
376 | } | |
377 | ||
378 | /* If a sub-CU part is not present (off the edge of the picture) its depth and | |
379 | * log2size should still be configured */ | |
380 | void CUData::setEmptyPart(const CUGeom& childGeom, uint32_t subPartIdx) | |
381 | { | |
382 | uint32_t offset = childGeom.numPartitions * subPartIdx; | |
383 | m_subPartSet(m_cuDepth + offset, (uint8_t)childGeom.depth); | |
384 | m_subPartSet(m_log2CUSize + offset, (uint8_t)childGeom.log2CUSize); | |
385 | } | |
386 | ||
387 | /* Copy all CU data from one instance to the next, except set lossless flag | |
388 | * This will only get used when --cu-lossless is enabled but --lossless is not. */ | |
389 | void CUData::initLosslessCU(const CUData& cu, const CUGeom& cuGeom) | |
390 | { | |
391 | /* Start by making an exact copy */ | |
392 | m_encData = cu.m_encData; | |
393 | m_slice = cu.m_slice; | |
394 | m_cuAddr = cu.m_cuAddr; | |
395 | m_cuPelX = cu.m_cuPelX; | |
396 | m_cuPelY = cu.m_cuPelY; | |
397 | m_cuLeft = cu.m_cuLeft; | |
398 | m_cuAbove = cu.m_cuAbove; | |
399 | m_cuAboveLeft = cu.m_cuAboveLeft; | |
400 | m_cuAboveRight = cu.m_cuAboveRight; | |
401 | m_absIdxInCTU = cuGeom.encodeIdx; | |
402 | m_numPartitions = cuGeom.numPartitions; | |
403 | memcpy(m_qp, cu.m_qp, BytesPerPartition * m_numPartitions); | |
404 | memcpy(m_mv[0], cu.m_mv[0], m_numPartitions * sizeof(MV)); | |
405 | memcpy(m_mv[1], cu.m_mv[1], m_numPartitions * sizeof(MV)); | |
406 | memcpy(m_mvd[0], cu.m_mvd[0], m_numPartitions * sizeof(MV)); | |
407 | memcpy(m_mvd[1], cu.m_mvd[1], m_numPartitions * sizeof(MV)); | |
408 | ||
409 | /* force TQBypass to true */ | |
410 | m_partSet(m_tqBypass, true); | |
411 | ||
412 | /* clear residual coding flags */ | |
413 | m_partSet(m_skipFlag, 0); | |
414 | m_partSet(m_tuDepth, 0); | |
415 | m_partSet(m_transformSkip[0], 0); | |
416 | m_partSet(m_transformSkip[1], 0); | |
417 | m_partSet(m_transformSkip[2], 0); | |
418 | m_partSet(m_cbf[0], 0); | |
419 | m_partSet(m_cbf[1], 0); | |
420 | m_partSet(m_cbf[2], 0); | |
421 | } | |
422 | ||
423 | /* Copy completed predicted CU to CTU in picture */ | |
424 | void CUData::copyToPic(uint32_t depth) const | |
425 | { | |
426 | CUData& ctu = *m_encData->getPicCTU(m_cuAddr); | |
427 | ||
428 | m_partCopy((uint8_t*)ctu.m_qp + m_absIdxInCTU, (uint8_t*)m_qp); | |
429 | m_partCopy(ctu.m_log2CUSize + m_absIdxInCTU, m_log2CUSize); | |
430 | m_partCopy(ctu.m_partSize + m_absIdxInCTU, m_partSize); | |
431 | m_partCopy(ctu.m_predMode + m_absIdxInCTU, m_predMode); | |
432 | m_partCopy(ctu.m_lumaIntraDir + m_absIdxInCTU, m_lumaIntraDir); | |
433 | m_partCopy(ctu.m_tqBypass + m_absIdxInCTU, m_tqBypass); | |
434 | m_partCopy((uint8_t*)ctu.m_refIdx[0] + m_absIdxInCTU, (uint8_t*)m_refIdx[0]); | |
435 | m_partCopy((uint8_t*)ctu.m_refIdx[1] + m_absIdxInCTU, (uint8_t*)m_refIdx[1]); | |
436 | m_partCopy(ctu.m_cuDepth + m_absIdxInCTU, m_cuDepth); | |
437 | m_partCopy(ctu.m_skipFlag + m_absIdxInCTU, m_skipFlag); | |
438 | m_partCopy(ctu.m_mergeFlag + m_absIdxInCTU, m_mergeFlag); | |
439 | m_partCopy(ctu.m_interDir + m_absIdxInCTU, m_interDir); | |
440 | m_partCopy(ctu.m_mvpIdx[0] + m_absIdxInCTU, m_mvpIdx[0]); | |
441 | m_partCopy(ctu.m_mvpIdx[1] + m_absIdxInCTU, m_mvpIdx[1]); | |
442 | m_partCopy(ctu.m_tuDepth + m_absIdxInCTU, m_tuDepth); | |
443 | m_partCopy(ctu.m_transformSkip[0] + m_absIdxInCTU, m_transformSkip[0]); | |
444 | m_partCopy(ctu.m_transformSkip[1] + m_absIdxInCTU, m_transformSkip[1]); | |
445 | m_partCopy(ctu.m_transformSkip[2] + m_absIdxInCTU, m_transformSkip[2]); | |
446 | m_partCopy(ctu.m_cbf[0] + m_absIdxInCTU, m_cbf[0]); | |
447 | m_partCopy(ctu.m_cbf[1] + m_absIdxInCTU, m_cbf[1]); | |
448 | m_partCopy(ctu.m_cbf[2] + m_absIdxInCTU, m_cbf[2]); | |
449 | m_partCopy(ctu.m_chromaIntraDir + m_absIdxInCTU, m_chromaIntraDir); | |
450 | ||
451 | memcpy(ctu.m_mv[0] + m_absIdxInCTU, m_mv[0], m_numPartitions * sizeof(MV)); | |
452 | memcpy(ctu.m_mv[1] + m_absIdxInCTU, m_mv[1], m_numPartitions * sizeof(MV)); | |
453 | memcpy(ctu.m_mvd[0] + m_absIdxInCTU, m_mvd[0], m_numPartitions * sizeof(MV)); | |
454 | memcpy(ctu.m_mvd[1] + m_absIdxInCTU, m_mvd[1], m_numPartitions * sizeof(MV)); | |
455 | ||
456 | uint32_t tmpY = 1 << ((g_maxLog2CUSize - depth) * 2); | |
457 | uint32_t tmpY2 = m_absIdxInCTU << (LOG2_UNIT_SIZE * 2); | |
458 | memcpy(ctu.m_trCoeff[0] + tmpY2, m_trCoeff[0], sizeof(coeff_t) * tmpY); | |
459 | ||
460 | uint32_t tmpC = tmpY >> (m_hChromaShift + m_vChromaShift); | |
461 | uint32_t tmpC2 = tmpY2 >> (m_hChromaShift + m_vChromaShift); | |
462 | memcpy(ctu.m_trCoeff[1] + tmpC2, m_trCoeff[1], sizeof(coeff_t) * tmpC); | |
463 | memcpy(ctu.m_trCoeff[2] + tmpC2, m_trCoeff[2], sizeof(coeff_t) * tmpC); | |
464 | } | |
465 | ||
466 | /* The reverse of copyToPic, called only by encodeResidue */ | |
467 | void CUData::copyFromPic(const CUData& ctu, const CUGeom& cuGeom) | |
468 | { | |
469 | m_encData = ctu.m_encData; | |
470 | m_slice = ctu.m_slice; | |
471 | m_cuAddr = ctu.m_cuAddr; | |
472 | m_cuPelX = ctu.m_cuPelX + g_zscanToPelX[cuGeom.encodeIdx]; | |
473 | m_cuPelY = ctu.m_cuPelY + g_zscanToPelY[cuGeom.encodeIdx]; | |
474 | m_absIdxInCTU = cuGeom.encodeIdx; | |
475 | m_numPartitions = cuGeom.numPartitions; | |
476 | ||
477 | /* copy out all prediction info for this part */ | |
478 | m_partCopy((uint8_t*)m_qp, (uint8_t*)ctu.m_qp + m_absIdxInCTU); | |
479 | m_partCopy(m_log2CUSize, ctu.m_log2CUSize + m_absIdxInCTU); | |
480 | m_partCopy(m_partSize, ctu.m_partSize + m_absIdxInCTU); | |
481 | m_partCopy(m_predMode, ctu.m_predMode + m_absIdxInCTU); | |
482 | m_partCopy(m_lumaIntraDir, ctu.m_lumaIntraDir + m_absIdxInCTU); | |
483 | m_partCopy(m_tqBypass, ctu.m_tqBypass + m_absIdxInCTU); | |
484 | m_partCopy((uint8_t*)m_refIdx[0], (uint8_t*)ctu.m_refIdx[0] + m_absIdxInCTU); | |
485 | m_partCopy((uint8_t*)m_refIdx[1], (uint8_t*)ctu.m_refIdx[1] + m_absIdxInCTU); | |
486 | m_partCopy(m_cuDepth, ctu.m_cuDepth + m_absIdxInCTU); | |
487 | m_partCopy(m_mergeFlag, ctu.m_mergeFlag + m_absIdxInCTU); | |
488 | m_partCopy(m_interDir, ctu.m_interDir + m_absIdxInCTU); | |
489 | m_partCopy(m_mvpIdx[0], ctu.m_mvpIdx[0] + m_absIdxInCTU); | |
490 | m_partCopy(m_mvpIdx[1], ctu.m_mvpIdx[1] + m_absIdxInCTU); | |
491 | m_partCopy(m_chromaIntraDir, ctu.m_chromaIntraDir + m_absIdxInCTU); | |
492 | ||
493 | memcpy(m_mv[0], ctu.m_mv[0] + m_absIdxInCTU, m_numPartitions * sizeof(MV)); | |
494 | memcpy(m_mv[1], ctu.m_mv[1] + m_absIdxInCTU, m_numPartitions * sizeof(MV)); | |
495 | memcpy(m_mvd[0], ctu.m_mvd[0] + m_absIdxInCTU, m_numPartitions * sizeof(MV)); | |
496 | memcpy(m_mvd[1], ctu.m_mvd[1] + m_absIdxInCTU, m_numPartitions * sizeof(MV)); | |
497 | ||
498 | /* clear residual coding flags */ | |
499 | m_partSet(m_skipFlag, 0); | |
500 | m_partSet(m_tuDepth, 0); | |
501 | m_partSet(m_transformSkip[0], 0); | |
502 | m_partSet(m_transformSkip[1], 0); | |
503 | m_partSet(m_transformSkip[2], 0); | |
504 | m_partSet(m_cbf[0], 0); | |
505 | m_partSet(m_cbf[1], 0); | |
506 | m_partSet(m_cbf[2], 0); | |
507 | } | |
508 | ||
509 | /* Only called by encodeResidue, these fields can be modified during inter/intra coding */ | |
510 | void CUData::updatePic(uint32_t depth) const | |
511 | { | |
512 | CUData& ctu = *m_encData->getPicCTU(m_cuAddr); | |
513 | ||
514 | m_partCopy((uint8_t*)ctu.m_qp + m_absIdxInCTU, (uint8_t*)m_qp); | |
515 | m_partCopy(ctu.m_transformSkip[0] + m_absIdxInCTU, m_transformSkip[0]); | |
516 | m_partCopy(ctu.m_transformSkip[1] + m_absIdxInCTU, m_transformSkip[1]); | |
517 | m_partCopy(ctu.m_transformSkip[2] + m_absIdxInCTU, m_transformSkip[2]); | |
518 | m_partCopy(ctu.m_skipFlag + m_absIdxInCTU, m_skipFlag); | |
519 | m_partCopy(ctu.m_tuDepth + m_absIdxInCTU, m_tuDepth); | |
520 | m_partCopy(ctu.m_cbf[0] + m_absIdxInCTU, m_cbf[0]); | |
521 | m_partCopy(ctu.m_cbf[1] + m_absIdxInCTU, m_cbf[1]); | |
522 | m_partCopy(ctu.m_cbf[2] + m_absIdxInCTU, m_cbf[2]); | |
523 | m_partCopy(ctu.m_chromaIntraDir + m_absIdxInCTU, m_chromaIntraDir); | |
524 | ||
525 | uint32_t tmpY = 1 << ((g_maxLog2CUSize - depth) * 2); | |
526 | uint32_t tmpY2 = m_absIdxInCTU << (LOG2_UNIT_SIZE * 2); | |
527 | memcpy(ctu.m_trCoeff[0] + tmpY2, m_trCoeff[0], sizeof(coeff_t) * tmpY); | |
528 | tmpY >>= m_hChromaShift + m_vChromaShift; | |
529 | tmpY2 >>= m_hChromaShift + m_vChromaShift; | |
530 | memcpy(ctu.m_trCoeff[1] + tmpY2, m_trCoeff[1], sizeof(coeff_t) * tmpY); | |
531 | memcpy(ctu.m_trCoeff[2] + tmpY2, m_trCoeff[2], sizeof(coeff_t) * tmpY); | |
532 | } | |
533 | ||
534 | const CUData* CUData::getPULeft(uint32_t& lPartUnitIdx, uint32_t curPartUnitIdx) const | |
535 | { | |
536 | uint32_t absPartIdx = g_zscanToRaster[curPartUnitIdx]; | |
537 | ||
538 | if (!isZeroCol(absPartIdx, s_numPartInCUSize)) | |
539 | { | |
540 | uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInCTU]; | |
541 | lPartUnitIdx = g_rasterToZscan[absPartIdx - 1]; | |
542 | if (isEqualCol(absPartIdx, absZorderCUIdx, s_numPartInCUSize)) | |
543 | return m_encData->getPicCTU(m_cuAddr); | |
544 | else | |
545 | { | |
546 | lPartUnitIdx -= m_absIdxInCTU; | |
547 | return this; | |
548 | } | |
549 | } | |
550 | ||
551 | lPartUnitIdx = g_rasterToZscan[absPartIdx + s_numPartInCUSize - 1]; | |
552 | return m_cuLeft; | |
553 | } | |
554 | ||
555 | const CUData* CUData::getPUAbove(uint32_t& aPartUnitIdx, uint32_t curPartUnitIdx, bool planarAtCTUBoundary) const | |
556 | { | |
557 | uint32_t absPartIdx = g_zscanToRaster[curPartUnitIdx]; | |
558 | ||
559 | if (!isZeroRow(absPartIdx, s_numPartInCUSize)) | |
560 | { | |
561 | uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInCTU]; | |
562 | aPartUnitIdx = g_rasterToZscan[absPartIdx - s_numPartInCUSize]; | |
563 | if (isEqualRow(absPartIdx, absZorderCUIdx, s_numPartInCUSize)) | |
564 | return m_encData->getPicCTU(m_cuAddr); | |
565 | else | |
566 | { | |
567 | aPartUnitIdx -= m_absIdxInCTU; | |
568 | return this; | |
569 | } | |
570 | } | |
571 | ||
572 | if (planarAtCTUBoundary) | |
573 | return NULL; | |
574 | ||
575 | aPartUnitIdx = g_rasterToZscan[absPartIdx + NUM_CU_PARTITIONS - s_numPartInCUSize]; | |
576 | return m_cuAbove; | |
577 | } | |
578 | ||
579 | const CUData* CUData::getPUAboveLeft(uint32_t& alPartUnitIdx, uint32_t curPartUnitIdx) const | |
580 | { | |
581 | uint32_t absPartIdx = g_zscanToRaster[curPartUnitIdx]; | |
582 | ||
583 | if (!isZeroCol(absPartIdx, s_numPartInCUSize)) | |
584 | { | |
585 | if (!isZeroRow(absPartIdx, s_numPartInCUSize)) | |
586 | { | |
587 | uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInCTU]; | |
588 | alPartUnitIdx = g_rasterToZscan[absPartIdx - s_numPartInCUSize - 1]; | |
589 | if (isEqualRowOrCol(absPartIdx, absZorderCUIdx, s_numPartInCUSize)) | |
590 | return m_encData->getPicCTU(m_cuAddr); | |
591 | else | |
592 | { | |
593 | alPartUnitIdx -= m_absIdxInCTU; | |
594 | return this; | |
595 | } | |
596 | } | |
597 | alPartUnitIdx = g_rasterToZscan[absPartIdx + NUM_CU_PARTITIONS - s_numPartInCUSize - 1]; | |
598 | return m_cuAbove; | |
599 | } | |
600 | ||
601 | if (!isZeroRow(absPartIdx, s_numPartInCUSize)) | |
602 | { | |
603 | alPartUnitIdx = g_rasterToZscan[absPartIdx - 1]; | |
604 | return m_cuLeft; | |
605 | } | |
606 | ||
607 | alPartUnitIdx = g_rasterToZscan[NUM_CU_PARTITIONS - 1]; | |
608 | return m_cuAboveLeft; | |
609 | } | |
610 | ||
611 | const CUData* CUData::getPUAboveRight(uint32_t& arPartUnitIdx, uint32_t curPartUnitIdx) const | |
612 | { | |
613 | if ((m_encData->getPicCTU(m_cuAddr)->m_cuPelX + g_zscanToPelX[curPartUnitIdx] + UNIT_SIZE) >= m_slice->m_sps->picWidthInLumaSamples) | |
614 | return NULL; | |
615 | ||
616 | uint32_t absPartIdxRT = g_zscanToRaster[curPartUnitIdx]; | |
617 | ||
618 | if (lessThanCol(absPartIdxRT, s_numPartInCUSize - 1, s_numPartInCUSize)) | |
619 | { | |
620 | if (!isZeroRow(absPartIdxRT, s_numPartInCUSize)) | |
621 | { | |
622 | if (curPartUnitIdx > g_rasterToZscan[absPartIdxRT - s_numPartInCUSize + 1]) | |
623 | { | |
624 | uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInCTU] + (1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1; | |
625 | arPartUnitIdx = g_rasterToZscan[absPartIdxRT - s_numPartInCUSize + 1]; | |
626 | if (isEqualRowOrCol(absPartIdxRT, absZorderCUIdx, s_numPartInCUSize)) | |
627 | return m_encData->getPicCTU(m_cuAddr); | |
628 | else | |
629 | { | |
630 | arPartUnitIdx -= m_absIdxInCTU; | |
631 | return this; | |
632 | } | |
633 | } | |
634 | return NULL; | |
635 | } | |
636 | arPartUnitIdx = g_rasterToZscan[absPartIdxRT + NUM_CU_PARTITIONS - s_numPartInCUSize + 1]; | |
637 | return m_cuAbove; | |
638 | } | |
639 | ||
640 | if (!isZeroRow(absPartIdxRT, s_numPartInCUSize)) | |
641 | return NULL; | |
642 | ||
643 | arPartUnitIdx = g_rasterToZscan[NUM_CU_PARTITIONS - s_numPartInCUSize]; | |
644 | return m_cuAboveRight; | |
645 | } | |
646 | ||
647 | const CUData* CUData::getPUBelowLeft(uint32_t& blPartUnitIdx, uint32_t curPartUnitIdx) const | |
648 | { | |
649 | if ((m_encData->getPicCTU(m_cuAddr)->m_cuPelY + g_zscanToPelY[curPartUnitIdx] + UNIT_SIZE) >= m_slice->m_sps->picHeightInLumaSamples) | |
650 | return NULL; | |
651 | ||
652 | uint32_t absPartIdxLB = g_zscanToRaster[curPartUnitIdx]; | |
653 | ||
654 | if (lessThanRow(absPartIdxLB, s_numPartInCUSize - 1, s_numPartInCUSize)) | |
655 | { | |
656 | if (!isZeroCol(absPartIdxLB, s_numPartInCUSize)) | |
657 | { | |
658 | if (curPartUnitIdx > g_rasterToZscan[absPartIdxLB + s_numPartInCUSize - 1]) | |
659 | { | |
660 | uint32_t absZorderCUIdxLB = g_zscanToRaster[m_absIdxInCTU] + ((1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1) * s_numPartInCUSize; | |
661 | blPartUnitIdx = g_rasterToZscan[absPartIdxLB + s_numPartInCUSize - 1]; | |
662 | if (isEqualRowOrCol(absPartIdxLB, absZorderCUIdxLB, s_numPartInCUSize)) | |
663 | return m_encData->getPicCTU(m_cuAddr); | |
664 | else | |
665 | { | |
666 | blPartUnitIdx -= m_absIdxInCTU; | |
667 | return this; | |
668 | } | |
669 | } | |
670 | return NULL; | |
671 | } | |
672 | blPartUnitIdx = g_rasterToZscan[absPartIdxLB + s_numPartInCUSize * 2 - 1]; | |
673 | return m_cuLeft; | |
674 | } | |
675 | ||
676 | return NULL; | |
677 | } | |
678 | ||
679 | const CUData* CUData::getPUBelowLeftAdi(uint32_t& blPartUnitIdx, uint32_t curPartUnitIdx, uint32_t partUnitOffset) const | |
680 | { | |
681 | if ((m_encData->getPicCTU(m_cuAddr)->m_cuPelY + g_zscanToPelY[curPartUnitIdx] + (partUnitOffset << LOG2_UNIT_SIZE)) >= m_slice->m_sps->picHeightInLumaSamples) | |
682 | return NULL; | |
683 | ||
684 | uint32_t absPartIdxLB = g_zscanToRaster[curPartUnitIdx]; | |
685 | ||
686 | if (lessThanRow(absPartIdxLB, s_numPartInCUSize - partUnitOffset, s_numPartInCUSize)) | |
687 | { | |
688 | if (!isZeroCol(absPartIdxLB, s_numPartInCUSize)) | |
689 | { | |
690 | if (curPartUnitIdx > g_rasterToZscan[absPartIdxLB + partUnitOffset * s_numPartInCUSize - 1]) | |
691 | { | |
692 | uint32_t absZorderCUIdxLB = g_zscanToRaster[m_absIdxInCTU] + ((1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1) * s_numPartInCUSize; | |
693 | blPartUnitIdx = g_rasterToZscan[absPartIdxLB + partUnitOffset * s_numPartInCUSize - 1]; | |
694 | if (isEqualRowOrCol(absPartIdxLB, absZorderCUIdxLB, s_numPartInCUSize)) | |
695 | return m_encData->getPicCTU(m_cuAddr); | |
696 | else | |
697 | { | |
698 | blPartUnitIdx -= m_absIdxInCTU; | |
699 | return this; | |
700 | } | |
701 | } | |
702 | return NULL; | |
703 | } | |
704 | blPartUnitIdx = g_rasterToZscan[absPartIdxLB + (1 + partUnitOffset) * s_numPartInCUSize - 1]; | |
705 | if (!m_cuLeft || !m_cuLeft->m_slice) | |
706 | return NULL; | |
707 | return m_cuLeft; | |
708 | } | |
709 | ||
710 | return NULL; | |
711 | } | |
712 | ||
713 | const CUData* CUData::getPUAboveRightAdi(uint32_t& arPartUnitIdx, uint32_t curPartUnitIdx, uint32_t partUnitOffset) const | |
714 | { | |
715 | if ((m_encData->getPicCTU(m_cuAddr)->m_cuPelX + g_zscanToPelX[curPartUnitIdx] + (partUnitOffset << LOG2_UNIT_SIZE)) >= m_slice->m_sps->picWidthInLumaSamples) | |
716 | return NULL; | |
717 | ||
718 | uint32_t absPartIdxRT = g_zscanToRaster[curPartUnitIdx]; | |
719 | ||
720 | if (lessThanCol(absPartIdxRT, s_numPartInCUSize - partUnitOffset, s_numPartInCUSize)) | |
721 | { | |
722 | if (!isZeroRow(absPartIdxRT, s_numPartInCUSize)) | |
723 | { | |
724 | if (curPartUnitIdx > g_rasterToZscan[absPartIdxRT - s_numPartInCUSize + partUnitOffset]) | |
725 | { | |
726 | uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInCTU] + (1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1; | |
727 | arPartUnitIdx = g_rasterToZscan[absPartIdxRT - s_numPartInCUSize + partUnitOffset]; | |
728 | if (isEqualRowOrCol(absPartIdxRT, absZorderCUIdx, s_numPartInCUSize)) | |
729 | return m_encData->getPicCTU(m_cuAddr); | |
730 | else | |
731 | { | |
732 | arPartUnitIdx -= m_absIdxInCTU; | |
733 | return this; | |
734 | } | |
735 | } | |
736 | return NULL; | |
737 | } | |
738 | arPartUnitIdx = g_rasterToZscan[absPartIdxRT + NUM_CU_PARTITIONS - s_numPartInCUSize + partUnitOffset]; | |
739 | if (!m_cuAbove || !m_cuAbove->m_slice) | |
740 | return NULL; | |
741 | return m_cuAbove; | |
742 | } | |
743 | ||
744 | if (!isZeroRow(absPartIdxRT, s_numPartInCUSize)) | |
745 | return NULL; | |
746 | ||
747 | arPartUnitIdx = g_rasterToZscan[NUM_CU_PARTITIONS - s_numPartInCUSize + partUnitOffset - 1]; | |
748 | if ((m_cuAboveRight == NULL || m_cuAboveRight->m_slice == NULL || (m_cuAboveRight->m_cuAddr) > m_cuAddr)) | |
749 | return NULL; | |
750 | return m_cuAboveRight; | |
751 | } | |
752 | ||
753 | /* Get left QpMinCu */ | |
754 | const CUData* CUData::getQpMinCuLeft(uint32_t& lPartUnitIdx, uint32_t curAbsIdxInCTU) const | |
755 | { | |
756 | uint32_t absZorderQpMinCUIdx = curAbsIdxInCTU & (0xFF << (g_maxFullDepth - m_slice->m_pps->maxCuDQPDepth) * 2); | |
757 | uint32_t absRorderQpMinCUIdx = g_zscanToRaster[absZorderQpMinCUIdx]; | |
758 | ||
759 | // check for left CTU boundary | |
760 | if (isZeroCol(absRorderQpMinCUIdx, s_numPartInCUSize)) | |
761 | return NULL; | |
762 | ||
763 | // get index of left-CU relative to top-left corner of current quantization group | |
764 | lPartUnitIdx = g_rasterToZscan[absRorderQpMinCUIdx - 1]; | |
765 | ||
766 | // return pointer to current CTU | |
767 | return m_encData->getPicCTU(m_cuAddr); | |
768 | } | |
769 | ||
770 | /* Get above QpMinCu */ | |
771 | const CUData* CUData::getQpMinCuAbove(uint32_t& aPartUnitIdx, uint32_t curAbsIdxInCTU) const | |
772 | { | |
773 | uint32_t absZorderQpMinCUIdx = curAbsIdxInCTU & (0xFF << (g_maxFullDepth - m_slice->m_pps->maxCuDQPDepth) * 2); | |
774 | uint32_t absRorderQpMinCUIdx = g_zscanToRaster[absZorderQpMinCUIdx]; | |
775 | ||
776 | // check for top CTU boundary | |
777 | if (isZeroRow(absRorderQpMinCUIdx, s_numPartInCUSize)) | |
778 | return NULL; | |
779 | ||
780 | // get index of top-CU relative to top-left corner of current quantization group | |
781 | aPartUnitIdx = g_rasterToZscan[absRorderQpMinCUIdx - s_numPartInCUSize]; | |
782 | ||
783 | // return pointer to current CTU | |
784 | return m_encData->getPicCTU(m_cuAddr); | |
785 | } | |
786 | ||
787 | /* Get reference QP from left QpMinCu or latest coded QP */ | |
788 | char CUData::getRefQP(uint32_t curAbsIdxInCTU) const | |
789 | { | |
790 | uint32_t lPartIdx = 0, aPartIdx = 0; | |
791 | const CUData* cULeft = getQpMinCuLeft(lPartIdx, m_absIdxInCTU + curAbsIdxInCTU); | |
792 | const CUData* cUAbove = getQpMinCuAbove(aPartIdx, m_absIdxInCTU + curAbsIdxInCTU); | |
793 | ||
794 | return ((cULeft ? cULeft->m_qp[lPartIdx] : getLastCodedQP(curAbsIdxInCTU)) + (cUAbove ? cUAbove->m_qp[aPartIdx] : getLastCodedQP(curAbsIdxInCTU)) + 1) >> 1; | |
795 | } | |
796 | ||
797 | int CUData::getLastValidPartIdx(int absPartIdx) const | |
798 | { | |
799 | int lastValidPartIdx = absPartIdx - 1; | |
800 | ||
801 | while (lastValidPartIdx >= 0 && m_predMode[lastValidPartIdx] == MODE_NONE) | |
802 | { | |
803 | uint32_t depth = m_cuDepth[lastValidPartIdx]; | |
804 | lastValidPartIdx -= m_numPartitions >> (depth << 1); | |
805 | } | |
806 | ||
807 | return lastValidPartIdx; | |
808 | } | |
809 | ||
810 | char CUData::getLastCodedQP(uint32_t absPartIdx) const | |
811 | { | |
812 | uint32_t quPartIdxMask = 0xFF << (g_maxFullDepth - m_slice->m_pps->maxCuDQPDepth) * 2; | |
813 | int lastValidPartIdx = getLastValidPartIdx(absPartIdx & quPartIdxMask); | |
814 | ||
815 | if (lastValidPartIdx >= 0) | |
816 | return m_qp[lastValidPartIdx]; | |
817 | else | |
818 | { | |
819 | if (m_absIdxInCTU) | |
820 | return m_encData->getPicCTU(m_cuAddr)->getLastCodedQP(m_absIdxInCTU); | |
821 | else if (m_cuAddr > 0 && !(m_slice->m_pps->bEntropyCodingSyncEnabled && !(m_cuAddr % m_slice->m_sps->numCuInWidth))) | |
822 | return m_encData->getPicCTU(m_cuAddr - 1)->getLastCodedQP(NUM_CU_PARTITIONS); | |
823 | else | |
824 | return (char)m_slice->m_sliceQp; | |
825 | } | |
826 | } | |
827 | ||
828 | /* Get allowed chroma intra modes */ | |
829 | void CUData::getAllowedChromaDir(uint32_t absPartIdx, uint32_t* modeList) const | |
830 | { | |
831 | modeList[0] = PLANAR_IDX; | |
832 | modeList[1] = VER_IDX; | |
833 | modeList[2] = HOR_IDX; | |
834 | modeList[3] = DC_IDX; | |
835 | modeList[4] = DM_CHROMA_IDX; | |
836 | ||
837 | uint32_t lumaMode = m_lumaIntraDir[absPartIdx]; | |
838 | ||
839 | for (int i = 0; i < NUM_CHROMA_MODE - 1; i++) | |
840 | { | |
841 | if (lumaMode == modeList[i]) | |
842 | { | |
843 | modeList[i] = 34; // VER+8 mode | |
844 | break; | |
845 | } | |
846 | } | |
847 | } | |
848 | ||
849 | /* Get most probable intra modes */ | |
850 | int CUData::getIntraDirLumaPredictor(uint32_t absPartIdx, uint32_t* intraDirPred) const | |
851 | { | |
852 | const CUData* tempCU; | |
853 | uint32_t tempPartIdx; | |
854 | uint32_t leftIntraDir, aboveIntraDir; | |
855 | ||
856 | // Get intra direction of left PU | |
857 | tempCU = getPULeft(tempPartIdx, m_absIdxInCTU + absPartIdx); | |
858 | ||
859 | leftIntraDir = (tempCU && tempCU->isIntra(tempPartIdx)) ? tempCU->m_lumaIntraDir[tempPartIdx] : DC_IDX; | |
860 | ||
861 | // Get intra direction of above PU | |
862 | tempCU = getPUAbove(tempPartIdx, m_absIdxInCTU + absPartIdx, true); | |
863 | ||
864 | aboveIntraDir = (tempCU && tempCU->isIntra(tempPartIdx)) ? tempCU->m_lumaIntraDir[tempPartIdx] : DC_IDX; | |
865 | ||
866 | if (leftIntraDir == aboveIntraDir) | |
867 | { | |
868 | if (leftIntraDir >= 2) // angular modes | |
869 | { | |
870 | intraDirPred[0] = leftIntraDir; | |
871 | intraDirPred[1] = ((leftIntraDir - 2 + 31) & 31) + 2; | |
872 | intraDirPred[2] = ((leftIntraDir - 2 + 1) & 31) + 2; | |
873 | } | |
874 | else //non-angular | |
875 | { | |
876 | intraDirPred[0] = PLANAR_IDX; | |
877 | intraDirPred[1] = DC_IDX; | |
878 | intraDirPred[2] = VER_IDX; | |
879 | } | |
880 | return 1; | |
881 | } | |
882 | else | |
883 | { | |
884 | intraDirPred[0] = leftIntraDir; | |
885 | intraDirPred[1] = aboveIntraDir; | |
886 | ||
887 | if (leftIntraDir && aboveIntraDir) //both modes are non-planar | |
888 | intraDirPred[2] = PLANAR_IDX; | |
889 | else | |
890 | intraDirPred[2] = (leftIntraDir + aboveIntraDir) < 2 ? VER_IDX : DC_IDX; | |
891 | return 2; | |
892 | } | |
893 | } | |
894 | ||
895 | uint32_t CUData::getCtxSplitFlag(uint32_t absPartIdx, uint32_t depth) const | |
896 | { | |
897 | const CUData* tempCU; | |
898 | uint32_t tempPartIdx; | |
899 | uint32_t ctx; | |
900 | ||
901 | // Get left split flag | |
902 | tempCU = getPULeft(tempPartIdx, m_absIdxInCTU + absPartIdx); | |
903 | ctx = (tempCU) ? ((tempCU->m_cuDepth[tempPartIdx] > depth) ? 1 : 0) : 0; | |
904 | ||
905 | // Get above split flag | |
906 | tempCU = getPUAbove(tempPartIdx, m_absIdxInCTU + absPartIdx); | |
907 | ctx += (tempCU) ? ((tempCU->m_cuDepth[tempPartIdx] > depth) ? 1 : 0) : 0; | |
908 | ||
909 | return ctx; | |
910 | } | |
911 | ||
912 | void CUData::getIntraTUQtDepthRange(uint32_t tuDepthRange[2], uint32_t absPartIdx) const | |
913 | { | |
914 | uint32_t log2CUSize = m_log2CUSize[absPartIdx]; | |
915 | uint32_t splitFlag = m_partSize[absPartIdx] == SIZE_NxN; | |
916 | ||
917 | tuDepthRange[0] = m_slice->m_sps->quadtreeTULog2MinSize; | |
918 | tuDepthRange[1] = m_slice->m_sps->quadtreeTULog2MaxSize; | |
919 | ||
920 | tuDepthRange[0] = X265_MAX(tuDepthRange[0], X265_MIN(log2CUSize - (m_slice->m_sps->quadtreeTUMaxDepthIntra - 1 + splitFlag), tuDepthRange[1])); | |
921 | } | |
922 | ||
923 | void CUData::getInterTUQtDepthRange(uint32_t tuDepthRange[2], uint32_t absPartIdx) const | |
924 | { | |
925 | uint32_t log2CUSize = m_log2CUSize[absPartIdx]; | |
926 | uint32_t quadtreeTUMaxDepth = m_slice->m_sps->quadtreeTUMaxDepthInter; | |
927 | uint32_t splitFlag = quadtreeTUMaxDepth == 1 && m_partSize[absPartIdx] != SIZE_2Nx2N; | |
928 | ||
929 | tuDepthRange[0] = m_slice->m_sps->quadtreeTULog2MinSize; | |
930 | tuDepthRange[1] = m_slice->m_sps->quadtreeTULog2MaxSize; | |
931 | ||
932 | tuDepthRange[0] = X265_MAX(tuDepthRange[0], X265_MIN(log2CUSize - (quadtreeTUMaxDepth - 1 + splitFlag), tuDepthRange[1])); | |
933 | } | |
934 | ||
935 | uint32_t CUData::getCtxSkipFlag(uint32_t absPartIdx) const | |
936 | { | |
937 | const CUData* tempCU; | |
938 | uint32_t tempPartIdx; | |
939 | uint32_t ctx; | |
940 | ||
941 | // Get BCBP of left PU | |
942 | tempCU = getPULeft(tempPartIdx, m_absIdxInCTU + absPartIdx); | |
943 | ctx = tempCU ? tempCU->isSkipped(tempPartIdx) : 0; | |
944 | ||
945 | // Get BCBP of above PU | |
946 | tempCU = getPUAbove(tempPartIdx, m_absIdxInCTU + absPartIdx); | |
947 | ctx += tempCU ? tempCU->isSkipped(tempPartIdx) : 0; | |
948 | ||
949 | return ctx; | |
950 | } | |
951 | ||
952 | bool CUData::setQPSubCUs(char qp, uint32_t absPartIdx, uint32_t depth) | |
953 | { | |
954 | uint32_t curPartNumb = NUM_CU_PARTITIONS >> (depth << 1); | |
955 | uint32_t curPartNumQ = curPartNumb >> 2; | |
956 | ||
957 | if (m_cuDepth[absPartIdx] > depth) | |
958 | { | |
959 | for (uint32_t subPartIdx = 0; subPartIdx < 4; subPartIdx++) | |
960 | if (setQPSubCUs(qp, absPartIdx + subPartIdx * curPartNumQ, depth + 1)) | |
961 | return true; | |
962 | } | |
963 | else | |
964 | { | |
965 | if (getQtRootCbf(absPartIdx)) | |
966 | return true; | |
967 | else | |
968 | setQPSubParts(qp, absPartIdx, depth); | |
969 | } | |
970 | ||
971 | return false; | |
972 | } | |
973 | ||
974 | void CUData::setPUInterDir(uint8_t dir, uint32_t absPartIdx, uint32_t puIdx) | |
975 | { | |
976 | uint32_t curPartNumQ = m_numPartitions >> 2; | |
977 | X265_CHECK(puIdx < 2, "unexpected part unit index\n"); | |
978 | ||
979 | switch (m_partSize[absPartIdx]) | |
980 | { | |
981 | case SIZE_2Nx2N: | |
982 | memset(m_interDir + absPartIdx, dir, 4 * curPartNumQ); | |
983 | break; | |
984 | case SIZE_2NxN: | |
985 | memset(m_interDir + absPartIdx, dir, 2 * curPartNumQ); | |
986 | break; | |
987 | case SIZE_Nx2N: | |
988 | memset(m_interDir + absPartIdx, dir, curPartNumQ); | |
989 | memset(m_interDir + absPartIdx + 2 * curPartNumQ, dir, curPartNumQ); | |
990 | break; | |
991 | case SIZE_NxN: | |
992 | memset(m_interDir + absPartIdx, dir, curPartNumQ); | |
993 | break; | |
994 | case SIZE_2NxnU: | |
995 | if (!puIdx) | |
996 | { | |
997 | memset(m_interDir + absPartIdx, dir, (curPartNumQ >> 1)); | |
998 | memset(m_interDir + absPartIdx + curPartNumQ, dir, (curPartNumQ >> 1)); | |
999 | } | |
1000 | else | |
1001 | { | |
1002 | memset(m_interDir + absPartIdx, dir, (curPartNumQ >> 1)); | |
1003 | memset(m_interDir + absPartIdx + curPartNumQ, dir, ((curPartNumQ >> 1) + (curPartNumQ << 1))); | |
1004 | } | |
1005 | break; | |
1006 | case SIZE_2NxnD: | |
1007 | if (!puIdx) | |
1008 | { | |
1009 | memset(m_interDir + absPartIdx, dir, ((curPartNumQ << 1) + (curPartNumQ >> 1))); | |
1010 | memset(m_interDir + absPartIdx + (curPartNumQ << 1) + curPartNumQ, dir, (curPartNumQ >> 1)); | |
1011 | } | |
1012 | else | |
1013 | { | |
1014 | memset(m_interDir + absPartIdx, dir, (curPartNumQ >> 1)); | |
1015 | memset(m_interDir + absPartIdx + curPartNumQ, dir, (curPartNumQ >> 1)); | |
1016 | } | |
1017 | break; | |
1018 | case SIZE_nLx2N: | |
1019 | if (!puIdx) | |
1020 | { | |
1021 | memset(m_interDir + absPartIdx, dir, (curPartNumQ >> 2)); | |
1022 | memset(m_interDir + absPartIdx + (curPartNumQ >> 1), dir, (curPartNumQ >> 2)); | |
1023 | memset(m_interDir + absPartIdx + (curPartNumQ << 1), dir, (curPartNumQ >> 2)); | |
1024 | memset(m_interDir + absPartIdx + (curPartNumQ << 1) + (curPartNumQ >> 1), dir, (curPartNumQ >> 2)); | |
1025 | } | |
1026 | else | |
1027 | { | |
1028 | memset(m_interDir + absPartIdx, dir, (curPartNumQ >> 2)); | |
1029 | memset(m_interDir + absPartIdx + (curPartNumQ >> 1), dir, (curPartNumQ + (curPartNumQ >> 2))); | |
1030 | memset(m_interDir + absPartIdx + (curPartNumQ << 1), dir, (curPartNumQ >> 2)); | |
1031 | memset(m_interDir + absPartIdx + (curPartNumQ << 1) + (curPartNumQ >> 1), dir, (curPartNumQ + (curPartNumQ >> 2))); | |
1032 | } | |
1033 | break; | |
1034 | case SIZE_nRx2N: | |
1035 | if (!puIdx) | |
1036 | { | |
1037 | memset(m_interDir + absPartIdx, dir, (curPartNumQ + (curPartNumQ >> 2))); | |
1038 | memset(m_interDir + absPartIdx + curPartNumQ + (curPartNumQ >> 1), dir, (curPartNumQ >> 2)); | |
1039 | memset(m_interDir + absPartIdx + (curPartNumQ << 1), dir, (curPartNumQ + (curPartNumQ >> 2))); | |
1040 | memset(m_interDir + absPartIdx + (curPartNumQ << 1) + curPartNumQ + (curPartNumQ >> 1), dir, (curPartNumQ >> 2)); | |
1041 | } | |
1042 | else | |
1043 | { | |
1044 | memset(m_interDir + absPartIdx, dir, (curPartNumQ >> 2)); | |
1045 | memset(m_interDir + absPartIdx + (curPartNumQ >> 1), dir, (curPartNumQ >> 2)); | |
1046 | memset(m_interDir + absPartIdx + (curPartNumQ << 1), dir, (curPartNumQ >> 2)); | |
1047 | memset(m_interDir + absPartIdx + (curPartNumQ << 1) + (curPartNumQ >> 1), dir, (curPartNumQ >> 2)); | |
1048 | } | |
1049 | break; | |
1050 | default: | |
1051 | X265_CHECK(0, "unexpected part type\n"); | |
1052 | break; | |
1053 | } | |
1054 | } | |
1055 | ||
1056 | template<typename T> | |
1057 | void CUData::setAllPU(T* p, const T& val, int absPartIdx, int puIdx) | |
1058 | { | |
1059 | int i; | |
1060 | ||
1061 | p += absPartIdx; | |
1062 | int numElements = m_numPartitions; | |
1063 | ||
1064 | switch (m_partSize[absPartIdx]) | |
1065 | { | |
1066 | case SIZE_2Nx2N: | |
1067 | for (i = 0; i < numElements; i++) | |
1068 | p[i] = val; | |
1069 | break; | |
1070 | ||
1071 | case SIZE_2NxN: | |
1072 | numElements >>= 1; | |
1073 | for (i = 0; i < numElements; i++) | |
1074 | p[i] = val; | |
1075 | break; | |
1076 | ||
1077 | case SIZE_Nx2N: | |
1078 | numElements >>= 2; | |
1079 | for (i = 0; i < numElements; i++) | |
1080 | { | |
1081 | p[i] = val; | |
1082 | p[i + 2 * numElements] = val; | |
1083 | } | |
1084 | break; | |
1085 | ||
1086 | case SIZE_2NxnU: | |
1087 | { | |
1088 | int curPartNumQ = numElements >> 2; | |
1089 | if (!puIdx) | |
1090 | { | |
1091 | T *pT = p; | |
1092 | T *pT2 = p + curPartNumQ; | |
1093 | for (i = 0; i < (curPartNumQ >> 1); i++) | |
1094 | { | |
1095 | pT[i] = val; | |
1096 | pT2[i] = val; | |
1097 | } | |
1098 | } | |
1099 | else | |
1100 | { | |
1101 | T *pT = p; | |
1102 | for (i = 0; i < (curPartNumQ >> 1); i++) | |
1103 | pT[i] = val; | |
1104 | ||
1105 | pT = p + curPartNumQ; | |
1106 | for (i = 0; i < ((curPartNumQ >> 1) + (curPartNumQ << 1)); i++) | |
1107 | pT[i] = val; | |
1108 | } | |
1109 | break; | |
1110 | } | |
1111 | ||
1112 | case SIZE_2NxnD: | |
1113 | { | |
1114 | int curPartNumQ = numElements >> 2; | |
1115 | if (!puIdx) | |
1116 | { | |
1117 | T *pT = p; | |
1118 | for (i = 0; i < ((curPartNumQ >> 1) + (curPartNumQ << 1)); i++) | |
1119 | pT[i] = val; | |
1120 | ||
1121 | pT = p + (numElements - curPartNumQ); | |
1122 | for (i = 0; i < (curPartNumQ >> 1); i++) | |
1123 | pT[i] = val; | |
1124 | } | |
1125 | else | |
1126 | { | |
1127 | T *pT = p; | |
1128 | T *pT2 = p + curPartNumQ; | |
1129 | for (i = 0; i < (curPartNumQ >> 1); i++) | |
1130 | { | |
1131 | pT[i] = val; | |
1132 | pT2[i] = val; | |
1133 | } | |
1134 | } | |
1135 | break; | |
1136 | } | |
1137 | ||
1138 | case SIZE_nLx2N: | |
1139 | { | |
1140 | int curPartNumQ = numElements >> 2; | |
1141 | if (!puIdx) | |
1142 | { | |
1143 | T *pT = p; | |
1144 | T *pT2 = p + (curPartNumQ << 1); | |
1145 | T *pT3 = p + (curPartNumQ >> 1); | |
1146 | T *pT4 = p + (curPartNumQ << 1) + (curPartNumQ >> 1); | |
1147 | ||
1148 | for (i = 0; i < (curPartNumQ >> 2); i++) | |
1149 | { | |
1150 | pT[i] = val; | |
1151 | pT2[i] = val; | |
1152 | pT3[i] = val; | |
1153 | pT4[i] = val; | |
1154 | } | |
1155 | } | |
1156 | else | |
1157 | { | |
1158 | T *pT = p; | |
1159 | T *pT2 = p + (curPartNumQ << 1); | |
1160 | for (i = 0; i < (curPartNumQ >> 2); i++) | |
1161 | { | |
1162 | pT[i] = val; | |
1163 | pT2[i] = val; | |
1164 | } | |
1165 | ||
1166 | pT = p + (curPartNumQ >> 1); | |
1167 | pT2 = p + (curPartNumQ << 1) + (curPartNumQ >> 1); | |
1168 | for (i = 0; i < ((curPartNumQ >> 2) + curPartNumQ); i++) | |
1169 | { | |
1170 | pT[i] = val; | |
1171 | pT2[i] = val; | |
1172 | } | |
1173 | } | |
1174 | break; | |
1175 | } | |
1176 | ||
1177 | case SIZE_nRx2N: | |
1178 | { | |
1179 | int curPartNumQ = numElements >> 2; | |
1180 | if (!puIdx) | |
1181 | { | |
1182 | T *pT = p; | |
1183 | T *pT2 = p + (curPartNumQ << 1); | |
1184 | for (i = 0; i < ((curPartNumQ >> 2) + curPartNumQ); i++) | |
1185 | { | |
1186 | pT[i] = val; | |
1187 | pT2[i] = val; | |
1188 | } | |
1189 | ||
1190 | pT = p + curPartNumQ + (curPartNumQ >> 1); | |
1191 | pT2 = p + numElements - curPartNumQ + (curPartNumQ >> 1); | |
1192 | for (i = 0; i < (curPartNumQ >> 2); i++) | |
1193 | { | |
1194 | pT[i] = val; | |
1195 | pT2[i] = val; | |
1196 | } | |
1197 | } | |
1198 | else | |
1199 | { | |
1200 | T *pT = p; | |
1201 | T *pT2 = p + (curPartNumQ >> 1); | |
1202 | T *pT3 = p + (curPartNumQ << 1); | |
1203 | T *pT4 = p + (curPartNumQ << 1) + (curPartNumQ >> 1); | |
1204 | for (i = 0; i < (curPartNumQ >> 2); i++) | |
1205 | { | |
1206 | pT[i] = val; | |
1207 | pT2[i] = val; | |
1208 | pT3[i] = val; | |
1209 | pT4[i] = val; | |
1210 | } | |
1211 | } | |
1212 | break; | |
1213 | } | |
1214 | ||
1215 | case SIZE_NxN: | |
1216 | default: | |
1217 | X265_CHECK(0, "unknown partition type\n"); | |
1218 | break; | |
1219 | } | |
1220 | } | |
1221 | ||
1222 | void CUData::setPUMv(int list, const MV& mv, int absPartIdx, int puIdx) | |
1223 | { | |
1224 | setAllPU(m_mv[list], mv, absPartIdx, puIdx); | |
1225 | } | |
1226 | ||
1227 | void CUData::setPURefIdx(int list, char refIdx, int absPartIdx, int puIdx) | |
1228 | { | |
1229 | setAllPU(m_refIdx[list], refIdx, absPartIdx, puIdx); | |
1230 | } | |
1231 | ||
1232 | void CUData::getPartIndexAndSize(uint32_t partIdx, uint32_t& outPartAddr, int& outWidth, int& outHeight) const | |
1233 | { | |
1234 | int cuSize = 1 << m_log2CUSize[0]; | |
1235 | int partType = m_partSize[0]; | |
1236 | ||
1237 | int tmp = partTable[partType][partIdx][0]; | |
1238 | outWidth = ((tmp >> 4) * cuSize) >> 2; | |
1239 | outHeight = ((tmp & 0xF) * cuSize) >> 2; | |
1240 | outPartAddr = (partAddrTable[partType][partIdx] * m_numPartitions) >> 4; | |
1241 | } | |
1242 | ||
1243 | void CUData::getMvField(const CUData* cu, uint32_t absPartIdx, int picList, MVField& outMvField) const | |
1244 | { | |
1245 | if (cu) | |
1246 | { | |
1247 | outMvField.mv = cu->m_mv[picList][absPartIdx]; | |
1248 | outMvField.refIdx = cu->m_refIdx[picList][absPartIdx]; | |
1249 | } | |
1250 | else | |
1251 | { | |
1252 | // OUT OF BOUNDARY | |
1253 | outMvField.mv.word = 0; | |
1254 | outMvField.refIdx = REF_NOT_VALID; | |
1255 | } | |
1256 | } | |
1257 | ||
1258 | void CUData::deriveLeftRightTopIdx(uint32_t partIdx, uint32_t& partIdxLT, uint32_t& partIdxRT) const | |
1259 | { | |
1260 | partIdxLT = m_absIdxInCTU; | |
1261 | partIdxRT = g_rasterToZscan[g_zscanToRaster[partIdxLT] + (1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1]; | |
1262 | ||
1263 | switch (m_partSize[0]) | |
1264 | { | |
1265 | case SIZE_2Nx2N: break; | |
1266 | case SIZE_2NxN: | |
1267 | partIdxLT += (partIdx == 0) ? 0 : m_numPartitions >> 1; | |
1268 | partIdxRT += (partIdx == 0) ? 0 : m_numPartitions >> 1; | |
1269 | break; | |
1270 | case SIZE_Nx2N: | |
1271 | partIdxLT += (partIdx == 0) ? 0 : m_numPartitions >> 2; | |
1272 | partIdxRT -= (partIdx == 1) ? 0 : m_numPartitions >> 2; | |
1273 | break; | |
1274 | case SIZE_NxN: | |
1275 | partIdxLT += (m_numPartitions >> 2) * partIdx; | |
1276 | partIdxRT += (m_numPartitions >> 2) * (partIdx - 1); | |
1277 | break; | |
1278 | case SIZE_2NxnU: | |
1279 | partIdxLT += (partIdx == 0) ? 0 : m_numPartitions >> 3; | |
1280 | partIdxRT += (partIdx == 0) ? 0 : m_numPartitions >> 3; | |
1281 | break; | |
1282 | case SIZE_2NxnD: | |
1283 | partIdxLT += (partIdx == 0) ? 0 : (m_numPartitions >> 1) + (m_numPartitions >> 3); | |
1284 | partIdxRT += (partIdx == 0) ? 0 : (m_numPartitions >> 1) + (m_numPartitions >> 3); | |
1285 | break; | |
1286 | case SIZE_nLx2N: | |
1287 | partIdxLT += (partIdx == 0) ? 0 : m_numPartitions >> 4; | |
1288 | partIdxRT -= (partIdx == 1) ? 0 : (m_numPartitions >> 2) + (m_numPartitions >> 4); | |
1289 | break; | |
1290 | case SIZE_nRx2N: | |
1291 | partIdxLT += (partIdx == 0) ? 0 : (m_numPartitions >> 2) + (m_numPartitions >> 4); | |
1292 | partIdxRT -= (partIdx == 1) ? 0 : m_numPartitions >> 4; | |
1293 | break; | |
1294 | default: | |
1295 | X265_CHECK(0, "unexpected part index\n"); | |
1296 | break; | |
1297 | } | |
1298 | } | |
1299 | ||
1300 | uint32_t CUData::deriveLeftBottomIdx(uint32_t puIdx) const | |
1301 | { | |
1302 | uint32_t outPartIdxLB; | |
1303 | outPartIdxLB = g_rasterToZscan[g_zscanToRaster[m_absIdxInCTU] + ((1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE - 1)) - 1) * s_numPartInCUSize]; | |
1304 | ||
1305 | switch (m_partSize[0]) | |
1306 | { | |
1307 | case SIZE_2Nx2N: | |
1308 | outPartIdxLB += m_numPartitions >> 1; | |
1309 | break; | |
1310 | case SIZE_2NxN: | |
1311 | outPartIdxLB += puIdx ? m_numPartitions >> 1 : 0; | |
1312 | break; | |
1313 | case SIZE_Nx2N: | |
1314 | outPartIdxLB += puIdx ? (m_numPartitions >> 2) * 3 : m_numPartitions >> 1; | |
1315 | break; | |
1316 | case SIZE_NxN: | |
1317 | outPartIdxLB += (m_numPartitions >> 2) * puIdx; | |
1318 | break; | |
1319 | case SIZE_2NxnU: | |
1320 | outPartIdxLB += puIdx ? m_numPartitions >> 1 : -((int)m_numPartitions >> 3); | |
1321 | break; | |
1322 | case SIZE_2NxnD: | |
1323 | outPartIdxLB += puIdx ? m_numPartitions >> 1 : (m_numPartitions >> 2) + (m_numPartitions >> 3); | |
1324 | break; | |
1325 | case SIZE_nLx2N: | |
1326 | outPartIdxLB += puIdx ? (m_numPartitions >> 1) + (m_numPartitions >> 4) : m_numPartitions >> 1; | |
1327 | break; | |
1328 | case SIZE_nRx2N: | |
1329 | outPartIdxLB += puIdx ? (m_numPartitions >> 1) + (m_numPartitions >> 2) + (m_numPartitions >> 4) : m_numPartitions >> 1; | |
1330 | break; | |
1331 | default: | |
1332 | X265_CHECK(0, "unexpected part index\n"); | |
1333 | break; | |
1334 | } | |
1335 | return outPartIdxLB; | |
1336 | } | |
1337 | ||
1338 | /* Derives the partition index of neighboring bottom right block */ | |
1339 | uint32_t CUData::deriveRightBottomIdx(uint32_t puIdx) const | |
1340 | { | |
1341 | uint32_t outPartIdxRB; | |
1342 | outPartIdxRB = g_rasterToZscan[g_zscanToRaster[m_absIdxInCTU] + | |
1343 | ((1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE - 1)) - 1) * s_numPartInCUSize + | |
1344 | (1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1]; | |
1345 | ||
1346 | switch (m_partSize[0]) | |
1347 | { | |
1348 | case SIZE_2Nx2N: | |
1349 | outPartIdxRB += m_numPartitions >> 1; | |
1350 | break; | |
1351 | case SIZE_2NxN: | |
1352 | outPartIdxRB += puIdx ? m_numPartitions >> 1 : 0; | |
1353 | break; | |
1354 | case SIZE_Nx2N: | |
1355 | outPartIdxRB += puIdx ? m_numPartitions >> 1 : m_numPartitions >> 2; | |
1356 | break; | |
1357 | case SIZE_NxN: | |
1358 | outPartIdxRB += (m_numPartitions >> 2) * (puIdx - 1); | |
1359 | break; | |
1360 | case SIZE_2NxnU: | |
1361 | outPartIdxRB += puIdx ? m_numPartitions >> 1 : -((int)m_numPartitions >> 3); | |
1362 | break; | |
1363 | case SIZE_2NxnD: | |
1364 | outPartIdxRB += puIdx ? m_numPartitions >> 1 : (m_numPartitions >> 2) + (m_numPartitions >> 3); | |
1365 | break; | |
1366 | case SIZE_nLx2N: | |
1367 | outPartIdxRB += puIdx ? m_numPartitions >> 1 : (m_numPartitions >> 3) + (m_numPartitions >> 4); | |
1368 | break; | |
1369 | case SIZE_nRx2N: | |
1370 | outPartIdxRB += puIdx ? m_numPartitions >> 1 : (m_numPartitions >> 2) + (m_numPartitions >> 3) + (m_numPartitions >> 4); | |
1371 | break; | |
1372 | default: | |
1373 | X265_CHECK(0, "unexpected part index\n"); | |
1374 | break; | |
1375 | } | |
1376 | return outPartIdxRB; | |
1377 | } | |
1378 | ||
1379 | void CUData::deriveLeftRightTopIdxAdi(uint32_t& outPartIdxLT, uint32_t& outPartIdxRT, uint32_t partOffset, uint32_t partDepth) const | |
1380 | { | |
1381 | uint32_t numPartInWidth = 1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE - partDepth); | |
1382 | ||
1383 | outPartIdxLT = m_absIdxInCTU + partOffset; | |
1384 | outPartIdxRT = g_rasterToZscan[g_zscanToRaster[outPartIdxLT] + numPartInWidth - 1]; | |
1385 | } | |
1386 | ||
1387 | bool CUData::hasEqualMotion(uint32_t absPartIdx, const CUData& candCU, uint32_t candAbsPartIdx) const | |
1388 | { | |
1389 | if (m_interDir[absPartIdx] != candCU.m_interDir[candAbsPartIdx]) | |
1390 | return false; | |
1391 | ||
1392 | for (uint32_t refListIdx = 0; refListIdx < 2; refListIdx++) | |
1393 | { | |
1394 | if (m_interDir[absPartIdx] & (1 << refListIdx)) | |
1395 | { | |
1396 | if (m_mv[refListIdx][absPartIdx] != candCU.m_mv[refListIdx][candAbsPartIdx] || | |
1397 | m_refIdx[refListIdx][absPartIdx] != candCU.m_refIdx[refListIdx][candAbsPartIdx]) | |
1398 | return false; | |
1399 | } | |
1400 | } | |
1401 | ||
1402 | return true; | |
1403 | } | |
1404 | ||
1405 | /* Construct list of merging candidates */ | |
1406 | uint32_t CUData::getInterMergeCandidates(uint32_t absPartIdx, uint32_t puIdx, MVField(*mvFieldNeighbours)[2], uint8_t* interDirNeighbours) const | |
1407 | { | |
1408 | uint32_t absPartAddr = m_absIdxInCTU + absPartIdx; | |
1409 | const bool isInterB = m_slice->isInterB(); | |
1410 | ||
1411 | const uint32_t maxNumMergeCand = m_slice->m_maxNumMergeCand; | |
1412 | ||
1413 | for (uint32_t i = 0; i < maxNumMergeCand; ++i) | |
1414 | { | |
1415 | mvFieldNeighbours[i][0].refIdx = REF_NOT_VALID; | |
1416 | mvFieldNeighbours[i][1].refIdx = REF_NOT_VALID; | |
1417 | } | |
1418 | ||
1419 | /* calculate the location of upper-left corner pixel and size of the current PU */ | |
1420 | int xP, yP, nPSW, nPSH; | |
1421 | ||
1422 | int cuSize = 1 << m_log2CUSize[0]; | |
1423 | int partMode = m_partSize[0]; | |
1424 | ||
1425 | int tmp = partTable[partMode][puIdx][0]; | |
1426 | nPSW = ((tmp >> 4) * cuSize) >> 2; | |
1427 | nPSH = ((tmp & 0xF) * cuSize) >> 2; | |
1428 | ||
1429 | tmp = partTable[partMode][puIdx][1]; | |
1430 | xP = ((tmp >> 4) * cuSize) >> 2; | |
1431 | yP = ((tmp & 0xF) * cuSize) >> 2; | |
1432 | ||
1433 | uint32_t count = 0; | |
1434 | ||
1435 | uint32_t partIdxLT, partIdxRT, partIdxLB = deriveLeftBottomIdx(puIdx); | |
1436 | PartSize curPS = (PartSize)m_partSize[absPartIdx]; | |
1437 | ||
1438 | // left | |
1439 | uint32_t leftPartIdx = 0; | |
1440 | const CUData* cuLeft = getPULeft(leftPartIdx, partIdxLB); | |
1441 | bool isAvailableA1 = cuLeft && | |
1442 | cuLeft->isDiffMER(xP - 1, yP + nPSH - 1, xP, yP) && | |
1443 | !(puIdx == 1 && (curPS == SIZE_Nx2N || curPS == SIZE_nLx2N || curPS == SIZE_nRx2N)) && | |
1444 | !cuLeft->isIntra(leftPartIdx); | |
1445 | if (isAvailableA1) | |
1446 | { | |
1447 | // get Inter Dir | |
1448 | interDirNeighbours[count] = cuLeft->m_interDir[leftPartIdx]; | |
1449 | // get Mv from Left | |
1450 | cuLeft->getMvField(cuLeft, leftPartIdx, 0, mvFieldNeighbours[count][0]); | |
1451 | if (isInterB) | |
1452 | cuLeft->getMvField(cuLeft, leftPartIdx, 1, mvFieldNeighbours[count][1]); | |
1453 | ||
1454 | count++; | |
1455 | ||
1456 | if (count == maxNumMergeCand) | |
1457 | return maxNumMergeCand; | |
1458 | } | |
1459 | ||
1460 | deriveLeftRightTopIdx(puIdx, partIdxLT, partIdxRT); | |
1461 | ||
1462 | // above | |
1463 | uint32_t abovePartIdx = 0; | |
1464 | const CUData* cuAbove = getPUAbove(abovePartIdx, partIdxRT); | |
1465 | bool isAvailableB1 = cuAbove && | |
1466 | cuAbove->isDiffMER(xP + nPSW - 1, yP - 1, xP, yP) && | |
1467 | !(puIdx == 1 && (curPS == SIZE_2NxN || curPS == SIZE_2NxnU || curPS == SIZE_2NxnD)) && | |
1468 | !cuAbove->isIntra(abovePartIdx); | |
1469 | if (isAvailableB1 && (!isAvailableA1 || !cuLeft->hasEqualMotion(leftPartIdx, *cuAbove, abovePartIdx))) | |
1470 | { | |
1471 | // get Inter Dir | |
1472 | interDirNeighbours[count] = cuAbove->m_interDir[abovePartIdx]; | |
1473 | // get Mv from Left | |
1474 | cuAbove->getMvField(cuAbove, abovePartIdx, 0, mvFieldNeighbours[count][0]); | |
1475 | if (isInterB) | |
1476 | cuAbove->getMvField(cuAbove, abovePartIdx, 1, mvFieldNeighbours[count][1]); | |
1477 | ||
1478 | count++; | |
1479 | ||
1480 | if (count == maxNumMergeCand) | |
1481 | return maxNumMergeCand; | |
1482 | } | |
1483 | ||
1484 | // above right | |
1485 | uint32_t aboveRightPartIdx = 0; | |
1486 | const CUData* cuAboveRight = getPUAboveRight(aboveRightPartIdx, partIdxRT); | |
1487 | bool isAvailableB0 = cuAboveRight && | |
1488 | cuAboveRight->isDiffMER(xP + nPSW, yP - 1, xP, yP) && | |
1489 | !cuAboveRight->isIntra(aboveRightPartIdx); | |
1490 | if (isAvailableB0 && (!isAvailableB1 || !cuAbove->hasEqualMotion(abovePartIdx, *cuAboveRight, aboveRightPartIdx))) | |
1491 | { | |
1492 | // get Inter Dir | |
1493 | interDirNeighbours[count] = cuAboveRight->m_interDir[aboveRightPartIdx]; | |
1494 | // get Mv from Left | |
1495 | cuAboveRight->getMvField(cuAboveRight, aboveRightPartIdx, 0, mvFieldNeighbours[count][0]); | |
1496 | if (isInterB) | |
1497 | cuAboveRight->getMvField(cuAboveRight, aboveRightPartIdx, 1, mvFieldNeighbours[count][1]); | |
1498 | ||
1499 | count++; | |
1500 | ||
1501 | if (count == maxNumMergeCand) | |
1502 | return maxNumMergeCand; | |
1503 | } | |
1504 | ||
1505 | // left bottom | |
1506 | uint32_t leftBottomPartIdx = 0; | |
1507 | const CUData* cuLeftBottom = this->getPUBelowLeft(leftBottomPartIdx, partIdxLB); | |
1508 | bool isAvailableA0 = cuLeftBottom && | |
1509 | cuLeftBottom->isDiffMER(xP - 1, yP + nPSH, xP, yP) && | |
1510 | !cuLeftBottom->isIntra(leftBottomPartIdx); | |
1511 | if (isAvailableA0 && (!isAvailableA1 || !cuLeft->hasEqualMotion(leftPartIdx, *cuLeftBottom, leftBottomPartIdx))) | |
1512 | { | |
1513 | // get Inter Dir | |
1514 | interDirNeighbours[count] = cuLeftBottom->m_interDir[leftBottomPartIdx]; | |
1515 | // get Mv from Left | |
1516 | cuLeftBottom->getMvField(cuLeftBottom, leftBottomPartIdx, 0, mvFieldNeighbours[count][0]); | |
1517 | if (isInterB) | |
1518 | cuLeftBottom->getMvField(cuLeftBottom, leftBottomPartIdx, 1, mvFieldNeighbours[count][1]); | |
1519 | ||
1520 | count++; | |
1521 | ||
1522 | if (count == maxNumMergeCand) | |
1523 | return maxNumMergeCand; | |
1524 | } | |
1525 | ||
1526 | // above left | |
1527 | if (count < 4) | |
1528 | { | |
1529 | uint32_t aboveLeftPartIdx = 0; | |
1530 | const CUData* cuAboveLeft = getPUAboveLeft(aboveLeftPartIdx, absPartAddr); | |
1531 | bool isAvailableB2 = cuAboveLeft && | |
1532 | cuAboveLeft->isDiffMER(xP - 1, yP - 1, xP, yP) && | |
1533 | !cuAboveLeft->isIntra(aboveLeftPartIdx); | |
1534 | if (isAvailableB2 && (!isAvailableA1 || !cuLeft->hasEqualMotion(leftPartIdx, *cuAboveLeft, aboveLeftPartIdx)) | |
1535 | && (!isAvailableB1 || !cuAbove->hasEqualMotion(abovePartIdx, *cuAboveLeft, aboveLeftPartIdx))) | |
1536 | { | |
1537 | // get Inter Dir | |
1538 | interDirNeighbours[count] = cuAboveLeft->m_interDir[aboveLeftPartIdx]; | |
1539 | // get Mv from Left | |
1540 | cuAboveLeft->getMvField(cuAboveLeft, aboveLeftPartIdx, 0, mvFieldNeighbours[count][0]); | |
1541 | if (isInterB) | |
1542 | cuAboveLeft->getMvField(cuAboveLeft, aboveLeftPartIdx, 1, mvFieldNeighbours[count][1]); | |
1543 | ||
1544 | count++; | |
1545 | ||
1546 | if (count == maxNumMergeCand) | |
1547 | return maxNumMergeCand; | |
1548 | } | |
1549 | } | |
1550 | if (m_slice->m_sps->bTemporalMVPEnabled) | |
1551 | { | |
1552 | uint32_t partIdxRB = deriveRightBottomIdx(puIdx); | |
1553 | MV colmv; | |
1554 | int ctuIdx = -1; | |
1555 | ||
1556 | // image boundary check | |
1557 | if (m_encData->getPicCTU(m_cuAddr)->m_cuPelX + g_zscanToPelX[partIdxRB] + UNIT_SIZE < m_slice->m_sps->picWidthInLumaSamples && | |
1558 | m_encData->getPicCTU(m_cuAddr)->m_cuPelY + g_zscanToPelY[partIdxRB] + UNIT_SIZE < m_slice->m_sps->picHeightInLumaSamples) | |
1559 | { | |
1560 | uint32_t absPartIdxRB = g_zscanToRaster[partIdxRB]; | |
1561 | uint32_t numPartInCUSize = s_numPartInCUSize; | |
1562 | bool bNotLastCol = lessThanCol(absPartIdxRB, numPartInCUSize - 1, numPartInCUSize); // is not at the last column of CTU | |
1563 | bool bNotLastRow = lessThanRow(absPartIdxRB, numPartInCUSize - 1, numPartInCUSize); // is not at the last row of CTU | |
1564 | ||
1565 | if (bNotLastCol && bNotLastRow) | |
1566 | { | |
1567 | absPartAddr = g_rasterToZscan[absPartIdxRB + numPartInCUSize + 1]; | |
1568 | ctuIdx = m_cuAddr; | |
1569 | } | |
1570 | else if (bNotLastCol) | |
1571 | absPartAddr = g_rasterToZscan[(absPartIdxRB + numPartInCUSize + 1) & (numPartInCUSize - 1)]; | |
1572 | else if (bNotLastRow) | |
1573 | { | |
1574 | absPartAddr = g_rasterToZscan[absPartIdxRB + 1]; | |
1575 | ctuIdx = m_cuAddr + 1; | |
1576 | } | |
1577 | else // is the right bottom corner of CTU | |
1578 | absPartAddr = 0; | |
1579 | } | |
1580 | ||
1581 | int refIdx = 0; | |
1582 | uint32_t partIdxCenter = deriveCenterIdx(puIdx); | |
1583 | uint32_t curCTUIdx = m_cuAddr; | |
1584 | int dir = 0; | |
1585 | bool bExistMV = ctuIdx >= 0 && getColMVP(colmv, refIdx, 0, ctuIdx, absPartAddr); | |
1586 | if (!bExistMV) | |
1587 | bExistMV = getColMVP(colmv, refIdx, 0, curCTUIdx, partIdxCenter); | |
1588 | if (bExistMV) | |
1589 | { | |
1590 | dir |= 1; | |
1591 | mvFieldNeighbours[count][0].mv = colmv; | |
1592 | mvFieldNeighbours[count][0].refIdx = refIdx; | |
1593 | } | |
1594 | ||
1595 | if (isInterB) | |
1596 | { | |
1597 | bExistMV = ctuIdx >= 0 && getColMVP(colmv, refIdx, 1, ctuIdx, absPartAddr); | |
1598 | if (!bExistMV) | |
1599 | bExistMV = getColMVP(colmv, refIdx, 1, curCTUIdx, partIdxCenter); | |
1600 | ||
1601 | if (bExistMV) | |
1602 | { | |
1603 | dir |= 2; | |
1604 | mvFieldNeighbours[count][1].mv = colmv; | |
1605 | mvFieldNeighbours[count][1].refIdx = refIdx; | |
1606 | } | |
1607 | } | |
1608 | ||
1609 | if (dir != 0) | |
1610 | { | |
1611 | interDirNeighbours[count] = (uint8_t)dir; | |
1612 | ||
1613 | count++; | |
1614 | ||
1615 | if (count == maxNumMergeCand) | |
1616 | return maxNumMergeCand; | |
1617 | } | |
1618 | } | |
1619 | ||
1620 | if (isInterB) | |
1621 | { | |
1622 | const uint32_t cutoff = count * (count - 1); | |
1623 | uint32_t priorityList0 = 0xEDC984; // { 0, 1, 0, 2, 1, 2, 0, 3, 1, 3, 2, 3 } | |
1624 | uint32_t priorityList1 = 0xB73621; // { 1, 0, 2, 0, 2, 1, 3, 0, 3, 1, 3, 2 } | |
1625 | ||
1626 | for (uint32_t idx = 0; idx < cutoff; idx++) | |
1627 | { | |
1628 | int i = priorityList0 & 3; | |
1629 | int j = priorityList1 & 3; | |
1630 | priorityList0 >>= 2; | |
1631 | priorityList1 >>= 2; | |
1632 | ||
1633 | if ((interDirNeighbours[i] & 0x1) && (interDirNeighbours[j] & 0x2)) | |
1634 | { | |
1635 | // get Mv from cand[i] and cand[j] | |
1636 | int refIdxL0 = mvFieldNeighbours[i][0].refIdx; | |
1637 | int refIdxL1 = mvFieldNeighbours[j][1].refIdx; | |
1638 | int refPOCL0 = m_slice->m_refPOCList[0][refIdxL0]; | |
1639 | int refPOCL1 = m_slice->m_refPOCList[1][refIdxL1]; | |
1640 | if (!(refPOCL0 == refPOCL1 && mvFieldNeighbours[i][0].mv == mvFieldNeighbours[j][1].mv)) | |
1641 | { | |
1642 | mvFieldNeighbours[count][0].mv = mvFieldNeighbours[i][0].mv; | |
1643 | mvFieldNeighbours[count][0].refIdx = refIdxL0; | |
1644 | mvFieldNeighbours[count][1].mv = mvFieldNeighbours[j][1].mv; | |
1645 | mvFieldNeighbours[count][1].refIdx = refIdxL1; | |
1646 | interDirNeighbours[count] = 3; | |
1647 | ||
1648 | count++; | |
1649 | ||
1650 | if (count == maxNumMergeCand) | |
1651 | return maxNumMergeCand; | |
1652 | } | |
1653 | } | |
1654 | } | |
1655 | } | |
1656 | int numRefIdx = (isInterB) ? X265_MIN(m_slice->m_numRefIdx[0], m_slice->m_numRefIdx[1]) : m_slice->m_numRefIdx[0]; | |
1657 | int r = 0; | |
1658 | int refcnt = 0; | |
1659 | while (count < maxNumMergeCand) | |
1660 | { | |
1661 | interDirNeighbours[count] = 1; | |
1662 | mvFieldNeighbours[count][0].mv.word = 0; | |
1663 | mvFieldNeighbours[count][0].refIdx = r; | |
1664 | ||
1665 | if (isInterB) | |
1666 | { | |
1667 | interDirNeighbours[count] = 3; | |
1668 | mvFieldNeighbours[count][1].mv.word = 0; | |
1669 | mvFieldNeighbours[count][1].refIdx = r; | |
1670 | } | |
1671 | ||
1672 | count++; | |
1673 | ||
1674 | if (refcnt == numRefIdx - 1) | |
1675 | r = 0; | |
1676 | else | |
1677 | { | |
1678 | ++r; | |
1679 | ++refcnt; | |
1680 | } | |
1681 | } | |
1682 | ||
1683 | return count; | |
1684 | } | |
1685 | ||
1686 | /* Check whether the current PU and a spatial neighboring PU are in a same ME region */ | |
1687 | bool CUData::isDiffMER(int xN, int yN, int xP, int yP) const | |
1688 | { | |
1689 | uint32_t plevel = 2; | |
1690 | ||
1691 | if ((xN >> plevel) != (xP >> plevel)) | |
1692 | return true; | |
1693 | if ((yN >> plevel) != (yP >> plevel)) | |
1694 | return true; | |
1695 | return false; | |
1696 | } | |
1697 | ||
1698 | /* Constructs a list of candidates for AMVP, and a larger list of motion candidates */ | |
1699 | int CUData::fillMvpCand(uint32_t puIdx, uint32_t absPartIdx, int picList, int refIdx, MV* amvpCand, MV* mvc) const | |
1700 | { | |
1701 | int num = 0; | |
1702 | ||
1703 | // spatial MV | |
1704 | uint32_t partIdxLT, partIdxRT, partIdxLB = deriveLeftBottomIdx(puIdx); | |
1705 | ||
1706 | deriveLeftRightTopIdx(puIdx, partIdxLT, partIdxRT); | |
1707 | ||
1708 | MV mv[MD_ABOVE_LEFT + 1]; | |
1709 | MV mvOrder[MD_ABOVE_LEFT + 1]; | |
1710 | bool valid[MD_ABOVE_LEFT + 1]; | |
1711 | bool validOrder[MD_ABOVE_LEFT + 1]; | |
1712 | ||
1713 | valid[MD_BELOW_LEFT] = addMVPCand(mv[MD_BELOW_LEFT], picList, refIdx, partIdxLB, MD_BELOW_LEFT); | |
1714 | valid[MD_LEFT] = addMVPCand(mv[MD_LEFT], picList, refIdx, partIdxLB, MD_LEFT); | |
1715 | valid[MD_ABOVE_RIGHT] = addMVPCand(mv[MD_ABOVE_RIGHT], picList, refIdx, partIdxRT, MD_ABOVE_RIGHT); | |
1716 | valid[MD_ABOVE] = addMVPCand(mv[MD_ABOVE], picList, refIdx, partIdxRT, MD_ABOVE); | |
1717 | valid[MD_ABOVE_LEFT] = addMVPCand(mv[MD_ABOVE_LEFT], picList, refIdx, partIdxLT, MD_ABOVE_LEFT); | |
1718 | ||
1719 | validOrder[MD_BELOW_LEFT] = addMVPCandOrder(mvOrder[MD_BELOW_LEFT], picList, refIdx, partIdxLB, MD_BELOW_LEFT); | |
1720 | validOrder[MD_LEFT] = addMVPCandOrder(mvOrder[MD_LEFT], picList, refIdx, partIdxLB, MD_LEFT); | |
1721 | validOrder[MD_ABOVE_RIGHT] = addMVPCandOrder(mvOrder[MD_ABOVE_RIGHT], picList, refIdx, partIdxRT, MD_ABOVE_RIGHT); | |
1722 | validOrder[MD_ABOVE] = addMVPCandOrder(mvOrder[MD_ABOVE], picList, refIdx, partIdxRT, MD_ABOVE); | |
1723 | validOrder[MD_ABOVE_LEFT] = addMVPCandOrder(mvOrder[MD_ABOVE_LEFT], picList, refIdx, partIdxLT, MD_ABOVE_LEFT); | |
1724 | ||
1725 | // Left predictor search | |
1726 | if (valid[MD_BELOW_LEFT]) | |
1727 | amvpCand[num++] = mv[MD_BELOW_LEFT]; | |
1728 | else if (valid[MD_LEFT]) | |
1729 | amvpCand[num++] = mv[MD_LEFT]; | |
1730 | else if (validOrder[MD_BELOW_LEFT]) | |
1731 | amvpCand[num++] = mvOrder[MD_BELOW_LEFT]; | |
1732 | else if (validOrder[MD_LEFT]) | |
1733 | amvpCand[num++] = mvOrder[MD_LEFT]; | |
1734 | ||
1735 | bool bAddedSmvp = num > 0; | |
1736 | ||
1737 | // Above predictor search | |
1738 | if (valid[MD_ABOVE_RIGHT]) | |
1739 | amvpCand[num++] = mv[MD_ABOVE_RIGHT]; | |
1740 | else if (valid[MD_ABOVE]) | |
1741 | amvpCand[num++] = mv[MD_ABOVE]; | |
1742 | else if (valid[MD_ABOVE_LEFT]) | |
1743 | amvpCand[num++] = mv[MD_ABOVE_LEFT]; | |
1744 | ||
1745 | if (!bAddedSmvp) | |
1746 | { | |
1747 | if (validOrder[MD_ABOVE_RIGHT]) | |
1748 | amvpCand[num++] = mvOrder[MD_ABOVE_RIGHT]; | |
1749 | else if (validOrder[MD_ABOVE]) | |
1750 | amvpCand[num++] = mvOrder[MD_ABOVE]; | |
1751 | else if (validOrder[MD_ABOVE_LEFT]) | |
1752 | amvpCand[num++] = mvOrder[MD_ABOVE_LEFT]; | |
1753 | } | |
1754 | ||
1755 | int numMvc = 0; | |
1756 | for (int dir = MD_LEFT; dir <= MD_ABOVE_LEFT; dir++) | |
1757 | { | |
1758 | if (valid[dir] && mv[dir].notZero()) | |
1759 | mvc[numMvc++] = mv[dir]; | |
1760 | ||
1761 | if (validOrder[dir] && mvOrder[dir].notZero()) | |
1762 | mvc[numMvc++] = mvOrder[dir]; | |
1763 | } | |
1764 | ||
1765 | if (num == 2) | |
1766 | { | |
1767 | if (amvpCand[0] == amvpCand[1]) | |
1768 | num = 1; | |
1769 | else | |
1770 | /* AMVP_NUM_CANDS = 2 */ | |
1771 | return numMvc; | |
1772 | } | |
1773 | ||
1774 | if (m_slice->m_sps->bTemporalMVPEnabled) | |
1775 | { | |
1776 | uint32_t absPartAddr = m_absIdxInCTU + absPartIdx; | |
1777 | uint32_t partIdxRB = deriveRightBottomIdx(puIdx); | |
1778 | MV colmv; | |
1779 | ||
1780 | // co-located RightBottom temporal predictor (H) | |
1781 | int ctuIdx = -1; | |
1782 | ||
1783 | // image boundary check | |
1784 | if (m_encData->getPicCTU(m_cuAddr)->m_cuPelX + g_zscanToPelX[partIdxRB] + UNIT_SIZE < m_slice->m_sps->picWidthInLumaSamples && | |
1785 | m_encData->getPicCTU(m_cuAddr)->m_cuPelY + g_zscanToPelY[partIdxRB] + UNIT_SIZE < m_slice->m_sps->picHeightInLumaSamples) | |
1786 | { | |
1787 | uint32_t absPartIdxRB = g_zscanToRaster[partIdxRB]; | |
1788 | uint32_t numPartInCUSize = s_numPartInCUSize; | |
1789 | bool bNotLastCol = lessThanCol(absPartIdxRB, numPartInCUSize - 1, numPartInCUSize); // is not at the last column of CTU | |
1790 | bool bNotLastRow = lessThanRow(absPartIdxRB, numPartInCUSize - 1, numPartInCUSize); // is not at the last row of CTU | |
1791 | ||
1792 | if (bNotLastCol && bNotLastRow) | |
1793 | { | |
1794 | absPartAddr = g_rasterToZscan[absPartIdxRB + numPartInCUSize + 1]; | |
1795 | ctuIdx = m_cuAddr; | |
1796 | } | |
1797 | else if (bNotLastCol) | |
1798 | absPartAddr = g_rasterToZscan[(absPartIdxRB + numPartInCUSize + 1) & (numPartInCUSize - 1)]; | |
1799 | else if (bNotLastRow) | |
1800 | { | |
1801 | absPartAddr = g_rasterToZscan[absPartIdxRB + 1]; | |
1802 | ctuIdx = m_cuAddr + 1; | |
1803 | } | |
1804 | else // is the right bottom corner of CTU | |
1805 | absPartAddr = 0; | |
1806 | } | |
1807 | if (ctuIdx >= 0 && getColMVP(colmv, refIdx, picList, ctuIdx, absPartAddr)) | |
1808 | { | |
1809 | amvpCand[num++] = colmv; | |
1810 | mvc[numMvc++] = colmv; | |
1811 | } | |
1812 | else | |
1813 | { | |
1814 | uint32_t partIdxCenter = deriveCenterIdx(puIdx); | |
1815 | uint32_t curCTUIdx = m_cuAddr; | |
1816 | if (getColMVP(colmv, refIdx, picList, curCTUIdx, partIdxCenter)) | |
1817 | { | |
1818 | amvpCand[num++] = colmv; | |
1819 | mvc[numMvc++] = colmv; | |
1820 | } | |
1821 | } | |
1822 | } | |
1823 | ||
1824 | while (num < AMVP_NUM_CANDS) | |
1825 | amvpCand[num++] = 0; | |
1826 | ||
1827 | return numMvc; | |
1828 | } | |
1829 | ||
1830 | void CUData::clipMv(MV& outMV) const | |
1831 | { | |
1832 | int mvshift = 2; | |
1833 | int offset = 8; | |
1834 | int xmax = (m_slice->m_sps->picWidthInLumaSamples + offset - m_cuPelX - 1) << mvshift; | |
1835 | int xmin = (-(int)g_maxCUSize - offset - (int)m_cuPelX + 1) << mvshift; | |
1836 | ||
1837 | int ymax = (m_slice->m_sps->picHeightInLumaSamples + offset - m_cuPelY - 1) << mvshift; | |
1838 | int ymin = (-(int)g_maxCUSize - offset - (int)m_cuPelY + 1) << mvshift; | |
1839 | ||
1840 | outMV.x = (int16_t)X265_MIN(xmax, X265_MAX(xmin, (int)outMV.x)); | |
1841 | outMV.y = (int16_t)X265_MIN(ymax, X265_MAX(ymin, (int)outMV.y)); | |
1842 | } | |
1843 | ||
1844 | bool CUData::addMVPCand(MV& mvp, int picList, int refIdx, uint32_t partUnitIdx, MVP_DIR dir) const | |
1845 | { | |
1846 | const CUData* tmpCU = NULL; | |
1847 | uint32_t idx = 0; | |
1848 | ||
1849 | switch (dir) | |
1850 | { | |
1851 | case MD_LEFT: | |
1852 | tmpCU = getPULeft(idx, partUnitIdx); | |
1853 | break; | |
1854 | case MD_ABOVE: | |
1855 | tmpCU = getPUAbove(idx, partUnitIdx); | |
1856 | break; | |
1857 | case MD_ABOVE_RIGHT: | |
1858 | tmpCU = getPUAboveRight(idx, partUnitIdx); | |
1859 | break; | |
1860 | case MD_BELOW_LEFT: | |
1861 | tmpCU = getPUBelowLeft(idx, partUnitIdx); | |
1862 | break; | |
1863 | case MD_ABOVE_LEFT: | |
1864 | tmpCU = getPUAboveLeft(idx, partUnitIdx); | |
1865 | break; | |
1866 | default: | |
1867 | return false; | |
1868 | } | |
1869 | ||
1870 | if (!tmpCU) | |
1871 | return false; | |
1872 | ||
1873 | int refPOC = m_slice->m_refPOCList[picList][refIdx]; | |
1874 | int partRefIdx = tmpCU->m_refIdx[picList][idx]; | |
1875 | if (partRefIdx >= 0 && refPOC == tmpCU->m_slice->m_refPOCList[picList][partRefIdx]) | |
1876 | { | |
1877 | mvp = tmpCU->m_mv[picList][idx]; | |
1878 | return true; | |
1879 | } | |
1880 | ||
1881 | int refPicList2nd = 0; | |
1882 | if (picList == 0) | |
1883 | refPicList2nd = 1; | |
1884 | else if (picList == 1) | |
1885 | refPicList2nd = 0; | |
1886 | ||
1887 | int curRefPOC = m_slice->m_refPOCList[picList][refIdx]; | |
1888 | int neibRefPOC; | |
1889 | ||
1890 | partRefIdx = tmpCU->m_refIdx[refPicList2nd][idx]; | |
1891 | if (partRefIdx >= 0) | |
1892 | { | |
1893 | neibRefPOC = tmpCU->m_slice->m_refPOCList[refPicList2nd][partRefIdx]; | |
1894 | if (neibRefPOC == curRefPOC) | |
1895 | { | |
1896 | // Same reference frame but different list | |
1897 | mvp = tmpCU->m_mv[refPicList2nd][idx]; | |
1898 | return true; | |
1899 | } | |
1900 | } | |
1901 | return false; | |
1902 | } | |
1903 | ||
1904 | bool CUData::addMVPCandOrder(MV& outMV, int picList, int refIdx, uint32_t partUnitIdx, MVP_DIR dir) const | |
1905 | { | |
1906 | const CUData* tmpCU = NULL; | |
1907 | uint32_t idx = 0; | |
1908 | ||
1909 | switch (dir) | |
1910 | { | |
1911 | case MD_LEFT: | |
1912 | tmpCU = getPULeft(idx, partUnitIdx); | |
1913 | break; | |
1914 | case MD_ABOVE: | |
1915 | tmpCU = getPUAbove(idx, partUnitIdx); | |
1916 | break; | |
1917 | case MD_ABOVE_RIGHT: | |
1918 | tmpCU = getPUAboveRight(idx, partUnitIdx); | |
1919 | break; | |
1920 | case MD_BELOW_LEFT: | |
1921 | tmpCU = getPUBelowLeft(idx, partUnitIdx); | |
1922 | break; | |
1923 | case MD_ABOVE_LEFT: | |
1924 | tmpCU = getPUAboveLeft(idx, partUnitIdx); | |
1925 | break; | |
1926 | default: | |
1927 | return false; | |
1928 | } | |
1929 | ||
1930 | if (!tmpCU) | |
1931 | return false; | |
1932 | ||
1933 | int refPicList2nd = 0; | |
1934 | if (picList == 0) | |
1935 | refPicList2nd = 1; | |
1936 | else if (picList == 1) | |
1937 | refPicList2nd = 0; | |
1938 | ||
1939 | int curPOC = m_slice->m_poc; | |
1940 | int curRefPOC = m_slice->m_refPOCList[picList][refIdx]; | |
1941 | int neibPOC = curPOC; | |
1942 | int neibRefPOC; | |
1943 | ||
1944 | int partRefIdx = tmpCU->m_refIdx[picList][idx]; | |
1945 | if (partRefIdx >= 0) | |
1946 | { | |
1947 | neibRefPOC = tmpCU->m_slice->m_refPOCList[picList][partRefIdx]; | |
1948 | MV mvp = tmpCU->m_mv[picList][idx]; | |
1949 | ||
1950 | scaleMvByPOCDist(outMV, mvp, curPOC, curRefPOC, neibPOC, neibRefPOC); | |
1951 | return true; | |
1952 | } | |
1953 | ||
1954 | partRefIdx = tmpCU->m_refIdx[refPicList2nd][idx]; | |
1955 | if (partRefIdx >= 0) | |
1956 | { | |
1957 | neibRefPOC = tmpCU->m_slice->m_refPOCList[refPicList2nd][partRefIdx]; | |
1958 | MV mvp = tmpCU->m_mv[refPicList2nd][idx]; | |
1959 | ||
1960 | scaleMvByPOCDist(outMV, mvp, curPOC, curRefPOC, neibPOC, neibRefPOC); | |
1961 | return true; | |
1962 | } | |
1963 | ||
1964 | return false; | |
1965 | } | |
1966 | ||
1967 | bool CUData::getColMVP(MV& outMV, int& outRefIdx, int picList, int cuAddr, int partUnitIdx) const | |
1968 | { | |
1969 | uint32_t absPartAddr = partUnitIdx & TMVP_UNIT_MASK; | |
1970 | ||
1971 | int colRefPicList; | |
1972 | int colPOC, colRefPOC, curPOC, curRefPOC; | |
1973 | MV colmv; | |
1974 | ||
1975 | // use coldir. | |
1976 | Frame *colPic = m_slice->m_refPicList[m_slice->isInterB() ? 1 - m_slice->m_colFromL0Flag : 0][m_slice->m_colRefIdx]; | |
1977 | CUData *colCU = colPic->m_encData->getPicCTU(cuAddr); | |
1978 | ||
1979 | if (colCU->m_partSize[partUnitIdx] == SIZE_NONE) | |
1980 | return false; | |
1981 | ||
1982 | curPOC = m_slice->m_poc; | |
1983 | colPOC = colCU->m_slice->m_poc; | |
1984 | ||
1985 | if (colCU->isIntra(absPartAddr)) | |
1986 | return false; | |
1987 | ||
1988 | colRefPicList = m_slice->m_bCheckLDC ? picList : m_slice->m_colFromL0Flag; | |
1989 | ||
1990 | int colRefIdx = colCU->m_refIdx[colRefPicList][absPartAddr]; | |
1991 | ||
1992 | if (colRefIdx < 0) | |
1993 | { | |
1994 | colRefPicList = 1 - colRefPicList; | |
1995 | colRefIdx = colCU->m_refIdx[colRefPicList][absPartAddr]; | |
1996 | ||
1997 | if (colRefIdx < 0) | |
1998 | return false; | |
1999 | } | |
2000 | ||
2001 | // Scale the vector | |
2002 | colRefPOC = colCU->m_slice->m_refPOCList[colRefPicList][colRefIdx]; | |
2003 | colmv = colCU->m_mv[colRefPicList][absPartAddr]; | |
2004 | curRefPOC = m_slice->m_refPOCList[picList][outRefIdx]; | |
2005 | ||
2006 | scaleMvByPOCDist(outMV, colmv, curPOC, curRefPOC, colPOC, colRefPOC); | |
2007 | return true; | |
2008 | } | |
2009 | ||
2010 | void CUData::scaleMvByPOCDist(MV& outMV, const MV& inMV, int curPOC, int curRefPOC, int colPOC, int colRefPOC) const | |
2011 | { | |
2012 | int diffPocD = colPOC - colRefPOC; | |
2013 | int diffPocB = curPOC - curRefPOC; | |
2014 | ||
2015 | if (diffPocD == diffPocB) | |
2016 | outMV = inMV; | |
2017 | else | |
2018 | { | |
2019 | int tdb = Clip3(-128, 127, diffPocB); | |
2020 | int tdd = Clip3(-128, 127, diffPocD); | |
2021 | int x = (0x4000 + abs(tdd / 2)) / tdd; | |
2022 | int scale = Clip3(-4096, 4095, (tdb * x + 32) >> 6); | |
2023 | outMV = scaleMv(inMV, scale); | |
2024 | } | |
2025 | } | |
2026 | ||
2027 | uint32_t CUData::deriveCenterIdx(uint32_t puIdx) const | |
2028 | { | |
2029 | uint32_t absPartIdx; | |
2030 | int puWidth, puHeight; | |
2031 | ||
2032 | getPartIndexAndSize(puIdx, absPartIdx, puWidth, puHeight); | |
2033 | ||
2034 | return g_rasterToZscan[g_zscanToRaster[m_absIdxInCTU + absPartIdx] | |
2035 | + (puHeight >> (LOG2_UNIT_SIZE + 1)) * s_numPartInCUSize | |
2036 | + (puWidth >> (LOG2_UNIT_SIZE + 1))]; | |
2037 | } | |
2038 | ||
2039 | ScanType CUData::getCoefScanIdx(uint32_t absPartIdx, uint32_t log2TrSize, bool bIsLuma, bool bIsIntra) const | |
2040 | { | |
2041 | uint32_t dirMode; | |
2042 | ||
2043 | if (!bIsIntra) | |
2044 | return SCAN_DIAG; | |
2045 | ||
2046 | // check that MDCS can be used for this TU | |
2047 | if (bIsLuma) | |
2048 | { | |
2049 | if (log2TrSize > MDCS_LOG2_MAX_SIZE) | |
2050 | return SCAN_DIAG; | |
2051 | ||
2052 | dirMode = m_lumaIntraDir[absPartIdx]; | |
2053 | } | |
2054 | else | |
2055 | { | |
2056 | if (log2TrSize > (uint32_t)(MDCS_LOG2_MAX_SIZE - m_hChromaShift)) | |
2057 | return SCAN_DIAG; | |
2058 | ||
2059 | dirMode = m_chromaIntraDir[absPartIdx]; | |
2060 | if (dirMode == DM_CHROMA_IDX) | |
2061 | { | |
2062 | dirMode = m_lumaIntraDir[(m_chromaFormat == X265_CSP_I444) ? absPartIdx : absPartIdx & 0xFC]; | |
2063 | dirMode = (m_chromaFormat == X265_CSP_I422) ? g_chroma422IntraAngleMappingTable[dirMode] : dirMode; | |
2064 | } | |
2065 | } | |
2066 | ||
2067 | if (abs((int)dirMode - VER_IDX) <= MDCS_ANGLE_LIMIT) | |
2068 | return SCAN_HOR; | |
2069 | else if (abs((int)dirMode - HOR_IDX) <= MDCS_ANGLE_LIMIT) | |
2070 | return SCAN_VER; | |
2071 | else | |
2072 | return SCAN_DIAG; | |
2073 | } | |
2074 | ||
2075 | void CUData::getTUEntropyCodingParameters(TUEntropyCodingParameters &result, uint32_t absPartIdx, uint32_t log2TrSize, bool bIsLuma) const | |
2076 | { | |
2077 | // set the group layout | |
2078 | result.log2TrSizeCG = log2TrSize - 2; | |
2079 | ||
2080 | // set the scan orders | |
2081 | result.scanType = getCoefScanIdx(absPartIdx, log2TrSize, bIsLuma, isIntra(absPartIdx)); | |
2082 | result.scan = g_scanOrder[result.scanType][log2TrSize - 2]; | |
2083 | result.scanCG = g_scanOrderCG[result.scanType][result.log2TrSizeCG]; | |
2084 | ||
2085 | if (log2TrSize == 2) | |
2086 | result.firstSignificanceMapContext = 0; | |
2087 | else if (log2TrSize == 3) | |
2088 | { | |
2089 | result.firstSignificanceMapContext = 9; | |
2090 | if (result.scanType != SCAN_DIAG && bIsLuma) | |
2091 | result.firstSignificanceMapContext += 6; | |
2092 | } | |
2093 | else | |
2094 | result.firstSignificanceMapContext = bIsLuma ? 21 : 12; | |
2095 | } | |
2096 | ||
2097 | #define CU_SET_FLAG(bitfield, flag, value) (bitfield) = ((bitfield) & (~(flag))) | ((~((value) - 1)) & (flag)) | |
2098 | ||
2099 | void CUData::calcCTUGeoms(uint32_t picWidth, uint32_t picHeight, uint32_t maxCUSize, CUGeom cuDataArray[CUGeom::MAX_GEOMS]) const | |
2100 | { | |
2101 | // Initialize the coding blocks inside the CTB | |
2102 | for (uint32_t log2CUSize = g_log2Size[maxCUSize], rangeCUIdx = 0; log2CUSize >= MIN_LOG2_CU_SIZE; log2CUSize--) | |
2103 | { | |
2104 | uint32_t blockSize = 1 << log2CUSize; | |
2105 | uint32_t sbWidth = 1 << (g_log2Size[maxCUSize] - log2CUSize); | |
2106 | int32_t lastLevelFlag = log2CUSize == MIN_LOG2_CU_SIZE; | |
2107 | for (uint32_t sbY = 0; sbY < sbWidth; sbY++) | |
2108 | { | |
2109 | for (uint32_t sbX = 0; sbX < sbWidth; sbX++) | |
2110 | { | |
2111 | uint32_t depthIdx = g_depthScanIdx[sbY][sbX]; | |
2112 | uint32_t cuIdx = rangeCUIdx + depthIdx; | |
2113 | uint32_t childIdx = rangeCUIdx + sbWidth * sbWidth + (depthIdx << 2); | |
2114 | uint32_t px = m_cuPelX + sbX * blockSize; | |
2115 | uint32_t py = m_cuPelY + sbY * blockSize; | |
2116 | int32_t presentFlag = px < picWidth && py < picHeight; | |
2117 | int32_t splitMandatoryFlag = presentFlag && !lastLevelFlag && (px + blockSize > picWidth || py + blockSize > picHeight); | |
2118 | ||
2119 | /* Offset of the luma CU in the X, Y direction in terms of pixels from the CTU origin */ | |
2120 | uint32_t xOffset = (sbX * blockSize) >> 3; | |
2121 | uint32_t yOffset = (sbY * blockSize) >> 3; | |
2122 | X265_CHECK(cuIdx < CUGeom::MAX_GEOMS, "CU geom index bug\n"); | |
2123 | ||
2124 | CUGeom *cu = cuDataArray + cuIdx; | |
2125 | cu->log2CUSize = log2CUSize; | |
2126 | cu->childOffset = childIdx - cuIdx; | |
2127 | cu->encodeIdx = g_depthScanIdx[yOffset][xOffset] * 4; | |
2128 | cu->numPartitions = (NUM_CU_PARTITIONS >> ((g_maxLog2CUSize - cu->log2CUSize) * 2)); | |
2129 | cu->depth = g_log2Size[maxCUSize] - log2CUSize; | |
2130 | ||
2131 | cu->flags = 0; | |
2132 | CU_SET_FLAG(cu->flags, CUGeom::PRESENT, presentFlag); | |
2133 | CU_SET_FLAG(cu->flags, CUGeom::SPLIT_MANDATORY | CUGeom::SPLIT, splitMandatoryFlag); | |
2134 | CU_SET_FLAG(cu->flags, CUGeom::LEAF, lastLevelFlag); | |
2135 | } | |
2136 | } | |
2137 | rangeCUIdx += sbWidth * sbWidth; | |
2138 | } | |
2139 | } |