Commit | Line | Data |
---|---|---|
72b9787e JB |
1 | /***************************************************************************** |
2 | * Copyright (C) 2014 x265 project | |
3 | * | |
4 | * Authors: Steve Borho <steve@borho.org> | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License as published by | |
8 | * the Free Software Foundation; either version 2 of the License, or | |
9 | * (at your option) any later version. | |
10 | * | |
11 | * This program is distributed in the hope that it will be useful, | |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | * GNU General Public License for more details. | |
15 | * | |
16 | * You should have received a copy of the GNU General Public License | |
17 | * along with this program; if not, write to the Free Software | |
18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. | |
19 | * | |
20 | * This program is also available under a commercial proprietary license. | |
21 | * For more information, contact us at license @ x265.com. | |
22 | *****************************************************************************/ | |
23 | ||
24 | #include "common.h" | |
25 | #include "frame.h" | |
26 | #include "framedata.h" | |
27 | #include "picyuv.h" | |
28 | #include "mv.h" | |
29 | #include "cudata.h" | |
30 | ||
31 | using namespace x265; | |
32 | ||
33 | namespace { | |
34 | // file private namespace | |
35 | ||
36 | /* for all bcast* and copy* functions, dst and src are aligned to MIN(size, 32) */ | |
37 | ||
38 | void bcast1(uint8_t* dst, uint8_t val) { dst[0] = val; } | |
39 | ||
40 | void copy4(uint8_t* dst, uint8_t* src) { ((uint32_t*)dst)[0] = ((uint32_t*)src)[0]; } | |
41 | void bcast4(uint8_t* dst, uint8_t val) { ((uint32_t*)dst)[0] = 0x01010101 * val; } | |
42 | ||
43 | void copy16(uint8_t* dst, uint8_t* src) { ((uint64_t*)dst)[0] = ((uint64_t*)src)[0]; ((uint64_t*)dst)[1] = ((uint64_t*)src)[1]; } | |
44 | void bcast16(uint8_t* dst, uint8_t val) { uint64_t bval = 0x0101010101010101ULL * val; ((uint64_t*)dst)[0] = bval; ((uint64_t*)dst)[1] = bval; } | |
45 | ||
46 | void copy64(uint8_t* dst, uint8_t* src) { ((uint64_t*)dst)[0] = ((uint64_t*)src)[0]; ((uint64_t*)dst)[1] = ((uint64_t*)src)[1]; | |
47 | ((uint64_t*)dst)[2] = ((uint64_t*)src)[2]; ((uint64_t*)dst)[3] = ((uint64_t*)src)[3]; | |
48 | ((uint64_t*)dst)[4] = ((uint64_t*)src)[4]; ((uint64_t*)dst)[5] = ((uint64_t*)src)[5]; | |
49 | ((uint64_t*)dst)[6] = ((uint64_t*)src)[6]; ((uint64_t*)dst)[7] = ((uint64_t*)src)[7]; } | |
50 | void bcast64(uint8_t* dst, uint8_t val) { uint64_t bval = 0x0101010101010101ULL * val; | |
51 | ((uint64_t*)dst)[0] = bval; ((uint64_t*)dst)[1] = bval; ((uint64_t*)dst)[2] = bval; ((uint64_t*)dst)[3] = bval; | |
52 | ((uint64_t*)dst)[4] = bval; ((uint64_t*)dst)[5] = bval; ((uint64_t*)dst)[6] = bval; ((uint64_t*)dst)[7] = bval; } | |
53 | ||
54 | /* at 256 bytes, memset/memcpy will probably use SIMD more effectively than our uint64_t hack, | |
55 | * but hand-written assembly would beat it. */ | |
56 | void copy256(uint8_t* dst, uint8_t* src) { memcpy(dst, src, 256); } | |
57 | void bcast256(uint8_t* dst, uint8_t val) { memset(dst, val, 256); } | |
58 | ||
59 | /* Check whether 2 addresses point to the same column */ | |
60 | inline bool isEqualCol(int addrA, int addrB, int numUnitsPerRow) | |
61 | { | |
62 | // addrA % numUnitsPerRow == addrB % numUnitsPerRow | |
63 | return ((addrA ^ addrB) & (numUnitsPerRow - 1)) == 0; | |
64 | } | |
65 | ||
66 | /* Check whether 2 addresses point to the same row */ | |
67 | inline bool isEqualRow(int addrA, int addrB, int numUnitsPerRow) | |
68 | { | |
69 | // addrA / numUnitsPerRow == addrB / numUnitsPerRow | |
70 | return ((addrA ^ addrB) & ~(numUnitsPerRow - 1)) == 0; | |
71 | } | |
72 | ||
73 | /* Check whether 2 addresses point to the same row or column */ | |
74 | inline bool isEqualRowOrCol(int addrA, int addrB, int numUnitsPerRow) | |
75 | { | |
76 | return isEqualCol(addrA, addrB, numUnitsPerRow) | isEqualRow(addrA, addrB, numUnitsPerRow); | |
77 | } | |
78 | ||
79 | /* Check whether one address points to the first column */ | |
80 | inline bool isZeroCol(int addr, int numUnitsPerRow) | |
81 | { | |
82 | // addr % numUnitsPerRow == 0 | |
83 | return (addr & (numUnitsPerRow - 1)) == 0; | |
84 | } | |
85 | ||
86 | /* Check whether one address points to the first row */ | |
87 | inline bool isZeroRow(int addr, int numUnitsPerRow) | |
88 | { | |
89 | // addr / numUnitsPerRow == 0 | |
90 | return (addr & ~(numUnitsPerRow - 1)) == 0; | |
91 | } | |
92 | ||
93 | /* Check whether one address points to a column whose index is smaller than a given value */ | |
94 | inline bool lessThanCol(int addr, int val, int numUnitsPerRow) | |
95 | { | |
96 | // addr % numUnitsPerRow < val | |
97 | return (addr & (numUnitsPerRow - 1)) < val; | |
98 | } | |
99 | ||
100 | /* Check whether one address points to a row whose index is smaller than a given value */ | |
101 | inline bool lessThanRow(int addr, int val, int numUnitsPerRow) | |
102 | { | |
103 | // addr / numUnitsPerRow < val | |
104 | return addr < val * numUnitsPerRow; | |
105 | } | |
106 | ||
107 | inline MV scaleMv(MV mv, int scale) | |
108 | { | |
109 | int mvx = Clip3(-32768, 32767, (scale * mv.x + 127 + (scale * mv.x < 0)) >> 8); | |
110 | int mvy = Clip3(-32768, 32767, (scale * mv.y + 127 + (scale * mv.y < 0)) >> 8); | |
111 | ||
112 | return MV((int16_t)mvx, (int16_t)mvy); | |
113 | } | |
114 | ||
115 | // Partition table. | |
116 | // First index is partitioning mode. Second index is partition index. | |
117 | // Third index is 0 for partition sizes, 1 for partition offsets. The | |
118 | // sizes and offsets are encoded as two packed 4-bit values (X,Y). | |
119 | // X and Y represent 1/4 fractions of the block size. | |
120 | const uint32_t partTable[8][4][2] = | |
121 | { | |
122 | // XY | |
123 | { { 0x44, 0x00 }, { 0x00, 0x00 }, { 0x00, 0x00 }, { 0x00, 0x00 } }, // SIZE_2Nx2N. | |
124 | { { 0x42, 0x00 }, { 0x42, 0x02 }, { 0x00, 0x00 }, { 0x00, 0x00 } }, // SIZE_2NxN. | |
125 | { { 0x24, 0x00 }, { 0x24, 0x20 }, { 0x00, 0x00 }, { 0x00, 0x00 } }, // SIZE_Nx2N. | |
126 | { { 0x22, 0x00 }, { 0x22, 0x20 }, { 0x22, 0x02 }, { 0x22, 0x22 } }, // SIZE_NxN. | |
127 | { { 0x41, 0x00 }, { 0x43, 0x01 }, { 0x00, 0x00 }, { 0x00, 0x00 } }, // SIZE_2NxnU. | |
128 | { { 0x43, 0x00 }, { 0x41, 0x03 }, { 0x00, 0x00 }, { 0x00, 0x00 } }, // SIZE_2NxnD. | |
129 | { { 0x14, 0x00 }, { 0x34, 0x10 }, { 0x00, 0x00 }, { 0x00, 0x00 } }, // SIZE_nLx2N. | |
130 | { { 0x34, 0x00 }, { 0x14, 0x30 }, { 0x00, 0x00 }, { 0x00, 0x00 } } // SIZE_nRx2N. | |
131 | }; | |
132 | ||
133 | // Partition Address table. | |
134 | // First index is partitioning mode. Second index is partition address. | |
135 | const uint32_t partAddrTable[8][4] = | |
136 | { | |
137 | { 0x00, 0x00, 0x00, 0x00 }, // SIZE_2Nx2N. | |
138 | { 0x00, 0x08, 0x08, 0x08 }, // SIZE_2NxN. | |
139 | { 0x00, 0x04, 0x04, 0x04 }, // SIZE_Nx2N. | |
140 | { 0x00, 0x04, 0x08, 0x0C }, // SIZE_NxN. | |
141 | { 0x00, 0x02, 0x02, 0x02 }, // SIZE_2NxnU. | |
142 | { 0x00, 0x0A, 0x0A, 0x0A }, // SIZE_2NxnD. | |
143 | { 0x00, 0x01, 0x01, 0x01 }, // SIZE_nLx2N. | |
144 | { 0x00, 0x05, 0x05, 0x05 } // SIZE_nRx2N. | |
145 | }; | |
146 | ||
147 | } | |
148 | ||
149 | cubcast_t CUData::s_partSet[NUM_FULL_DEPTH] = { NULL, NULL, NULL, NULL, NULL }; | |
150 | uint32_t CUData::s_numPartInCUSize; | |
151 | ||
152 | CUData::CUData() | |
153 | { | |
154 | memset(this, 0, sizeof(*this)); | |
155 | } | |
156 | ||
157 | void CUData::initialize(const CUDataMemPool& dataPool, uint32_t depth, int csp, int instance) | |
158 | { | |
159 | m_chromaFormat = csp; | |
160 | m_hChromaShift = CHROMA_H_SHIFT(csp); | |
161 | m_vChromaShift = CHROMA_V_SHIFT(csp); | |
162 | m_numPartitions = NUM_CU_PARTITIONS >> (depth * 2); | |
163 | ||
164 | if (!s_partSet[0]) | |
165 | { | |
166 | s_numPartInCUSize = 1 << g_maxFullDepth; | |
167 | switch (g_maxLog2CUSize) | |
168 | { | |
169 | case 6: | |
170 | s_partSet[0] = bcast256; | |
171 | s_partSet[1] = bcast64; | |
172 | s_partSet[2] = bcast16; | |
173 | s_partSet[3] = bcast4; | |
174 | s_partSet[4] = bcast1; | |
175 | break; | |
176 | case 5: | |
177 | s_partSet[0] = bcast64; | |
178 | s_partSet[1] = bcast16; | |
179 | s_partSet[2] = bcast4; | |
180 | s_partSet[3] = bcast1; | |
181 | s_partSet[4] = NULL; | |
182 | break; | |
183 | case 4: | |
184 | s_partSet[0] = bcast16; | |
185 | s_partSet[1] = bcast4; | |
186 | s_partSet[2] = bcast1; | |
187 | s_partSet[3] = NULL; | |
188 | s_partSet[4] = NULL; | |
189 | break; | |
190 | default: | |
191 | X265_CHECK(0, "unexpected CTU size\n"); | |
192 | break; | |
193 | } | |
194 | } | |
195 | ||
196 | switch (m_numPartitions) | |
197 | { | |
198 | case 256: // 64x64 CU | |
199 | m_partCopy = copy256; | |
200 | m_partSet = bcast256; | |
201 | m_subPartCopy = copy64; | |
202 | m_subPartSet = bcast64; | |
203 | break; | |
204 | case 64: // 32x32 CU | |
205 | m_partCopy = copy64; | |
206 | m_partSet = bcast64; | |
207 | m_subPartCopy = copy16; | |
208 | m_subPartSet = bcast16; | |
209 | break; | |
210 | case 16: // 16x16 CU | |
211 | m_partCopy = copy16; | |
212 | m_partSet = bcast16; | |
213 | m_subPartCopy = copy4; | |
214 | m_subPartSet = bcast4; | |
215 | break; | |
216 | case 4: // 8x8 CU | |
217 | m_partCopy = copy4; | |
218 | m_partSet = bcast4; | |
219 | m_subPartCopy = NULL; | |
220 | m_subPartSet = NULL; | |
221 | break; | |
222 | default: | |
223 | X265_CHECK(0, "unexpected CU partition count\n"); | |
224 | break; | |
225 | } | |
226 | ||
227 | /* Each CU's data is layed out sequentially within the charMemBlock */ | |
228 | uint8_t *charBuf = dataPool.charMemBlock + (m_numPartitions * BytesPerPartition) * instance; | |
229 | ||
b53f7c52 | 230 | m_qp = (int8_t*)charBuf; charBuf += m_numPartitions; |
72b9787e | 231 | m_log2CUSize = charBuf; charBuf += m_numPartitions; |
72b9787e JB |
232 | m_lumaIntraDir = charBuf; charBuf += m_numPartitions; |
233 | m_tqBypass = charBuf; charBuf += m_numPartitions; | |
b53f7c52 JB |
234 | m_refIdx[0] = (int8_t*)charBuf; charBuf += m_numPartitions; |
235 | m_refIdx[1] = (int8_t*)charBuf; charBuf += m_numPartitions; | |
72b9787e | 236 | m_cuDepth = charBuf; charBuf += m_numPartitions; |
b53f7c52 JB |
237 | m_predMode = charBuf; charBuf += m_numPartitions; /* the order up to here is important in initCTU() and initSubCU() */ |
238 | m_partSize = charBuf; charBuf += m_numPartitions; | |
72b9787e JB |
239 | m_mergeFlag = charBuf; charBuf += m_numPartitions; |
240 | m_interDir = charBuf; charBuf += m_numPartitions; | |
241 | m_mvpIdx[0] = charBuf; charBuf += m_numPartitions; | |
242 | m_mvpIdx[1] = charBuf; charBuf += m_numPartitions; | |
243 | m_tuDepth = charBuf; charBuf += m_numPartitions; | |
244 | m_transformSkip[0] = charBuf; charBuf += m_numPartitions; | |
245 | m_transformSkip[1] = charBuf; charBuf += m_numPartitions; | |
246 | m_transformSkip[2] = charBuf; charBuf += m_numPartitions; | |
247 | m_cbf[0] = charBuf; charBuf += m_numPartitions; | |
248 | m_cbf[1] = charBuf; charBuf += m_numPartitions; | |
249 | m_cbf[2] = charBuf; charBuf += m_numPartitions; | |
250 | m_chromaIntraDir = charBuf; charBuf += m_numPartitions; | |
251 | ||
252 | X265_CHECK(charBuf == dataPool.charMemBlock + (m_numPartitions * BytesPerPartition) * (instance + 1), "CU data layout is broken\n"); | |
253 | ||
254 | m_mv[0] = dataPool.mvMemBlock + (instance * 4) * m_numPartitions; | |
255 | m_mv[1] = m_mv[0] + m_numPartitions; | |
256 | m_mvd[0] = m_mv[1] + m_numPartitions; | |
257 | m_mvd[1] = m_mvd[0] + m_numPartitions; | |
258 | ||
259 | uint32_t cuSize = g_maxCUSize >> depth; | |
260 | uint32_t sizeL = cuSize * cuSize; | |
261 | uint32_t sizeC = sizeL >> (m_hChromaShift + m_vChromaShift); | |
262 | m_trCoeff[0] = dataPool.trCoeffMemBlock + instance * (sizeL + sizeC * 2); | |
263 | m_trCoeff[1] = m_trCoeff[0] + sizeL; | |
264 | m_trCoeff[2] = m_trCoeff[0] + sizeL + sizeC; | |
265 | } | |
266 | ||
267 | void CUData::initCTU(const Frame& frame, uint32_t cuAddr, int qp) | |
268 | { | |
269 | m_encData = frame.m_encData; | |
270 | m_slice = m_encData->m_slice; | |
271 | m_cuAddr = cuAddr; | |
272 | m_cuPelX = (cuAddr % m_slice->m_sps->numCuInWidth) << g_maxLog2CUSize; | |
273 | m_cuPelY = (cuAddr / m_slice->m_sps->numCuInWidth) << g_maxLog2CUSize; | |
274 | m_absIdxInCTU = 0; | |
275 | m_numPartitions = NUM_CU_PARTITIONS; | |
276 | ||
277 | /* sequential memsets */ | |
278 | m_partSet((uint8_t*)m_qp, (uint8_t)qp); | |
279 | m_partSet(m_log2CUSize, (uint8_t)g_maxLog2CUSize); | |
72b9787e JB |
280 | m_partSet(m_lumaIntraDir, (uint8_t)DC_IDX); |
281 | m_partSet(m_tqBypass, (uint8_t)frame.m_encData->m_param->bLossless); | |
282 | if (m_slice->m_sliceType != I_SLICE) | |
283 | { | |
284 | m_partSet((uint8_t*)m_refIdx[0], (uint8_t)REF_NOT_VALID); | |
285 | m_partSet((uint8_t*)m_refIdx[1], (uint8_t)REF_NOT_VALID); | |
286 | } | |
287 | ||
288 | X265_CHECK(!(frame.m_encData->m_param->bLossless && !m_slice->m_pps->bTransquantBypassEnabled), "lossless enabled without TQbypass in PPS\n"); | |
289 | ||
290 | /* initialize the remaining CU data in one memset */ | |
b53f7c52 | 291 | memset(m_cuDepth, 0, (BytesPerPartition - 6) * m_numPartitions); |
72b9787e JB |
292 | |
293 | uint32_t widthInCU = m_slice->m_sps->numCuInWidth; | |
294 | m_cuLeft = (m_cuAddr % widthInCU) ? m_encData->getPicCTU(m_cuAddr - 1) : NULL; | |
295 | m_cuAbove = (m_cuAddr / widthInCU) ? m_encData->getPicCTU(m_cuAddr - widthInCU) : NULL; | |
296 | m_cuAboveLeft = (m_cuLeft && m_cuAbove) ? m_encData->getPicCTU(m_cuAddr - widthInCU - 1) : NULL; | |
297 | m_cuAboveRight = (m_cuAbove && ((m_cuAddr % widthInCU) < (widthInCU - 1))) ? m_encData->getPicCTU(m_cuAddr - widthInCU + 1) : NULL; | |
298 | } | |
299 | ||
300 | // initialize Sub partition | |
301 | void CUData::initSubCU(const CUData& ctu, const CUGeom& cuGeom) | |
302 | { | |
303 | m_absIdxInCTU = cuGeom.encodeIdx; | |
304 | m_encData = ctu.m_encData; | |
305 | m_slice = ctu.m_slice; | |
306 | m_cuAddr = ctu.m_cuAddr; | |
307 | m_cuPelX = ctu.m_cuPelX + g_zscanToPelX[cuGeom.encodeIdx]; | |
308 | m_cuPelY = ctu.m_cuPelY + g_zscanToPelY[cuGeom.encodeIdx]; | |
309 | m_cuLeft = ctu.m_cuLeft; | |
310 | m_cuAbove = ctu.m_cuAbove; | |
311 | m_cuAboveLeft = ctu.m_cuAboveLeft; | |
312 | m_cuAboveRight = ctu.m_cuAboveRight; | |
313 | X265_CHECK(m_numPartitions == cuGeom.numPartitions, "initSubCU() size mismatch\n"); | |
314 | ||
315 | /* sequential memsets */ | |
316 | m_partSet((uint8_t*)m_qp, (uint8_t)ctu.m_qp[0]); | |
317 | m_partSet(m_log2CUSize, (uint8_t)cuGeom.log2CUSize); | |
72b9787e JB |
318 | m_partSet(m_lumaIntraDir, (uint8_t)DC_IDX); |
319 | m_partSet(m_tqBypass, (uint8_t)m_encData->m_param->bLossless); | |
320 | m_partSet((uint8_t*)m_refIdx[0], (uint8_t)REF_NOT_VALID); | |
321 | m_partSet((uint8_t*)m_refIdx[1], (uint8_t)REF_NOT_VALID); | |
322 | m_partSet(m_cuDepth, (uint8_t)cuGeom.depth); | |
323 | ||
324 | /* initialize the remaining CU data in one memset */ | |
b53f7c52 | 325 | memset(m_predMode, 0, (BytesPerPartition - 7) * m_numPartitions); |
72b9787e JB |
326 | } |
327 | ||
328 | /* Copy the results of a sub-part (split) CU to the parent CU */ | |
329 | void CUData::copyPartFrom(const CUData& subCU, const CUGeom& childGeom, uint32_t subPartIdx) | |
330 | { | |
331 | X265_CHECK(subPartIdx < 4, "part unit should be less than 4\n"); | |
332 | ||
333 | uint32_t offset = childGeom.numPartitions * subPartIdx; | |
334 | ||
335 | m_subPartCopy((uint8_t*)m_qp + offset, (uint8_t*)subCU.m_qp); | |
336 | m_subPartCopy(m_log2CUSize + offset, subCU.m_log2CUSize); | |
72b9787e JB |
337 | m_subPartCopy(m_lumaIntraDir + offset, subCU.m_lumaIntraDir); |
338 | m_subPartCopy(m_tqBypass + offset, subCU.m_tqBypass); | |
339 | m_subPartCopy((uint8_t*)m_refIdx[0] + offset, (uint8_t*)subCU.m_refIdx[0]); | |
340 | m_subPartCopy((uint8_t*)m_refIdx[1] + offset, (uint8_t*)subCU.m_refIdx[1]); | |
341 | m_subPartCopy(m_cuDepth + offset, subCU.m_cuDepth); | |
b53f7c52 JB |
342 | m_subPartCopy(m_predMode + offset, subCU.m_predMode); |
343 | m_subPartCopy(m_partSize + offset, subCU.m_partSize); | |
72b9787e JB |
344 | m_subPartCopy(m_mergeFlag + offset, subCU.m_mergeFlag); |
345 | m_subPartCopy(m_interDir + offset, subCU.m_interDir); | |
346 | m_subPartCopy(m_mvpIdx[0] + offset, subCU.m_mvpIdx[0]); | |
347 | m_subPartCopy(m_mvpIdx[1] + offset, subCU.m_mvpIdx[1]); | |
348 | m_subPartCopy(m_tuDepth + offset, subCU.m_tuDepth); | |
349 | m_subPartCopy(m_transformSkip[0] + offset, subCU.m_transformSkip[0]); | |
350 | m_subPartCopy(m_transformSkip[1] + offset, subCU.m_transformSkip[1]); | |
351 | m_subPartCopy(m_transformSkip[2] + offset, subCU.m_transformSkip[2]); | |
352 | m_subPartCopy(m_cbf[0] + offset, subCU.m_cbf[0]); | |
353 | m_subPartCopy(m_cbf[1] + offset, subCU.m_cbf[1]); | |
354 | m_subPartCopy(m_cbf[2] + offset, subCU.m_cbf[2]); | |
355 | m_subPartCopy(m_chromaIntraDir + offset, subCU.m_chromaIntraDir); | |
356 | ||
357 | memcpy(m_mv[0] + offset, subCU.m_mv[0], childGeom.numPartitions * sizeof(MV)); | |
358 | memcpy(m_mv[1] + offset, subCU.m_mv[1], childGeom.numPartitions * sizeof(MV)); | |
359 | memcpy(m_mvd[0] + offset, subCU.m_mvd[0], childGeom.numPartitions * sizeof(MV)); | |
360 | memcpy(m_mvd[1] + offset, subCU.m_mvd[1], childGeom.numPartitions * sizeof(MV)); | |
361 | ||
362 | uint32_t tmp = 1 << ((g_maxLog2CUSize - childGeom.depth) * 2); | |
363 | uint32_t tmp2 = subPartIdx * tmp; | |
364 | memcpy(m_trCoeff[0] + tmp2, subCU.m_trCoeff[0], sizeof(coeff_t) * tmp); | |
365 | ||
366 | uint32_t tmpC = tmp >> (m_hChromaShift + m_vChromaShift); | |
367 | uint32_t tmpC2 = tmp2 >> (m_hChromaShift + m_vChromaShift); | |
368 | memcpy(m_trCoeff[1] + tmpC2, subCU.m_trCoeff[1], sizeof(coeff_t) * tmpC); | |
369 | memcpy(m_trCoeff[2] + tmpC2, subCU.m_trCoeff[2], sizeof(coeff_t) * tmpC); | |
370 | } | |
371 | ||
372 | /* If a sub-CU part is not present (off the edge of the picture) its depth and | |
373 | * log2size should still be configured */ | |
374 | void CUData::setEmptyPart(const CUGeom& childGeom, uint32_t subPartIdx) | |
375 | { | |
376 | uint32_t offset = childGeom.numPartitions * subPartIdx; | |
377 | m_subPartSet(m_cuDepth + offset, (uint8_t)childGeom.depth); | |
378 | m_subPartSet(m_log2CUSize + offset, (uint8_t)childGeom.log2CUSize); | |
379 | } | |
380 | ||
381 | /* Copy all CU data from one instance to the next, except set lossless flag | |
382 | * This will only get used when --cu-lossless is enabled but --lossless is not. */ | |
383 | void CUData::initLosslessCU(const CUData& cu, const CUGeom& cuGeom) | |
384 | { | |
385 | /* Start by making an exact copy */ | |
386 | m_encData = cu.m_encData; | |
387 | m_slice = cu.m_slice; | |
388 | m_cuAddr = cu.m_cuAddr; | |
389 | m_cuPelX = cu.m_cuPelX; | |
390 | m_cuPelY = cu.m_cuPelY; | |
391 | m_cuLeft = cu.m_cuLeft; | |
392 | m_cuAbove = cu.m_cuAbove; | |
393 | m_cuAboveLeft = cu.m_cuAboveLeft; | |
394 | m_cuAboveRight = cu.m_cuAboveRight; | |
395 | m_absIdxInCTU = cuGeom.encodeIdx; | |
396 | m_numPartitions = cuGeom.numPartitions; | |
397 | memcpy(m_qp, cu.m_qp, BytesPerPartition * m_numPartitions); | |
398 | memcpy(m_mv[0], cu.m_mv[0], m_numPartitions * sizeof(MV)); | |
399 | memcpy(m_mv[1], cu.m_mv[1], m_numPartitions * sizeof(MV)); | |
400 | memcpy(m_mvd[0], cu.m_mvd[0], m_numPartitions * sizeof(MV)); | |
401 | memcpy(m_mvd[1], cu.m_mvd[1], m_numPartitions * sizeof(MV)); | |
402 | ||
403 | /* force TQBypass to true */ | |
404 | m_partSet(m_tqBypass, true); | |
405 | ||
406 | /* clear residual coding flags */ | |
b53f7c52 | 407 | m_partSet(m_predMode, cu.m_predMode[0] & (MODE_INTRA | MODE_INTER)); |
72b9787e JB |
408 | m_partSet(m_tuDepth, 0); |
409 | m_partSet(m_transformSkip[0], 0); | |
410 | m_partSet(m_transformSkip[1], 0); | |
411 | m_partSet(m_transformSkip[2], 0); | |
412 | m_partSet(m_cbf[0], 0); | |
413 | m_partSet(m_cbf[1], 0); | |
414 | m_partSet(m_cbf[2], 0); | |
415 | } | |
416 | ||
417 | /* Copy completed predicted CU to CTU in picture */ | |
418 | void CUData::copyToPic(uint32_t depth) const | |
419 | { | |
420 | CUData& ctu = *m_encData->getPicCTU(m_cuAddr); | |
421 | ||
422 | m_partCopy((uint8_t*)ctu.m_qp + m_absIdxInCTU, (uint8_t*)m_qp); | |
423 | m_partCopy(ctu.m_log2CUSize + m_absIdxInCTU, m_log2CUSize); | |
72b9787e JB |
424 | m_partCopy(ctu.m_lumaIntraDir + m_absIdxInCTU, m_lumaIntraDir); |
425 | m_partCopy(ctu.m_tqBypass + m_absIdxInCTU, m_tqBypass); | |
426 | m_partCopy((uint8_t*)ctu.m_refIdx[0] + m_absIdxInCTU, (uint8_t*)m_refIdx[0]); | |
427 | m_partCopy((uint8_t*)ctu.m_refIdx[1] + m_absIdxInCTU, (uint8_t*)m_refIdx[1]); | |
428 | m_partCopy(ctu.m_cuDepth + m_absIdxInCTU, m_cuDepth); | |
b53f7c52 JB |
429 | m_partCopy(ctu.m_predMode + m_absIdxInCTU, m_predMode); |
430 | m_partCopy(ctu.m_partSize + m_absIdxInCTU, m_partSize); | |
72b9787e JB |
431 | m_partCopy(ctu.m_mergeFlag + m_absIdxInCTU, m_mergeFlag); |
432 | m_partCopy(ctu.m_interDir + m_absIdxInCTU, m_interDir); | |
433 | m_partCopy(ctu.m_mvpIdx[0] + m_absIdxInCTU, m_mvpIdx[0]); | |
434 | m_partCopy(ctu.m_mvpIdx[1] + m_absIdxInCTU, m_mvpIdx[1]); | |
435 | m_partCopy(ctu.m_tuDepth + m_absIdxInCTU, m_tuDepth); | |
436 | m_partCopy(ctu.m_transformSkip[0] + m_absIdxInCTU, m_transformSkip[0]); | |
437 | m_partCopy(ctu.m_transformSkip[1] + m_absIdxInCTU, m_transformSkip[1]); | |
438 | m_partCopy(ctu.m_transformSkip[2] + m_absIdxInCTU, m_transformSkip[2]); | |
439 | m_partCopy(ctu.m_cbf[0] + m_absIdxInCTU, m_cbf[0]); | |
440 | m_partCopy(ctu.m_cbf[1] + m_absIdxInCTU, m_cbf[1]); | |
441 | m_partCopy(ctu.m_cbf[2] + m_absIdxInCTU, m_cbf[2]); | |
442 | m_partCopy(ctu.m_chromaIntraDir + m_absIdxInCTU, m_chromaIntraDir); | |
443 | ||
444 | memcpy(ctu.m_mv[0] + m_absIdxInCTU, m_mv[0], m_numPartitions * sizeof(MV)); | |
445 | memcpy(ctu.m_mv[1] + m_absIdxInCTU, m_mv[1], m_numPartitions * sizeof(MV)); | |
446 | memcpy(ctu.m_mvd[0] + m_absIdxInCTU, m_mvd[0], m_numPartitions * sizeof(MV)); | |
447 | memcpy(ctu.m_mvd[1] + m_absIdxInCTU, m_mvd[1], m_numPartitions * sizeof(MV)); | |
448 | ||
449 | uint32_t tmpY = 1 << ((g_maxLog2CUSize - depth) * 2); | |
450 | uint32_t tmpY2 = m_absIdxInCTU << (LOG2_UNIT_SIZE * 2); | |
451 | memcpy(ctu.m_trCoeff[0] + tmpY2, m_trCoeff[0], sizeof(coeff_t) * tmpY); | |
452 | ||
453 | uint32_t tmpC = tmpY >> (m_hChromaShift + m_vChromaShift); | |
454 | uint32_t tmpC2 = tmpY2 >> (m_hChromaShift + m_vChromaShift); | |
455 | memcpy(ctu.m_trCoeff[1] + tmpC2, m_trCoeff[1], sizeof(coeff_t) * tmpC); | |
456 | memcpy(ctu.m_trCoeff[2] + tmpC2, m_trCoeff[2], sizeof(coeff_t) * tmpC); | |
457 | } | |
458 | ||
459 | /* The reverse of copyToPic, called only by encodeResidue */ | |
460 | void CUData::copyFromPic(const CUData& ctu, const CUGeom& cuGeom) | |
461 | { | |
462 | m_encData = ctu.m_encData; | |
463 | m_slice = ctu.m_slice; | |
464 | m_cuAddr = ctu.m_cuAddr; | |
465 | m_cuPelX = ctu.m_cuPelX + g_zscanToPelX[cuGeom.encodeIdx]; | |
466 | m_cuPelY = ctu.m_cuPelY + g_zscanToPelY[cuGeom.encodeIdx]; | |
467 | m_absIdxInCTU = cuGeom.encodeIdx; | |
468 | m_numPartitions = cuGeom.numPartitions; | |
469 | ||
470 | /* copy out all prediction info for this part */ | |
471 | m_partCopy((uint8_t*)m_qp, (uint8_t*)ctu.m_qp + m_absIdxInCTU); | |
472 | m_partCopy(m_log2CUSize, ctu.m_log2CUSize + m_absIdxInCTU); | |
72b9787e JB |
473 | m_partCopy(m_lumaIntraDir, ctu.m_lumaIntraDir + m_absIdxInCTU); |
474 | m_partCopy(m_tqBypass, ctu.m_tqBypass + m_absIdxInCTU); | |
475 | m_partCopy((uint8_t*)m_refIdx[0], (uint8_t*)ctu.m_refIdx[0] + m_absIdxInCTU); | |
476 | m_partCopy((uint8_t*)m_refIdx[1], (uint8_t*)ctu.m_refIdx[1] + m_absIdxInCTU); | |
477 | m_partCopy(m_cuDepth, ctu.m_cuDepth + m_absIdxInCTU); | |
b53f7c52 JB |
478 | m_partSet(m_predMode, ctu.m_predMode[m_absIdxInCTU] & (MODE_INTRA | MODE_INTER)); /* clear skip flag */ |
479 | m_partCopy(m_partSize, ctu.m_partSize + m_absIdxInCTU); | |
72b9787e JB |
480 | m_partCopy(m_mergeFlag, ctu.m_mergeFlag + m_absIdxInCTU); |
481 | m_partCopy(m_interDir, ctu.m_interDir + m_absIdxInCTU); | |
482 | m_partCopy(m_mvpIdx[0], ctu.m_mvpIdx[0] + m_absIdxInCTU); | |
483 | m_partCopy(m_mvpIdx[1], ctu.m_mvpIdx[1] + m_absIdxInCTU); | |
484 | m_partCopy(m_chromaIntraDir, ctu.m_chromaIntraDir + m_absIdxInCTU); | |
485 | ||
486 | memcpy(m_mv[0], ctu.m_mv[0] + m_absIdxInCTU, m_numPartitions * sizeof(MV)); | |
487 | memcpy(m_mv[1], ctu.m_mv[1] + m_absIdxInCTU, m_numPartitions * sizeof(MV)); | |
488 | memcpy(m_mvd[0], ctu.m_mvd[0] + m_absIdxInCTU, m_numPartitions * sizeof(MV)); | |
489 | memcpy(m_mvd[1], ctu.m_mvd[1] + m_absIdxInCTU, m_numPartitions * sizeof(MV)); | |
490 | ||
491 | /* clear residual coding flags */ | |
72b9787e JB |
492 | m_partSet(m_tuDepth, 0); |
493 | m_partSet(m_transformSkip[0], 0); | |
494 | m_partSet(m_transformSkip[1], 0); | |
495 | m_partSet(m_transformSkip[2], 0); | |
496 | m_partSet(m_cbf[0], 0); | |
497 | m_partSet(m_cbf[1], 0); | |
498 | m_partSet(m_cbf[2], 0); | |
499 | } | |
500 | ||
501 | /* Only called by encodeResidue, these fields can be modified during inter/intra coding */ | |
502 | void CUData::updatePic(uint32_t depth) const | |
503 | { | |
504 | CUData& ctu = *m_encData->getPicCTU(m_cuAddr); | |
505 | ||
506 | m_partCopy((uint8_t*)ctu.m_qp + m_absIdxInCTU, (uint8_t*)m_qp); | |
507 | m_partCopy(ctu.m_transformSkip[0] + m_absIdxInCTU, m_transformSkip[0]); | |
508 | m_partCopy(ctu.m_transformSkip[1] + m_absIdxInCTU, m_transformSkip[1]); | |
509 | m_partCopy(ctu.m_transformSkip[2] + m_absIdxInCTU, m_transformSkip[2]); | |
b53f7c52 | 510 | m_partCopy(ctu.m_predMode + m_absIdxInCTU, m_predMode); |
72b9787e JB |
511 | m_partCopy(ctu.m_tuDepth + m_absIdxInCTU, m_tuDepth); |
512 | m_partCopy(ctu.m_cbf[0] + m_absIdxInCTU, m_cbf[0]); | |
513 | m_partCopy(ctu.m_cbf[1] + m_absIdxInCTU, m_cbf[1]); | |
514 | m_partCopy(ctu.m_cbf[2] + m_absIdxInCTU, m_cbf[2]); | |
515 | m_partCopy(ctu.m_chromaIntraDir + m_absIdxInCTU, m_chromaIntraDir); | |
516 | ||
517 | uint32_t tmpY = 1 << ((g_maxLog2CUSize - depth) * 2); | |
518 | uint32_t tmpY2 = m_absIdxInCTU << (LOG2_UNIT_SIZE * 2); | |
519 | memcpy(ctu.m_trCoeff[0] + tmpY2, m_trCoeff[0], sizeof(coeff_t) * tmpY); | |
520 | tmpY >>= m_hChromaShift + m_vChromaShift; | |
521 | tmpY2 >>= m_hChromaShift + m_vChromaShift; | |
522 | memcpy(ctu.m_trCoeff[1] + tmpY2, m_trCoeff[1], sizeof(coeff_t) * tmpY); | |
523 | memcpy(ctu.m_trCoeff[2] + tmpY2, m_trCoeff[2], sizeof(coeff_t) * tmpY); | |
524 | } | |
525 | ||
526 | const CUData* CUData::getPULeft(uint32_t& lPartUnitIdx, uint32_t curPartUnitIdx) const | |
527 | { | |
528 | uint32_t absPartIdx = g_zscanToRaster[curPartUnitIdx]; | |
529 | ||
530 | if (!isZeroCol(absPartIdx, s_numPartInCUSize)) | |
531 | { | |
532 | uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInCTU]; | |
533 | lPartUnitIdx = g_rasterToZscan[absPartIdx - 1]; | |
534 | if (isEqualCol(absPartIdx, absZorderCUIdx, s_numPartInCUSize)) | |
535 | return m_encData->getPicCTU(m_cuAddr); | |
536 | else | |
537 | { | |
538 | lPartUnitIdx -= m_absIdxInCTU; | |
539 | return this; | |
540 | } | |
541 | } | |
542 | ||
543 | lPartUnitIdx = g_rasterToZscan[absPartIdx + s_numPartInCUSize - 1]; | |
544 | return m_cuLeft; | |
545 | } | |
546 | ||
b53f7c52 | 547 | const CUData* CUData::getPUAbove(uint32_t& aPartUnitIdx, uint32_t curPartUnitIdx) const |
72b9787e JB |
548 | { |
549 | uint32_t absPartIdx = g_zscanToRaster[curPartUnitIdx]; | |
550 | ||
551 | if (!isZeroRow(absPartIdx, s_numPartInCUSize)) | |
552 | { | |
553 | uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInCTU]; | |
554 | aPartUnitIdx = g_rasterToZscan[absPartIdx - s_numPartInCUSize]; | |
555 | if (isEqualRow(absPartIdx, absZorderCUIdx, s_numPartInCUSize)) | |
556 | return m_encData->getPicCTU(m_cuAddr); | |
557 | else | |
72b9787e | 558 | aPartUnitIdx -= m_absIdxInCTU; |
b53f7c52 | 559 | return this; |
72b9787e JB |
560 | } |
561 | ||
72b9787e JB |
562 | aPartUnitIdx = g_rasterToZscan[absPartIdx + NUM_CU_PARTITIONS - s_numPartInCUSize]; |
563 | return m_cuAbove; | |
564 | } | |
565 | ||
566 | const CUData* CUData::getPUAboveLeft(uint32_t& alPartUnitIdx, uint32_t curPartUnitIdx) const | |
567 | { | |
568 | uint32_t absPartIdx = g_zscanToRaster[curPartUnitIdx]; | |
569 | ||
570 | if (!isZeroCol(absPartIdx, s_numPartInCUSize)) | |
571 | { | |
572 | if (!isZeroRow(absPartIdx, s_numPartInCUSize)) | |
573 | { | |
574 | uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInCTU]; | |
575 | alPartUnitIdx = g_rasterToZscan[absPartIdx - s_numPartInCUSize - 1]; | |
576 | if (isEqualRowOrCol(absPartIdx, absZorderCUIdx, s_numPartInCUSize)) | |
577 | return m_encData->getPicCTU(m_cuAddr); | |
578 | else | |
579 | { | |
580 | alPartUnitIdx -= m_absIdxInCTU; | |
581 | return this; | |
582 | } | |
583 | } | |
584 | alPartUnitIdx = g_rasterToZscan[absPartIdx + NUM_CU_PARTITIONS - s_numPartInCUSize - 1]; | |
585 | return m_cuAbove; | |
586 | } | |
587 | ||
588 | if (!isZeroRow(absPartIdx, s_numPartInCUSize)) | |
589 | { | |
590 | alPartUnitIdx = g_rasterToZscan[absPartIdx - 1]; | |
591 | return m_cuLeft; | |
592 | } | |
593 | ||
594 | alPartUnitIdx = g_rasterToZscan[NUM_CU_PARTITIONS - 1]; | |
595 | return m_cuAboveLeft; | |
596 | } | |
597 | ||
598 | const CUData* CUData::getPUAboveRight(uint32_t& arPartUnitIdx, uint32_t curPartUnitIdx) const | |
599 | { | |
600 | if ((m_encData->getPicCTU(m_cuAddr)->m_cuPelX + g_zscanToPelX[curPartUnitIdx] + UNIT_SIZE) >= m_slice->m_sps->picWidthInLumaSamples) | |
601 | return NULL; | |
602 | ||
603 | uint32_t absPartIdxRT = g_zscanToRaster[curPartUnitIdx]; | |
604 | ||
605 | if (lessThanCol(absPartIdxRT, s_numPartInCUSize - 1, s_numPartInCUSize)) | |
606 | { | |
607 | if (!isZeroRow(absPartIdxRT, s_numPartInCUSize)) | |
608 | { | |
609 | if (curPartUnitIdx > g_rasterToZscan[absPartIdxRT - s_numPartInCUSize + 1]) | |
610 | { | |
611 | uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInCTU] + (1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1; | |
612 | arPartUnitIdx = g_rasterToZscan[absPartIdxRT - s_numPartInCUSize + 1]; | |
613 | if (isEqualRowOrCol(absPartIdxRT, absZorderCUIdx, s_numPartInCUSize)) | |
614 | return m_encData->getPicCTU(m_cuAddr); | |
615 | else | |
616 | { | |
617 | arPartUnitIdx -= m_absIdxInCTU; | |
618 | return this; | |
619 | } | |
620 | } | |
621 | return NULL; | |
622 | } | |
623 | arPartUnitIdx = g_rasterToZscan[absPartIdxRT + NUM_CU_PARTITIONS - s_numPartInCUSize + 1]; | |
624 | return m_cuAbove; | |
625 | } | |
626 | ||
627 | if (!isZeroRow(absPartIdxRT, s_numPartInCUSize)) | |
628 | return NULL; | |
629 | ||
630 | arPartUnitIdx = g_rasterToZscan[NUM_CU_PARTITIONS - s_numPartInCUSize]; | |
631 | return m_cuAboveRight; | |
632 | } | |
633 | ||
634 | const CUData* CUData::getPUBelowLeft(uint32_t& blPartUnitIdx, uint32_t curPartUnitIdx) const | |
635 | { | |
636 | if ((m_encData->getPicCTU(m_cuAddr)->m_cuPelY + g_zscanToPelY[curPartUnitIdx] + UNIT_SIZE) >= m_slice->m_sps->picHeightInLumaSamples) | |
637 | return NULL; | |
638 | ||
639 | uint32_t absPartIdxLB = g_zscanToRaster[curPartUnitIdx]; | |
640 | ||
641 | if (lessThanRow(absPartIdxLB, s_numPartInCUSize - 1, s_numPartInCUSize)) | |
642 | { | |
643 | if (!isZeroCol(absPartIdxLB, s_numPartInCUSize)) | |
644 | { | |
645 | if (curPartUnitIdx > g_rasterToZscan[absPartIdxLB + s_numPartInCUSize - 1]) | |
646 | { | |
647 | uint32_t absZorderCUIdxLB = g_zscanToRaster[m_absIdxInCTU] + ((1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1) * s_numPartInCUSize; | |
648 | blPartUnitIdx = g_rasterToZscan[absPartIdxLB + s_numPartInCUSize - 1]; | |
649 | if (isEqualRowOrCol(absPartIdxLB, absZorderCUIdxLB, s_numPartInCUSize)) | |
650 | return m_encData->getPicCTU(m_cuAddr); | |
651 | else | |
652 | { | |
653 | blPartUnitIdx -= m_absIdxInCTU; | |
654 | return this; | |
655 | } | |
656 | } | |
657 | return NULL; | |
658 | } | |
659 | blPartUnitIdx = g_rasterToZscan[absPartIdxLB + s_numPartInCUSize * 2 - 1]; | |
660 | return m_cuLeft; | |
661 | } | |
662 | ||
663 | return NULL; | |
664 | } | |
665 | ||
666 | const CUData* CUData::getPUBelowLeftAdi(uint32_t& blPartUnitIdx, uint32_t curPartUnitIdx, uint32_t partUnitOffset) const | |
667 | { | |
668 | if ((m_encData->getPicCTU(m_cuAddr)->m_cuPelY + g_zscanToPelY[curPartUnitIdx] + (partUnitOffset << LOG2_UNIT_SIZE)) >= m_slice->m_sps->picHeightInLumaSamples) | |
669 | return NULL; | |
670 | ||
671 | uint32_t absPartIdxLB = g_zscanToRaster[curPartUnitIdx]; | |
672 | ||
673 | if (lessThanRow(absPartIdxLB, s_numPartInCUSize - partUnitOffset, s_numPartInCUSize)) | |
674 | { | |
675 | if (!isZeroCol(absPartIdxLB, s_numPartInCUSize)) | |
676 | { | |
677 | if (curPartUnitIdx > g_rasterToZscan[absPartIdxLB + partUnitOffset * s_numPartInCUSize - 1]) | |
678 | { | |
679 | uint32_t absZorderCUIdxLB = g_zscanToRaster[m_absIdxInCTU] + ((1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1) * s_numPartInCUSize; | |
680 | blPartUnitIdx = g_rasterToZscan[absPartIdxLB + partUnitOffset * s_numPartInCUSize - 1]; | |
681 | if (isEqualRowOrCol(absPartIdxLB, absZorderCUIdxLB, s_numPartInCUSize)) | |
682 | return m_encData->getPicCTU(m_cuAddr); | |
683 | else | |
684 | { | |
685 | blPartUnitIdx -= m_absIdxInCTU; | |
686 | return this; | |
687 | } | |
688 | } | |
689 | return NULL; | |
690 | } | |
691 | blPartUnitIdx = g_rasterToZscan[absPartIdxLB + (1 + partUnitOffset) * s_numPartInCUSize - 1]; | |
692 | if (!m_cuLeft || !m_cuLeft->m_slice) | |
693 | return NULL; | |
694 | return m_cuLeft; | |
695 | } | |
696 | ||
697 | return NULL; | |
698 | } | |
699 | ||
700 | const CUData* CUData::getPUAboveRightAdi(uint32_t& arPartUnitIdx, uint32_t curPartUnitIdx, uint32_t partUnitOffset) const | |
701 | { | |
702 | if ((m_encData->getPicCTU(m_cuAddr)->m_cuPelX + g_zscanToPelX[curPartUnitIdx] + (partUnitOffset << LOG2_UNIT_SIZE)) >= m_slice->m_sps->picWidthInLumaSamples) | |
703 | return NULL; | |
704 | ||
705 | uint32_t absPartIdxRT = g_zscanToRaster[curPartUnitIdx]; | |
706 | ||
707 | if (lessThanCol(absPartIdxRT, s_numPartInCUSize - partUnitOffset, s_numPartInCUSize)) | |
708 | { | |
709 | if (!isZeroRow(absPartIdxRT, s_numPartInCUSize)) | |
710 | { | |
711 | if (curPartUnitIdx > g_rasterToZscan[absPartIdxRT - s_numPartInCUSize + partUnitOffset]) | |
712 | { | |
713 | uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInCTU] + (1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1; | |
714 | arPartUnitIdx = g_rasterToZscan[absPartIdxRT - s_numPartInCUSize + partUnitOffset]; | |
715 | if (isEqualRowOrCol(absPartIdxRT, absZorderCUIdx, s_numPartInCUSize)) | |
716 | return m_encData->getPicCTU(m_cuAddr); | |
717 | else | |
718 | { | |
719 | arPartUnitIdx -= m_absIdxInCTU; | |
720 | return this; | |
721 | } | |
722 | } | |
723 | return NULL; | |
724 | } | |
725 | arPartUnitIdx = g_rasterToZscan[absPartIdxRT + NUM_CU_PARTITIONS - s_numPartInCUSize + partUnitOffset]; | |
726 | if (!m_cuAbove || !m_cuAbove->m_slice) | |
727 | return NULL; | |
728 | return m_cuAbove; | |
729 | } | |
730 | ||
731 | if (!isZeroRow(absPartIdxRT, s_numPartInCUSize)) | |
732 | return NULL; | |
733 | ||
734 | arPartUnitIdx = g_rasterToZscan[NUM_CU_PARTITIONS - s_numPartInCUSize + partUnitOffset - 1]; | |
735 | if ((m_cuAboveRight == NULL || m_cuAboveRight->m_slice == NULL || (m_cuAboveRight->m_cuAddr) > m_cuAddr)) | |
736 | return NULL; | |
737 | return m_cuAboveRight; | |
738 | } | |
739 | ||
740 | /* Get left QpMinCu */ | |
741 | const CUData* CUData::getQpMinCuLeft(uint32_t& lPartUnitIdx, uint32_t curAbsIdxInCTU) const | |
742 | { | |
743 | uint32_t absZorderQpMinCUIdx = curAbsIdxInCTU & (0xFF << (g_maxFullDepth - m_slice->m_pps->maxCuDQPDepth) * 2); | |
744 | uint32_t absRorderQpMinCUIdx = g_zscanToRaster[absZorderQpMinCUIdx]; | |
745 | ||
746 | // check for left CTU boundary | |
747 | if (isZeroCol(absRorderQpMinCUIdx, s_numPartInCUSize)) | |
748 | return NULL; | |
749 | ||
750 | // get index of left-CU relative to top-left corner of current quantization group | |
751 | lPartUnitIdx = g_rasterToZscan[absRorderQpMinCUIdx - 1]; | |
752 | ||
753 | // return pointer to current CTU | |
754 | return m_encData->getPicCTU(m_cuAddr); | |
755 | } | |
756 | ||
757 | /* Get above QpMinCu */ | |
758 | const CUData* CUData::getQpMinCuAbove(uint32_t& aPartUnitIdx, uint32_t curAbsIdxInCTU) const | |
759 | { | |
760 | uint32_t absZorderQpMinCUIdx = curAbsIdxInCTU & (0xFF << (g_maxFullDepth - m_slice->m_pps->maxCuDQPDepth) * 2); | |
761 | uint32_t absRorderQpMinCUIdx = g_zscanToRaster[absZorderQpMinCUIdx]; | |
762 | ||
763 | // check for top CTU boundary | |
764 | if (isZeroRow(absRorderQpMinCUIdx, s_numPartInCUSize)) | |
765 | return NULL; | |
766 | ||
767 | // get index of top-CU relative to top-left corner of current quantization group | |
768 | aPartUnitIdx = g_rasterToZscan[absRorderQpMinCUIdx - s_numPartInCUSize]; | |
769 | ||
770 | // return pointer to current CTU | |
771 | return m_encData->getPicCTU(m_cuAddr); | |
772 | } | |
773 | ||
774 | /* Get reference QP from left QpMinCu or latest coded QP */ | |
b53f7c52 | 775 | int8_t CUData::getRefQP(uint32_t curAbsIdxInCTU) const |
72b9787e JB |
776 | { |
777 | uint32_t lPartIdx = 0, aPartIdx = 0; | |
778 | const CUData* cULeft = getQpMinCuLeft(lPartIdx, m_absIdxInCTU + curAbsIdxInCTU); | |
779 | const CUData* cUAbove = getQpMinCuAbove(aPartIdx, m_absIdxInCTU + curAbsIdxInCTU); | |
780 | ||
781 | return ((cULeft ? cULeft->m_qp[lPartIdx] : getLastCodedQP(curAbsIdxInCTU)) + (cUAbove ? cUAbove->m_qp[aPartIdx] : getLastCodedQP(curAbsIdxInCTU)) + 1) >> 1; | |
782 | } | |
783 | ||
784 | int CUData::getLastValidPartIdx(int absPartIdx) const | |
785 | { | |
786 | int lastValidPartIdx = absPartIdx - 1; | |
787 | ||
788 | while (lastValidPartIdx >= 0 && m_predMode[lastValidPartIdx] == MODE_NONE) | |
789 | { | |
790 | uint32_t depth = m_cuDepth[lastValidPartIdx]; | |
791 | lastValidPartIdx -= m_numPartitions >> (depth << 1); | |
792 | } | |
793 | ||
794 | return lastValidPartIdx; | |
795 | } | |
796 | ||
b53f7c52 | 797 | int8_t CUData::getLastCodedQP(uint32_t absPartIdx) const |
72b9787e JB |
798 | { |
799 | uint32_t quPartIdxMask = 0xFF << (g_maxFullDepth - m_slice->m_pps->maxCuDQPDepth) * 2; | |
800 | int lastValidPartIdx = getLastValidPartIdx(absPartIdx & quPartIdxMask); | |
801 | ||
802 | if (lastValidPartIdx >= 0) | |
803 | return m_qp[lastValidPartIdx]; | |
804 | else | |
805 | { | |
806 | if (m_absIdxInCTU) | |
807 | return m_encData->getPicCTU(m_cuAddr)->getLastCodedQP(m_absIdxInCTU); | |
808 | else if (m_cuAddr > 0 && !(m_slice->m_pps->bEntropyCodingSyncEnabled && !(m_cuAddr % m_slice->m_sps->numCuInWidth))) | |
809 | return m_encData->getPicCTU(m_cuAddr - 1)->getLastCodedQP(NUM_CU_PARTITIONS); | |
810 | else | |
b53f7c52 | 811 | return (int8_t)m_slice->m_sliceQp; |
72b9787e JB |
812 | } |
813 | } | |
814 | ||
815 | /* Get allowed chroma intra modes */ | |
816 | void CUData::getAllowedChromaDir(uint32_t absPartIdx, uint32_t* modeList) const | |
817 | { | |
818 | modeList[0] = PLANAR_IDX; | |
819 | modeList[1] = VER_IDX; | |
820 | modeList[2] = HOR_IDX; | |
821 | modeList[3] = DC_IDX; | |
822 | modeList[4] = DM_CHROMA_IDX; | |
823 | ||
824 | uint32_t lumaMode = m_lumaIntraDir[absPartIdx]; | |
825 | ||
826 | for (int i = 0; i < NUM_CHROMA_MODE - 1; i++) | |
827 | { | |
828 | if (lumaMode == modeList[i]) | |
829 | { | |
830 | modeList[i] = 34; // VER+8 mode | |
831 | break; | |
832 | } | |
833 | } | |
834 | } | |
835 | ||
836 | /* Get most probable intra modes */ | |
837 | int CUData::getIntraDirLumaPredictor(uint32_t absPartIdx, uint32_t* intraDirPred) const | |
838 | { | |
839 | const CUData* tempCU; | |
840 | uint32_t tempPartIdx; | |
841 | uint32_t leftIntraDir, aboveIntraDir; | |
842 | ||
843 | // Get intra direction of left PU | |
844 | tempCU = getPULeft(tempPartIdx, m_absIdxInCTU + absPartIdx); | |
845 | ||
846 | leftIntraDir = (tempCU && tempCU->isIntra(tempPartIdx)) ? tempCU->m_lumaIntraDir[tempPartIdx] : DC_IDX; | |
847 | ||
848 | // Get intra direction of above PU | |
b53f7c52 | 849 | tempCU = g_zscanToPelY[m_absIdxInCTU + absPartIdx] > 0 ? getPUAbove(tempPartIdx, m_absIdxInCTU + absPartIdx) : NULL; |
72b9787e JB |
850 | |
851 | aboveIntraDir = (tempCU && tempCU->isIntra(tempPartIdx)) ? tempCU->m_lumaIntraDir[tempPartIdx] : DC_IDX; | |
852 | ||
853 | if (leftIntraDir == aboveIntraDir) | |
854 | { | |
855 | if (leftIntraDir >= 2) // angular modes | |
856 | { | |
857 | intraDirPred[0] = leftIntraDir; | |
858 | intraDirPred[1] = ((leftIntraDir - 2 + 31) & 31) + 2; | |
859 | intraDirPred[2] = ((leftIntraDir - 2 + 1) & 31) + 2; | |
860 | } | |
861 | else //non-angular | |
862 | { | |
863 | intraDirPred[0] = PLANAR_IDX; | |
864 | intraDirPred[1] = DC_IDX; | |
865 | intraDirPred[2] = VER_IDX; | |
866 | } | |
867 | return 1; | |
868 | } | |
869 | else | |
870 | { | |
871 | intraDirPred[0] = leftIntraDir; | |
872 | intraDirPred[1] = aboveIntraDir; | |
873 | ||
874 | if (leftIntraDir && aboveIntraDir) //both modes are non-planar | |
875 | intraDirPred[2] = PLANAR_IDX; | |
876 | else | |
877 | intraDirPred[2] = (leftIntraDir + aboveIntraDir) < 2 ? VER_IDX : DC_IDX; | |
878 | return 2; | |
879 | } | |
880 | } | |
881 | ||
882 | uint32_t CUData::getCtxSplitFlag(uint32_t absPartIdx, uint32_t depth) const | |
883 | { | |
884 | const CUData* tempCU; | |
885 | uint32_t tempPartIdx; | |
886 | uint32_t ctx; | |
887 | ||
888 | // Get left split flag | |
889 | tempCU = getPULeft(tempPartIdx, m_absIdxInCTU + absPartIdx); | |
890 | ctx = (tempCU) ? ((tempCU->m_cuDepth[tempPartIdx] > depth) ? 1 : 0) : 0; | |
891 | ||
892 | // Get above split flag | |
893 | tempCU = getPUAbove(tempPartIdx, m_absIdxInCTU + absPartIdx); | |
894 | ctx += (tempCU) ? ((tempCU->m_cuDepth[tempPartIdx] > depth) ? 1 : 0) : 0; | |
895 | ||
896 | return ctx; | |
897 | } | |
898 | ||
899 | void CUData::getIntraTUQtDepthRange(uint32_t tuDepthRange[2], uint32_t absPartIdx) const | |
900 | { | |
901 | uint32_t log2CUSize = m_log2CUSize[absPartIdx]; | |
b53f7c52 | 902 | uint32_t splitFlag = m_partSize[absPartIdx] != SIZE_2Nx2N; |
72b9787e JB |
903 | |
904 | tuDepthRange[0] = m_slice->m_sps->quadtreeTULog2MinSize; | |
905 | tuDepthRange[1] = m_slice->m_sps->quadtreeTULog2MaxSize; | |
906 | ||
907 | tuDepthRange[0] = X265_MAX(tuDepthRange[0], X265_MIN(log2CUSize - (m_slice->m_sps->quadtreeTUMaxDepthIntra - 1 + splitFlag), tuDepthRange[1])); | |
908 | } | |
909 | ||
910 | void CUData::getInterTUQtDepthRange(uint32_t tuDepthRange[2], uint32_t absPartIdx) const | |
911 | { | |
912 | uint32_t log2CUSize = m_log2CUSize[absPartIdx]; | |
913 | uint32_t quadtreeTUMaxDepth = m_slice->m_sps->quadtreeTUMaxDepthInter; | |
914 | uint32_t splitFlag = quadtreeTUMaxDepth == 1 && m_partSize[absPartIdx] != SIZE_2Nx2N; | |
915 | ||
916 | tuDepthRange[0] = m_slice->m_sps->quadtreeTULog2MinSize; | |
917 | tuDepthRange[1] = m_slice->m_sps->quadtreeTULog2MaxSize; | |
918 | ||
919 | tuDepthRange[0] = X265_MAX(tuDepthRange[0], X265_MIN(log2CUSize - (quadtreeTUMaxDepth - 1 + splitFlag), tuDepthRange[1])); | |
920 | } | |
921 | ||
922 | uint32_t CUData::getCtxSkipFlag(uint32_t absPartIdx) const | |
923 | { | |
924 | const CUData* tempCU; | |
925 | uint32_t tempPartIdx; | |
926 | uint32_t ctx; | |
927 | ||
928 | // Get BCBP of left PU | |
929 | tempCU = getPULeft(tempPartIdx, m_absIdxInCTU + absPartIdx); | |
930 | ctx = tempCU ? tempCU->isSkipped(tempPartIdx) : 0; | |
931 | ||
932 | // Get BCBP of above PU | |
933 | tempCU = getPUAbove(tempPartIdx, m_absIdxInCTU + absPartIdx); | |
934 | ctx += tempCU ? tempCU->isSkipped(tempPartIdx) : 0; | |
935 | ||
936 | return ctx; | |
937 | } | |
938 | ||
b53f7c52 | 939 | bool CUData::setQPSubCUs(int8_t qp, uint32_t absPartIdx, uint32_t depth) |
72b9787e JB |
940 | { |
941 | uint32_t curPartNumb = NUM_CU_PARTITIONS >> (depth << 1); | |
942 | uint32_t curPartNumQ = curPartNumb >> 2; | |
943 | ||
944 | if (m_cuDepth[absPartIdx] > depth) | |
945 | { | |
946 | for (uint32_t subPartIdx = 0; subPartIdx < 4; subPartIdx++) | |
947 | if (setQPSubCUs(qp, absPartIdx + subPartIdx * curPartNumQ, depth + 1)) | |
948 | return true; | |
949 | } | |
950 | else | |
951 | { | |
952 | if (getQtRootCbf(absPartIdx)) | |
953 | return true; | |
954 | else | |
955 | setQPSubParts(qp, absPartIdx, depth); | |
956 | } | |
957 | ||
958 | return false; | |
959 | } | |
960 | ||
961 | void CUData::setPUInterDir(uint8_t dir, uint32_t absPartIdx, uint32_t puIdx) | |
962 | { | |
963 | uint32_t curPartNumQ = m_numPartitions >> 2; | |
964 | X265_CHECK(puIdx < 2, "unexpected part unit index\n"); | |
965 | ||
966 | switch (m_partSize[absPartIdx]) | |
967 | { | |
968 | case SIZE_2Nx2N: | |
969 | memset(m_interDir + absPartIdx, dir, 4 * curPartNumQ); | |
970 | break; | |
971 | case SIZE_2NxN: | |
972 | memset(m_interDir + absPartIdx, dir, 2 * curPartNumQ); | |
973 | break; | |
974 | case SIZE_Nx2N: | |
975 | memset(m_interDir + absPartIdx, dir, curPartNumQ); | |
976 | memset(m_interDir + absPartIdx + 2 * curPartNumQ, dir, curPartNumQ); | |
977 | break; | |
978 | case SIZE_NxN: | |
979 | memset(m_interDir + absPartIdx, dir, curPartNumQ); | |
980 | break; | |
981 | case SIZE_2NxnU: | |
982 | if (!puIdx) | |
983 | { | |
984 | memset(m_interDir + absPartIdx, dir, (curPartNumQ >> 1)); | |
985 | memset(m_interDir + absPartIdx + curPartNumQ, dir, (curPartNumQ >> 1)); | |
986 | } | |
987 | else | |
988 | { | |
989 | memset(m_interDir + absPartIdx, dir, (curPartNumQ >> 1)); | |
990 | memset(m_interDir + absPartIdx + curPartNumQ, dir, ((curPartNumQ >> 1) + (curPartNumQ << 1))); | |
991 | } | |
992 | break; | |
993 | case SIZE_2NxnD: | |
994 | if (!puIdx) | |
995 | { | |
996 | memset(m_interDir + absPartIdx, dir, ((curPartNumQ << 1) + (curPartNumQ >> 1))); | |
997 | memset(m_interDir + absPartIdx + (curPartNumQ << 1) + curPartNumQ, dir, (curPartNumQ >> 1)); | |
998 | } | |
999 | else | |
1000 | { | |
1001 | memset(m_interDir + absPartIdx, dir, (curPartNumQ >> 1)); | |
1002 | memset(m_interDir + absPartIdx + curPartNumQ, dir, (curPartNumQ >> 1)); | |
1003 | } | |
1004 | break; | |
1005 | case SIZE_nLx2N: | |
1006 | if (!puIdx) | |
1007 | { | |
1008 | memset(m_interDir + absPartIdx, dir, (curPartNumQ >> 2)); | |
1009 | memset(m_interDir + absPartIdx + (curPartNumQ >> 1), dir, (curPartNumQ >> 2)); | |
1010 | memset(m_interDir + absPartIdx + (curPartNumQ << 1), dir, (curPartNumQ >> 2)); | |
1011 | memset(m_interDir + absPartIdx + (curPartNumQ << 1) + (curPartNumQ >> 1), dir, (curPartNumQ >> 2)); | |
1012 | } | |
1013 | else | |
1014 | { | |
1015 | memset(m_interDir + absPartIdx, dir, (curPartNumQ >> 2)); | |
1016 | memset(m_interDir + absPartIdx + (curPartNumQ >> 1), dir, (curPartNumQ + (curPartNumQ >> 2))); | |
1017 | memset(m_interDir + absPartIdx + (curPartNumQ << 1), dir, (curPartNumQ >> 2)); | |
1018 | memset(m_interDir + absPartIdx + (curPartNumQ << 1) + (curPartNumQ >> 1), dir, (curPartNumQ + (curPartNumQ >> 2))); | |
1019 | } | |
1020 | break; | |
1021 | case SIZE_nRx2N: | |
1022 | if (!puIdx) | |
1023 | { | |
1024 | memset(m_interDir + absPartIdx, dir, (curPartNumQ + (curPartNumQ >> 2))); | |
1025 | memset(m_interDir + absPartIdx + curPartNumQ + (curPartNumQ >> 1), dir, (curPartNumQ >> 2)); | |
1026 | memset(m_interDir + absPartIdx + (curPartNumQ << 1), dir, (curPartNumQ + (curPartNumQ >> 2))); | |
1027 | memset(m_interDir + absPartIdx + (curPartNumQ << 1) + curPartNumQ + (curPartNumQ >> 1), dir, (curPartNumQ >> 2)); | |
1028 | } | |
1029 | else | |
1030 | { | |
1031 | memset(m_interDir + absPartIdx, dir, (curPartNumQ >> 2)); | |
1032 | memset(m_interDir + absPartIdx + (curPartNumQ >> 1), dir, (curPartNumQ >> 2)); | |
1033 | memset(m_interDir + absPartIdx + (curPartNumQ << 1), dir, (curPartNumQ >> 2)); | |
1034 | memset(m_interDir + absPartIdx + (curPartNumQ << 1) + (curPartNumQ >> 1), dir, (curPartNumQ >> 2)); | |
1035 | } | |
1036 | break; | |
1037 | default: | |
1038 | X265_CHECK(0, "unexpected part type\n"); | |
1039 | break; | |
1040 | } | |
1041 | } | |
1042 | ||
1043 | template<typename T> | |
1044 | void CUData::setAllPU(T* p, const T& val, int absPartIdx, int puIdx) | |
1045 | { | |
1046 | int i; | |
1047 | ||
1048 | p += absPartIdx; | |
1049 | int numElements = m_numPartitions; | |
1050 | ||
1051 | switch (m_partSize[absPartIdx]) | |
1052 | { | |
1053 | case SIZE_2Nx2N: | |
1054 | for (i = 0; i < numElements; i++) | |
1055 | p[i] = val; | |
1056 | break; | |
1057 | ||
1058 | case SIZE_2NxN: | |
1059 | numElements >>= 1; | |
1060 | for (i = 0; i < numElements; i++) | |
1061 | p[i] = val; | |
1062 | break; | |
1063 | ||
1064 | case SIZE_Nx2N: | |
1065 | numElements >>= 2; | |
1066 | for (i = 0; i < numElements; i++) | |
1067 | { | |
1068 | p[i] = val; | |
1069 | p[i + 2 * numElements] = val; | |
1070 | } | |
1071 | break; | |
1072 | ||
1073 | case SIZE_2NxnU: | |
1074 | { | |
1075 | int curPartNumQ = numElements >> 2; | |
1076 | if (!puIdx) | |
1077 | { | |
1078 | T *pT = p; | |
1079 | T *pT2 = p + curPartNumQ; | |
1080 | for (i = 0; i < (curPartNumQ >> 1); i++) | |
1081 | { | |
1082 | pT[i] = val; | |
1083 | pT2[i] = val; | |
1084 | } | |
1085 | } | |
1086 | else | |
1087 | { | |
1088 | T *pT = p; | |
1089 | for (i = 0; i < (curPartNumQ >> 1); i++) | |
1090 | pT[i] = val; | |
1091 | ||
1092 | pT = p + curPartNumQ; | |
1093 | for (i = 0; i < ((curPartNumQ >> 1) + (curPartNumQ << 1)); i++) | |
1094 | pT[i] = val; | |
1095 | } | |
1096 | break; | |
1097 | } | |
1098 | ||
1099 | case SIZE_2NxnD: | |
1100 | { | |
1101 | int curPartNumQ = numElements >> 2; | |
1102 | if (!puIdx) | |
1103 | { | |
1104 | T *pT = p; | |
1105 | for (i = 0; i < ((curPartNumQ >> 1) + (curPartNumQ << 1)); i++) | |
1106 | pT[i] = val; | |
1107 | ||
1108 | pT = p + (numElements - curPartNumQ); | |
1109 | for (i = 0; i < (curPartNumQ >> 1); i++) | |
1110 | pT[i] = val; | |
1111 | } | |
1112 | else | |
1113 | { | |
1114 | T *pT = p; | |
1115 | T *pT2 = p + curPartNumQ; | |
1116 | for (i = 0; i < (curPartNumQ >> 1); i++) | |
1117 | { | |
1118 | pT[i] = val; | |
1119 | pT2[i] = val; | |
1120 | } | |
1121 | } | |
1122 | break; | |
1123 | } | |
1124 | ||
1125 | case SIZE_nLx2N: | |
1126 | { | |
1127 | int curPartNumQ = numElements >> 2; | |
1128 | if (!puIdx) | |
1129 | { | |
1130 | T *pT = p; | |
1131 | T *pT2 = p + (curPartNumQ << 1); | |
1132 | T *pT3 = p + (curPartNumQ >> 1); | |
1133 | T *pT4 = p + (curPartNumQ << 1) + (curPartNumQ >> 1); | |
1134 | ||
1135 | for (i = 0; i < (curPartNumQ >> 2); i++) | |
1136 | { | |
1137 | pT[i] = val; | |
1138 | pT2[i] = val; | |
1139 | pT3[i] = val; | |
1140 | pT4[i] = val; | |
1141 | } | |
1142 | } | |
1143 | else | |
1144 | { | |
1145 | T *pT = p; | |
1146 | T *pT2 = p + (curPartNumQ << 1); | |
1147 | for (i = 0; i < (curPartNumQ >> 2); i++) | |
1148 | { | |
1149 | pT[i] = val; | |
1150 | pT2[i] = val; | |
1151 | } | |
1152 | ||
1153 | pT = p + (curPartNumQ >> 1); | |
1154 | pT2 = p + (curPartNumQ << 1) + (curPartNumQ >> 1); | |
1155 | for (i = 0; i < ((curPartNumQ >> 2) + curPartNumQ); i++) | |
1156 | { | |
1157 | pT[i] = val; | |
1158 | pT2[i] = val; | |
1159 | } | |
1160 | } | |
1161 | break; | |
1162 | } | |
1163 | ||
1164 | case SIZE_nRx2N: | |
1165 | { | |
1166 | int curPartNumQ = numElements >> 2; | |
1167 | if (!puIdx) | |
1168 | { | |
1169 | T *pT = p; | |
1170 | T *pT2 = p + (curPartNumQ << 1); | |
1171 | for (i = 0; i < ((curPartNumQ >> 2) + curPartNumQ); i++) | |
1172 | { | |
1173 | pT[i] = val; | |
1174 | pT2[i] = val; | |
1175 | } | |
1176 | ||
1177 | pT = p + curPartNumQ + (curPartNumQ >> 1); | |
1178 | pT2 = p + numElements - curPartNumQ + (curPartNumQ >> 1); | |
1179 | for (i = 0; i < (curPartNumQ >> 2); i++) | |
1180 | { | |
1181 | pT[i] = val; | |
1182 | pT2[i] = val; | |
1183 | } | |
1184 | } | |
1185 | else | |
1186 | { | |
1187 | T *pT = p; | |
1188 | T *pT2 = p + (curPartNumQ >> 1); | |
1189 | T *pT3 = p + (curPartNumQ << 1); | |
1190 | T *pT4 = p + (curPartNumQ << 1) + (curPartNumQ >> 1); | |
1191 | for (i = 0; i < (curPartNumQ >> 2); i++) | |
1192 | { | |
1193 | pT[i] = val; | |
1194 | pT2[i] = val; | |
1195 | pT3[i] = val; | |
1196 | pT4[i] = val; | |
1197 | } | |
1198 | } | |
1199 | break; | |
1200 | } | |
1201 | ||
1202 | case SIZE_NxN: | |
1203 | default: | |
1204 | X265_CHECK(0, "unknown partition type\n"); | |
1205 | break; | |
1206 | } | |
1207 | } | |
1208 | ||
1209 | void CUData::setPUMv(int list, const MV& mv, int absPartIdx, int puIdx) | |
1210 | { | |
1211 | setAllPU(m_mv[list], mv, absPartIdx, puIdx); | |
1212 | } | |
1213 | ||
b53f7c52 | 1214 | void CUData::setPURefIdx(int list, int8_t refIdx, int absPartIdx, int puIdx) |
72b9787e JB |
1215 | { |
1216 | setAllPU(m_refIdx[list], refIdx, absPartIdx, puIdx); | |
1217 | } | |
1218 | ||
1219 | void CUData::getPartIndexAndSize(uint32_t partIdx, uint32_t& outPartAddr, int& outWidth, int& outHeight) const | |
1220 | { | |
1221 | int cuSize = 1 << m_log2CUSize[0]; | |
1222 | int partType = m_partSize[0]; | |
1223 | ||
1224 | int tmp = partTable[partType][partIdx][0]; | |
1225 | outWidth = ((tmp >> 4) * cuSize) >> 2; | |
1226 | outHeight = ((tmp & 0xF) * cuSize) >> 2; | |
1227 | outPartAddr = (partAddrTable[partType][partIdx] * m_numPartitions) >> 4; | |
1228 | } | |
1229 | ||
1230 | void CUData::getMvField(const CUData* cu, uint32_t absPartIdx, int picList, MVField& outMvField) const | |
1231 | { | |
1232 | if (cu) | |
1233 | { | |
1234 | outMvField.mv = cu->m_mv[picList][absPartIdx]; | |
1235 | outMvField.refIdx = cu->m_refIdx[picList][absPartIdx]; | |
1236 | } | |
1237 | else | |
1238 | { | |
1239 | // OUT OF BOUNDARY | |
b53f7c52 | 1240 | outMvField.mv = 0; |
72b9787e JB |
1241 | outMvField.refIdx = REF_NOT_VALID; |
1242 | } | |
1243 | } | |
1244 | ||
1245 | void CUData::deriveLeftRightTopIdx(uint32_t partIdx, uint32_t& partIdxLT, uint32_t& partIdxRT) const | |
1246 | { | |
1247 | partIdxLT = m_absIdxInCTU; | |
1248 | partIdxRT = g_rasterToZscan[g_zscanToRaster[partIdxLT] + (1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1]; | |
1249 | ||
1250 | switch (m_partSize[0]) | |
1251 | { | |
1252 | case SIZE_2Nx2N: break; | |
1253 | case SIZE_2NxN: | |
1254 | partIdxLT += (partIdx == 0) ? 0 : m_numPartitions >> 1; | |
1255 | partIdxRT += (partIdx == 0) ? 0 : m_numPartitions >> 1; | |
1256 | break; | |
1257 | case SIZE_Nx2N: | |
1258 | partIdxLT += (partIdx == 0) ? 0 : m_numPartitions >> 2; | |
1259 | partIdxRT -= (partIdx == 1) ? 0 : m_numPartitions >> 2; | |
1260 | break; | |
1261 | case SIZE_NxN: | |
1262 | partIdxLT += (m_numPartitions >> 2) * partIdx; | |
1263 | partIdxRT += (m_numPartitions >> 2) * (partIdx - 1); | |
1264 | break; | |
1265 | case SIZE_2NxnU: | |
1266 | partIdxLT += (partIdx == 0) ? 0 : m_numPartitions >> 3; | |
1267 | partIdxRT += (partIdx == 0) ? 0 : m_numPartitions >> 3; | |
1268 | break; | |
1269 | case SIZE_2NxnD: | |
1270 | partIdxLT += (partIdx == 0) ? 0 : (m_numPartitions >> 1) + (m_numPartitions >> 3); | |
1271 | partIdxRT += (partIdx == 0) ? 0 : (m_numPartitions >> 1) + (m_numPartitions >> 3); | |
1272 | break; | |
1273 | case SIZE_nLx2N: | |
1274 | partIdxLT += (partIdx == 0) ? 0 : m_numPartitions >> 4; | |
1275 | partIdxRT -= (partIdx == 1) ? 0 : (m_numPartitions >> 2) + (m_numPartitions >> 4); | |
1276 | break; | |
1277 | case SIZE_nRx2N: | |
1278 | partIdxLT += (partIdx == 0) ? 0 : (m_numPartitions >> 2) + (m_numPartitions >> 4); | |
1279 | partIdxRT -= (partIdx == 1) ? 0 : m_numPartitions >> 4; | |
1280 | break; | |
1281 | default: | |
1282 | X265_CHECK(0, "unexpected part index\n"); | |
1283 | break; | |
1284 | } | |
1285 | } | |
1286 | ||
1287 | uint32_t CUData::deriveLeftBottomIdx(uint32_t puIdx) const | |
1288 | { | |
1289 | uint32_t outPartIdxLB; | |
1290 | outPartIdxLB = g_rasterToZscan[g_zscanToRaster[m_absIdxInCTU] + ((1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE - 1)) - 1) * s_numPartInCUSize]; | |
1291 | ||
1292 | switch (m_partSize[0]) | |
1293 | { | |
1294 | case SIZE_2Nx2N: | |
1295 | outPartIdxLB += m_numPartitions >> 1; | |
1296 | break; | |
1297 | case SIZE_2NxN: | |
1298 | outPartIdxLB += puIdx ? m_numPartitions >> 1 : 0; | |
1299 | break; | |
1300 | case SIZE_Nx2N: | |
1301 | outPartIdxLB += puIdx ? (m_numPartitions >> 2) * 3 : m_numPartitions >> 1; | |
1302 | break; | |
1303 | case SIZE_NxN: | |
1304 | outPartIdxLB += (m_numPartitions >> 2) * puIdx; | |
1305 | break; | |
1306 | case SIZE_2NxnU: | |
1307 | outPartIdxLB += puIdx ? m_numPartitions >> 1 : -((int)m_numPartitions >> 3); | |
1308 | break; | |
1309 | case SIZE_2NxnD: | |
1310 | outPartIdxLB += puIdx ? m_numPartitions >> 1 : (m_numPartitions >> 2) + (m_numPartitions >> 3); | |
1311 | break; | |
1312 | case SIZE_nLx2N: | |
1313 | outPartIdxLB += puIdx ? (m_numPartitions >> 1) + (m_numPartitions >> 4) : m_numPartitions >> 1; | |
1314 | break; | |
1315 | case SIZE_nRx2N: | |
1316 | outPartIdxLB += puIdx ? (m_numPartitions >> 1) + (m_numPartitions >> 2) + (m_numPartitions >> 4) : m_numPartitions >> 1; | |
1317 | break; | |
1318 | default: | |
1319 | X265_CHECK(0, "unexpected part index\n"); | |
1320 | break; | |
1321 | } | |
1322 | return outPartIdxLB; | |
1323 | } | |
1324 | ||
1325 | /* Derives the partition index of neighboring bottom right block */ | |
1326 | uint32_t CUData::deriveRightBottomIdx(uint32_t puIdx) const | |
1327 | { | |
1328 | uint32_t outPartIdxRB; | |
1329 | outPartIdxRB = g_rasterToZscan[g_zscanToRaster[m_absIdxInCTU] + | |
1330 | ((1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE - 1)) - 1) * s_numPartInCUSize + | |
1331 | (1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1]; | |
1332 | ||
1333 | switch (m_partSize[0]) | |
1334 | { | |
1335 | case SIZE_2Nx2N: | |
1336 | outPartIdxRB += m_numPartitions >> 1; | |
1337 | break; | |
1338 | case SIZE_2NxN: | |
1339 | outPartIdxRB += puIdx ? m_numPartitions >> 1 : 0; | |
1340 | break; | |
1341 | case SIZE_Nx2N: | |
1342 | outPartIdxRB += puIdx ? m_numPartitions >> 1 : m_numPartitions >> 2; | |
1343 | break; | |
1344 | case SIZE_NxN: | |
1345 | outPartIdxRB += (m_numPartitions >> 2) * (puIdx - 1); | |
1346 | break; | |
1347 | case SIZE_2NxnU: | |
1348 | outPartIdxRB += puIdx ? m_numPartitions >> 1 : -((int)m_numPartitions >> 3); | |
1349 | break; | |
1350 | case SIZE_2NxnD: | |
1351 | outPartIdxRB += puIdx ? m_numPartitions >> 1 : (m_numPartitions >> 2) + (m_numPartitions >> 3); | |
1352 | break; | |
1353 | case SIZE_nLx2N: | |
1354 | outPartIdxRB += puIdx ? m_numPartitions >> 1 : (m_numPartitions >> 3) + (m_numPartitions >> 4); | |
1355 | break; | |
1356 | case SIZE_nRx2N: | |
1357 | outPartIdxRB += puIdx ? m_numPartitions >> 1 : (m_numPartitions >> 2) + (m_numPartitions >> 3) + (m_numPartitions >> 4); | |
1358 | break; | |
1359 | default: | |
1360 | X265_CHECK(0, "unexpected part index\n"); | |
1361 | break; | |
1362 | } | |
1363 | return outPartIdxRB; | |
1364 | } | |
1365 | ||
1366 | void CUData::deriveLeftRightTopIdxAdi(uint32_t& outPartIdxLT, uint32_t& outPartIdxRT, uint32_t partOffset, uint32_t partDepth) const | |
1367 | { | |
1368 | uint32_t numPartInWidth = 1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE - partDepth); | |
1369 | ||
1370 | outPartIdxLT = m_absIdxInCTU + partOffset; | |
1371 | outPartIdxRT = g_rasterToZscan[g_zscanToRaster[outPartIdxLT] + numPartInWidth - 1]; | |
1372 | } | |
1373 | ||
1374 | bool CUData::hasEqualMotion(uint32_t absPartIdx, const CUData& candCU, uint32_t candAbsPartIdx) const | |
1375 | { | |
1376 | if (m_interDir[absPartIdx] != candCU.m_interDir[candAbsPartIdx]) | |
1377 | return false; | |
1378 | ||
1379 | for (uint32_t refListIdx = 0; refListIdx < 2; refListIdx++) | |
1380 | { | |
1381 | if (m_interDir[absPartIdx] & (1 << refListIdx)) | |
1382 | { | |
1383 | if (m_mv[refListIdx][absPartIdx] != candCU.m_mv[refListIdx][candAbsPartIdx] || | |
1384 | m_refIdx[refListIdx][absPartIdx] != candCU.m_refIdx[refListIdx][candAbsPartIdx]) | |
1385 | return false; | |
1386 | } | |
1387 | } | |
1388 | ||
1389 | return true; | |
1390 | } | |
1391 | ||
1392 | /* Construct list of merging candidates */ | |
1393 | uint32_t CUData::getInterMergeCandidates(uint32_t absPartIdx, uint32_t puIdx, MVField(*mvFieldNeighbours)[2], uint8_t* interDirNeighbours) const | |
1394 | { | |
1395 | uint32_t absPartAddr = m_absIdxInCTU + absPartIdx; | |
1396 | const bool isInterB = m_slice->isInterB(); | |
1397 | ||
1398 | const uint32_t maxNumMergeCand = m_slice->m_maxNumMergeCand; | |
1399 | ||
1400 | for (uint32_t i = 0; i < maxNumMergeCand; ++i) | |
1401 | { | |
b53f7c52 JB |
1402 | mvFieldNeighbours[i][0].mv = 0; |
1403 | mvFieldNeighbours[i][1].mv = 0; | |
72b9787e JB |
1404 | mvFieldNeighbours[i][0].refIdx = REF_NOT_VALID; |
1405 | mvFieldNeighbours[i][1].refIdx = REF_NOT_VALID; | |
1406 | } | |
1407 | ||
1408 | /* calculate the location of upper-left corner pixel and size of the current PU */ | |
1409 | int xP, yP, nPSW, nPSH; | |
1410 | ||
1411 | int cuSize = 1 << m_log2CUSize[0]; | |
1412 | int partMode = m_partSize[0]; | |
1413 | ||
1414 | int tmp = partTable[partMode][puIdx][0]; | |
1415 | nPSW = ((tmp >> 4) * cuSize) >> 2; | |
1416 | nPSH = ((tmp & 0xF) * cuSize) >> 2; | |
1417 | ||
1418 | tmp = partTable[partMode][puIdx][1]; | |
1419 | xP = ((tmp >> 4) * cuSize) >> 2; | |
1420 | yP = ((tmp & 0xF) * cuSize) >> 2; | |
1421 | ||
1422 | uint32_t count = 0; | |
1423 | ||
1424 | uint32_t partIdxLT, partIdxRT, partIdxLB = deriveLeftBottomIdx(puIdx); | |
1425 | PartSize curPS = (PartSize)m_partSize[absPartIdx]; | |
1426 | ||
1427 | // left | |
1428 | uint32_t leftPartIdx = 0; | |
1429 | const CUData* cuLeft = getPULeft(leftPartIdx, partIdxLB); | |
1430 | bool isAvailableA1 = cuLeft && | |
1431 | cuLeft->isDiffMER(xP - 1, yP + nPSH - 1, xP, yP) && | |
1432 | !(puIdx == 1 && (curPS == SIZE_Nx2N || curPS == SIZE_nLx2N || curPS == SIZE_nRx2N)) && | |
b53f7c52 | 1433 | cuLeft->isInter(leftPartIdx); |
72b9787e JB |
1434 | if (isAvailableA1) |
1435 | { | |
1436 | // get Inter Dir | |
1437 | interDirNeighbours[count] = cuLeft->m_interDir[leftPartIdx]; | |
1438 | // get Mv from Left | |
1439 | cuLeft->getMvField(cuLeft, leftPartIdx, 0, mvFieldNeighbours[count][0]); | |
1440 | if (isInterB) | |
1441 | cuLeft->getMvField(cuLeft, leftPartIdx, 1, mvFieldNeighbours[count][1]); | |
1442 | ||
1443 | count++; | |
1444 | ||
1445 | if (count == maxNumMergeCand) | |
1446 | return maxNumMergeCand; | |
1447 | } | |
1448 | ||
1449 | deriveLeftRightTopIdx(puIdx, partIdxLT, partIdxRT); | |
1450 | ||
1451 | // above | |
1452 | uint32_t abovePartIdx = 0; | |
1453 | const CUData* cuAbove = getPUAbove(abovePartIdx, partIdxRT); | |
1454 | bool isAvailableB1 = cuAbove && | |
1455 | cuAbove->isDiffMER(xP + nPSW - 1, yP - 1, xP, yP) && | |
1456 | !(puIdx == 1 && (curPS == SIZE_2NxN || curPS == SIZE_2NxnU || curPS == SIZE_2NxnD)) && | |
b53f7c52 | 1457 | cuAbove->isInter(abovePartIdx); |
72b9787e JB |
1458 | if (isAvailableB1 && (!isAvailableA1 || !cuLeft->hasEqualMotion(leftPartIdx, *cuAbove, abovePartIdx))) |
1459 | { | |
1460 | // get Inter Dir | |
1461 | interDirNeighbours[count] = cuAbove->m_interDir[abovePartIdx]; | |
1462 | // get Mv from Left | |
1463 | cuAbove->getMvField(cuAbove, abovePartIdx, 0, mvFieldNeighbours[count][0]); | |
1464 | if (isInterB) | |
1465 | cuAbove->getMvField(cuAbove, abovePartIdx, 1, mvFieldNeighbours[count][1]); | |
1466 | ||
1467 | count++; | |
1468 | ||
1469 | if (count == maxNumMergeCand) | |
1470 | return maxNumMergeCand; | |
1471 | } | |
1472 | ||
1473 | // above right | |
1474 | uint32_t aboveRightPartIdx = 0; | |
1475 | const CUData* cuAboveRight = getPUAboveRight(aboveRightPartIdx, partIdxRT); | |
1476 | bool isAvailableB0 = cuAboveRight && | |
1477 | cuAboveRight->isDiffMER(xP + nPSW, yP - 1, xP, yP) && | |
b53f7c52 | 1478 | cuAboveRight->isInter(aboveRightPartIdx); |
72b9787e JB |
1479 | if (isAvailableB0 && (!isAvailableB1 || !cuAbove->hasEqualMotion(abovePartIdx, *cuAboveRight, aboveRightPartIdx))) |
1480 | { | |
1481 | // get Inter Dir | |
1482 | interDirNeighbours[count] = cuAboveRight->m_interDir[aboveRightPartIdx]; | |
1483 | // get Mv from Left | |
1484 | cuAboveRight->getMvField(cuAboveRight, aboveRightPartIdx, 0, mvFieldNeighbours[count][0]); | |
1485 | if (isInterB) | |
1486 | cuAboveRight->getMvField(cuAboveRight, aboveRightPartIdx, 1, mvFieldNeighbours[count][1]); | |
1487 | ||
1488 | count++; | |
1489 | ||
1490 | if (count == maxNumMergeCand) | |
1491 | return maxNumMergeCand; | |
1492 | } | |
1493 | ||
1494 | // left bottom | |
1495 | uint32_t leftBottomPartIdx = 0; | |
1496 | const CUData* cuLeftBottom = this->getPUBelowLeft(leftBottomPartIdx, partIdxLB); | |
1497 | bool isAvailableA0 = cuLeftBottom && | |
1498 | cuLeftBottom->isDiffMER(xP - 1, yP + nPSH, xP, yP) && | |
b53f7c52 | 1499 | cuLeftBottom->isInter(leftBottomPartIdx); |
72b9787e JB |
1500 | if (isAvailableA0 && (!isAvailableA1 || !cuLeft->hasEqualMotion(leftPartIdx, *cuLeftBottom, leftBottomPartIdx))) |
1501 | { | |
1502 | // get Inter Dir | |
1503 | interDirNeighbours[count] = cuLeftBottom->m_interDir[leftBottomPartIdx]; | |
1504 | // get Mv from Left | |
1505 | cuLeftBottom->getMvField(cuLeftBottom, leftBottomPartIdx, 0, mvFieldNeighbours[count][0]); | |
1506 | if (isInterB) | |
1507 | cuLeftBottom->getMvField(cuLeftBottom, leftBottomPartIdx, 1, mvFieldNeighbours[count][1]); | |
1508 | ||
1509 | count++; | |
1510 | ||
1511 | if (count == maxNumMergeCand) | |
1512 | return maxNumMergeCand; | |
1513 | } | |
1514 | ||
1515 | // above left | |
1516 | if (count < 4) | |
1517 | { | |
1518 | uint32_t aboveLeftPartIdx = 0; | |
1519 | const CUData* cuAboveLeft = getPUAboveLeft(aboveLeftPartIdx, absPartAddr); | |
1520 | bool isAvailableB2 = cuAboveLeft && | |
1521 | cuAboveLeft->isDiffMER(xP - 1, yP - 1, xP, yP) && | |
b53f7c52 | 1522 | cuAboveLeft->isInter(aboveLeftPartIdx); |
72b9787e JB |
1523 | if (isAvailableB2 && (!isAvailableA1 || !cuLeft->hasEqualMotion(leftPartIdx, *cuAboveLeft, aboveLeftPartIdx)) |
1524 | && (!isAvailableB1 || !cuAbove->hasEqualMotion(abovePartIdx, *cuAboveLeft, aboveLeftPartIdx))) | |
1525 | { | |
1526 | // get Inter Dir | |
1527 | interDirNeighbours[count] = cuAboveLeft->m_interDir[aboveLeftPartIdx]; | |
1528 | // get Mv from Left | |
1529 | cuAboveLeft->getMvField(cuAboveLeft, aboveLeftPartIdx, 0, mvFieldNeighbours[count][0]); | |
1530 | if (isInterB) | |
1531 | cuAboveLeft->getMvField(cuAboveLeft, aboveLeftPartIdx, 1, mvFieldNeighbours[count][1]); | |
1532 | ||
1533 | count++; | |
1534 | ||
1535 | if (count == maxNumMergeCand) | |
1536 | return maxNumMergeCand; | |
1537 | } | |
1538 | } | |
1539 | if (m_slice->m_sps->bTemporalMVPEnabled) | |
1540 | { | |
1541 | uint32_t partIdxRB = deriveRightBottomIdx(puIdx); | |
1542 | MV colmv; | |
1543 | int ctuIdx = -1; | |
1544 | ||
1545 | // image boundary check | |
1546 | if (m_encData->getPicCTU(m_cuAddr)->m_cuPelX + g_zscanToPelX[partIdxRB] + UNIT_SIZE < m_slice->m_sps->picWidthInLumaSamples && | |
1547 | m_encData->getPicCTU(m_cuAddr)->m_cuPelY + g_zscanToPelY[partIdxRB] + UNIT_SIZE < m_slice->m_sps->picHeightInLumaSamples) | |
1548 | { | |
1549 | uint32_t absPartIdxRB = g_zscanToRaster[partIdxRB]; | |
1550 | uint32_t numPartInCUSize = s_numPartInCUSize; | |
1551 | bool bNotLastCol = lessThanCol(absPartIdxRB, numPartInCUSize - 1, numPartInCUSize); // is not at the last column of CTU | |
1552 | bool bNotLastRow = lessThanRow(absPartIdxRB, numPartInCUSize - 1, numPartInCUSize); // is not at the last row of CTU | |
1553 | ||
1554 | if (bNotLastCol && bNotLastRow) | |
1555 | { | |
1556 | absPartAddr = g_rasterToZscan[absPartIdxRB + numPartInCUSize + 1]; | |
1557 | ctuIdx = m_cuAddr; | |
1558 | } | |
1559 | else if (bNotLastCol) | |
1560 | absPartAddr = g_rasterToZscan[(absPartIdxRB + numPartInCUSize + 1) & (numPartInCUSize - 1)]; | |
1561 | else if (bNotLastRow) | |
1562 | { | |
1563 | absPartAddr = g_rasterToZscan[absPartIdxRB + 1]; | |
1564 | ctuIdx = m_cuAddr + 1; | |
1565 | } | |
1566 | else // is the right bottom corner of CTU | |
1567 | absPartAddr = 0; | |
1568 | } | |
1569 | ||
1570 | int refIdx = 0; | |
1571 | uint32_t partIdxCenter = deriveCenterIdx(puIdx); | |
1572 | uint32_t curCTUIdx = m_cuAddr; | |
1573 | int dir = 0; | |
1574 | bool bExistMV = ctuIdx >= 0 && getColMVP(colmv, refIdx, 0, ctuIdx, absPartAddr); | |
1575 | if (!bExistMV) | |
1576 | bExistMV = getColMVP(colmv, refIdx, 0, curCTUIdx, partIdxCenter); | |
1577 | if (bExistMV) | |
1578 | { | |
1579 | dir |= 1; | |
1580 | mvFieldNeighbours[count][0].mv = colmv; | |
1581 | mvFieldNeighbours[count][0].refIdx = refIdx; | |
1582 | } | |
1583 | ||
1584 | if (isInterB) | |
1585 | { | |
1586 | bExistMV = ctuIdx >= 0 && getColMVP(colmv, refIdx, 1, ctuIdx, absPartAddr); | |
1587 | if (!bExistMV) | |
1588 | bExistMV = getColMVP(colmv, refIdx, 1, curCTUIdx, partIdxCenter); | |
1589 | ||
1590 | if (bExistMV) | |
1591 | { | |
1592 | dir |= 2; | |
1593 | mvFieldNeighbours[count][1].mv = colmv; | |
1594 | mvFieldNeighbours[count][1].refIdx = refIdx; | |
1595 | } | |
1596 | } | |
1597 | ||
1598 | if (dir != 0) | |
1599 | { | |
1600 | interDirNeighbours[count] = (uint8_t)dir; | |
1601 | ||
1602 | count++; | |
1603 | ||
1604 | if (count == maxNumMergeCand) | |
1605 | return maxNumMergeCand; | |
1606 | } | |
1607 | } | |
1608 | ||
1609 | if (isInterB) | |
1610 | { | |
1611 | const uint32_t cutoff = count * (count - 1); | |
1612 | uint32_t priorityList0 = 0xEDC984; // { 0, 1, 0, 2, 1, 2, 0, 3, 1, 3, 2, 3 } | |
1613 | uint32_t priorityList1 = 0xB73621; // { 1, 0, 2, 0, 2, 1, 3, 0, 3, 1, 3, 2 } | |
1614 | ||
1615 | for (uint32_t idx = 0; idx < cutoff; idx++) | |
1616 | { | |
1617 | int i = priorityList0 & 3; | |
1618 | int j = priorityList1 & 3; | |
1619 | priorityList0 >>= 2; | |
1620 | priorityList1 >>= 2; | |
1621 | ||
1622 | if ((interDirNeighbours[i] & 0x1) && (interDirNeighbours[j] & 0x2)) | |
1623 | { | |
1624 | // get Mv from cand[i] and cand[j] | |
1625 | int refIdxL0 = mvFieldNeighbours[i][0].refIdx; | |
1626 | int refIdxL1 = mvFieldNeighbours[j][1].refIdx; | |
1627 | int refPOCL0 = m_slice->m_refPOCList[0][refIdxL0]; | |
1628 | int refPOCL1 = m_slice->m_refPOCList[1][refIdxL1]; | |
1629 | if (!(refPOCL0 == refPOCL1 && mvFieldNeighbours[i][0].mv == mvFieldNeighbours[j][1].mv)) | |
1630 | { | |
1631 | mvFieldNeighbours[count][0].mv = mvFieldNeighbours[i][0].mv; | |
1632 | mvFieldNeighbours[count][0].refIdx = refIdxL0; | |
1633 | mvFieldNeighbours[count][1].mv = mvFieldNeighbours[j][1].mv; | |
1634 | mvFieldNeighbours[count][1].refIdx = refIdxL1; | |
1635 | interDirNeighbours[count] = 3; | |
1636 | ||
1637 | count++; | |
1638 | ||
1639 | if (count == maxNumMergeCand) | |
1640 | return maxNumMergeCand; | |
1641 | } | |
1642 | } | |
1643 | } | |
1644 | } | |
1645 | int numRefIdx = (isInterB) ? X265_MIN(m_slice->m_numRefIdx[0], m_slice->m_numRefIdx[1]) : m_slice->m_numRefIdx[0]; | |
1646 | int r = 0; | |
1647 | int refcnt = 0; | |
1648 | while (count < maxNumMergeCand) | |
1649 | { | |
1650 | interDirNeighbours[count] = 1; | |
b53f7c52 | 1651 | mvFieldNeighbours[count][0].mv = 0; |
72b9787e JB |
1652 | mvFieldNeighbours[count][0].refIdx = r; |
1653 | ||
1654 | if (isInterB) | |
1655 | { | |
1656 | interDirNeighbours[count] = 3; | |
1657 | mvFieldNeighbours[count][1].mv.word = 0; | |
1658 | mvFieldNeighbours[count][1].refIdx = r; | |
1659 | } | |
1660 | ||
1661 | count++; | |
1662 | ||
1663 | if (refcnt == numRefIdx - 1) | |
1664 | r = 0; | |
1665 | else | |
1666 | { | |
1667 | ++r; | |
1668 | ++refcnt; | |
1669 | } | |
1670 | } | |
1671 | ||
1672 | return count; | |
1673 | } | |
1674 | ||
1675 | /* Check whether the current PU and a spatial neighboring PU are in a same ME region */ | |
1676 | bool CUData::isDiffMER(int xN, int yN, int xP, int yP) const | |
1677 | { | |
1678 | uint32_t plevel = 2; | |
1679 | ||
1680 | if ((xN >> plevel) != (xP >> plevel)) | |
1681 | return true; | |
1682 | if ((yN >> plevel) != (yP >> plevel)) | |
1683 | return true; | |
1684 | return false; | |
1685 | } | |
1686 | ||
1687 | /* Constructs a list of candidates for AMVP, and a larger list of motion candidates */ | |
1688 | int CUData::fillMvpCand(uint32_t puIdx, uint32_t absPartIdx, int picList, int refIdx, MV* amvpCand, MV* mvc) const | |
1689 | { | |
1690 | int num = 0; | |
1691 | ||
1692 | // spatial MV | |
1693 | uint32_t partIdxLT, partIdxRT, partIdxLB = deriveLeftBottomIdx(puIdx); | |
1694 | ||
1695 | deriveLeftRightTopIdx(puIdx, partIdxLT, partIdxRT); | |
1696 | ||
1697 | MV mv[MD_ABOVE_LEFT + 1]; | |
1698 | MV mvOrder[MD_ABOVE_LEFT + 1]; | |
1699 | bool valid[MD_ABOVE_LEFT + 1]; | |
1700 | bool validOrder[MD_ABOVE_LEFT + 1]; | |
1701 | ||
1702 | valid[MD_BELOW_LEFT] = addMVPCand(mv[MD_BELOW_LEFT], picList, refIdx, partIdxLB, MD_BELOW_LEFT); | |
1703 | valid[MD_LEFT] = addMVPCand(mv[MD_LEFT], picList, refIdx, partIdxLB, MD_LEFT); | |
1704 | valid[MD_ABOVE_RIGHT] = addMVPCand(mv[MD_ABOVE_RIGHT], picList, refIdx, partIdxRT, MD_ABOVE_RIGHT); | |
1705 | valid[MD_ABOVE] = addMVPCand(mv[MD_ABOVE], picList, refIdx, partIdxRT, MD_ABOVE); | |
1706 | valid[MD_ABOVE_LEFT] = addMVPCand(mv[MD_ABOVE_LEFT], picList, refIdx, partIdxLT, MD_ABOVE_LEFT); | |
1707 | ||
1708 | validOrder[MD_BELOW_LEFT] = addMVPCandOrder(mvOrder[MD_BELOW_LEFT], picList, refIdx, partIdxLB, MD_BELOW_LEFT); | |
1709 | validOrder[MD_LEFT] = addMVPCandOrder(mvOrder[MD_LEFT], picList, refIdx, partIdxLB, MD_LEFT); | |
1710 | validOrder[MD_ABOVE_RIGHT] = addMVPCandOrder(mvOrder[MD_ABOVE_RIGHT], picList, refIdx, partIdxRT, MD_ABOVE_RIGHT); | |
1711 | validOrder[MD_ABOVE] = addMVPCandOrder(mvOrder[MD_ABOVE], picList, refIdx, partIdxRT, MD_ABOVE); | |
1712 | validOrder[MD_ABOVE_LEFT] = addMVPCandOrder(mvOrder[MD_ABOVE_LEFT], picList, refIdx, partIdxLT, MD_ABOVE_LEFT); | |
1713 | ||
1714 | // Left predictor search | |
1715 | if (valid[MD_BELOW_LEFT]) | |
1716 | amvpCand[num++] = mv[MD_BELOW_LEFT]; | |
1717 | else if (valid[MD_LEFT]) | |
1718 | amvpCand[num++] = mv[MD_LEFT]; | |
1719 | else if (validOrder[MD_BELOW_LEFT]) | |
1720 | amvpCand[num++] = mvOrder[MD_BELOW_LEFT]; | |
1721 | else if (validOrder[MD_LEFT]) | |
1722 | amvpCand[num++] = mvOrder[MD_LEFT]; | |
1723 | ||
1724 | bool bAddedSmvp = num > 0; | |
1725 | ||
1726 | // Above predictor search | |
1727 | if (valid[MD_ABOVE_RIGHT]) | |
1728 | amvpCand[num++] = mv[MD_ABOVE_RIGHT]; | |
1729 | else if (valid[MD_ABOVE]) | |
1730 | amvpCand[num++] = mv[MD_ABOVE]; | |
1731 | else if (valid[MD_ABOVE_LEFT]) | |
1732 | amvpCand[num++] = mv[MD_ABOVE_LEFT]; | |
1733 | ||
1734 | if (!bAddedSmvp) | |
1735 | { | |
1736 | if (validOrder[MD_ABOVE_RIGHT]) | |
1737 | amvpCand[num++] = mvOrder[MD_ABOVE_RIGHT]; | |
1738 | else if (validOrder[MD_ABOVE]) | |
1739 | amvpCand[num++] = mvOrder[MD_ABOVE]; | |
1740 | else if (validOrder[MD_ABOVE_LEFT]) | |
1741 | amvpCand[num++] = mvOrder[MD_ABOVE_LEFT]; | |
1742 | } | |
1743 | ||
1744 | int numMvc = 0; | |
1745 | for (int dir = MD_LEFT; dir <= MD_ABOVE_LEFT; dir++) | |
1746 | { | |
1747 | if (valid[dir] && mv[dir].notZero()) | |
1748 | mvc[numMvc++] = mv[dir]; | |
1749 | ||
1750 | if (validOrder[dir] && mvOrder[dir].notZero()) | |
1751 | mvc[numMvc++] = mvOrder[dir]; | |
1752 | } | |
1753 | ||
1754 | if (num == 2) | |
1755 | { | |
1756 | if (amvpCand[0] == amvpCand[1]) | |
1757 | num = 1; | |
1758 | else | |
1759 | /* AMVP_NUM_CANDS = 2 */ | |
1760 | return numMvc; | |
1761 | } | |
1762 | ||
1763 | if (m_slice->m_sps->bTemporalMVPEnabled) | |
1764 | { | |
1765 | uint32_t absPartAddr = m_absIdxInCTU + absPartIdx; | |
1766 | uint32_t partIdxRB = deriveRightBottomIdx(puIdx); | |
1767 | MV colmv; | |
1768 | ||
1769 | // co-located RightBottom temporal predictor (H) | |
1770 | int ctuIdx = -1; | |
1771 | ||
1772 | // image boundary check | |
1773 | if (m_encData->getPicCTU(m_cuAddr)->m_cuPelX + g_zscanToPelX[partIdxRB] + UNIT_SIZE < m_slice->m_sps->picWidthInLumaSamples && | |
1774 | m_encData->getPicCTU(m_cuAddr)->m_cuPelY + g_zscanToPelY[partIdxRB] + UNIT_SIZE < m_slice->m_sps->picHeightInLumaSamples) | |
1775 | { | |
1776 | uint32_t absPartIdxRB = g_zscanToRaster[partIdxRB]; | |
1777 | uint32_t numPartInCUSize = s_numPartInCUSize; | |
1778 | bool bNotLastCol = lessThanCol(absPartIdxRB, numPartInCUSize - 1, numPartInCUSize); // is not at the last column of CTU | |
1779 | bool bNotLastRow = lessThanRow(absPartIdxRB, numPartInCUSize - 1, numPartInCUSize); // is not at the last row of CTU | |
1780 | ||
1781 | if (bNotLastCol && bNotLastRow) | |
1782 | { | |
1783 | absPartAddr = g_rasterToZscan[absPartIdxRB + numPartInCUSize + 1]; | |
1784 | ctuIdx = m_cuAddr; | |
1785 | } | |
1786 | else if (bNotLastCol) | |
1787 | absPartAddr = g_rasterToZscan[(absPartIdxRB + numPartInCUSize + 1) & (numPartInCUSize - 1)]; | |
1788 | else if (bNotLastRow) | |
1789 | { | |
1790 | absPartAddr = g_rasterToZscan[absPartIdxRB + 1]; | |
1791 | ctuIdx = m_cuAddr + 1; | |
1792 | } | |
1793 | else // is the right bottom corner of CTU | |
1794 | absPartAddr = 0; | |
1795 | } | |
1796 | if (ctuIdx >= 0 && getColMVP(colmv, refIdx, picList, ctuIdx, absPartAddr)) | |
1797 | { | |
1798 | amvpCand[num++] = colmv; | |
1799 | mvc[numMvc++] = colmv; | |
1800 | } | |
1801 | else | |
1802 | { | |
1803 | uint32_t partIdxCenter = deriveCenterIdx(puIdx); | |
1804 | uint32_t curCTUIdx = m_cuAddr; | |
1805 | if (getColMVP(colmv, refIdx, picList, curCTUIdx, partIdxCenter)) | |
1806 | { | |
1807 | amvpCand[num++] = colmv; | |
1808 | mvc[numMvc++] = colmv; | |
1809 | } | |
1810 | } | |
1811 | } | |
1812 | ||
1813 | while (num < AMVP_NUM_CANDS) | |
1814 | amvpCand[num++] = 0; | |
1815 | ||
1816 | return numMvc; | |
1817 | } | |
1818 | ||
1819 | void CUData::clipMv(MV& outMV) const | |
1820 | { | |
1821 | int mvshift = 2; | |
1822 | int offset = 8; | |
1823 | int xmax = (m_slice->m_sps->picWidthInLumaSamples + offset - m_cuPelX - 1) << mvshift; | |
1824 | int xmin = (-(int)g_maxCUSize - offset - (int)m_cuPelX + 1) << mvshift; | |
1825 | ||
1826 | int ymax = (m_slice->m_sps->picHeightInLumaSamples + offset - m_cuPelY - 1) << mvshift; | |
1827 | int ymin = (-(int)g_maxCUSize - offset - (int)m_cuPelY + 1) << mvshift; | |
1828 | ||
1829 | outMV.x = (int16_t)X265_MIN(xmax, X265_MAX(xmin, (int)outMV.x)); | |
1830 | outMV.y = (int16_t)X265_MIN(ymax, X265_MAX(ymin, (int)outMV.y)); | |
1831 | } | |
1832 | ||
1833 | bool CUData::addMVPCand(MV& mvp, int picList, int refIdx, uint32_t partUnitIdx, MVP_DIR dir) const | |
1834 | { | |
1835 | const CUData* tmpCU = NULL; | |
1836 | uint32_t idx = 0; | |
1837 | ||
1838 | switch (dir) | |
1839 | { | |
1840 | case MD_LEFT: | |
1841 | tmpCU = getPULeft(idx, partUnitIdx); | |
1842 | break; | |
1843 | case MD_ABOVE: | |
1844 | tmpCU = getPUAbove(idx, partUnitIdx); | |
1845 | break; | |
1846 | case MD_ABOVE_RIGHT: | |
1847 | tmpCU = getPUAboveRight(idx, partUnitIdx); | |
1848 | break; | |
1849 | case MD_BELOW_LEFT: | |
1850 | tmpCU = getPUBelowLeft(idx, partUnitIdx); | |
1851 | break; | |
1852 | case MD_ABOVE_LEFT: | |
1853 | tmpCU = getPUAboveLeft(idx, partUnitIdx); | |
1854 | break; | |
1855 | default: | |
1856 | return false; | |
1857 | } | |
1858 | ||
1859 | if (!tmpCU) | |
1860 | return false; | |
1861 | ||
1862 | int refPOC = m_slice->m_refPOCList[picList][refIdx]; | |
1863 | int partRefIdx = tmpCU->m_refIdx[picList][idx]; | |
1864 | if (partRefIdx >= 0 && refPOC == tmpCU->m_slice->m_refPOCList[picList][partRefIdx]) | |
1865 | { | |
1866 | mvp = tmpCU->m_mv[picList][idx]; | |
1867 | return true; | |
1868 | } | |
1869 | ||
1870 | int refPicList2nd = 0; | |
1871 | if (picList == 0) | |
1872 | refPicList2nd = 1; | |
1873 | else if (picList == 1) | |
1874 | refPicList2nd = 0; | |
1875 | ||
1876 | int curRefPOC = m_slice->m_refPOCList[picList][refIdx]; | |
1877 | int neibRefPOC; | |
1878 | ||
1879 | partRefIdx = tmpCU->m_refIdx[refPicList2nd][idx]; | |
1880 | if (partRefIdx >= 0) | |
1881 | { | |
1882 | neibRefPOC = tmpCU->m_slice->m_refPOCList[refPicList2nd][partRefIdx]; | |
1883 | if (neibRefPOC == curRefPOC) | |
1884 | { | |
1885 | // Same reference frame but different list | |
1886 | mvp = tmpCU->m_mv[refPicList2nd][idx]; | |
1887 | return true; | |
1888 | } | |
1889 | } | |
1890 | return false; | |
1891 | } | |
1892 | ||
1893 | bool CUData::addMVPCandOrder(MV& outMV, int picList, int refIdx, uint32_t partUnitIdx, MVP_DIR dir) const | |
1894 | { | |
1895 | const CUData* tmpCU = NULL; | |
1896 | uint32_t idx = 0; | |
1897 | ||
1898 | switch (dir) | |
1899 | { | |
1900 | case MD_LEFT: | |
1901 | tmpCU = getPULeft(idx, partUnitIdx); | |
1902 | break; | |
1903 | case MD_ABOVE: | |
1904 | tmpCU = getPUAbove(idx, partUnitIdx); | |
1905 | break; | |
1906 | case MD_ABOVE_RIGHT: | |
1907 | tmpCU = getPUAboveRight(idx, partUnitIdx); | |
1908 | break; | |
1909 | case MD_BELOW_LEFT: | |
1910 | tmpCU = getPUBelowLeft(idx, partUnitIdx); | |
1911 | break; | |
1912 | case MD_ABOVE_LEFT: | |
1913 | tmpCU = getPUAboveLeft(idx, partUnitIdx); | |
1914 | break; | |
1915 | default: | |
1916 | return false; | |
1917 | } | |
1918 | ||
1919 | if (!tmpCU) | |
1920 | return false; | |
1921 | ||
1922 | int refPicList2nd = 0; | |
1923 | if (picList == 0) | |
1924 | refPicList2nd = 1; | |
1925 | else if (picList == 1) | |
1926 | refPicList2nd = 0; | |
1927 | ||
1928 | int curPOC = m_slice->m_poc; | |
1929 | int curRefPOC = m_slice->m_refPOCList[picList][refIdx]; | |
1930 | int neibPOC = curPOC; | |
1931 | int neibRefPOC; | |
1932 | ||
1933 | int partRefIdx = tmpCU->m_refIdx[picList][idx]; | |
1934 | if (partRefIdx >= 0) | |
1935 | { | |
1936 | neibRefPOC = tmpCU->m_slice->m_refPOCList[picList][partRefIdx]; | |
1937 | MV mvp = tmpCU->m_mv[picList][idx]; | |
1938 | ||
1939 | scaleMvByPOCDist(outMV, mvp, curPOC, curRefPOC, neibPOC, neibRefPOC); | |
1940 | return true; | |
1941 | } | |
1942 | ||
1943 | partRefIdx = tmpCU->m_refIdx[refPicList2nd][idx]; | |
1944 | if (partRefIdx >= 0) | |
1945 | { | |
1946 | neibRefPOC = tmpCU->m_slice->m_refPOCList[refPicList2nd][partRefIdx]; | |
1947 | MV mvp = tmpCU->m_mv[refPicList2nd][idx]; | |
1948 | ||
1949 | scaleMvByPOCDist(outMV, mvp, curPOC, curRefPOC, neibPOC, neibRefPOC); | |
1950 | return true; | |
1951 | } | |
1952 | ||
1953 | return false; | |
1954 | } | |
1955 | ||
1956 | bool CUData::getColMVP(MV& outMV, int& outRefIdx, int picList, int cuAddr, int partUnitIdx) const | |
1957 | { | |
b53f7c52 JB |
1958 | const Frame* colPic = m_slice->m_refPicList[m_slice->isInterB() && !m_slice->m_colFromL0Flag][m_slice->m_colRefIdx]; |
1959 | const CUData* colCU = colPic->m_encData->getPicCTU(cuAddr); | |
72b9787e | 1960 | |
b53f7c52 | 1961 | if (colCU->m_predMode[partUnitIdx] == MODE_NONE) |
72b9787e JB |
1962 | return false; |
1963 | ||
b53f7c52 | 1964 | uint32_t absPartAddr = partUnitIdx & TMVP_UNIT_MASK; |
72b9787e JB |
1965 | |
1966 | if (colCU->isIntra(absPartAddr)) | |
1967 | return false; | |
1968 | ||
b53f7c52 | 1969 | int colRefPicList = m_slice->m_bCheckLDC ? picList : m_slice->m_colFromL0Flag; |
72b9787e JB |
1970 | |
1971 | int colRefIdx = colCU->m_refIdx[colRefPicList][absPartAddr]; | |
1972 | ||
1973 | if (colRefIdx < 0) | |
1974 | { | |
1975 | colRefPicList = 1 - colRefPicList; | |
1976 | colRefIdx = colCU->m_refIdx[colRefPicList][absPartAddr]; | |
1977 | ||
1978 | if (colRefIdx < 0) | |
1979 | return false; | |
1980 | } | |
1981 | ||
1982 | // Scale the vector | |
b53f7c52 JB |
1983 | int colRefPOC = colCU->m_slice->m_refPOCList[colRefPicList][colRefIdx]; |
1984 | int colPOC = colCU->m_slice->m_poc; | |
1985 | MV colmv = colCU->m_mv[colRefPicList][absPartAddr]; | |
1986 | ||
1987 | int curRefPOC = m_slice->m_refPOCList[picList][outRefIdx]; | |
1988 | int curPOC = m_slice->m_poc; | |
72b9787e JB |
1989 | |
1990 | scaleMvByPOCDist(outMV, colmv, curPOC, curRefPOC, colPOC, colRefPOC); | |
1991 | return true; | |
1992 | } | |
1993 | ||
1994 | void CUData::scaleMvByPOCDist(MV& outMV, const MV& inMV, int curPOC, int curRefPOC, int colPOC, int colRefPOC) const | |
1995 | { | |
1996 | int diffPocD = colPOC - colRefPOC; | |
1997 | int diffPocB = curPOC - curRefPOC; | |
1998 | ||
1999 | if (diffPocD == diffPocB) | |
2000 | outMV = inMV; | |
2001 | else | |
2002 | { | |
2003 | int tdb = Clip3(-128, 127, diffPocB); | |
2004 | int tdd = Clip3(-128, 127, diffPocD); | |
2005 | int x = (0x4000 + abs(tdd / 2)) / tdd; | |
2006 | int scale = Clip3(-4096, 4095, (tdb * x + 32) >> 6); | |
2007 | outMV = scaleMv(inMV, scale); | |
2008 | } | |
2009 | } | |
2010 | ||
2011 | uint32_t CUData::deriveCenterIdx(uint32_t puIdx) const | |
2012 | { | |
2013 | uint32_t absPartIdx; | |
2014 | int puWidth, puHeight; | |
2015 | ||
2016 | getPartIndexAndSize(puIdx, absPartIdx, puWidth, puHeight); | |
2017 | ||
2018 | return g_rasterToZscan[g_zscanToRaster[m_absIdxInCTU + absPartIdx] | |
2019 | + (puHeight >> (LOG2_UNIT_SIZE + 1)) * s_numPartInCUSize | |
2020 | + (puWidth >> (LOG2_UNIT_SIZE + 1))]; | |
2021 | } | |
2022 | ||
2023 | ScanType CUData::getCoefScanIdx(uint32_t absPartIdx, uint32_t log2TrSize, bool bIsLuma, bool bIsIntra) const | |
2024 | { | |
2025 | uint32_t dirMode; | |
2026 | ||
2027 | if (!bIsIntra) | |
2028 | return SCAN_DIAG; | |
2029 | ||
2030 | // check that MDCS can be used for this TU | |
2031 | if (bIsLuma) | |
2032 | { | |
2033 | if (log2TrSize > MDCS_LOG2_MAX_SIZE) | |
2034 | return SCAN_DIAG; | |
2035 | ||
2036 | dirMode = m_lumaIntraDir[absPartIdx]; | |
2037 | } | |
2038 | else | |
2039 | { | |
2040 | if (log2TrSize > (uint32_t)(MDCS_LOG2_MAX_SIZE - m_hChromaShift)) | |
2041 | return SCAN_DIAG; | |
2042 | ||
2043 | dirMode = m_chromaIntraDir[absPartIdx]; | |
2044 | if (dirMode == DM_CHROMA_IDX) | |
2045 | { | |
2046 | dirMode = m_lumaIntraDir[(m_chromaFormat == X265_CSP_I444) ? absPartIdx : absPartIdx & 0xFC]; | |
2047 | dirMode = (m_chromaFormat == X265_CSP_I422) ? g_chroma422IntraAngleMappingTable[dirMode] : dirMode; | |
2048 | } | |
2049 | } | |
2050 | ||
2051 | if (abs((int)dirMode - VER_IDX) <= MDCS_ANGLE_LIMIT) | |
2052 | return SCAN_HOR; | |
2053 | else if (abs((int)dirMode - HOR_IDX) <= MDCS_ANGLE_LIMIT) | |
2054 | return SCAN_VER; | |
2055 | else | |
2056 | return SCAN_DIAG; | |
2057 | } | |
2058 | ||
2059 | void CUData::getTUEntropyCodingParameters(TUEntropyCodingParameters &result, uint32_t absPartIdx, uint32_t log2TrSize, bool bIsLuma) const | |
2060 | { | |
2061 | // set the group layout | |
2062 | result.log2TrSizeCG = log2TrSize - 2; | |
2063 | ||
2064 | // set the scan orders | |
2065 | result.scanType = getCoefScanIdx(absPartIdx, log2TrSize, bIsLuma, isIntra(absPartIdx)); | |
2066 | result.scan = g_scanOrder[result.scanType][log2TrSize - 2]; | |
2067 | result.scanCG = g_scanOrderCG[result.scanType][result.log2TrSizeCG]; | |
2068 | ||
2069 | if (log2TrSize == 2) | |
2070 | result.firstSignificanceMapContext = 0; | |
2071 | else if (log2TrSize == 3) | |
2072 | { | |
2073 | result.firstSignificanceMapContext = 9; | |
2074 | if (result.scanType != SCAN_DIAG && bIsLuma) | |
2075 | result.firstSignificanceMapContext += 6; | |
2076 | } | |
2077 | else | |
2078 | result.firstSignificanceMapContext = bIsLuma ? 21 : 12; | |
2079 | } | |
2080 | ||
2081 | #define CU_SET_FLAG(bitfield, flag, value) (bitfield) = ((bitfield) & (~(flag))) | ((~((value) - 1)) & (flag)) | |
2082 | ||
b53f7c52 | 2083 | void CUData::calcCTUGeoms(uint32_t ctuWidth, uint32_t ctuHeight, uint32_t maxCUSize, CUGeom cuDataArray[CUGeom::MAX_GEOMS]) |
72b9787e JB |
2084 | { |
2085 | // Initialize the coding blocks inside the CTB | |
2086 | for (uint32_t log2CUSize = g_log2Size[maxCUSize], rangeCUIdx = 0; log2CUSize >= MIN_LOG2_CU_SIZE; log2CUSize--) | |
2087 | { | |
2088 | uint32_t blockSize = 1 << log2CUSize; | |
2089 | uint32_t sbWidth = 1 << (g_log2Size[maxCUSize] - log2CUSize); | |
2090 | int32_t lastLevelFlag = log2CUSize == MIN_LOG2_CU_SIZE; | |
2091 | for (uint32_t sbY = 0; sbY < sbWidth; sbY++) | |
2092 | { | |
2093 | for (uint32_t sbX = 0; sbX < sbWidth; sbX++) | |
2094 | { | |
2095 | uint32_t depthIdx = g_depthScanIdx[sbY][sbX]; | |
2096 | uint32_t cuIdx = rangeCUIdx + depthIdx; | |
2097 | uint32_t childIdx = rangeCUIdx + sbWidth * sbWidth + (depthIdx << 2); | |
b53f7c52 JB |
2098 | uint32_t px = sbX * blockSize; |
2099 | uint32_t py = sbY * blockSize; | |
2100 | int32_t presentFlag = px < ctuWidth && py < ctuHeight; | |
2101 | int32_t splitMandatoryFlag = presentFlag && !lastLevelFlag && (px + blockSize > ctuWidth || py + blockSize > ctuHeight); | |
72b9787e JB |
2102 | |
2103 | /* Offset of the luma CU in the X, Y direction in terms of pixels from the CTU origin */ | |
2104 | uint32_t xOffset = (sbX * blockSize) >> 3; | |
2105 | uint32_t yOffset = (sbY * blockSize) >> 3; | |
2106 | X265_CHECK(cuIdx < CUGeom::MAX_GEOMS, "CU geom index bug\n"); | |
2107 | ||
2108 | CUGeom *cu = cuDataArray + cuIdx; | |
2109 | cu->log2CUSize = log2CUSize; | |
2110 | cu->childOffset = childIdx - cuIdx; | |
2111 | cu->encodeIdx = g_depthScanIdx[yOffset][xOffset] * 4; | |
2112 | cu->numPartitions = (NUM_CU_PARTITIONS >> ((g_maxLog2CUSize - cu->log2CUSize) * 2)); | |
2113 | cu->depth = g_log2Size[maxCUSize] - log2CUSize; | |
2114 | ||
2115 | cu->flags = 0; | |
2116 | CU_SET_FLAG(cu->flags, CUGeom::PRESENT, presentFlag); | |
2117 | CU_SET_FLAG(cu->flags, CUGeom::SPLIT_MANDATORY | CUGeom::SPLIT, splitMandatoryFlag); | |
2118 | CU_SET_FLAG(cu->flags, CUGeom::LEAF, lastLevelFlag); | |
2119 | } | |
2120 | } | |
2121 | rangeCUIdx += sbWidth * sbWidth; | |
2122 | } | |
2123 | } |