Commit | Line | Data |
---|---|---|
72b9787e JB |
1 | /***************************************************************************** |
2 | * Copyright (C) 2014 x265 project | |
3 | * | |
4 | * Authors: Steve Borho <steve@borho.org> | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License as published by | |
8 | * the Free Software Foundation; either version 2 of the License, or | |
9 | * (at your option) any later version. | |
10 | * | |
11 | * This program is distributed in the hope that it will be useful, | |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | * GNU General Public License for more details. | |
15 | * | |
16 | * You should have received a copy of the GNU General Public License | |
17 | * along with this program; if not, write to the Free Software | |
18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. | |
19 | * | |
20 | * This program is also available under a commercial proprietary license. | |
21 | * For more information, contact us at license @ x265.com. | |
22 | *****************************************************************************/ | |
23 | ||
24 | #ifndef X265_CUDATA_H | |
25 | #define X265_CUDATA_H | |
26 | ||
27 | #include "common.h" | |
28 | #include "slice.h" | |
29 | #include "mv.h" | |
30 | ||
31 | namespace x265 { | |
32 | // private namespace | |
33 | ||
34 | class FrameData; | |
35 | class Slice; | |
36 | struct TUEntropyCodingParameters; | |
37 | struct CUDataMemPool; | |
38 | ||
39 | enum PartSize | |
40 | { | |
41 | SIZE_2Nx2N, // symmetric motion partition, 2Nx2N | |
42 | SIZE_2NxN, // symmetric motion partition, 2Nx N | |
43 | SIZE_Nx2N, // symmetric motion partition, Nx2N | |
44 | SIZE_NxN, // symmetric motion partition, Nx N | |
45 | SIZE_2NxnU, // asymmetric motion partition, 2Nx( N/2) + 2Nx(3N/2) | |
46 | SIZE_2NxnD, // asymmetric motion partition, 2Nx(3N/2) + 2Nx( N/2) | |
47 | SIZE_nLx2N, // asymmetric motion partition, ( N/2)x2N + (3N/2)x2N | |
48 | SIZE_nRx2N, // asymmetric motion partition, (3N/2)x2N + ( N/2)x2N | |
b53f7c52 | 49 | NUM_SIZES |
72b9787e JB |
50 | }; |
51 | ||
52 | enum PredMode | |
53 | { | |
b53f7c52 JB |
54 | MODE_NONE = 0, |
55 | MODE_INTER = (1 << 0), | |
56 | MODE_INTRA = (1 << 1), | |
57 | MODE_SKIP = (1 << 2) | MODE_INTER | |
72b9787e JB |
58 | }; |
59 | ||
60 | // motion vector predictor direction used in AMVP | |
61 | enum MVP_DIR | |
62 | { | |
63 | MD_LEFT = 0, // MVP of left block | |
64 | MD_ABOVE, // MVP of above block | |
65 | MD_ABOVE_RIGHT, // MVP of above right block | |
66 | MD_BELOW_LEFT, // MVP of below left block | |
67 | MD_ABOVE_LEFT // MVP of above left block | |
68 | }; | |
69 | ||
70 | struct CUGeom | |
71 | { | |
72 | enum { | |
73 | INTRA = 1<<0, // CU is intra predicted | |
74 | PRESENT = 1<<1, // CU is not completely outside the frame | |
75 | SPLIT_MANDATORY = 1<<2, // CU split is mandatory if CU is inside frame and can be split | |
76 | LEAF = 1<<3, // CU is a leaf node of the CTU | |
77 | SPLIT = 1<<4, // CU is currently split in four child CUs. | |
78 | }; | |
79 | ||
80 | // (1 + 4 + 16 + 64) = 85. | |
81 | enum { MAX_GEOMS = 85 }; | |
82 | ||
83 | uint32_t log2CUSize; // Log of the CU size. | |
84 | uint32_t childOffset; // offset of the first child CU from current CU | |
85 | uint32_t encodeIdx; // Encoding index of this CU in terms of 4x4 blocks. | |
86 | uint32_t numPartitions; // Number of 4x4 blocks in the CU | |
87 | uint32_t depth; // depth of this CU relative from CTU | |
88 | uint32_t flags; // CU flags. | |
89 | }; | |
90 | ||
91 | struct MVField | |
92 | { | |
93 | MV mv; | |
94 | int refIdx; | |
95 | }; | |
96 | ||
97 | typedef void(*cucopy_t)(uint8_t* dst, uint8_t* src); // dst and src are aligned to MIN(size, 32) | |
98 | typedef void(*cubcast_t)(uint8_t* dst, uint8_t val); // dst is aligned to MIN(size, 32) | |
99 | ||
100 | // Partition count table, index represents partitioning mode. | |
101 | const uint32_t nbPartsTable[8] = { 1, 2, 2, 4, 2, 2, 2, 2 }; | |
102 | ||
103 | // Holds part data for a CU of a given size, from an 8x8 CU to a CTU | |
104 | class CUData | |
105 | { | |
106 | public: | |
107 | ||
108 | static cubcast_t s_partSet[NUM_FULL_DEPTH]; // pointer to broadcast set functions per absolute depth | |
109 | static uint32_t s_numPartInCUSize; | |
110 | ||
111 | FrameData* m_encData; | |
112 | const Slice* m_slice; | |
113 | ||
114 | cucopy_t m_partCopy; // pointer to function that copies m_numPartitions elements | |
115 | cubcast_t m_partSet; // pointer to function that sets m_numPartitions elements | |
116 | cucopy_t m_subPartCopy; // pointer to function that copies m_numPartitions/4 elements, may be NULL | |
117 | cubcast_t m_subPartSet; // pointer to function that sets m_numPartitions/4 elements, may be NULL | |
118 | ||
119 | uint32_t m_cuAddr; // address of CTU within the picture in raster order | |
120 | uint32_t m_absIdxInCTU; // address of CU within its CTU in Z scan order | |
121 | uint32_t m_cuPelX; // CU position within the picture, in pixels (X) | |
122 | uint32_t m_cuPelY; // CU position within the picture, in pixels (Y) | |
123 | uint32_t m_numPartitions; // maximum number of 4x4 partitions within this CU | |
124 | ||
125 | int m_chromaFormat; | |
126 | int m_hChromaShift; | |
127 | int m_vChromaShift; | |
128 | ||
129 | /* Per-part data, stored contiguously */ | |
b53f7c52 | 130 | int8_t* m_qp; // array of QP values |
72b9787e | 131 | uint8_t* m_log2CUSize; // array of cu log2Size TODO: seems redundant to depth |
72b9787e JB |
132 | uint8_t* m_lumaIntraDir; // array of intra directions (luma) |
133 | uint8_t* m_tqBypass; // array of CU lossless flags | |
b53f7c52 | 134 | int8_t* m_refIdx[2]; // array of motion reference indices per list |
72b9787e | 135 | uint8_t* m_cuDepth; // array of depths |
b53f7c52 JB |
136 | uint8_t* m_predMode; // array of prediction modes |
137 | uint8_t* m_partSize; // array of partition sizes | |
72b9787e JB |
138 | uint8_t* m_mergeFlag; // array of merge flags |
139 | uint8_t* m_interDir; // array of inter directions | |
140 | uint8_t* m_mvpIdx[2]; // array of motion vector predictor candidates or merge candidate indices [0] | |
141 | uint8_t* m_tuDepth; // array of transform indices | |
142 | uint8_t* m_transformSkip[3]; // array of transform skipping flags per plane | |
143 | uint8_t* m_cbf[3]; // array of coded block flags (CBF) per plane | |
144 | uint8_t* m_chromaIntraDir; // array of intra directions (chroma) | |
b53f7c52 | 145 | enum { BytesPerPartition = 21 }; // combined sizeof() of all per-part data |
72b9787e JB |
146 | |
147 | coeff_t* m_trCoeff[3]; // transformed coefficient buffer per plane | |
148 | ||
149 | MV* m_mv[2]; // array of motion vectors per list | |
150 | MV* m_mvd[2]; // array of coded motion vector deltas per list | |
151 | enum { TMVP_UNIT_MASK = 0xF0 }; // mask for mapping index to into a compressed (reference) MV field | |
152 | ||
153 | const CUData* m_cuAboveLeft; // pointer to above-left neighbor CTU | |
154 | const CUData* m_cuAboveRight; // pointer to above-right neighbor CTU | |
155 | const CUData* m_cuAbove; // pointer to above neighbor CTU | |
156 | const CUData* m_cuLeft; // pointer to left neighbor CTU | |
157 | ||
158 | CUData(); | |
159 | ||
160 | void initialize(const CUDataMemPool& dataPool, uint32_t depth, int csp, int instance); | |
b53f7c52 | 161 | static void calcCTUGeoms(uint32_t ctuWidth, uint32_t ctuHeight, uint32_t maxCUSize, CUGeom cuDataArray[CUGeom::MAX_GEOMS]); |
72b9787e JB |
162 | |
163 | void initCTU(const Frame& frame, uint32_t cuAddr, int qp); | |
164 | void initSubCU(const CUData& ctu, const CUGeom& cuGeom); | |
165 | void initLosslessCU(const CUData& cu, const CUGeom& cuGeom); | |
166 | ||
167 | void copyPartFrom(const CUData& cu, const CUGeom& childGeom, uint32_t subPartIdx); | |
168 | void setEmptyPart(const CUGeom& childGeom, uint32_t subPartIdx); | |
169 | void copyToPic(uint32_t depth) const; | |
170 | ||
171 | /* RD-0 methods called only from encodeResidue */ | |
172 | void copyFromPic(const CUData& ctu, const CUGeom& cuGeom); | |
173 | void updatePic(uint32_t depth) const; | |
174 | ||
175 | void setPartSizeSubParts(PartSize size) { m_partSet(m_partSize, (uint8_t)size); } | |
72b9787e JB |
176 | void setPredModeSubParts(PredMode mode) { m_partSet(m_predMode, (uint8_t)mode); } |
177 | void clearCbf() { m_partSet(m_cbf[0], 0); m_partSet(m_cbf[1], 0); m_partSet(m_cbf[2], 0); } | |
178 | ||
179 | /* these functions all take depth as an absolute depth from CTU, it is used to calculate the number of parts to copy */ | |
b53f7c52 | 180 | void setQPSubParts(int8_t qp, uint32_t absPartIdx, uint32_t depth) { s_partSet[depth]((uint8_t*)m_qp + absPartIdx, (uint8_t)qp); } |
72b9787e JB |
181 | void setTUDepthSubParts(uint8_t tuDepth, uint32_t absPartIdx, uint32_t depth) { s_partSet[depth](m_tuDepth + absPartIdx, tuDepth); } |
182 | void setLumaIntraDirSubParts(uint8_t dir, uint32_t absPartIdx, uint32_t depth) { s_partSet[depth](m_lumaIntraDir + absPartIdx, dir); } | |
183 | void setChromIntraDirSubParts(uint8_t dir, uint32_t absPartIdx, uint32_t depth) { s_partSet[depth](m_chromaIntraDir + absPartIdx, dir); } | |
184 | void setCbfSubParts(uint8_t cbf, TextType ttype, uint32_t absPartIdx, uint32_t depth) { s_partSet[depth](m_cbf[ttype] + absPartIdx, cbf); } | |
185 | void setCbfPartRange(uint8_t cbf, TextType ttype, uint32_t absPartIdx, uint32_t coveredPartIdxes) { memset(m_cbf[ttype] + absPartIdx, cbf, coveredPartIdxes); } | |
186 | void setTransformSkipSubParts(uint8_t tskip, TextType ttype, uint32_t absPartIdx, uint32_t depth) { s_partSet[depth](m_transformSkip[ttype] + absPartIdx, tskip); } | |
187 | void setTransformSkipPartRange(uint8_t tskip, TextType ttype, uint32_t absPartIdx, uint32_t coveredPartIdxes) { memset(m_transformSkip[ttype] + absPartIdx, tskip, coveredPartIdxes); } | |
188 | ||
b53f7c52 | 189 | bool setQPSubCUs(int8_t qp, uint32_t absPartIdx, uint32_t depth); |
72b9787e JB |
190 | |
191 | void setPUInterDir(uint8_t dir, uint32_t absPartIdx, uint32_t puIdx); | |
192 | void setPUMv(int list, const MV& mv, int absPartIdx, int puIdx); | |
b53f7c52 | 193 | void setPURefIdx(int list, int8_t refIdx, int absPartIdx, int puIdx); |
72b9787e | 194 | |
b53f7c52 | 195 | uint8_t getCbf(uint32_t absPartIdx, TextType ttype, uint32_t tuDepth) const { return (m_cbf[ttype][absPartIdx] >> tuDepth) & 0x1; } |
72b9787e | 196 | uint8_t getQtRootCbf(uint32_t absPartIdx) const { return m_cbf[0][absPartIdx] || m_cbf[1][absPartIdx] || m_cbf[2][absPartIdx]; } |
b53f7c52 | 197 | int8_t getRefQP(uint32_t currAbsIdxInCTU) const; |
72b9787e JB |
198 | uint32_t getInterMergeCandidates(uint32_t absPartIdx, uint32_t puIdx, MVField (*mvFieldNeighbours)[2], uint8_t* interDirNeighbours) const; |
199 | void clipMv(MV& outMV) const; | |
200 | int fillMvpCand(uint32_t puIdx, uint32_t absPartIdx, int picList, int refIdx, MV* amvpCand, MV* mvc) const; | |
201 | void getIntraTUQtDepthRange(uint32_t tuDepthRange[2], uint32_t absPartIdx) const; | |
202 | void getInterTUQtDepthRange(uint32_t tuDepthRange[2], uint32_t absPartIdx) const; | |
203 | ||
204 | uint32_t getNumPartInter() const { return nbPartsTable[(int)m_partSize[0]]; } | |
205 | bool isIntra(uint32_t absPartIdx) const { return m_predMode[absPartIdx] == MODE_INTRA; } | |
b53f7c52 JB |
206 | bool isInter(uint32_t absPartIdx) const { return !!(m_predMode[absPartIdx] & MODE_INTER); } |
207 | bool isSkipped(uint32_t absPartIdx) const { return m_predMode[absPartIdx] == MODE_SKIP; } | |
72b9787e JB |
208 | bool isBipredRestriction() const { return m_log2CUSize[0] == 3 && m_partSize[0] != SIZE_2Nx2N; } |
209 | ||
210 | void getPartIndexAndSize(uint32_t puIdx, uint32_t& absPartIdx, int& puWidth, int& puHeight) const; | |
211 | void getMvField(const CUData* cu, uint32_t absPartIdx, int picList, MVField& mvField) const; | |
212 | ||
213 | void getAllowedChromaDir(uint32_t absPartIdx, uint32_t* modeList) const; | |
214 | int getIntraDirLumaPredictor(uint32_t absPartIdx, uint32_t* intraDirPred) const; | |
215 | void deriveLeftRightTopIdxAdi(uint32_t& partIdxLT, uint32_t& partIdxRT, uint32_t partOffset, uint32_t partDepth) const; | |
216 | ||
217 | uint32_t getSCUAddr() const { return (m_cuAddr << g_maxFullDepth * 2) + m_absIdxInCTU; } | |
218 | uint32_t getCtxSplitFlag(uint32_t absPartIdx, uint32_t depth) const; | |
219 | uint32_t getCtxSkipFlag(uint32_t absPartIdx) const; | |
220 | ScanType getCoefScanIdx(uint32_t absPartIdx, uint32_t log2TrSize, bool bIsLuma, bool bIsIntra) const; | |
221 | void getTUEntropyCodingParameters(TUEntropyCodingParameters &result, uint32_t absPartIdx, uint32_t log2TrSize, bool bIsLuma) const; | |
222 | ||
223 | const CUData* getPULeft(uint32_t& lPartUnitIdx, uint32_t curPartUnitIdx) const; | |
b53f7c52 | 224 | const CUData* getPUAbove(uint32_t& aPartUnitIdx, uint32_t curPartUnitIdx) const; |
72b9787e JB |
225 | const CUData* getPUAboveLeft(uint32_t& alPartUnitIdx, uint32_t curPartUnitIdx) const; |
226 | const CUData* getPUAboveRight(uint32_t& arPartUnitIdx, uint32_t curPartUnitIdx) const; | |
227 | const CUData* getPUBelowLeft(uint32_t& blPartUnitIdx, uint32_t curPartUnitIdx) const; | |
228 | ||
229 | const CUData* getQpMinCuLeft(uint32_t& lPartUnitIdx, uint32_t currAbsIdxInCTU) const; | |
230 | const CUData* getQpMinCuAbove(uint32_t& aPartUnitIdx, uint32_t currAbsIdxInCTU) const; | |
231 | ||
232 | const CUData* getPUAboveRightAdi(uint32_t& arPartUnitIdx, uint32_t curPartUnitIdx, uint32_t partUnitOffset) const; | |
233 | const CUData* getPUBelowLeftAdi(uint32_t& blPartUnitIdx, uint32_t curPartUnitIdx, uint32_t partUnitOffset) const; | |
234 | ||
235 | protected: | |
236 | ||
237 | template<typename T> | |
238 | void setAllPU(T *p, const T& val, int absPartIdx, int puIdx); | |
239 | ||
b53f7c52 | 240 | int8_t getLastCodedQP(uint32_t absPartIdx) const; |
72b9787e JB |
241 | int getLastValidPartIdx(int absPartIdx) const; |
242 | ||
243 | bool hasEqualMotion(uint32_t absPartIdx, const CUData& candCU, uint32_t candAbsPartIdx) const; | |
244 | ||
245 | bool isDiffMER(int xN, int yN, int xP, int yP) const; | |
246 | ||
247 | // add possible motion vector predictor candidates | |
248 | bool addMVPCand(MV& mvp, int picList, int refIdx, uint32_t absPartIdx, MVP_DIR dir) const; | |
249 | bool addMVPCandOrder(MV& mvp, int picList, int refIdx, uint32_t absPartIdx, MVP_DIR dir) const; | |
250 | ||
251 | bool getColMVP(MV& outMV, int& outRefIdx, int picList, int cuAddr, int absPartIdx) const; | |
252 | ||
253 | void scaleMvByPOCDist(MV& outMV, const MV& inMV, int curPOC, int curRefPOC, int colPOC, int colRefPOC) const; | |
254 | ||
255 | void deriveLeftRightTopIdx(uint32_t puIdx, uint32_t& partIdxLT, uint32_t& partIdxRT) const; | |
256 | ||
257 | uint32_t deriveCenterIdx(uint32_t puIdx) const; | |
258 | uint32_t deriveRightBottomIdx(uint32_t puIdx) const; | |
259 | uint32_t deriveLeftBottomIdx(uint32_t puIdx) const; | |
260 | }; | |
261 | ||
262 | // TU settings for entropy encoding | |
263 | struct TUEntropyCodingParameters | |
264 | { | |
265 | const uint16_t *scan; | |
266 | const uint16_t *scanCG; | |
267 | ScanType scanType; | |
268 | uint32_t log2TrSizeCG; | |
269 | uint32_t firstSignificanceMapContext; | |
270 | }; | |
271 | ||
272 | struct CUDataMemPool | |
273 | { | |
274 | uint8_t* charMemBlock; | |
275 | coeff_t* trCoeffMemBlock; | |
276 | MV* mvMemBlock; | |
277 | ||
278 | CUDataMemPool() { charMemBlock = NULL; trCoeffMemBlock = NULL; mvMemBlock = NULL; } | |
279 | ||
280 | bool create(uint32_t depth, uint32_t csp, uint32_t numInstances) | |
281 | { | |
282 | uint32_t numPartition = NUM_CU_PARTITIONS >> (depth * 2); | |
283 | uint32_t cuSize = g_maxCUSize >> depth; | |
284 | uint32_t sizeL = cuSize * cuSize; | |
285 | uint32_t sizeC = sizeL >> (CHROMA_H_SHIFT(csp) + CHROMA_V_SHIFT(csp)); | |
286 | CHECKED_MALLOC(trCoeffMemBlock, coeff_t, (sizeL + sizeC * 2) * numInstances); | |
287 | CHECKED_MALLOC(charMemBlock, uint8_t, numPartition * numInstances * CUData::BytesPerPartition); | |
288 | CHECKED_MALLOC(mvMemBlock, MV, numPartition * 4 * numInstances); | |
289 | return true; | |
290 | ||
291 | fail: | |
292 | return false; | |
293 | } | |
294 | ||
295 | void destroy() | |
296 | { | |
297 | X265_FREE(trCoeffMemBlock); | |
298 | X265_FREE(mvMemBlock); | |
299 | X265_FREE(charMemBlock); | |
300 | } | |
301 | }; | |
302 | } | |
303 | ||
304 | #endif // ifndef X265_CUDATA_H |