1 /*****************************************************************************
2 * Copyright (C) 2014 x265 project
4 * Authors: Steve Borho <steve@borho.org>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
20 * This program is also available under a commercial proprietary license.
21 * For more information, contact us at license @ x265.com.
22 *****************************************************************************/
36 struct TUEntropyCodingParameters
;
41 SIZE_2Nx2N
, // symmetric motion partition, 2Nx2N
42 SIZE_2NxN
, // symmetric motion partition, 2Nx N
43 SIZE_Nx2N
, // symmetric motion partition, Nx2N
44 SIZE_NxN
, // symmetric motion partition, Nx N
45 SIZE_2NxnU
, // asymmetric motion partition, 2Nx( N/2) + 2Nx(3N/2)
46 SIZE_2NxnD
, // asymmetric motion partition, 2Nx(3N/2) + 2Nx( N/2)
47 SIZE_nLx2N
, // asymmetric motion partition, ( N/2)x2N + (3N/2)x2N
48 SIZE_nRx2N
, // asymmetric motion partition, (3N/2)x2N + ( N/2)x2N
59 // motion vector predictor direction used in AMVP
62 MD_LEFT
= 0, // MVP of left block
63 MD_ABOVE
, // MVP of above block
64 MD_ABOVE_RIGHT
, // MVP of above right block
65 MD_BELOW_LEFT
, // MVP of below left block
66 MD_ABOVE_LEFT
// MVP of above left block
72 INTRA
= 1<<0, // CU is intra predicted
73 PRESENT
= 1<<1, // CU is not completely outside the frame
74 SPLIT_MANDATORY
= 1<<2, // CU split is mandatory if CU is inside frame and can be split
75 LEAF
= 1<<3, // CU is a leaf node of the CTU
76 SPLIT
= 1<<4, // CU is currently split in four child CUs.
79 // (1 + 4 + 16 + 64) = 85.
80 enum { MAX_GEOMS
= 85 };
82 uint32_t log2CUSize
; // Log of the CU size.
83 uint32_t childOffset
; // offset of the first child CU from current CU
84 uint32_t encodeIdx
; // Encoding index of this CU in terms of 4x4 blocks.
85 uint32_t numPartitions
; // Number of 4x4 blocks in the CU
86 uint32_t depth
; // depth of this CU relative from CTU
87 uint32_t flags
; // CU flags.
96 typedef void(*cucopy_t
)(uint8_t* dst
, uint8_t* src
); // dst and src are aligned to MIN(size, 32)
97 typedef void(*cubcast_t
)(uint8_t* dst
, uint8_t val
); // dst is aligned to MIN(size, 32)
99 // Partition count table, index represents partitioning mode.
100 const uint32_t nbPartsTable
[8] = { 1, 2, 2, 4, 2, 2, 2, 2 };
102 // Holds part data for a CU of a given size, from an 8x8 CU to a CTU
107 static cubcast_t s_partSet
[NUM_FULL_DEPTH
]; // pointer to broadcast set functions per absolute depth
108 static uint32_t s_numPartInCUSize
;
110 FrameData
* m_encData
;
111 const Slice
* m_slice
;
113 cucopy_t m_partCopy
; // pointer to function that copies m_numPartitions elements
114 cubcast_t m_partSet
; // pointer to function that sets m_numPartitions elements
115 cucopy_t m_subPartCopy
; // pointer to function that copies m_numPartitions/4 elements, may be NULL
116 cubcast_t m_subPartSet
; // pointer to function that sets m_numPartitions/4 elements, may be NULL
118 uint32_t m_cuAddr
; // address of CTU within the picture in raster order
119 uint32_t m_absIdxInCTU
; // address of CU within its CTU in Z scan order
120 uint32_t m_cuPelX
; // CU position within the picture, in pixels (X)
121 uint32_t m_cuPelY
; // CU position within the picture, in pixels (Y)
122 uint32_t m_numPartitions
; // maximum number of 4x4 partitions within this CU
128 /* Per-part data, stored contiguously */
129 char* m_qp
; // array of QP values
130 uint8_t* m_log2CUSize
; // array of cu log2Size TODO: seems redundant to depth
131 uint8_t* m_partSize
; // array of partition sizes
132 uint8_t* m_predMode
; // array of prediction modes
133 uint8_t* m_lumaIntraDir
; // array of intra directions (luma)
134 uint8_t* m_tqBypass
; // array of CU lossless flags
135 char* m_refIdx
[2]; // array of motion reference indices per list
136 uint8_t* m_cuDepth
; // array of depths
137 uint8_t* m_skipFlag
; // array of skip flags
138 uint8_t* m_mergeFlag
; // array of merge flags
139 uint8_t* m_interDir
; // array of inter directions
140 uint8_t* m_mvpIdx
[2]; // array of motion vector predictor candidates or merge candidate indices [0]
141 uint8_t* m_tuDepth
; // array of transform indices
142 uint8_t* m_transformSkip
[3]; // array of transform skipping flags per plane
143 uint8_t* m_cbf
[3]; // array of coded block flags (CBF) per plane
144 uint8_t* m_chromaIntraDir
; // array of intra directions (chroma)
145 enum { BytesPerPartition
= 22 }; // combined sizeof() of all per-part data
147 coeff_t
* m_trCoeff
[3]; // transformed coefficient buffer per plane
149 MV
* m_mv
[2]; // array of motion vectors per list
150 MV
* m_mvd
[2]; // array of coded motion vector deltas per list
151 enum { TMVP_UNIT_MASK
= 0xF0 }; // mask for mapping index to into a compressed (reference) MV field
153 const CUData
* m_cuAboveLeft
; // pointer to above-left neighbor CTU
154 const CUData
* m_cuAboveRight
; // pointer to above-right neighbor CTU
155 const CUData
* m_cuAbove
; // pointer to above neighbor CTU
156 const CUData
* m_cuLeft
; // pointer to left neighbor CTU
160 void initialize(const CUDataMemPool
& dataPool
, uint32_t depth
, int csp
, int instance
);
161 void calcCTUGeoms(uint32_t picWidth
, uint32_t picHeight
, uint32_t maxCUSize
, CUGeom cuDataArray
[CUGeom::MAX_GEOMS
]) const;
163 void initCTU(const Frame
& frame
, uint32_t cuAddr
, int qp
);
164 void initSubCU(const CUData
& ctu
, const CUGeom
& cuGeom
);
165 void initLosslessCU(const CUData
& cu
, const CUGeom
& cuGeom
);
167 void copyPartFrom(const CUData
& cu
, const CUGeom
& childGeom
, uint32_t subPartIdx
);
168 void setEmptyPart(const CUGeom
& childGeom
, uint32_t subPartIdx
);
169 void copyToPic(uint32_t depth
) const;
171 /* RD-0 methods called only from encodeResidue */
172 void copyFromPic(const CUData
& ctu
, const CUGeom
& cuGeom
);
173 void updatePic(uint32_t depth
) const;
175 void setPartSizeSubParts(PartSize size
) { m_partSet(m_partSize
, (uint8_t)size
); }
176 void setSkipFlagSubParts(uint8_t skipFlag
) { m_partSet(m_skipFlag
, skipFlag
); }
177 void setPredModeSubParts(PredMode mode
) { m_partSet(m_predMode
, (uint8_t)mode
); }
178 void clearCbf() { m_partSet(m_cbf
[0], 0); m_partSet(m_cbf
[1], 0); m_partSet(m_cbf
[2], 0); }
180 /* these functions all take depth as an absolute depth from CTU, it is used to calculate the number of parts to copy */
181 void setQPSubParts(char qp
, uint32_t absPartIdx
, uint32_t depth
) { s_partSet
[depth
]((uint8_t*)m_qp
+ absPartIdx
, (uint8_t)qp
); }
182 void setTUDepthSubParts(uint8_t tuDepth
, uint32_t absPartIdx
, uint32_t depth
) { s_partSet
[depth
](m_tuDepth
+ absPartIdx
, tuDepth
); }
183 void setLumaIntraDirSubParts(uint8_t dir
, uint32_t absPartIdx
, uint32_t depth
) { s_partSet
[depth
](m_lumaIntraDir
+ absPartIdx
, dir
); }
184 void setChromIntraDirSubParts(uint8_t dir
, uint32_t absPartIdx
, uint32_t depth
) { s_partSet
[depth
](m_chromaIntraDir
+ absPartIdx
, dir
); }
185 void setCbfSubParts(uint8_t cbf
, TextType ttype
, uint32_t absPartIdx
, uint32_t depth
) { s_partSet
[depth
](m_cbf
[ttype
] + absPartIdx
, cbf
); }
186 void setCbfPartRange(uint8_t cbf
, TextType ttype
, uint32_t absPartIdx
, uint32_t coveredPartIdxes
) { memset(m_cbf
[ttype
] + absPartIdx
, cbf
, coveredPartIdxes
); }
187 void setTransformSkipSubParts(uint8_t tskip
, TextType ttype
, uint32_t absPartIdx
, uint32_t depth
) { s_partSet
[depth
](m_transformSkip
[ttype
] + absPartIdx
, tskip
); }
188 void setTransformSkipPartRange(uint8_t tskip
, TextType ttype
, uint32_t absPartIdx
, uint32_t coveredPartIdxes
) { memset(m_transformSkip
[ttype
] + absPartIdx
, tskip
, coveredPartIdxes
); }
190 bool setQPSubCUs(char qp
, uint32_t absPartIdx
, uint32_t depth
);
192 void setPUInterDir(uint8_t dir
, uint32_t absPartIdx
, uint32_t puIdx
);
193 void setPUMv(int list
, const MV
& mv
, int absPartIdx
, int puIdx
);
194 void setPURefIdx(int list
, char refIdx
, int absPartIdx
, int puIdx
);
196 uint8_t getCbf(uint32_t absPartIdx
, TextType ttype
, uint32_t trDepth
) const { return (m_cbf
[ttype
][absPartIdx
] >> trDepth
) & 0x1; }
197 uint8_t getQtRootCbf(uint32_t absPartIdx
) const { return m_cbf
[0][absPartIdx
] || m_cbf
[1][absPartIdx
] || m_cbf
[2][absPartIdx
]; }
198 char getRefQP(uint32_t currAbsIdxInCTU
) const;
199 uint32_t getInterMergeCandidates(uint32_t absPartIdx
, uint32_t puIdx
, MVField (*mvFieldNeighbours
)[2], uint8_t* interDirNeighbours
) const;
200 void clipMv(MV
& outMV
) const;
201 int fillMvpCand(uint32_t puIdx
, uint32_t absPartIdx
, int picList
, int refIdx
, MV
* amvpCand
, MV
* mvc
) const;
202 void getIntraTUQtDepthRange(uint32_t tuDepthRange
[2], uint32_t absPartIdx
) const;
203 void getInterTUQtDepthRange(uint32_t tuDepthRange
[2], uint32_t absPartIdx
) const;
205 uint32_t getNumPartInter() const { return nbPartsTable
[(int)m_partSize
[0]]; }
206 bool isIntra(uint32_t absPartIdx
) const { return m_predMode
[absPartIdx
] == MODE_INTRA
; }
207 bool isSkipped(uint32_t absPartIdx
) const { return !!m_skipFlag
[absPartIdx
]; }
208 bool isBipredRestriction() const { return m_log2CUSize
[0] == 3 && m_partSize
[0] != SIZE_2Nx2N
; }
210 void getPartIndexAndSize(uint32_t puIdx
, uint32_t& absPartIdx
, int& puWidth
, int& puHeight
) const;
211 void getMvField(const CUData
* cu
, uint32_t absPartIdx
, int picList
, MVField
& mvField
) const;
213 void getAllowedChromaDir(uint32_t absPartIdx
, uint32_t* modeList
) const;
214 int getIntraDirLumaPredictor(uint32_t absPartIdx
, uint32_t* intraDirPred
) const;
215 void deriveLeftRightTopIdxAdi(uint32_t& partIdxLT
, uint32_t& partIdxRT
, uint32_t partOffset
, uint32_t partDepth
) const;
217 uint32_t getSCUAddr() const { return (m_cuAddr
<< g_maxFullDepth
* 2) + m_absIdxInCTU
; }
218 uint32_t getCtxSplitFlag(uint32_t absPartIdx
, uint32_t depth
) const;
219 uint32_t getCtxSkipFlag(uint32_t absPartIdx
) const;
220 ScanType
getCoefScanIdx(uint32_t absPartIdx
, uint32_t log2TrSize
, bool bIsLuma
, bool bIsIntra
) const;
221 void getTUEntropyCodingParameters(TUEntropyCodingParameters
&result
, uint32_t absPartIdx
, uint32_t log2TrSize
, bool bIsLuma
) const;
223 const CUData
* getPULeft(uint32_t& lPartUnitIdx
, uint32_t curPartUnitIdx
) const;
224 const CUData
* getPUAbove(uint32_t& aPartUnitIdx
, uint32_t curPartUnitIdx
, bool planarAtCTUBoundary
= false) const;
225 const CUData
* getPUAboveLeft(uint32_t& alPartUnitIdx
, uint32_t curPartUnitIdx
) const;
226 const CUData
* getPUAboveRight(uint32_t& arPartUnitIdx
, uint32_t curPartUnitIdx
) const;
227 const CUData
* getPUBelowLeft(uint32_t& blPartUnitIdx
, uint32_t curPartUnitIdx
) const;
229 const CUData
* getQpMinCuLeft(uint32_t& lPartUnitIdx
, uint32_t currAbsIdxInCTU
) const;
230 const CUData
* getQpMinCuAbove(uint32_t& aPartUnitIdx
, uint32_t currAbsIdxInCTU
) const;
232 const CUData
* getPUAboveRightAdi(uint32_t& arPartUnitIdx
, uint32_t curPartUnitIdx
, uint32_t partUnitOffset
) const;
233 const CUData
* getPUBelowLeftAdi(uint32_t& blPartUnitIdx
, uint32_t curPartUnitIdx
, uint32_t partUnitOffset
) const;
238 void setAllPU(T
*p
, const T
& val
, int absPartIdx
, int puIdx
);
240 char getLastCodedQP(uint32_t absPartIdx
) const;
241 int getLastValidPartIdx(int absPartIdx
) const;
243 bool hasEqualMotion(uint32_t absPartIdx
, const CUData
& candCU
, uint32_t candAbsPartIdx
) const;
245 bool isDiffMER(int xN
, int yN
, int xP
, int yP
) const;
247 // add possible motion vector predictor candidates
248 bool addMVPCand(MV
& mvp
, int picList
, int refIdx
, uint32_t absPartIdx
, MVP_DIR dir
) const;
249 bool addMVPCandOrder(MV
& mvp
, int picList
, int refIdx
, uint32_t absPartIdx
, MVP_DIR dir
) const;
251 bool getColMVP(MV
& outMV
, int& outRefIdx
, int picList
, int cuAddr
, int absPartIdx
) const;
253 void scaleMvByPOCDist(MV
& outMV
, const MV
& inMV
, int curPOC
, int curRefPOC
, int colPOC
, int colRefPOC
) const;
255 void deriveLeftRightTopIdx(uint32_t puIdx
, uint32_t& partIdxLT
, uint32_t& partIdxRT
) const;
257 uint32_t deriveCenterIdx(uint32_t puIdx
) const;
258 uint32_t deriveRightBottomIdx(uint32_t puIdx
) const;
259 uint32_t deriveLeftBottomIdx(uint32_t puIdx
) const;
262 // TU settings for entropy encoding
263 struct TUEntropyCodingParameters
265 const uint16_t *scan
;
266 const uint16_t *scanCG
;
268 uint32_t log2TrSizeCG
;
269 uint32_t firstSignificanceMapContext
;
274 uint8_t* charMemBlock
;
275 coeff_t
* trCoeffMemBlock
;
278 CUDataMemPool() { charMemBlock
= NULL
; trCoeffMemBlock
= NULL
; mvMemBlock
= NULL
; }
280 bool create(uint32_t depth
, uint32_t csp
, uint32_t numInstances
)
282 uint32_t numPartition
= NUM_CU_PARTITIONS
>> (depth
* 2);
283 uint32_t cuSize
= g_maxCUSize
>> depth
;
284 uint32_t sizeL
= cuSize
* cuSize
;
285 uint32_t sizeC
= sizeL
>> (CHROMA_H_SHIFT(csp
) + CHROMA_V_SHIFT(csp
));
286 CHECKED_MALLOC(trCoeffMemBlock
, coeff_t
, (sizeL
+ sizeC
* 2) * numInstances
);
287 CHECKED_MALLOC(charMemBlock
, uint8_t, numPartition
* numInstances
* CUData::BytesPerPartition
);
288 CHECKED_MALLOC(mvMemBlock
, MV
, numPartition
* 4 * numInstances
);
297 X265_FREE(trCoeffMemBlock
);
298 X265_FREE(mvMemBlock
);
299 X265_FREE(charMemBlock
);
304 #endif // ifndef X265_CUDATA_H