1 /*****************************************************************************
2 * Copyright (C) 2013 x265 project
4 * Authors: Steve Borho <steve@borho.org>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
20 * This program is also available under a commercial proprietary license.
21 * For more information, contact us at license @ x265.com.
22 *****************************************************************************/
32 #include "threadpool.h"
42 struct ThreadLocalData
;
44 /* All the CABAC contexts that Analysis needs to keep track of at each depth
45 * and temp buffers for residual, coeff, and recon for use during residual
46 * quad-tree depth recursion */
49 Entropy cur
; /* starting context for current CU */
51 /* these are indexed by qtLayer (log2size - 2) so nominally 0=4x4, 1=8x8, 2=16x16, 3=32x32
52 * the coeffRQT and reconQtYuv are allocated to the max CU size at every depth. The parts
53 * which are reconstructed at each depth are valid. At the end, the transform depth table
54 * is walked and the coeff and recon at the final split depths are collected */
55 Entropy rqtRoot
; /* residual quad-tree start context */
56 Entropy rqtTemp
; /* residual quad-tree temp context */
57 Entropy rqtTest
; /* residual quad-tree test context */
58 coeff_t
* coeffRQT
[3]; /* coeff storage for entire CTU for each RQT layer */
59 Yuv reconQtYuv
; /* recon storage for entire CTU for each RQT layer (intra) */
60 ShortYuv resiQtYuv
; /* residual storage for entire CTU for each RQT layer (inter) */
62 /* per-depth temp buffers for inter prediction */
87 enum { MAX_INTER_PARTS
= 2 };
89 MotionData bestME
[MAX_INTER_PARTS
][2];
90 MV amvpCand
[2][MAX_NUM_REF
][AMVP_NUM_CANDS
];
92 uint64_t rdCost
; // sum of partition (psy) RD costs (sse(fenc, recon) + lambda2 * bits)
93 uint64_t sa8dCost
; // sum of partition sa8d distortion costs (sa8d(fenc, pred) + lambda * bits)
94 uint32_t sa8dBits
; // signal bits used in sa8dCost calculation
95 uint32_t psyEnergy
; // sum of partition psycho-visual energy difference
96 uint32_t distortion
; // sum of partition SSE distortion
97 uint32_t totalBits
; // sum of partition bits (mv + coeff)
98 uint32_t mvBits
; // Mv bits + Ref + block type (or intra mode)
99 uint32_t coeffBits
; // Texture bits (DCT Coeffs)
113 void addSubCosts(const Mode
& subMode
)
115 rdCost
+= subMode
.rdCost
;
116 sa8dCost
+= subMode
.sa8dCost
;
117 sa8dBits
+= subMode
.sa8dBits
;
118 psyEnergy
+= subMode
.psyEnergy
;
119 distortion
+= subMode
.distortion
;
120 totalBits
+= subMode
.totalBits
;
121 mvBits
+= subMode
.mvBits
;
122 coeffBits
+= subMode
.coeffBits
;
126 inline int getTUBits(int idx
, int numIdx
)
128 return idx
+ (idx
< numIdx
- 1);
131 class Search
: public JobProvider
, public Predict
135 static const pixel zeroPixel
[MAX_CU_SIZE
];
136 static const int16_t zeroShort
[MAX_CU_SIZE
];
141 const x265_param
* m_param
;
143 const Slice
* m_slice
;
145 Entropy m_entropyCoder
;
146 RQTData m_rqt
[NUM_FULL_DEPTH
];
148 uint8_t* m_qtTempCbf
[3];
149 uint8_t* m_qtTempTransformSkipFlag
[3];
151 bool m_bFrameParallel
;
153 uint32_t m_numLayers
;
154 uint32_t m_refLagPixels
;
159 bool initSearch(const x265_param
& param
, ScalingList
& scalingList
);
160 void setQP(const Slice
& slice
, int qp
);
162 // mark temp RD entropy contexts as uninitialized; useful for finding loads without stores
163 void invalidateContexts(int fromDepth
);
165 // full RD search of intra modes. if sharedModes is not NULL, it directly uses them
166 void checkIntra(Mode
& intraMode
, const CUGeom
& cuGeom
, PartSize partSize
, uint8_t* sharedModes
);
168 // select best intra mode using only sa8d costs, cannot measure NxN intra
169 void checkIntraInInter(Mode
& intraMode
, const CUGeom
& cuGeom
);
170 // encode luma mode selected by checkIntraInInter, then pick and encode a chroma mode
171 void encodeIntraInInter(Mode
& intraMode
, const CUGeom
& cuGeom
);
173 // estimation inter prediction (non-skip)
174 bool predInterSearch(Mode
& interMode
, const CUGeom
& cuGeom
, bool bMergeOnly
, bool bChroma
);
176 // encode residual and compute rd-cost for inter mode
177 void encodeResAndCalcRdInterCU(Mode
& interMode
, const CUGeom
& cuGeom
);
178 void encodeResAndCalcRdSkipCU(Mode
& interMode
);
180 // encode residual without rd-cost
181 void residualTransformQuantInter(Mode
& mode
, const CUGeom
& cuGeom
, uint32_t absPartIdx
, uint32_t depth
, const uint32_t depthRange
[2]);
182 void residualTransformQuantIntra(Mode
& mode
, const CUGeom
& cuGeom
, uint32_t tuDepth
, uint32_t absPartIdx
, const uint32_t depthRange
[2]);
183 void residualQTIntraChroma(Mode
& mode
, const CUGeom
& cuGeom
, uint32_t tuDepth
, uint32_t absPartIdx
);
185 // pick be chroma mode from available using just sa8d costs
186 void getBestIntraModeChroma(Mode
& intraMode
, const CUGeom
& cuGeom
);
190 /* motion estimation distribution */
191 ThreadLocalData
* m_tld
;
192 Mode
* m_curInterMode
;
193 const CUGeom
* m_curGeom
;
195 uint32_t m_listSelBits
[3];
197 volatile int m_numAcquiredME
;
198 volatile int m_numCompletedME
;
199 Event m_meCompletionEvent
;
202 void singleMotionEstimation(Search
& master
, Mode
& interMode
, const CUGeom
& cuGeom
, int part
, int list
, int ref
);
204 void saveResidualQTData(CUData
& cu
, ShortYuv
& resiYuv
, uint32_t absPartIdx
, uint32_t depth
);
206 // RDO search of luma intra modes; result is fully encoded luma. luma distortion is returned
207 uint32_t estIntraPredQT(Mode
&intraMode
, const CUGeom
& cuGeom
, const uint32_t depthRange
[2], uint8_t* sharedModes
);
209 // RDO select best chroma mode from luma; result is fully encode chroma. chroma distortion is returned
210 uint32_t estIntraPredChromaQT(Mode
&intraMode
, const CUGeom
& cuGeom
);
212 void codeSubdivCbfQTChroma(const CUData
& cu
, uint32_t tuDepth
, uint32_t absPartIdx
);
213 void codeInterSubdivCbfQT(CUData
& cu
, uint32_t absPartIdx
, const uint32_t depth
, const uint32_t depthRange
[2]);
214 void codeCoeffQTChroma(const CUData
& cu
, uint32_t tuDepth
, uint32_t absPartIdx
, TextType ttype
);
222 Cost() { rdcost
= 0; bits
= 0; distortion
= 0; energy
= 0; }
225 uint64_t estimateNullCbfCost(uint32_t &dist
, uint32_t &psyEnergy
, uint32_t tuDepth
, TextType compId
);
226 void estimateResidualQT(Mode
& mode
, const CUGeom
& cuGeom
, uint32_t absPartIdx
, uint32_t depth
, ShortYuv
& resiYuv
, Cost
& costs
, const uint32_t depthRange
[2]);
228 // estimate bit cost of residual QT
229 void encodeResidualQT(CUData
& cu
, uint32_t absPartIdx
, uint32_t depth
, TextType ttype
, const uint32_t depthRange
[2]);
231 // generate prediction, generate residual and recon. if bAllowSplit, find optimal RQT splits
232 void codeIntraLumaQT(Mode
& mode
, const CUGeom
& cuGeom
, uint32_t tuDepth
, uint32_t absPartIdx
, bool bAllowSplit
, Cost
& costs
, const uint32_t depthRange
[2]);
233 void codeIntraLumaTSkip(Mode
& mode
, const CUGeom
& cuGeom
, uint32_t tuDepth
, uint32_t absPartIdx
, Cost
& costs
);
234 void extractIntraResultQT(CUData
& cu
, Yuv
& reconYuv
, uint32_t tuDepth
, uint32_t absPartIdx
);
236 // generate chroma prediction, generate residual and recon
237 uint32_t codeIntraChromaQt(Mode
& mode
, const CUGeom
& cuGeom
, uint32_t tuDepth
, uint32_t absPartIdx
, uint32_t& psyEnergy
);
238 uint32_t codeIntraChromaTSkip(Mode
& mode
, const CUGeom
& cuGeom
, uint32_t tuDepth
, uint32_t tuDepthC
, uint32_t absPartIdx
, uint32_t& psyEnergy
);
239 void extractIntraResultChromaQT(CUData
& cu
, Yuv
& reconYuv
, uint32_t absPartIdx
, uint32_t tuDepth
);
241 // reshuffle CBF flags after coding a pair of 4:2:2 chroma blocks
242 void offsetSubTUCBFs(CUData
& cu
, TextType ttype
, uint32_t tuDepth
, uint32_t absPartIdx
);
246 /* merge candidate data, cached between calls to mergeEstimation */
247 MVField mvFieldNeighbours
[MRG_MAX_NUM_CANDS
][2];
248 uint8_t interDirNeighbours
[MRG_MAX_NUM_CANDS
];
249 uint32_t maxNumMergeCand
;
251 /* data updated for each partition */
263 /* inter/ME helper functions */
264 void checkBestMVP(MV
* amvpCand
, MV cMv
, MV
& mvPred
, int& mvpIdx
, uint32_t& outBits
, uint32_t& outCost
) const;
265 void setSearchRange(const CUData
& cu
, MV mvp
, int merange
, MV
& mvmin
, MV
& mvmax
) const;
266 uint32_t mergeEstimation(CUData
& cu
, const CUGeom
& cuGeom
, int partIdx
, MergeData
& m
);
267 static void getBlkBits(PartSize cuMode
, bool bPSlice
, int partIdx
, uint32_t lastMode
, uint32_t blockBit
[3]);
269 /* intra helper functions */
270 enum { MAX_RD_INTRA_MODES
= 16 };
271 static void updateCandList(uint32_t mode
, uint64_t cost
, int maxCandCount
, uint32_t* candModeList
, uint64_t* candCostList
);
273 // get most probable luma modes for CU part, and bit cost of all non mpm modes
274 uint32_t getIntraRemModeBits(CUData
& cu
, uint32_t absPartIdx
, uint32_t preds
[3], uint64_t& mpms
) const;
276 void updateModeCost(Mode
& m
) const { m
.rdCost
= m_rdCost
.m_psyRd
? m_rdCost
.calcPsyRdCost(m
.distortion
, m
.totalBits
, m
.psyEnergy
) : m_rdCost
.calcRdCost(m
.distortion
, m
.totalBits
); }
280 #endif // ifndef X265_SEARCH_H