1 /*****************************************************************************
2 * Copyright (C) 2013 x265 project
4 * Authors: Steve Borho <steve@borho.org>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
20 * This program is also available under a commercial proprietary license.
21 * For more information, contact us at license @ x265.com.
22 *****************************************************************************/
32 #include "threadpool.h"
38 #define MVP_IDX_BITS 1
45 struct ThreadLocalData
;
47 /* All the CABAC contexts that Analysis needs to keep track of at each depth
48 * and temp buffers for residual, coeff, and recon for use during residual
49 * quad-tree depth recursion */
52 Entropy cur
; /* starting context for current CU */
54 /* these are indexed by qtLayer (log2size - 2) so nominally 0=4x4, 1=8x8, 2=16x16, 3=32x32
55 * the coeffRQT and reconQtYuv are allocated to the max CU size at every depth. The parts
56 * which are reconstructed at each depth are valid. At the end, the transform depth table
57 * is walked and the coeff and recon at the final split depths are collected */
58 Entropy rqtRoot
; /* residual quad-tree start context */
59 Entropy rqtTemp
; /* residual quad-tree temp context */
60 Entropy rqtTest
; /* residual quad-tree test context */
61 coeff_t
* coeffRQT
[3]; /* coeff storage for entire CTU for each RQT layer */
62 Yuv reconQtYuv
; /* recon storage for entire CTU for each RQT layer (intra) */
63 ShortYuv resiQtYuv
; /* residual storage for entire CTU for each RQT layer (inter) */
65 /* per-depth temp buffers for inter prediction */
71 inline int getTUBits(int idx
, int numIdx
)
73 return idx
+ (idx
< numIdx
- 1);
76 class Search
: public JobProvider
, public Predict
80 static const pixel zeroPixel
[MAX_CU_SIZE
];
81 static const int16_t zeroShort
[MAX_CU_SIZE
];
86 const x265_param
* m_param
;
90 Entropy m_entropyCoder
;
91 RQTData m_rqt
[NUM_FULL_DEPTH
];
93 uint8_t* m_qtTempCbf
[3];
94 uint8_t* m_qtTempTransformSkipFlag
[3];
96 bool m_bFrameParallel
;
99 uint32_t m_refLagPixels
;
109 uint64_t rdCost
; // sum of partition (psy) RD costs (sse(fenc, recon) + lambda2 * bits)
110 uint64_t sa8dCost
; // sum of partition sa8d distortion costs (sa8d(fenc, pred) + lambda * bits)
111 uint32_t sa8dBits
; // signal bits used in sa8dCost calculation
112 uint32_t psyEnergy
; // sum of partition psycho-visual energy difference
113 uint32_t distortion
; // sum of partition SSE distortion
114 uint32_t totalBits
; // sum of partition bits (mv + coeff)
115 uint32_t mvBits
; // Mv bits + Ref + block type (or intra mode)
116 uint32_t coeffBits
; // Texture bits (DCT Coeffs)
130 void addSubCosts(const Mode
& subMode
)
132 rdCost
+= subMode
.rdCost
;
133 sa8dCost
+= subMode
.sa8dCost
;
134 sa8dBits
+= subMode
.sa8dBits
;
135 psyEnergy
+= subMode
.psyEnergy
;
136 distortion
+= subMode
.distortion
;
137 totalBits
+= subMode
.totalBits
;
138 mvBits
+= subMode
.mvBits
;
139 coeffBits
+= subMode
.coeffBits
;
156 bool initSearch(const x265_param
& param
, ScalingList
& scalingList
);
157 void setQP(const Slice
& slice
, int qp
);
159 // mark temp RD entropy contexts as uninitialized; useful for finding loads without stores
160 void invalidateContexts(int fromDepth
);
162 // full RD search of intra modes. if sharedModes is not NULL, it directly uses them
163 void checkIntra(Mode
& intraMode
, const CUGeom
& cuGeom
, PartSize partSize
, uint8_t* sharedModes
);
165 // estimation inter prediction (non-skip)
166 bool predInterSearch(Mode
& interMode
, const CUGeom
& cuGeom
, bool bMergeOnly
, bool bChroma
);
168 // encode residual and compute rd-cost for inter mode
169 void encodeResAndCalcRdInterCU(Mode
& interMode
, const CUGeom
& cuGeom
);
170 void encodeResAndCalcRdSkipCU(Mode
& interMode
);
172 void generateCoeffRecon(Mode
& mode
, const CUGeom
& cuGeom
);
173 void residualTransformQuantInter(Mode
& mode
, const CUGeom
& cuGeom
, uint32_t absPartIdx
, uint32_t depth
, uint32_t depthRange
[2]);
175 uint32_t getIntraRemModeBits(CUData
& cu
, uint32_t absPartIdx
, uint32_t preds
[3], uint64_t& mpms
) const;
179 /* motion estimation distribution */
180 ThreadLocalData
* m_tld
;
182 const CUGeom
* m_curGeom
;
184 MotionData m_bestME
[2];
185 uint32_t m_listSelBits
[3];
187 volatile int m_numAcquiredME
;
188 volatile int m_numCompletedME
;
189 Event m_meCompletionEvent
;
192 void singleMotionEstimation(Search
& master
, const CUData
& cu
, const CUGeom
& cuGeom
, int part
, int list
, int ref
);
194 void saveResidualQTData(CUData
& cu
, ShortYuv
& resiYuv
, uint32_t absPartIdx
, uint32_t depth
);
196 // RDO search of luma intra modes; result is fully encoded luma. luma distortion is returned
197 uint32_t estIntraPredQT(Mode
&intraMode
, const CUGeom
& cuGeom
, uint32_t depthRange
[2], uint8_t* sharedModes
);
199 // RDO select best chroma mode from luma; result is fully encode chroma. chroma distortion is returned
200 uint32_t estIntraPredChromaQT(Mode
&intraMode
, const CUGeom
& cuGeom
);
202 void codeSubdivCbfQTChroma(const CUData
& cu
, uint32_t trDepth
, uint32_t absPartIdx
, uint32_t absPartIdxStep
, uint32_t width
, uint32_t height
);
203 void codeCoeffQTChroma(const CUData
& cu
, uint32_t trDepth
, uint32_t absPartIdx
, TextType ttype
);
211 Cost() { rdcost
= 0; bits
= 0; distortion
= 0; energy
= 0; }
214 void estimateResidualQT(Mode
& mode
, const CUGeom
& cuGeom
, uint32_t absPartIdx
, uint32_t depth
, ShortYuv
& resiYuv
, Cost
& costs
, uint32_t depthRange
[2]);
216 void encodeResidualQT(CUData
& cu
, uint32_t absPartIdx
, uint32_t depth
, bool bSubdivAndCbf
, TextType ttype
, uint32_t depthRange
[2]);
218 // generate prediction, generate residual and recon. if bAllowSplit, find optimal RQT splits
219 void codeIntraLumaQT(Mode
& mode
, const CUGeom
& cuGeom
, uint32_t trDepth
, uint32_t absPartIdx
, bool bAllowSplit
, Cost
& costs
, uint32_t depthRange
[2]);
220 void codeIntraLumaTSkip(Mode
& mode
, const CUGeom
& cuGeom
, uint32_t trDepth
, uint32_t absPartIdx
, Cost
& costs
);
221 void extractIntraResultQT(CUData
& cu
, Yuv
& reconYuv
, uint32_t trDepth
, uint32_t absPartIdx
);
223 // generate chroma prediction, generate residual and recon
224 uint32_t codeIntraChromaQt(Mode
& mode
, const CUGeom
& cuGeom
, uint32_t trDepth
, uint32_t absPartIdx
, uint32_t& psyEnergy
);
225 uint32_t codeIntraChromaTSkip(Mode
& mode
, const CUGeom
& cuGeom
, uint32_t trDepth
, uint32_t trDepthC
, uint32_t absPartIdx
, uint32_t& psyEnergy
);
226 void extractIntraResultChromaQT(CUData
& cu
, Yuv
& reconYuv
, uint32_t absPartIdx
, uint32_t trDepth
, bool tuQuad
);
228 void residualTransformQuantIntra(Mode
& mode
, const CUGeom
& cuGeom
, uint32_t trDepth
, uint32_t absPartIdx
, uint32_t depthRange
[2]);
229 void residualQTIntraChroma(Mode
& mode
, const CUGeom
& cuGeom
, uint32_t trDepth
, uint32_t absPartIdx
);
231 void offsetSubTUCBFs(CUData
& cu
, TextType ttype
, uint32_t trDepth
, uint32_t absPartIdx
);
235 /* merge candidate data, cached between calls to mergeEstimation */
236 MVField mvFieldNeighbours
[MRG_MAX_NUM_CANDS
][2];
237 uint8_t interDirNeighbours
[MRG_MAX_NUM_CANDS
];
238 uint32_t maxNumMergeCand
;
240 /* data updated for each partition */
252 /* inter/ME helper functions */
253 void checkBestMVP(MV
* amvpCand
, MV cMv
, MV
& mvPred
, int& mvpIdx
, uint32_t& outBits
, uint32_t& outCost
) const;
254 void setSearchRange(const CUData
& cu
, MV mvp
, int merange
, MV
& mvmin
, MV
& mvmax
) const;
255 uint32_t mergeEstimation(CUData
& cu
, const CUGeom
& cuGeom
, int partIdx
, MergeData
& m
);
256 static void getBlkBits(PartSize cuMode
, bool bPSlice
, int partIdx
, uint32_t lastMode
, uint32_t blockBit
[3]);
258 /* intra helper functions */
259 enum { MAX_RD_INTRA_MODES
= 16 };
260 static void updateCandList(uint32_t mode
, uint64_t cost
, int maxCandCount
, uint32_t* candModeList
, uint64_t* candCostList
);
261 void getBestIntraModeChroma(Mode
& intraMode
, const CUGeom
& cuGeom
);
263 void updateModeCost(Mode
& m
) const { m
.rdCost
= m_rdCost
.m_psyRd
? m_rdCost
.calcPsyRdCost(m
.distortion
, m
.totalBits
, m
.psyEnergy
) : m_rdCost
.calcRdCost(m
.distortion
, m
.totalBits
); }
267 #endif // ifndef X265_SEARCH_H