Imported Upstream version 1.4
[deb_x265.git] / source / encoder / search.h
CommitLineData
72b9787e
JB
1/*****************************************************************************
2* Copyright (C) 2013 x265 project
3*
4* Authors: Steve Borho <steve@borho.org>
5*
6* This program is free software; you can redistribute it and/or modify
7* it under the terms of the GNU General Public License as published by
8* the Free Software Foundation; either version 2 of the License, or
9* (at your option) any later version.
10*
11* This program is distributed in the hope that it will be useful,
12* but WITHOUT ANY WARRANTY; without even the implied warranty of
13* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14* GNU General Public License for more details.
15*
16* You should have received a copy of the GNU General Public License
17* along with this program; if not, write to the Free Software
18* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
19*
20* This program is also available under a commercial proprietary license.
21* For more information, contact us at license @ x265.com.
22*****************************************************************************/
23
24#ifndef X265_SEARCH_H
25#define X265_SEARCH_H
26
27#include "common.h"
28#include "predict.h"
29#include "quant.h"
30#include "bitcost.h"
31#include "yuv.h"
32#include "threadpool.h"
33
34#include "rdcost.h"
35#include "entropy.h"
36#include "motion.h"
37
38#define MVP_IDX_BITS 1
39#define NUM_LAYERS 4
40
41namespace x265 {
42// private namespace
43
44class Entropy;
45struct ThreadLocalData;
46
47/* All the CABAC contexts that Analysis needs to keep track of at each depth
48 * and temp buffers for residual, coeff, and recon for use during residual
49 * quad-tree depth recursion */
50struct RQTData
51{
52 Entropy cur; /* starting context for current CU */
53
54 /* these are indexed by qtLayer (log2size - 2) so nominally 0=4x4, 1=8x8, 2=16x16, 3=32x32
55 * the coeffRQT and reconQtYuv are allocated to the max CU size at every depth. The parts
56 * which are reconstructed at each depth are valid. At the end, the transform depth table
57 * is walked and the coeff and recon at the final split depths are collected */
58 Entropy rqtRoot; /* residual quad-tree start context */
59 Entropy rqtTemp; /* residual quad-tree temp context */
60 Entropy rqtTest; /* residual quad-tree test context */
61 coeff_t* coeffRQT[3]; /* coeff storage for entire CTU for each RQT layer */
62 Yuv reconQtYuv; /* recon storage for entire CTU for each RQT layer (intra) */
63 ShortYuv resiQtYuv; /* residual storage for entire CTU for each RQT layer (inter) */
64
65 /* per-depth temp buffers for inter prediction */
66 ShortYuv tmpResiYuv;
67 Yuv tmpPredYuv;
68 Yuv bidirPredYuv[2];
69};
70
71inline int getTUBits(int idx, int numIdx)
72{
73 return idx + (idx < numIdx - 1);
74}
75
76class Search : public JobProvider, public Predict
77{
78public:
79
80 static const pixel zeroPixel[MAX_CU_SIZE];
81 static const int16_t zeroShort[MAX_CU_SIZE];
82
83 MotionEstimate m_me;
84 Quant m_quant;
85 RDCost m_rdCost;
86 const x265_param* m_param;
87 Frame* m_frame;
88 const Slice* m_slice;
89
90 Entropy m_entropyCoder;
91 RQTData m_rqt[NUM_FULL_DEPTH];
92
93 uint8_t* m_qtTempCbf[3];
94 uint8_t* m_qtTempTransformSkipFlag[3];
95
96 bool m_bFrameParallel;
97 bool m_bEnableRDOQ;
98 uint32_t m_numLayers;
99 uint32_t m_refLagPixels;
100
101 struct Mode
102 {
103 CUData cu;
104 const Yuv* fencYuv;
105 Yuv predYuv;
106 Yuv reconYuv;
107 Entropy contexts;
108
109 uint64_t rdCost; // sum of partition (psy) RD costs (sse(fenc, recon) + lambda2 * bits)
110 uint64_t sa8dCost; // sum of partition sa8d distortion costs (sa8d(fenc, pred) + lambda * bits)
111 uint32_t sa8dBits; // signal bits used in sa8dCost calculation
112 uint32_t psyEnergy; // sum of partition psycho-visual energy difference
113 uint32_t distortion; // sum of partition SSE distortion
114 uint32_t totalBits; // sum of partition bits (mv + coeff)
115 uint32_t mvBits; // Mv bits + Ref + block type (or intra mode)
116 uint32_t coeffBits; // Texture bits (DCT Coeffs)
117
118 void initCosts()
119 {
120 rdCost = 0;
121 sa8dCost = 0;
122 sa8dBits = 0;
123 psyEnergy = 0;
124 distortion = 0;
125 totalBits = 0;
126 mvBits = 0;
127 coeffBits = 0;
128 }
129
130 void addSubCosts(const Mode& subMode)
131 {
132 rdCost += subMode.rdCost;
133 sa8dCost += subMode.sa8dCost;
134 sa8dBits += subMode.sa8dBits;
135 psyEnergy += subMode.psyEnergy;
136 distortion += subMode.distortion;
137 totalBits += subMode.totalBits;
138 mvBits += subMode.mvBits;
139 coeffBits += subMode.coeffBits;
140 }
141 };
142
143 struct MotionData
144 {
145 MV mv;
146 MV mvp;
147 int mvpIdx;
148 int ref;
149 uint32_t cost;
150 int bits;
151 };
152
153 Search();
154 ~Search();
155
156 bool initSearch(const x265_param& param, ScalingList& scalingList);
157 void setQP(const Slice& slice, int qp);
158
159 // mark temp RD entropy contexts as uninitialized; useful for finding loads without stores
160 void invalidateContexts(int fromDepth);
161
162 // full RD search of intra modes. if sharedModes is not NULL, it directly uses them
163 void checkIntra(Mode& intraMode, const CUGeom& cuGeom, PartSize partSize, uint8_t* sharedModes);
164
165 // estimation inter prediction (non-skip)
166 bool predInterSearch(Mode& interMode, const CUGeom& cuGeom, bool bMergeOnly, bool bChroma);
167
168 // encode residual and compute rd-cost for inter mode
169 void encodeResAndCalcRdInterCU(Mode& interMode, const CUGeom& cuGeom);
170 void encodeResAndCalcRdSkipCU(Mode& interMode);
171
172 void generateCoeffRecon(Mode& mode, const CUGeom& cuGeom);
173 void residualTransformQuantInter(Mode& mode, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t depth, uint32_t depthRange[2]);
174
175 uint32_t getIntraRemModeBits(CUData & cu, uint32_t absPartIdx, uint32_t preds[3], uint64_t& mpms) const;
176
177protected:
178
179 /* motion estimation distribution */
180 ThreadLocalData* m_tld;
181 CUData* m_curMECu;
182 const CUGeom* m_curGeom;
183 int m_curPart;
184 MotionData m_bestME[2];
185 uint32_t m_listSelBits[3];
186 int m_totalNumME;
187 volatile int m_numAcquiredME;
188 volatile int m_numCompletedME;
189 Event m_meCompletionEvent;
190 Lock m_outputLock;
191 bool m_bJobsQueued;
192 void singleMotionEstimation(Search& master, const CUData& cu, const CUGeom& cuGeom, int part, int list, int ref);
193
194 void saveResidualQTData(CUData& cu, ShortYuv& resiYuv, uint32_t absPartIdx, uint32_t depth);
195
196 // RDO search of luma intra modes; result is fully encoded luma. luma distortion is returned
197 uint32_t estIntraPredQT(Mode &intraMode, const CUGeom& cuGeom, uint32_t depthRange[2], uint8_t* sharedModes);
198
199 // RDO select best chroma mode from luma; result is fully encode chroma. chroma distortion is returned
200 uint32_t estIntraPredChromaQT(Mode &intraMode, const CUGeom& cuGeom);
201
202 void codeSubdivCbfQTChroma(const CUData& cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t absPartIdxStep, uint32_t width, uint32_t height);
203 void codeCoeffQTChroma(const CUData& cu, uint32_t trDepth, uint32_t absPartIdx, TextType ttype);
204
205 struct Cost
206 {
207 uint64_t rdcost;
208 uint32_t bits;
209 uint32_t distortion;
210 uint32_t energy;
211 Cost() { rdcost = 0; bits = 0; distortion = 0; energy = 0; }
212 };
213
214 void estimateResidualQT(Mode& mode, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t depth, ShortYuv& resiYuv, Cost& costs, uint32_t depthRange[2]);
215
216 void encodeResidualQT(CUData& cu, uint32_t absPartIdx, uint32_t depth, bool bSubdivAndCbf, TextType ttype, uint32_t depthRange[2]);
217
218 // generate prediction, generate residual and recon. if bAllowSplit, find optimal RQT splits
219 void codeIntraLumaQT(Mode& mode, const CUGeom& cuGeom, uint32_t trDepth, uint32_t absPartIdx, bool bAllowSplit, Cost& costs, uint32_t depthRange[2]);
220 void codeIntraLumaTSkip(Mode& mode, const CUGeom& cuGeom, uint32_t trDepth, uint32_t absPartIdx, Cost& costs);
221 void extractIntraResultQT(CUData& cu, Yuv& reconYuv, uint32_t trDepth, uint32_t absPartIdx);
222
223 // generate chroma prediction, generate residual and recon
224 uint32_t codeIntraChromaQt(Mode& mode, const CUGeom& cuGeom, uint32_t trDepth, uint32_t absPartIdx, uint32_t& psyEnergy);
225 uint32_t codeIntraChromaTSkip(Mode& mode, const CUGeom& cuGeom, uint32_t trDepth, uint32_t trDepthC, uint32_t absPartIdx, uint32_t& psyEnergy);
226 void extractIntraResultChromaQT(CUData& cu, Yuv& reconYuv, uint32_t absPartIdx, uint32_t trDepth, bool tuQuad);
227
228 void residualTransformQuantIntra(Mode& mode, const CUGeom& cuGeom, uint32_t trDepth, uint32_t absPartIdx, uint32_t depthRange[2]);
229 void residualQTIntraChroma(Mode& mode, const CUGeom& cuGeom, uint32_t trDepth, uint32_t absPartIdx);
230
231 void offsetSubTUCBFs(CUData& cu, TextType ttype, uint32_t trDepth, uint32_t absPartIdx);
232
233 struct MergeData
234 {
235 /* merge candidate data, cached between calls to mergeEstimation */
236 MVField mvFieldNeighbours[MRG_MAX_NUM_CANDS][2];
237 uint8_t interDirNeighbours[MRG_MAX_NUM_CANDS];
238 uint32_t maxNumMergeCand;
239
240 /* data updated for each partition */
241 uint32_t absPartIdx;
242 int width;
243 int height;
244
245 /* outputs */
246 MVField mvField[2];
247 uint32_t interDir;
248 uint32_t index;
249 uint32_t bits;
250 };
251
252 /* inter/ME helper functions */
253 void checkBestMVP(MV* amvpCand, MV cMv, MV& mvPred, int& mvpIdx, uint32_t& outBits, uint32_t& outCost) const;
254 void setSearchRange(const CUData& cu, MV mvp, int merange, MV& mvmin, MV& mvmax) const;
255 uint32_t mergeEstimation(CUData& cu, const CUGeom& cuGeom, int partIdx, MergeData& m);
256 static void getBlkBits(PartSize cuMode, bool bPSlice, int partIdx, uint32_t lastMode, uint32_t blockBit[3]);
257
258 /* intra helper functions */
259 enum { MAX_RD_INTRA_MODES = 16 };
260 static void updateCandList(uint32_t mode, uint64_t cost, int maxCandCount, uint32_t* candModeList, uint64_t* candCostList);
261 void getBestIntraModeChroma(Mode& intraMode, const CUGeom& cuGeom);
262
263 void updateModeCost(Mode& m) const { m.rdCost = m_rdCost.m_psyRd ? m_rdCost.calcPsyRdCost(m.distortion, m.totalBits, m.psyEnergy) : m_rdCost.calcRdCost(m.distortion, m.totalBits); }
264};
265}
266
267#endif // ifndef X265_SEARCH_H