1 /*****************************************************************************
2 * Copyright (C) 2013 x265 project
4 * Authors: Deepthi Nandakumar <deepthi@multicorewareinc.com>
5 * Steve Borho <steve@borho.org>
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
21 * This program is also available under a commercial proprietary license.
22 * For more information, contact us at license @ x265.com.
23 *****************************************************************************/
27 #include "framedata.h"
29 #include "primitives.h"
30 #include "threading.h"
40 /* An explanation of rate distortion levels (--rd-level)
42 * rd-level 0 generates no recon per CU (NO RDO or Quant)
44 * sa8d selection between merge / skip / inter / intra and split
45 * no recon pixels generated until CTU analysis is complete, requiring
46 * intra predictions to use source pixels
48 * rd-level 1 uses RDO for merge and skip, sa8d for all else
50 * RDO selection between merge and skip
51 * sa8d selection between (merge/skip) / inter modes / intra and split
52 * intra prediction uses reconstructed pixels
54 * rd-level 2 uses RDO for merge/skip and split
56 * RDO selection between merge and skip
57 * sa8d selection between (merge/skip) / inter modes / intra
60 * rd-level 3 uses RDO for merge/skip/best inter/intra
62 * RDO selection between merge and skip
63 * sa8d selection of best inter mode
64 * RDO selection between (merge/skip) / best inter mode / intra / split
66 * rd-level 4 enables RDOQuant
68 * rd-level 5,6 does RDO for each inter mode
73 m_totalNumJobs
= m_numAcquiredJobs
= m_numCompletedJobs
= 0;
76 bool Analysis::create(ThreadLocalData
*tld
)
79 m_bTryLossless
= m_param
->bCULossless
&& !m_param
->bLossless
&& m_param
->rdLevel
>= 2;
81 int csp
= m_param
->internalCsp
;
82 uint32_t cuSize
= g_maxCUSize
;
85 for (uint32_t depth
= 0; depth
<= g_maxCUDepth
; depth
++, cuSize
>>= 1)
87 ModeDepth
&md
= m_modeDepth
[depth
];
89 md
.cuMemPool
.create(depth
, csp
, MAX_PRED_TYPES
);
90 ok
&= md
.fencYuv
.create(cuSize
, csp
);
92 for (int j
= 0; j
< MAX_PRED_TYPES
; j
++)
94 md
.pred
[j
].cu
.initialize(md
.cuMemPool
, depth
, csp
, j
);
95 ok
&= md
.pred
[j
].predYuv
.create(cuSize
, csp
);
96 ok
&= md
.pred
[j
].reconYuv
.create(cuSize
, csp
);
97 md
.pred
[j
].fencYuv
= &md
.fencYuv
;
104 void Analysis::destroy()
106 for (uint32_t i
= 0; i
<= g_maxCUDepth
; i
++)
108 m_modeDepth
[i
].cuMemPool
.destroy();
109 m_modeDepth
[i
].fencYuv
.destroy();
111 for (int j
= 0; j
< MAX_PRED_TYPES
; j
++)
113 m_modeDepth
[i
].pred
[j
].predYuv
.destroy();
114 m_modeDepth
[i
].pred
[j
].reconYuv
.destroy();
119 Search::Mode
& Analysis::compressCTU(CUData
& ctu
, Frame
& frame
, const CUGeom
& cuGeom
, const Entropy
& initialContext
)
121 m_slice
= ctu
.m_slice
;
124 invalidateContexts(0);
125 m_quant
.setQPforQuant(ctu
);
126 m_rqt
[0].cur
.load(initialContext
);
127 m_modeDepth
[0].fencYuv
.copyFromPicYuv(*m_frame
->m_origPicYuv
, ctu
.m_cuAddr
, 0);
129 uint32_t numPartition
= ctu
.m_numPartitions
;
130 if (m_slice
->m_sliceType
== I_SLICE
)
133 if (m_param
->analysisMode
== X265_ANALYSIS_LOAD
)
134 compressIntraCU(ctu
, cuGeom
, m_frame
->m_intraData
, zOrder
);
137 compressIntraCU(ctu
, cuGeom
, NULL
, zOrder
);
139 if (m_param
->analysisMode
== X265_ANALYSIS_SAVE
&& m_frame
->m_intraData
)
141 CUData
*bestCU
= &m_modeDepth
[0].bestMode
->cu
;
142 memcpy(&m_frame
->m_intraData
->depth
[ctu
.m_cuAddr
* numPartition
], bestCU
->m_cuDepth
, sizeof(uint8_t) * numPartition
);
143 memcpy(&m_frame
->m_intraData
->modes
[ctu
.m_cuAddr
* numPartition
], bestCU
->m_lumaIntraDir
, sizeof(uint8_t) * numPartition
);
144 memcpy(&m_frame
->m_intraData
->partSizes
[ctu
.m_cuAddr
* numPartition
], bestCU
->m_partSize
, sizeof(uint8_t) * numPartition
);
145 m_frame
->m_intraData
->cuAddr
[ctu
.m_cuAddr
] = ctu
.m_cuAddr
;
146 m_frame
->m_intraData
->poc
[ctu
.m_cuAddr
] = m_frame
->m_poc
;
152 if (!m_param
->rdLevel
)
154 /* In RD Level 0/1, copy source pixels into the reconstructed block so
155 * they are available for intra predictions */
156 m_modeDepth
[0].fencYuv
.copyToPicYuv(*m_frame
->m_reconPicYuv
, ctu
.m_cuAddr
, 0);
158 compressInterCU_rd0_4(ctu
, cuGeom
); // TODO: this really wants to be compressInterCU_rd0_1
160 /* generate residual for entire CTU at once and copy to reconPic */
161 encodeResidue(ctu
, cuGeom
);
163 else if (m_param
->bDistributeModeAnalysis
&& m_param
->rdLevel
>= 2)
164 compressInterCU_dist(ctu
, cuGeom
);
165 else if (m_param
->rdLevel
<= 4)
166 compressInterCU_rd0_4(ctu
, cuGeom
);
168 compressInterCU_rd5_6(ctu
, cuGeom
);
171 return *m_modeDepth
[0].bestMode
;
174 void Analysis::tryLossless(const CUGeom
& cuGeom
)
176 ModeDepth
& md
= m_modeDepth
[cuGeom
.depth
];
178 if (!md
.bestMode
->distortion
)
179 /* already lossless */
181 else if (md
.bestMode
->cu
.m_predMode
[0] == MODE_INTRA
)
183 md
.pred
[PRED_LOSSLESS
].cu
.initLosslessCU(md
.bestMode
->cu
, cuGeom
);
184 PartSize size
= (PartSize
)md
.pred
[PRED_LOSSLESS
].cu
.m_partSize
[0];
185 uint8_t* modes
= md
.pred
[PRED_LOSSLESS
].cu
.m_lumaIntraDir
;
186 checkIntra(md
.pred
[PRED_LOSSLESS
], cuGeom
, size
, modes
);
187 checkBestMode(md
.pred
[PRED_LOSSLESS
], cuGeom
.depth
);
191 md
.pred
[PRED_LOSSLESS
].cu
.initLosslessCU(md
.bestMode
->cu
, cuGeom
);
192 md
.pred
[PRED_LOSSLESS
].predYuv
.copyFromYuv(md
.bestMode
->predYuv
);
193 encodeResAndCalcRdInterCU(md
.pred
[PRED_LOSSLESS
], cuGeom
);
194 checkBestMode(md
.pred
[PRED_LOSSLESS
], cuGeom
.depth
);
198 void Analysis::compressIntraCU(const CUData
& parentCTU
, const CUGeom
& cuGeom
, x265_intra_data
* shared
, uint32_t& zOrder
)
200 uint32_t depth
= cuGeom
.depth
;
201 ModeDepth
& md
= m_modeDepth
[depth
];
204 bool mightSplit
= !(cuGeom
.flags
& CUGeom::LEAF
);
205 bool mightNotSplit
= !(cuGeom
.flags
& CUGeom::SPLIT_MANDATORY
);
209 uint8_t* sharedDepth
= &shared
->depth
[parentCTU
.m_cuAddr
* parentCTU
.m_numPartitions
];
210 char* sharedPartSizes
= &shared
->partSizes
[parentCTU
.m_cuAddr
* parentCTU
.m_numPartitions
];
211 uint8_t* sharedModes
= &shared
->modes
[parentCTU
.m_cuAddr
* parentCTU
.m_numPartitions
];
213 if (mightNotSplit
&& depth
== sharedDepth
[zOrder
] && zOrder
== cuGeom
.encodeIdx
)
215 m_quant
.setQPforQuant(parentCTU
);
217 PartSize size
= (PartSize
)sharedPartSizes
[zOrder
];
218 Mode
& mode
= size
== SIZE_2Nx2N
? md
.pred
[PRED_INTRA
] : md
.pred
[PRED_INTRA_NxN
];
219 mode
.cu
.initSubCU(parentCTU
, cuGeom
);
220 checkIntra(mode
, cuGeom
, size
, sharedModes
);
221 checkBestMode(mode
, depth
);
227 addSplitFlagCost(*md
.bestMode
, cuGeom
.depth
);
229 // increment zOrder offset to point to next best depth in sharedDepth buffer
230 zOrder
+= g_depthInc
[g_maxCUDepth
- 1][sharedDepth
[zOrder
]];
234 else if (mightNotSplit
)
236 m_quant
.setQPforQuant(parentCTU
);
238 md
.pred
[PRED_INTRA
].cu
.initSubCU(parentCTU
, cuGeom
);
239 checkIntra(md
.pred
[PRED_INTRA
], cuGeom
, SIZE_2Nx2N
, NULL
);
240 checkBestMode(md
.pred
[PRED_INTRA
], depth
);
242 if (depth
== g_maxCUDepth
)
244 md
.pred
[PRED_INTRA_NxN
].cu
.initSubCU(parentCTU
, cuGeom
);
245 checkIntra(md
.pred
[PRED_INTRA_NxN
], cuGeom
, SIZE_NxN
, NULL
);
246 checkBestMode(md
.pred
[PRED_INTRA_NxN
], depth
);
253 addSplitFlagCost(*md
.bestMode
, cuGeom
.depth
);
258 Mode
* splitPred
= &md
.pred
[PRED_SPLIT
];
259 splitPred
->initCosts();
260 CUData
* splitCU
= &splitPred
->cu
;
261 splitCU
->initSubCU(parentCTU
, cuGeom
);
263 uint32_t nextDepth
= depth
+ 1;
264 ModeDepth
& nd
= m_modeDepth
[nextDepth
];
265 invalidateContexts(nextDepth
);
266 Entropy
* nextContext
= &m_rqt
[depth
].cur
;
268 for (uint32_t subPartIdx
= 0; subPartIdx
< 4; subPartIdx
++)
270 const CUGeom
& childCuData
= *(&cuGeom
+ cuGeom
.childOffset
+ subPartIdx
);
271 if (childCuData
.flags
& CUGeom::PRESENT
)
273 m_modeDepth
[0].fencYuv
.copyPartToYuv(nd
.fencYuv
, childCuData
.encodeIdx
);
274 m_rqt
[nextDepth
].cur
.load(*nextContext
);
275 compressIntraCU(parentCTU
, childCuData
, shared
, zOrder
);
277 // Save best CU and pred data for this sub CU
278 splitCU
->copyPartFrom(nd
.bestMode
->cu
, childCuData
, subPartIdx
);
279 splitPred
->addSubCosts(*nd
.bestMode
);
280 nd
.bestMode
->reconYuv
.copyToPartYuv(splitPred
->reconYuv
, childCuData
.numPartitions
* subPartIdx
);
281 nextContext
= &nd
.bestMode
->contexts
;
285 /* record the depth of this non-present sub-CU */
286 splitCU
->setEmptyPart(childCuData
, subPartIdx
);
287 zOrder
+= g_depthInc
[g_maxCUDepth
- 1][nextDepth
];
290 nextContext
->store(splitPred
->contexts
);
292 addSplitFlagCost(*splitPred
, cuGeom
.depth
);
294 updateModeCost(*splitPred
);
295 checkBestMode(*splitPred
, depth
);
298 checkDQP(md
.bestMode
->cu
, cuGeom
);
300 /* Copy best data to encData CTU and recon */
301 md
.bestMode
->cu
.copyToPic(depth
);
302 if (md
.bestMode
!= &md
.pred
[PRED_SPLIT
])
303 md
.bestMode
->reconYuv
.copyToPicYuv(*m_frame
->m_reconPicYuv
, parentCTU
.m_cuAddr
, cuGeom
.encodeIdx
);
306 bool Analysis::findJob(int threadId
)
308 /* try to acquire a CU mode to analyze */
309 if (m_totalNumJobs
> m_numAcquiredJobs
)
311 /* ATOMIC_INC returns the incremented value */
312 int id
= ATOMIC_INC(&m_numAcquiredJobs
);
313 if (m_totalNumJobs
>= id
)
315 parallelModeAnalysis(threadId
, id
- 1);
317 if (ATOMIC_INC(&m_numCompletedJobs
) == m_totalNumJobs
)
318 m_modeCompletionEvent
.trigger();
323 if (m_totalNumME
> m_numAcquiredME
)
325 int id
= ATOMIC_INC(&m_numAcquiredME
);
326 if (m_totalNumME
>= id
)
328 parallelME(threadId
, id
- 1);
330 if (ATOMIC_INC(&m_numCompletedME
) == m_totalNumME
)
331 m_meCompletionEvent
.trigger();
339 void Analysis::parallelME(int threadId
, int meId
)
347 slave
= &m_tld
[threadId
].analysis
;
348 slave
->setQP(*m_slice
, m_rdCost
.m_qp
);
349 slave
->m_slice
= m_slice
;
350 slave
->m_frame
= m_frame
;
352 PicYuv
* fencPic
= m_frame
->m_origPicYuv
;
353 pixel
* pu
= fencPic
->getLumaAddr(m_curMECu
->m_cuAddr
, m_curGeom
->encodeIdx
+ m_puAbsPartIdx
);
354 slave
->m_me
.setSourcePlane(fencPic
->m_picOrg
[0], fencPic
->m_stride
);
355 slave
->m_me
.setSourcePU(pu
- fencPic
->m_picOrg
[0], m_puWidth
, m_puHeight
);
357 slave
->prepMotionCompensation(*m_curMECu
, *m_curGeom
, m_curPart
);
360 if (meId
< m_slice
->m_numRefIdx
[0])
361 slave
->singleMotionEstimation(*this, *m_curMECu
, *m_curGeom
, m_curPart
, 0, meId
);
363 slave
->singleMotionEstimation(*this, *m_curMECu
, *m_curGeom
, m_curPart
, 1, meId
- m_slice
->m_numRefIdx
[0]);
366 void Analysis::parallelModeAnalysis(int threadId
, int jobId
)
374 slave
= &m_tld
[threadId
].analysis
;
375 slave
->m_slice
= m_slice
;
376 slave
->m_frame
= m_frame
;
377 slave
->setQP(*m_slice
, m_rdCost
.m_qp
);
378 slave
->invalidateContexts(0);
380 slave
->m_me
.setSourcePlane(m_frame
->m_origPicYuv
->m_picOrg
[0], m_frame
->m_origPicYuv
->m_stride
);
383 ModeDepth
& md
= m_modeDepth
[m_curGeom
->depth
];
385 if (m_param
->rdLevel
<= 4)
391 slave
->m_rqt
[m_curGeom
->depth
].cur
.load(m_rqt
[m_curGeom
->depth
].cur
);
392 slave
->checkIntraInInter_rd0_4(md
.pred
[PRED_INTRA
], *m_curGeom
);
393 if (m_param
->rdLevel
> 2)
394 slave
->encodeIntraInInter(md
.pred
[PRED_INTRA
], *m_curGeom
);
398 slave
->checkInter_rd0_4(md
.pred
[PRED_2Nx2N
], *m_curGeom
, SIZE_2Nx2N
);
402 slave
->checkInter_rd0_4(md
.pred
[PRED_Nx2N
], *m_curGeom
, SIZE_Nx2N
);
406 slave
->checkInter_rd0_4(md
.pred
[PRED_2NxN
], *m_curGeom
, SIZE_2NxN
);
410 slave
->checkInter_rd0_4(md
.pred
[PRED_2NxnU
], *m_curGeom
, SIZE_2NxnU
);
414 slave
->checkInter_rd0_4(md
.pred
[PRED_2NxnD
], *m_curGeom
, SIZE_2NxnD
);
418 slave
->checkInter_rd0_4(md
.pred
[PRED_nLx2N
], *m_curGeom
, SIZE_nLx2N
);
422 slave
->checkInter_rd0_4(md
.pred
[PRED_nRx2N
], *m_curGeom
, SIZE_nRx2N
);
426 X265_CHECK(0, "invalid job ID for parallel mode analysis\n");
432 bool bMergeOnly
= m_curGeom
->log2CUSize
== 6;
435 slave
->m_rqt
[m_curGeom
->depth
].cur
.load(m_rqt
[m_curGeom
->depth
].cur
);
436 slave
->m_quant
.setQPforQuant(md
.pred
[PRED_2Nx2N
].cu
);
442 slave
->checkIntra(md
.pred
[PRED_INTRA
], *m_curGeom
, SIZE_2Nx2N
, NULL
);
443 if (m_curGeom
->depth
== g_maxCUDepth
&& m_curGeom
->log2CUSize
> m_slice
->m_sps
->quadtreeTULog2MinSize
)
444 slave
->checkIntra(md
.pred
[PRED_INTRA_NxN
], *m_curGeom
, SIZE_NxN
, NULL
);
448 slave
->checkInter_rd5_6(md
.pred
[PRED_2Nx2N
], *m_curGeom
, SIZE_2Nx2N
, false);
452 slave
->checkInter_rd5_6(md
.pred
[PRED_Nx2N
], *m_curGeom
, SIZE_Nx2N
, false);
456 slave
->checkInter_rd5_6(md
.pred
[PRED_2NxN
], *m_curGeom
, SIZE_2NxN
, false);
460 slave
->checkInter_rd5_6(md
.pred
[PRED_2NxnU
], *m_curGeom
, SIZE_2NxnU
, bMergeOnly
);
464 slave
->checkInter_rd5_6(md
.pred
[PRED_2NxnD
], *m_curGeom
, SIZE_2NxnD
, bMergeOnly
);
468 slave
->checkInter_rd5_6(md
.pred
[PRED_nLx2N
], *m_curGeom
, SIZE_nLx2N
, bMergeOnly
);
472 slave
->checkInter_rd5_6(md
.pred
[PRED_nRx2N
], *m_curGeom
, SIZE_nRx2N
, bMergeOnly
);
476 X265_CHECK(0, "invalid job ID for parallel mode analysis\n");
482 void Analysis::compressInterCU_dist(const CUData
& parentCTU
, const CUGeom
& cuGeom
)
484 uint32_t depth
= cuGeom
.depth
;
485 uint32_t cuAddr
= parentCTU
.m_cuAddr
;
486 ModeDepth
& md
= m_modeDepth
[depth
];
489 bool mightSplit
= !(cuGeom
.flags
& CUGeom::LEAF
);
490 bool mightNotSplit
= !(cuGeom
.flags
& CUGeom::SPLIT_MANDATORY
);
491 uint32_t minDepth
= m_param
->rdLevel
<= 4 ? topSkipMinDepth(parentCTU
, cuGeom
) : 0;
493 X265_CHECK(m_param
->rdLevel
>= 2, "compressInterCU_dist does not support RD 0 or 1\n");
495 if (mightNotSplit
&& depth
>= minDepth
)
497 int bTryAmp
= m_slice
->m_sps
->maxAMPDepth
> depth
&& (cuGeom
.log2CUSize
< 6 || m_param
->rdLevel
> 4);
498 int bTryIntra
= m_slice
->m_sliceType
!= B_SLICE
|| m_param
->bIntraInBFrames
;
500 /* Initialize all prediction CUs based on parentCTU */
501 md
.pred
[PRED_2Nx2N
].cu
.initSubCU(parentCTU
, cuGeom
);
502 md
.pred
[PRED_MERGE
].cu
.initSubCU(parentCTU
, cuGeom
);
503 md
.pred
[PRED_SKIP
].cu
.initSubCU(parentCTU
, cuGeom
);
504 if (m_param
->bEnableRectInter
)
506 md
.pred
[PRED_2NxN
].cu
.initSubCU(parentCTU
, cuGeom
);
507 md
.pred
[PRED_Nx2N
].cu
.initSubCU(parentCTU
, cuGeom
);
511 md
.pred
[PRED_2NxnU
].cu
.initSubCU(parentCTU
, cuGeom
);
512 md
.pred
[PRED_2NxnD
].cu
.initSubCU(parentCTU
, cuGeom
);
513 md
.pred
[PRED_nLx2N
].cu
.initSubCU(parentCTU
, cuGeom
);
514 md
.pred
[PRED_nRx2N
].cu
.initSubCU(parentCTU
, cuGeom
);
518 md
.pred
[PRED_INTRA
].cu
.initSubCU(parentCTU
, cuGeom
);
519 if (depth
== g_maxCUDepth
&& cuGeom
.log2CUSize
> m_slice
->m_sps
->quadtreeTULog2MinSize
)
520 md
.pred
[PRED_INTRA_NxN
].cu
.initSubCU(parentCTU
, cuGeom
);
523 m_totalNumJobs
= 2 + m_param
->bEnableRectInter
* 2 + bTryAmp
* 4;
524 m_numAcquiredJobs
= !bTryIntra
;
525 m_numCompletedJobs
= m_numAcquiredJobs
;
527 m_bJobsQueued
= true;
528 JobProvider::enqueue();
530 for (int i
= 0; i
< m_totalNumJobs
- m_numCompletedJobs
; i
++)
531 m_pool
->pokeIdleThread();
533 /* participate in processing jobs, until all are distributed */
537 JobProvider::dequeue();
538 m_bJobsQueued
= false;
540 /* the master worker thread (this one) does merge analysis. By doing
541 * merge after all the other jobs are at least started, we usually avoid
542 * blocking on another thread */
544 if (m_param
->rdLevel
<= 4)
546 checkMerge2Nx2N_rd0_4(md
.pred
[PRED_SKIP
], md
.pred
[PRED_MERGE
], cuGeom
);
548 m_modeCompletionEvent
.wait();
550 /* select best inter mode based on sa8d cost */
551 Mode
*bestInter
= &md
.pred
[PRED_2Nx2N
];
553 if (m_param
->bEnableRectInter
)
555 if (md
.pred
[PRED_Nx2N
].sa8dCost
< bestInter
->sa8dCost
)
556 bestInter
= &md
.pred
[PRED_Nx2N
];
557 if (md
.pred
[PRED_2NxN
].sa8dCost
< bestInter
->sa8dCost
)
558 bestInter
= &md
.pred
[PRED_2NxN
];
563 if (md
.pred
[PRED_2NxnU
].sa8dCost
< bestInter
->sa8dCost
)
564 bestInter
= &md
.pred
[PRED_2NxnU
];
565 if (md
.pred
[PRED_2NxnD
].sa8dCost
< bestInter
->sa8dCost
)
566 bestInter
= &md
.pred
[PRED_2NxnD
];
567 if (md
.pred
[PRED_nLx2N
].sa8dCost
< bestInter
->sa8dCost
)
568 bestInter
= &md
.pred
[PRED_nLx2N
];
569 if (md
.pred
[PRED_nRx2N
].sa8dCost
< bestInter
->sa8dCost
)
570 bestInter
= &md
.pred
[PRED_nRx2N
];
573 if (m_param
->rdLevel
> 2)
575 /* encode best inter */
576 for (uint32_t puIdx
= 0; puIdx
< bestInter
->cu
.getNumPartInter(); puIdx
++)
578 prepMotionCompensation(bestInter
->cu
, cuGeom
, puIdx
);
579 motionCompensation(bestInter
->predYuv
, false, true);
581 encodeResAndCalcRdInterCU(*bestInter
, cuGeom
);
583 /* RD selection between merge, inter and intra */
584 checkBestMode(*bestInter
, depth
);
587 checkBestMode(md
.pred
[PRED_INTRA
], depth
);
589 else /* m_param->rdLevel == 2 */
591 if (!md
.bestMode
|| bestInter
->sa8dCost
< md
.bestMode
->sa8dCost
)
592 md
.bestMode
= bestInter
;
594 if (bTryIntra
&& md
.pred
[PRED_INTRA
].sa8dCost
< md
.bestMode
->sa8dCost
)
596 md
.bestMode
= &md
.pred
[PRED_INTRA
];
597 encodeIntraInInter(*md
.bestMode
, cuGeom
);
599 else if (!md
.bestMode
->cu
.m_mergeFlag
[0])
601 /* finally code the best mode selected from SA8D costs */
602 for (uint32_t puIdx
= 0; puIdx
< md
.bestMode
->cu
.getNumPartInter(); puIdx
++)
604 prepMotionCompensation(md
.bestMode
->cu
, cuGeom
, puIdx
);
605 motionCompensation(md
.bestMode
->predYuv
, false, true);
607 encodeResAndCalcRdInterCU(*md
.bestMode
, cuGeom
);
613 checkMerge2Nx2N_rd5_6(md
.pred
[PRED_SKIP
], md
.pred
[PRED_MERGE
], cuGeom
);
614 m_modeCompletionEvent
.wait();
616 checkBestMode(md
.pred
[PRED_2Nx2N
], depth
);
618 if (m_param
->bEnableRectInter
)
620 checkBestMode(md
.pred
[PRED_Nx2N
], depth
);
621 checkBestMode(md
.pred
[PRED_2NxN
], depth
);
626 checkBestMode(md
.pred
[PRED_2NxnU
], depth
);
627 checkBestMode(md
.pred
[PRED_2NxnD
], depth
);
628 checkBestMode(md
.pred
[PRED_nLx2N
], depth
);
629 checkBestMode(md
.pred
[PRED_nRx2N
], depth
);
634 checkBestMode(md
.pred
[PRED_INTRA
], depth
);
635 if (depth
== g_maxCUDepth
&& cuGeom
.log2CUSize
> m_slice
->m_sps
->quadtreeTULog2MinSize
)
636 checkBestMode(md
.pred
[PRED_INTRA_NxN
], depth
);
640 if (md
.bestMode
->rdCost
== MAX_INT64
&& !bTryIntra
)
642 md
.pred
[PRED_INTRA
].cu
.initSubCU(parentCTU
, cuGeom
);
643 checkIntraInInter_rd0_4(md
.pred
[PRED_INTRA
], cuGeom
);
644 encodeIntraInInter(md
.pred
[PRED_INTRA
], cuGeom
);
645 checkBestMode(md
.pred
[PRED_INTRA
], depth
);
652 addSplitFlagCost(*md
.bestMode
, cuGeom
.depth
);
655 bool bNoSplit
= false;
658 bNoSplit
= !!md
.bestMode
->cu
.isSkipped(0);
659 if (mightSplit
&& depth
&& depth
>= minDepth
&& !bNoSplit
&& m_param
->rdLevel
<= 4)
660 bNoSplit
= recursionDepthCheck(parentCTU
, cuGeom
, *md
.bestMode
);
663 if (mightSplit
&& !bNoSplit
)
665 Mode
* splitPred
= &md
.pred
[PRED_SPLIT
];
666 splitPred
->initCosts();
667 CUData
* splitCU
= &splitPred
->cu
;
668 splitCU
->initSubCU(parentCTU
, cuGeom
);
670 uint32_t nextDepth
= depth
+ 1;
671 ModeDepth
& nd
= m_modeDepth
[nextDepth
];
672 invalidateContexts(nextDepth
);
673 Entropy
* nextContext
= &m_rqt
[depth
].cur
;
675 for (uint32_t subPartIdx
= 0; subPartIdx
< 4; subPartIdx
++)
677 const CUGeom
& childCuData
= *(&cuGeom
+ cuGeom
.childOffset
+ subPartIdx
);
678 if (childCuData
.flags
& CUGeom::PRESENT
)
680 m_modeDepth
[0].fencYuv
.copyPartToYuv(nd
.fencYuv
, childCuData
.encodeIdx
);
681 m_rqt
[nextDepth
].cur
.load(*nextContext
);
682 compressInterCU_dist(parentCTU
, childCuData
);
684 // Save best CU and pred data for this sub CU
685 splitCU
->copyPartFrom(nd
.bestMode
->cu
, childCuData
, subPartIdx
);
686 splitPred
->addSubCosts(*nd
.bestMode
);
688 nd
.bestMode
->reconYuv
.copyToPartYuv(splitPred
->reconYuv
, childCuData
.numPartitions
* subPartIdx
);
689 nextContext
= &nd
.bestMode
->contexts
;
692 splitCU
->setEmptyPart(childCuData
, subPartIdx
);
694 nextContext
->store(splitPred
->contexts
);
697 addSplitFlagCost(*splitPred
, cuGeom
.depth
);
699 updateModeCost(*splitPred
);
701 checkBestMode(*splitPred
, depth
);
704 if (!depth
|| md
.bestMode
->cu
.m_predMode
[0] != MODE_INTRA
)
706 /* early-out statistics */
707 FrameData
& curEncData
= const_cast<FrameData
&>(*m_frame
->m_encData
);
708 FrameData::RCStatCU
& cuStat
= curEncData
.m_cuStat
[parentCTU
.m_cuAddr
];
709 uint64_t temp
= cuStat
.avgCost
[depth
] * cuStat
.count
[depth
];
710 cuStat
.count
[depth
] += 1;
711 cuStat
.avgCost
[depth
] = (temp
+ md
.bestMode
->rdCost
) / cuStat
.count
[depth
];
714 checkDQP(md
.bestMode
->cu
, cuGeom
);
716 /* Copy best data to encData CTU and recon */
717 md
.bestMode
->cu
.copyToPic(depth
);
718 if (md
.bestMode
!= &md
.pred
[PRED_SPLIT
])
719 md
.bestMode
->reconYuv
.copyToPicYuv(*m_frame
->m_reconPicYuv
, cuAddr
, cuGeom
.encodeIdx
);
722 void Analysis::compressInterCU_rd0_4(const CUData
& parentCTU
, const CUGeom
& cuGeom
)
724 uint32_t depth
= cuGeom
.depth
;
725 uint32_t cuAddr
= parentCTU
.m_cuAddr
;
726 ModeDepth
& md
= m_modeDepth
[depth
];
729 bool mightSplit
= !(cuGeom
.flags
& CUGeom::LEAF
);
730 bool mightNotSplit
= !(cuGeom
.flags
& CUGeom::SPLIT_MANDATORY
);
731 uint32_t minDepth
= topSkipMinDepth(parentCTU
, cuGeom
);
733 if (mightNotSplit
&& depth
>= minDepth
)
735 bool bTryIntra
= m_slice
->m_sliceType
!= B_SLICE
|| m_param
->bIntraInBFrames
;
737 /* Initialize all prediction CUs based on parentCTU */
738 md
.pred
[PRED_2Nx2N
].cu
.initSubCU(parentCTU
, cuGeom
);
739 md
.pred
[PRED_MERGE
].cu
.initSubCU(parentCTU
, cuGeom
);
740 md
.pred
[PRED_SKIP
].cu
.initSubCU(parentCTU
, cuGeom
);
741 if (m_param
->bEnableRectInter
)
743 md
.pred
[PRED_2NxN
].cu
.initSubCU(parentCTU
, cuGeom
);
744 md
.pred
[PRED_Nx2N
].cu
.initSubCU(parentCTU
, cuGeom
);
746 if (m_slice
->m_sps
->maxAMPDepth
> depth
&& cuGeom
.log2CUSize
< 6)
748 md
.pred
[PRED_2NxnU
].cu
.initSubCU(parentCTU
, cuGeom
);
749 md
.pred
[PRED_2NxnD
].cu
.initSubCU(parentCTU
, cuGeom
);
750 md
.pred
[PRED_nLx2N
].cu
.initSubCU(parentCTU
, cuGeom
);
751 md
.pred
[PRED_nRx2N
].cu
.initSubCU(parentCTU
, cuGeom
);
754 /* Compute Merge Cost */
755 checkMerge2Nx2N_rd0_4(md
.pred
[PRED_SKIP
], md
.pred
[PRED_MERGE
], cuGeom
);
757 bool earlyskip
= false;
758 if (m_param
->rdLevel
)
759 earlyskip
= m_param
->bEnableEarlySkip
&& md
.bestMode
&& md
.bestMode
->cu
.isSkipped(0); // TODO: sa8d threshold per depth
763 checkInter_rd0_4(md
.pred
[PRED_2Nx2N
], cuGeom
, SIZE_2Nx2N
);
764 Mode
*bestInter
= &md
.pred
[PRED_2Nx2N
];
766 if (m_param
->bEnableRectInter
)
768 checkInter_rd0_4(md
.pred
[PRED_Nx2N
], cuGeom
, SIZE_Nx2N
);
769 if (md
.pred
[PRED_Nx2N
].sa8dCost
< bestInter
->sa8dCost
)
770 bestInter
= &md
.pred
[PRED_Nx2N
];
771 checkInter_rd0_4(md
.pred
[PRED_2NxN
], cuGeom
, SIZE_2NxN
);
772 if (md
.pred
[PRED_2NxN
].sa8dCost
< bestInter
->sa8dCost
)
773 bestInter
= &md
.pred
[PRED_2NxN
];
776 if (m_slice
->m_sps
->maxAMPDepth
> depth
&& cuGeom
.log2CUSize
< 6)
778 bool bHor
= false, bVer
= false;
779 if (bestInter
->cu
.m_partSize
[0] == SIZE_2NxN
)
781 else if (bestInter
->cu
.m_partSize
[0] == SIZE_Nx2N
)
783 else if (bestInter
->cu
.m_partSize
[0] == SIZE_2Nx2N
&&
784 md
.bestMode
&& md
.bestMode
->cu
.getQtRootCbf(0))
792 checkInter_rd0_4(md
.pred
[PRED_2NxnU
], cuGeom
, SIZE_2NxnU
);
793 if (md
.pred
[PRED_2NxnU
].sa8dCost
< bestInter
->sa8dCost
)
794 bestInter
= &md
.pred
[PRED_2NxnU
];
795 checkInter_rd0_4(md
.pred
[PRED_2NxnD
], cuGeom
, SIZE_2NxnD
);
796 if (md
.pred
[PRED_2NxnD
].sa8dCost
< bestInter
->sa8dCost
)
797 bestInter
= &md
.pred
[PRED_2NxnD
];
801 checkInter_rd0_4(md
.pred
[PRED_nLx2N
], cuGeom
, SIZE_nLx2N
);
802 if (md
.pred
[PRED_nLx2N
].sa8dCost
< bestInter
->sa8dCost
)
803 bestInter
= &md
.pred
[PRED_nLx2N
];
804 checkInter_rd0_4(md
.pred
[PRED_nRx2N
], cuGeom
, SIZE_nRx2N
);
805 if (md
.pred
[PRED_nRx2N
].sa8dCost
< bestInter
->sa8dCost
)
806 bestInter
= &md
.pred
[PRED_nRx2N
];
810 if (m_param
->rdLevel
>= 3)
812 /* Calculate RD cost of best inter option */
813 for (uint32_t puIdx
= 0; puIdx
< bestInter
->cu
.getNumPartInter(); puIdx
++)
815 prepMotionCompensation(bestInter
->cu
, cuGeom
, puIdx
);
816 motionCompensation(bestInter
->predYuv
, false, true);
819 encodeResAndCalcRdInterCU(*bestInter
, cuGeom
);
821 if (!md
.bestMode
|| bestInter
->rdCost
< md
.bestMode
->rdCost
)
822 md
.bestMode
= bestInter
;
824 if ((bTryIntra
&& md
.bestMode
->cu
.getQtRootCbf(0)) ||
825 md
.bestMode
->sa8dCost
== MAX_INT64
)
827 md
.pred
[PRED_INTRA
].cu
.initSubCU(parentCTU
, cuGeom
);
828 checkIntraInInter_rd0_4(md
.pred
[PRED_INTRA
], cuGeom
);
829 encodeIntraInInter(md
.pred
[PRED_INTRA
], cuGeom
);
830 if (md
.pred
[PRED_INTRA
].rdCost
< md
.bestMode
->rdCost
)
831 md
.bestMode
= &md
.pred
[PRED_INTRA
];
836 /* SA8D choice between merge/skip, inter, and intra */
837 if (!md
.bestMode
|| bestInter
->sa8dCost
< md
.bestMode
->sa8dCost
)
838 md
.bestMode
= bestInter
;
840 if (bTryIntra
|| md
.bestMode
->sa8dCost
== MAX_INT64
)
842 md
.pred
[PRED_INTRA
].cu
.initSubCU(parentCTU
, cuGeom
);
843 checkIntraInInter_rd0_4(md
.pred
[PRED_INTRA
], cuGeom
);
844 if (md
.pred
[PRED_INTRA
].sa8dCost
< md
.bestMode
->sa8dCost
)
845 md
.bestMode
= &md
.pred
[PRED_INTRA
];
848 /* finally code the best mode selected by SA8D costs:
849 * RD level 2 - fully encode the best mode
850 * RD level 1 - generate recon pixels
851 * RD level 0 - generate chroma prediction */
852 if (md
.bestMode
->cu
.m_mergeFlag
[0] && md
.bestMode
->cu
.m_partSize
[0] == SIZE_2Nx2N
)
854 /* prediction already generated for this CU, and if rd level
855 * is not 0, it is already fully encoded */
857 else if (md
.bestMode
->cu
.m_predMode
[0] == MODE_INTER
)
859 for (uint32_t puIdx
= 0; puIdx
< md
.bestMode
->cu
.getNumPartInter(); puIdx
++)
861 prepMotionCompensation(md
.bestMode
->cu
, cuGeom
, puIdx
);
862 motionCompensation(md
.bestMode
->predYuv
, false, true);
864 if (m_param
->rdLevel
== 2)
865 encodeResAndCalcRdInterCU(*md
.bestMode
, cuGeom
);
866 else if (m_param
->rdLevel
== 1)
868 m_rqt
[cuGeom
.depth
].tmpResiYuv
.subtract(md
.fencYuv
, md
.bestMode
->predYuv
, cuGeom
.log2CUSize
);
869 generateCoeffRecon(*md
.bestMode
, cuGeom
);
874 if (m_param
->rdLevel
== 2)
875 encodeIntraInInter(*md
.bestMode
, cuGeom
);
876 else if (m_param
->rdLevel
== 1)
877 generateCoeffRecon(*md
.bestMode
, cuGeom
);
886 addSplitFlagCost(*md
.bestMode
, cuGeom
.depth
);
889 bool bNoSplit
= false;
892 bNoSplit
= !!md
.bestMode
->cu
.isSkipped(0);
893 if (mightSplit
&& depth
&& depth
>= minDepth
&& !bNoSplit
)
894 bNoSplit
= recursionDepthCheck(parentCTU
, cuGeom
, *md
.bestMode
);
897 if (mightSplit
&& !bNoSplit
)
899 Mode
* splitPred
= &md
.pred
[PRED_SPLIT
];
900 splitPred
->initCosts();
901 CUData
* splitCU
= &splitPred
->cu
;
902 splitCU
->initSubCU(parentCTU
, cuGeom
);
904 uint32_t nextDepth
= depth
+ 1;
905 ModeDepth
& nd
= m_modeDepth
[nextDepth
];
906 invalidateContexts(nextDepth
);
907 Entropy
* nextContext
= &m_rqt
[depth
].cur
;
909 for (uint32_t subPartIdx
= 0; subPartIdx
< 4; subPartIdx
++)
911 const CUGeom
& childCuData
= *(&cuGeom
+ cuGeom
.childOffset
+ subPartIdx
);
912 if (childCuData
.flags
& CUGeom::PRESENT
)
914 m_modeDepth
[0].fencYuv
.copyPartToYuv(nd
.fencYuv
, childCuData
.encodeIdx
);
915 m_rqt
[nextDepth
].cur
.load(*nextContext
);
916 compressInterCU_rd0_4(parentCTU
, childCuData
);
918 // Save best CU and pred data for this sub CU
919 splitCU
->copyPartFrom(nd
.bestMode
->cu
, childCuData
, subPartIdx
);
920 splitPred
->addSubCosts(*nd
.bestMode
);
922 if (m_param
->rdLevel
)
923 nd
.bestMode
->reconYuv
.copyToPartYuv(splitPred
->reconYuv
, childCuData
.numPartitions
* subPartIdx
);
925 nd
.bestMode
->predYuv
.copyToPartYuv(splitPred
->predYuv
, childCuData
.numPartitions
* subPartIdx
);
926 if (m_param
->rdLevel
> 1)
927 nextContext
= &nd
.bestMode
->contexts
;
930 splitCU
->setEmptyPart(childCuData
, subPartIdx
);
932 nextContext
->store(splitPred
->contexts
);
935 addSplitFlagCost(*splitPred
, cuGeom
.depth
);
936 else if (m_param
->rdLevel
<= 1)
937 splitPred
->sa8dCost
= m_rdCost
.calcRdSADCost(splitPred
->distortion
, splitPred
->sa8dBits
);
939 updateModeCost(*splitPred
);
942 md
.bestMode
= splitPred
;
943 else if (m_param
->rdLevel
>= 1)
945 if (splitPred
->rdCost
< md
.bestMode
->rdCost
)
946 md
.bestMode
= splitPred
;
950 if (splitPred
->sa8dCost
< md
.bestMode
->sa8dCost
)
951 md
.bestMode
= splitPred
;
955 if (!depth
|| md
.bestMode
->cu
.m_predMode
[0] != MODE_INTRA
)
957 /* early-out statistics */
958 FrameData
& curEncData
= const_cast<FrameData
&>(*m_frame
->m_encData
);
959 FrameData::RCStatCU
& cuStat
= curEncData
.m_cuStat
[parentCTU
.m_cuAddr
];
960 uint64_t temp
= cuStat
.avgCost
[depth
] * cuStat
.count
[depth
];
961 cuStat
.count
[depth
] += 1;
962 cuStat
.avgCost
[depth
] = (temp
+ md
.bestMode
->rdCost
) / cuStat
.count
[depth
];
965 checkDQP(md
.bestMode
->cu
, cuGeom
);
967 /* Copy best data to encData CTU and recon */
968 md
.bestMode
->cu
.copyToPic(depth
);
969 if (md
.bestMode
!= &md
.pred
[PRED_SPLIT
] && m_param
->rdLevel
)
970 md
.bestMode
->reconYuv
.copyToPicYuv(*m_frame
->m_reconPicYuv
, cuAddr
, cuGeom
.encodeIdx
);
973 void Analysis::compressInterCU_rd5_6(const CUData
& parentCTU
, const CUGeom
& cuGeom
)
975 uint32_t depth
= cuGeom
.depth
;
976 ModeDepth
& md
= m_modeDepth
[depth
];
979 bool mightSplit
= !(cuGeom
.flags
& CUGeom::LEAF
);
980 bool mightNotSplit
= !(cuGeom
.flags
& CUGeom::SPLIT_MANDATORY
);
984 for (int i
= 0; i
< MAX_PRED_TYPES
; i
++)
985 md
.pred
[i
].cu
.initSubCU(parentCTU
, cuGeom
);
987 checkMerge2Nx2N_rd5_6(md
.pred
[PRED_SKIP
], md
.pred
[PRED_MERGE
], cuGeom
);
988 bool earlySkip
= m_param
->bEnableEarlySkip
&& md
.bestMode
&& !md
.bestMode
->cu
.getQtRootCbf(0);
992 checkInter_rd5_6(md
.pred
[PRED_2Nx2N
], cuGeom
, SIZE_2Nx2N
, false);
993 checkBestMode(md
.pred
[PRED_2Nx2N
], cuGeom
.depth
);
995 if (m_param
->bEnableRectInter
)
998 if (!m_param
->bEnableCbfFastMode
|| md
.bestMode
->cu
.getQtRootCbf(0))
1000 checkInter_rd5_6(md
.pred
[PRED_Nx2N
], cuGeom
, SIZE_Nx2N
, false);
1001 checkBestMode(md
.pred
[PRED_Nx2N
], cuGeom
.depth
);
1003 if (!m_param
->bEnableCbfFastMode
|| md
.bestMode
->cu
.getQtRootCbf(0))
1005 checkInter_rd5_6(md
.pred
[PRED_2NxN
], cuGeom
, SIZE_2NxN
, false);
1006 checkBestMode(md
.pred
[PRED_2NxN
], cuGeom
.depth
);
1010 // Try AMP (SIZE_2NxnU, SIZE_2NxnD, SIZE_nLx2N, SIZE_nRx2N)
1011 if (m_slice
->m_sps
->maxAMPDepth
> depth
)
1013 bool bMergeOnly
= cuGeom
.log2CUSize
== 6;
1015 bool bHor
= false, bVer
= false;
1016 if (md
.bestMode
->cu
.m_partSize
[0] == SIZE_2NxN
)
1018 else if (md
.bestMode
->cu
.m_partSize
[0] == SIZE_Nx2N
)
1020 else if (md
.bestMode
->cu
.m_partSize
[0] == SIZE_2Nx2N
&& !md
.bestMode
->cu
.m_mergeFlag
[0] && !md
.bestMode
->cu
.isSkipped(0))
1028 if (!m_param
->bEnableCbfFastMode
|| md
.bestMode
->cu
.getQtRootCbf(0))
1030 checkInter_rd5_6(md
.pred
[PRED_2NxnU
], cuGeom
, SIZE_2NxnU
, bMergeOnly
);
1031 checkBestMode(md
.pred
[PRED_2NxnU
], cuGeom
.depth
);
1033 if (!m_param
->bEnableCbfFastMode
|| md
.bestMode
->cu
.getQtRootCbf(0))
1035 checkInter_rd5_6(md
.pred
[PRED_2NxnD
], cuGeom
, SIZE_2NxnD
, bMergeOnly
);
1036 checkBestMode(md
.pred
[PRED_2NxnD
], cuGeom
.depth
);
1041 if (!m_param
->bEnableCbfFastMode
|| md
.bestMode
->cu
.getQtRootCbf(0))
1043 checkInter_rd5_6(md
.pred
[PRED_nLx2N
], cuGeom
, SIZE_nLx2N
, bMergeOnly
);
1044 checkBestMode(md
.pred
[PRED_nLx2N
], cuGeom
.depth
);
1046 if (!m_param
->bEnableCbfFastMode
|| md
.bestMode
->cu
.getQtRootCbf(0))
1048 checkInter_rd5_6(md
.pred
[PRED_nRx2N
], cuGeom
, SIZE_nRx2N
, bMergeOnly
);
1049 checkBestMode(md
.pred
[PRED_nRx2N
], cuGeom
.depth
);
1054 if ((m_slice
->m_sliceType
!= B_SLICE
|| m_param
->bIntraInBFrames
) &&
1055 (!m_param
->bEnableCbfFastMode
|| md
.bestMode
->cu
.getQtRootCbf(0)))
1057 checkIntra(md
.pred
[PRED_INTRA
], cuGeom
, SIZE_2Nx2N
, NULL
);
1058 checkBestMode(md
.pred
[PRED_INTRA
], depth
);
1060 if (depth
== g_maxCUDepth
&& cuGeom
.log2CUSize
> m_slice
->m_sps
->quadtreeTULog2MinSize
)
1062 checkIntra(md
.pred
[PRED_INTRA_NxN
], cuGeom
, SIZE_NxN
, NULL
);
1063 checkBestMode(md
.pred
[PRED_INTRA_NxN
], depth
);
1069 tryLossless(cuGeom
);
1072 addSplitFlagCost(*md
.bestMode
, cuGeom
.depth
);
1075 // estimate split cost
1076 if (mightSplit
&& (!md
.bestMode
|| !md
.bestMode
->cu
.isSkipped(0)))
1078 Mode
* splitPred
= &md
.pred
[PRED_SPLIT
];
1079 splitPred
->initCosts();
1080 CUData
* splitCU
= &splitPred
->cu
;
1081 splitCU
->initSubCU(parentCTU
, cuGeom
);
1083 uint32_t nextDepth
= depth
+ 1;
1084 ModeDepth
& nd
= m_modeDepth
[nextDepth
];
1085 invalidateContexts(nextDepth
);
1086 Entropy
* nextContext
= &m_rqt
[depth
].cur
;
1088 for (uint32_t subPartIdx
= 0; subPartIdx
< 4; subPartIdx
++)
1090 const CUGeom
& childCuData
= *(&cuGeom
+ cuGeom
.childOffset
+ subPartIdx
);
1091 if (childCuData
.flags
& CUGeom::PRESENT
)
1093 m_modeDepth
[0].fencYuv
.copyPartToYuv(nd
.fencYuv
, childCuData
.encodeIdx
);
1094 m_rqt
[nextDepth
].cur
.load(*nextContext
);
1095 compressInterCU_rd5_6(parentCTU
, childCuData
);
1097 // Save best CU and pred data for this sub CU
1098 splitCU
->copyPartFrom(nd
.bestMode
->cu
, childCuData
, subPartIdx
);
1099 splitPred
->addSubCosts(*nd
.bestMode
);
1100 nd
.bestMode
->reconYuv
.copyToPartYuv(splitPred
->reconYuv
, childCuData
.numPartitions
* subPartIdx
);
1101 nextContext
= &nd
.bestMode
->contexts
;
1104 splitCU
->setEmptyPart(childCuData
, subPartIdx
);
1106 nextContext
->store(splitPred
->contexts
);
1108 addSplitFlagCost(*splitPred
, cuGeom
.depth
);
1110 updateModeCost(*splitPred
);
1112 checkBestMode(*splitPred
, depth
);
1115 checkDQP(md
.bestMode
->cu
, cuGeom
);
1117 /* Copy best data to encData CTU and recon */
1118 md
.bestMode
->cu
.copyToPic(depth
);
1119 if (md
.bestMode
!= &md
.pred
[PRED_SPLIT
])
1120 md
.bestMode
->reconYuv
.copyToPicYuv(*m_frame
->m_reconPicYuv
, parentCTU
.m_cuAddr
, cuGeom
.encodeIdx
);
1123 /* sets md.bestMode if a valid merge candidate is found, else leaves it NULL */
1124 void Analysis::checkMerge2Nx2N_rd0_4(Mode
& skip
, Mode
& merge
, const CUGeom
& cuGeom
)
1126 uint32_t depth
= cuGeom
.depth
;
1127 ModeDepth
& md
= m_modeDepth
[depth
];
1128 Yuv
*fencYuv
= &md
.fencYuv
;
1130 /* Note that these two Mode instances are named MERGE and SKIP but they may
1131 * hold the reverse when the function returns. We toggle between the two modes */
1132 Mode
* tempPred
= &merge
;
1133 Mode
* bestPred
= &skip
;
1135 X265_CHECK(m_slice
->m_sliceType
!= I_SLICE
, "Evaluating merge in I slice\n");
1137 tempPred
->cu
.setPartSizeSubParts(SIZE_2Nx2N
);
1138 tempPred
->cu
.setPredModeSubParts(MODE_INTER
);
1139 tempPred
->cu
.m_mergeFlag
[0] = true;
1141 bestPred
->cu
.setPartSizeSubParts(SIZE_2Nx2N
);
1142 bestPred
->cu
.setPredModeSubParts(MODE_INTER
);
1143 bestPred
->cu
.m_mergeFlag
[0] = true;
1145 MVField mvFieldNeighbours
[MRG_MAX_NUM_CANDS
][2]; // double length for mv of both lists
1146 uint8_t interDirNeighbours
[MRG_MAX_NUM_CANDS
];
1147 uint32_t maxNumMergeCand
= tempPred
->cu
.getInterMergeCandidates(0, 0, mvFieldNeighbours
, interDirNeighbours
);
1149 bestPred
->sa8dCost
= MAX_INT64
;
1150 int bestSadCand
= -1;
1151 int sizeIdx
= cuGeom
.log2CUSize
- 2;
1152 for (uint32_t i
= 0; i
< maxNumMergeCand
; ++i
)
1154 if (m_bFrameParallel
&&
1155 (mvFieldNeighbours
[i
][0].mv
.y
>= (m_param
->searchRange
+ 1) * 4 ||
1156 mvFieldNeighbours
[i
][1].mv
.y
>= (m_param
->searchRange
+ 1) * 4))
1159 tempPred
->cu
.m_mvpIdx
[0][0] = (uint8_t)i
; // merge candidate ID is stored in L0 MVP idx
1160 tempPred
->cu
.m_interDir
[0] = interDirNeighbours
[i
];
1161 tempPred
->cu
.m_mv
[0][0] = mvFieldNeighbours
[i
][0].mv
;
1162 tempPred
->cu
.m_refIdx
[0][0] = (char)mvFieldNeighbours
[i
][0].refIdx
;
1163 tempPred
->cu
.m_mv
[1][0] = mvFieldNeighbours
[i
][1].mv
;
1164 tempPred
->cu
.m_refIdx
[1][0] = (char)mvFieldNeighbours
[i
][1].refIdx
;
1166 // do MC only for Luma part
1167 prepMotionCompensation(tempPred
->cu
, cuGeom
, 0);
1168 motionCompensation(tempPred
->predYuv
, true, false);
1170 tempPred
->sa8dBits
= getTUBits(i
, maxNumMergeCand
);
1171 tempPred
->distortion
= primitives
.sa8d
[sizeIdx
](fencYuv
->m_buf
[0], fencYuv
->m_size
, tempPred
->predYuv
.m_buf
[0], tempPred
->predYuv
.m_size
);
1172 tempPred
->sa8dCost
= m_rdCost
.calcRdSADCost(tempPred
->distortion
, tempPred
->sa8dBits
);
1174 if (tempPred
->sa8dCost
< bestPred
->sa8dCost
)
1177 std::swap(tempPred
, bestPred
);
1181 /* force mode decision to take inter or intra */
1182 if (bestSadCand
< 0)
1185 /* calculate the motion compensation for chroma for the best mode selected */
1186 prepMotionCompensation(bestPred
->cu
, cuGeom
, 0);
1187 motionCompensation(bestPred
->predYuv
, false, true);
1189 if (m_param
->rdLevel
)
1191 if (m_param
->bLossless
)
1192 bestPred
->rdCost
= MAX_INT64
;
1194 encodeResAndCalcRdSkipCU(*bestPred
);
1196 /* Encode with residual */
1197 tempPred
->cu
.m_mvpIdx
[0][0] = (uint8_t)bestSadCand
;
1198 tempPred
->cu
.setPUInterDir(interDirNeighbours
[bestSadCand
], 0, 0);
1199 tempPred
->cu
.setPUMv(0, mvFieldNeighbours
[bestSadCand
][0].mv
, 0, 0);
1200 tempPred
->cu
.setPURefIdx(0, (char)mvFieldNeighbours
[bestSadCand
][0].refIdx
, 0, 0);
1201 tempPred
->cu
.setPUMv(1, mvFieldNeighbours
[bestSadCand
][1].mv
, 0, 0);
1202 tempPred
->cu
.setPURefIdx(1, (char)mvFieldNeighbours
[bestSadCand
][1].refIdx
, 0, 0);
1203 tempPred
->sa8dCost
= bestPred
->sa8dCost
;
1204 tempPred
->predYuv
.copyFromYuv(bestPred
->predYuv
);
1206 encodeResAndCalcRdInterCU(*tempPred
, cuGeom
);
1208 md
.bestMode
= tempPred
->rdCost
< bestPred
->rdCost
? tempPred
: bestPred
;
1211 md
.bestMode
= bestPred
;
1213 /* broadcast sets of MV field data */
1214 bestPred
->cu
.setPUInterDir(interDirNeighbours
[bestSadCand
], 0, 0);
1215 bestPred
->cu
.setPUMv(0, mvFieldNeighbours
[bestSadCand
][0].mv
, 0, 0);
1216 bestPred
->cu
.setPURefIdx(0, (char)mvFieldNeighbours
[bestSadCand
][0].refIdx
, 0, 0);
1217 bestPred
->cu
.setPUMv(1, mvFieldNeighbours
[bestSadCand
][1].mv
, 0, 0);
1218 bestPred
->cu
.setPURefIdx(1, (char)mvFieldNeighbours
[bestSadCand
][1].refIdx
, 0, 0);
1221 /* sets md.bestMode if a valid merge candidate is found, else leaves it NULL */
1222 void Analysis::checkMerge2Nx2N_rd5_6(Mode
& skip
, Mode
& merge
, const CUGeom
& cuGeom
)
1224 uint32_t depth
= cuGeom
.depth
;
1226 /* Note that these two Mode instances are named MERGE and SKIP but they may
1227 * hold the reverse when the function returns. We toggle between the two modes */
1228 Mode
* tempPred
= &merge
;
1229 Mode
* bestPred
= &skip
;
1231 merge
.cu
.setPredModeSubParts(MODE_INTER
);
1232 merge
.cu
.setPartSizeSubParts(SIZE_2Nx2N
);
1233 merge
.cu
.m_mergeFlag
[0] = true;
1235 skip
.cu
.setPredModeSubParts(MODE_INTER
);
1236 skip
.cu
.setPartSizeSubParts(SIZE_2Nx2N
);
1237 skip
.cu
.m_mergeFlag
[0] = true;
1239 MVField mvFieldNeighbours
[MRG_MAX_NUM_CANDS
][2]; // double length for mv of both lists
1240 uint8_t interDirNeighbours
[MRG_MAX_NUM_CANDS
];
1241 uint32_t maxNumMergeCand
= merge
.cu
.getInterMergeCandidates(0, 0, mvFieldNeighbours
, interDirNeighbours
);
1243 bool foundCbf0Merge
= false;
1244 bool triedPZero
= false, triedBZero
= false;
1245 bestPred
->rdCost
= MAX_INT64
;
1246 for (uint32_t i
= 0; i
< maxNumMergeCand
; i
++)
1248 if (m_bFrameParallel
&&
1249 (mvFieldNeighbours
[i
][0].mv
.y
>= (m_param
->searchRange
+ 1) * 4 ||
1250 mvFieldNeighbours
[i
][1].mv
.y
>= (m_param
->searchRange
+ 1) * 4))
1253 /* the merge candidate list is packed with MV(0,0) ref 0 when it is not full */
1254 if (interDirNeighbours
[i
] == 1 && !mvFieldNeighbours
[i
][0].mv
.word
&& !mvFieldNeighbours
[i
][0].refIdx
)
1260 else if (interDirNeighbours
[i
] == 3 &&
1261 !mvFieldNeighbours
[i
][0].mv
.word
&& !mvFieldNeighbours
[i
][0].refIdx
&&
1262 !mvFieldNeighbours
[i
][1].mv
.word
&& !mvFieldNeighbours
[i
][1].refIdx
)
1269 tempPred
->cu
.m_mvpIdx
[0][0] = (uint8_t)i
; /* merge candidate ID is stored in L0 MVP idx */
1270 tempPred
->cu
.m_interDir
[0] = interDirNeighbours
[i
];
1271 tempPred
->cu
.m_mv
[0][0] = mvFieldNeighbours
[i
][0].mv
;
1272 tempPred
->cu
.m_refIdx
[0][0] = (char)mvFieldNeighbours
[i
][0].refIdx
;
1273 tempPred
->cu
.m_mv
[1][0] = mvFieldNeighbours
[i
][1].mv
;
1274 tempPred
->cu
.m_refIdx
[1][0] = (char)mvFieldNeighbours
[i
][1].refIdx
;
1275 tempPred
->cu
.setSkipFlagSubParts(false); /* must be cleared between encode iterations */
1277 prepMotionCompensation(tempPred
->cu
, cuGeom
, 0);
1278 motionCompensation(tempPred
->predYuv
, true, true);
1280 uint8_t hasCbf
= true;
1281 bool swapped
= false;
1282 if (!foundCbf0Merge
)
1284 /* if the best prediction has CBF (not a skip) then try merge with residual */
1286 encodeResAndCalcRdInterCU(*tempPred
, cuGeom
);
1287 hasCbf
= tempPred
->cu
.getQtRootCbf(0);
1288 foundCbf0Merge
= !hasCbf
;
1290 if (tempPred
->rdCost
< bestPred
->rdCost
)
1292 std::swap(tempPred
, bestPred
);
1296 if (!m_param
->bLossless
&& hasCbf
)
1298 /* try merge without residual (skip), if not lossless coding */
1302 tempPred
->cu
.m_mvpIdx
[0][0] = (uint8_t)i
;
1303 tempPred
->cu
.m_interDir
[0] = interDirNeighbours
[i
];
1304 tempPred
->cu
.m_mv
[0][0] = mvFieldNeighbours
[i
][0].mv
;
1305 tempPred
->cu
.m_refIdx
[0][0] = (char)mvFieldNeighbours
[i
][0].refIdx
;
1306 tempPred
->cu
.m_mv
[1][0] = mvFieldNeighbours
[i
][1].mv
;
1307 tempPred
->cu
.m_refIdx
[1][0] = (char)mvFieldNeighbours
[i
][1].refIdx
;
1308 tempPred
->cu
.setSkipFlagSubParts(false);
1309 tempPred
->predYuv
.copyFromYuv(bestPred
->predYuv
);
1312 encodeResAndCalcRdSkipCU(*tempPred
);
1314 if (tempPred
->rdCost
< bestPred
->rdCost
)
1315 std::swap(tempPred
, bestPred
);
1319 if (bestPred
->rdCost
< MAX_INT64
)
1321 m_modeDepth
[depth
].bestMode
= bestPred
;
1323 /* broadcast sets of MV field data */
1324 uint32_t bestCand
= bestPred
->cu
.m_mvpIdx
[0][0];
1325 bestPred
->cu
.setPUInterDir(interDirNeighbours
[bestCand
], 0, 0);
1326 bestPred
->cu
.setPUMv(0, mvFieldNeighbours
[bestCand
][0].mv
, 0, 0);
1327 bestPred
->cu
.setPURefIdx(0, (char)mvFieldNeighbours
[bestCand
][0].refIdx
, 0, 0);
1328 bestPred
->cu
.setPUMv(1, mvFieldNeighbours
[bestCand
][1].mv
, 0, 0);
1329 bestPred
->cu
.setPURefIdx(1, (char)mvFieldNeighbours
[bestCand
][1].refIdx
, 0, 0);
1333 void Analysis::checkInter_rd0_4(Mode
& interMode
, const CUGeom
& cuGeom
, PartSize partSize
)
1335 interMode
.initCosts();
1336 interMode
.cu
.setPartSizeSubParts(partSize
);
1337 interMode
.cu
.setPredModeSubParts(MODE_INTER
);
1339 if (predInterSearch(interMode
, cuGeom
, false, false))
1341 /* predInterSearch sets interMode.sa8dBits */
1342 const Yuv
& fencYuv
= *interMode
.fencYuv
;
1343 Yuv
& predYuv
= interMode
.predYuv
;
1344 interMode
.distortion
= primitives
.sa8d
[cuGeom
.log2CUSize
- 2](fencYuv
.m_buf
[0], fencYuv
.m_size
, predYuv
.m_buf
[0], predYuv
.m_size
);
1345 interMode
.sa8dCost
= m_rdCost
.calcRdSADCost(interMode
.distortion
, interMode
.sa8dBits
);
1349 interMode
.distortion
= MAX_UINT
;
1350 interMode
.sa8dCost
= MAX_INT64
;
1354 void Analysis::checkInter_rd5_6(Mode
& interMode
, const CUGeom
& cuGeom
, PartSize partSize
, bool bMergeOnly
)
1356 interMode
.initCosts();
1357 interMode
.cu
.setPartSizeSubParts(partSize
);
1358 interMode
.cu
.setPredModeSubParts(MODE_INTER
);
1360 if (predInterSearch(interMode
, cuGeom
, bMergeOnly
, true))
1362 /* predInterSearch sets interMode.sa8dBits, but this is ignored */
1363 encodeResAndCalcRdInterCU(interMode
, cuGeom
);
1367 interMode
.distortion
= MAX_UINT
;
1368 interMode
.rdCost
= MAX_INT64
;
1372 /* Note that this function does not save the best intra prediction, it must
1373 * be generated later. It records the best mode in the cu */
1374 void Analysis::checkIntraInInter_rd0_4(Mode
& intraMode
, const CUGeom
& cuGeom
)
1376 CUData
& cu
= intraMode
.cu
;
1377 uint32_t depth
= cu
.m_cuDepth
[0];
1379 cu
.setPartSizeSubParts(SIZE_2Nx2N
);
1380 cu
.setPredModeSubParts(MODE_INTRA
);
1382 uint32_t initTrDepth
= 0;
1383 uint32_t log2TrSize
= cu
.m_log2CUSize
[0] - initTrDepth
;
1384 uint32_t tuSize
= 1 << log2TrSize
;
1385 const uint32_t absPartIdx
= 0;
1387 // Reference sample smoothing
1388 initAdiPattern(cu
, cuGeom
, absPartIdx
, initTrDepth
, ALL_IDX
);
1390 pixel
* fenc
= m_modeDepth
[depth
].fencYuv
.m_buf
[0];
1391 uint32_t stride
= m_modeDepth
[depth
].fencYuv
.m_size
;
1393 pixel
*above
= m_refAbove
+ tuSize
- 1;
1394 pixel
*aboveFiltered
= m_refAboveFlt
+ tuSize
- 1;
1395 pixel
*left
= m_refLeft
+ tuSize
- 1;
1396 pixel
*leftFiltered
= m_refLeftFlt
+ tuSize
- 1;
1398 uint32_t bits
, bbits
, mode
, bmode
;
1399 uint64_t cost
, bcost
;
1401 // 33 Angle modes once
1402 ALIGN_VAR_32(pixel
, bufScale
[32 * 32]);
1403 ALIGN_VAR_32(pixel
, bufTrans
[32 * 32]);
1404 ALIGN_VAR_32(pixel
, tmp
[33 * 32 * 32]);
1405 int scaleTuSize
= tuSize
;
1406 int scaleStride
= stride
;
1408 int sizeIdx
= log2TrSize
- 2;
1412 // origin is 64x64, we scale to 32x32 and setup required parameters
1413 primitives
.scale2D_64to32(bufScale
, fenc
, stride
);
1416 // reserve space in case primitives need to store data in above
1418 pixel _above
[4 * 32 + 1];
1419 pixel _left
[4 * 32 + 1];
1420 pixel
*aboveScale
= _above
+ 2 * 32;
1421 pixel
*leftScale
= _left
+ 2 * 32;
1422 aboveScale
[0] = leftScale
[0] = above
[0];
1423 primitives
.scale1D_128to64(aboveScale
+ 1, above
+ 1, 0);
1424 primitives
.scale1D_128to64(leftScale
+ 1, left
+ 1, 0);
1429 sizeIdx
= 5 - 2; // log2(scaleTuSize) - 2
1431 // Filtered and Unfiltered refAbove and refLeft pointing to above and left.
1434 aboveFiltered
= aboveScale
;
1435 leftFiltered
= leftScale
;
1438 pixelcmp_t sa8d
= primitives
.sa8d
[sizeIdx
];
1439 int predsize
= scaleTuSize
* scaleTuSize
;
1441 m_entropyCoder
.loadIntraDirModeLuma(m_rqt
[depth
].cur
);
1443 /* there are three cost tiers for intra modes:
1444 * pred[0] - mode probable, least cost
1445 * pred[1], pred[2] - less probable, slightly more cost
1446 * non-mpm modes - all cost the same (rbits) */
1449 uint32_t rbits
= getIntraRemModeBits(cu
, absPartIdx
, preds
, mpms
);
1452 primitives
.intra_pred
[DC_IDX
][sizeIdx
](tmp
, scaleStride
, left
, above
, 0, (scaleTuSize
<= 16));
1453 bsad
= sa8d(fenc
, scaleStride
, tmp
, scaleStride
) << costShift
;
1454 bmode
= mode
= DC_IDX
;
1455 bbits
= (mpms
& ((uint64_t)1 << mode
)) ? m_entropyCoder
.bitsIntraModeMPM(preds
, mode
) : rbits
;
1456 bcost
= m_rdCost
.calcRdSADCost(bsad
, bbits
);
1458 pixel
*abovePlanar
= above
;
1459 pixel
*leftPlanar
= left
;
1461 if (tuSize
& (8 | 16 | 32))
1463 abovePlanar
= aboveFiltered
;
1464 leftPlanar
= leftFiltered
;
1468 primitives
.intra_pred
[PLANAR_IDX
][sizeIdx
](tmp
, scaleStride
, leftPlanar
, abovePlanar
, 0, 0);
1469 sad
= sa8d(fenc
, scaleStride
, tmp
, scaleStride
) << costShift
;
1471 bits
= (mpms
& ((uint64_t)1 << mode
)) ? m_entropyCoder
.bitsIntraModeMPM(preds
, mode
) : rbits
;
1472 cost
= m_rdCost
.calcRdSADCost(sad
, bits
);
1473 COPY4_IF_LT(bcost
, cost
, bmode
, mode
, bsad
, sad
, bbits
, bits
);
1476 primitives
.transpose
[sizeIdx
](bufTrans
, fenc
, scaleStride
);
1478 primitives
.intra_pred_allangs
[sizeIdx
](tmp
, above
, left
, aboveFiltered
, leftFiltered
, (scaleTuSize
<= 16));
1484 #define TRY_ANGLE(angle) \
1485 modeHor = angle < 18; \
1486 cmp = modeHor ? bufTrans : fenc; \
1487 srcStride = modeHor ? scaleTuSize : scaleStride; \
1488 sad = sa8d(cmp, srcStride, &tmp[(angle - 2) * predsize], scaleTuSize) << costShift; \
1489 bits = (mpms & ((uint64_t)1 << angle)) ? m_entropyCoder.bitsIntraModeMPM(preds, angle) : rbits; \
1490 cost = m_rdCost.calcRdSADCost(sad, bits)
1492 if (m_param
->bEnableFastIntra
)
1495 uint32_t lowmode
, highmode
, amode
= 5, abits
= 0;
1496 uint64_t acost
= MAX_INT64
;
1498 /* pick the best angle, sampling at distance of 5 */
1499 for (mode
= 5; mode
< 35; mode
+= 5)
1502 COPY4_IF_LT(acost
, cost
, amode
, mode
, asad
, sad
, abits
, bits
);
1505 /* refine best angle at distance 2, then distance 1 */
1506 for (uint32_t dist
= 2; dist
>= 1; dist
--)
1508 lowmode
= amode
- dist
;
1509 highmode
= amode
+ dist
;
1511 X265_CHECK(lowmode
>= 2 && lowmode
<= 34, "low intra mode out of range\n");
1513 COPY4_IF_LT(acost
, cost
, amode
, lowmode
, asad
, sad
, abits
, bits
);
1515 X265_CHECK(highmode
>= 2 && highmode
<= 34, "high intra mode out of range\n");
1516 TRY_ANGLE(highmode
);
1517 COPY4_IF_LT(acost
, cost
, amode
, highmode
, asad
, sad
, abits
, bits
);
1523 COPY4_IF_LT(acost
, cost
, amode
, 34, asad
, sad
, abits
, bits
);
1526 COPY4_IF_LT(bcost
, acost
, bmode
, amode
, bsad
, asad
, bbits
, abits
);
1528 else // calculate and search all intra prediction angles for lowest cost
1530 for (mode
= 2; mode
< 35; mode
++)
1533 COPY4_IF_LT(bcost
, cost
, bmode
, mode
, bsad
, sad
, bbits
, bits
);
1537 cu
.setLumaIntraDirSubParts((uint8_t)bmode
, absPartIdx
, depth
+ initTrDepth
);
1538 intraMode
.initCosts();
1539 intraMode
.totalBits
= bbits
;
1540 intraMode
.distortion
= bsad
;
1541 intraMode
.sa8dCost
= bcost
;
1542 intraMode
.sa8dBits
= bbits
;
1545 void Analysis::encodeIntraInInter(Mode
& intraMode
, const CUGeom
& cuGeom
)
1547 CUData
& cu
= intraMode
.cu
;
1548 Yuv
* reconYuv
= &intraMode
.reconYuv
;
1549 Yuv
* fencYuv
= &m_modeDepth
[cuGeom
.depth
].fencYuv
;
1551 X265_CHECK(cu
.m_partSize
[0] == SIZE_2Nx2N
, "encodeIntraInInter does not expect NxN intra\n");
1552 X265_CHECK(!m_slice
->isIntra(), "encodeIntraInInter does not expect to be used in I slices\n");
1554 m_quant
.setQPforQuant(cu
);
1556 uint32_t tuDepthRange
[2];
1557 cu
.getIntraTUQtDepthRange(tuDepthRange
, 0);
1559 m_entropyCoder
.load(m_rqt
[cuGeom
.depth
].cur
);
1562 codeIntraLumaQT(intraMode
, cuGeom
, 0, 0, false, icosts
, tuDepthRange
);
1563 extractIntraResultQT(cu
, *reconYuv
, 0, 0);
1565 intraMode
.distortion
= icosts
.distortion
;
1566 intraMode
.distortion
+= estIntraPredChromaQT(intraMode
, cuGeom
);
1568 m_entropyCoder
.resetBits();
1569 if (m_slice
->m_pps
->bTransquantBypassEnabled
)
1570 m_entropyCoder
.codeCUTransquantBypassFlag(cu
.m_tqBypass
[0]);
1571 m_entropyCoder
.codeSkipFlag(cu
, 0);
1572 m_entropyCoder
.codePredMode(cu
.m_predMode
[0]);
1573 m_entropyCoder
.codePartSize(cu
, 0, cuGeom
.depth
);
1574 m_entropyCoder
.codePredInfo(cu
, 0);
1575 intraMode
.mvBits
+= m_entropyCoder
.getNumberOfWrittenBits();
1577 bool bCodeDQP
= m_slice
->m_pps
->bUseDQP
;
1578 m_entropyCoder
.codeCoeff(cu
, 0, cuGeom
.depth
, bCodeDQP
, tuDepthRange
);
1580 intraMode
.totalBits
= m_entropyCoder
.getNumberOfWrittenBits();
1581 intraMode
.coeffBits
= intraMode
.totalBits
- intraMode
.mvBits
;
1582 if (m_rdCost
.m_psyRd
)
1583 intraMode
.psyEnergy
= m_rdCost
.psyCost(cuGeom
.log2CUSize
- 2, fencYuv
->m_buf
[0], fencYuv
->m_size
, reconYuv
->m_buf
[0], reconYuv
->m_size
);
1585 m_entropyCoder
.store(intraMode
.contexts
);
1586 updateModeCost(intraMode
);
1589 void Analysis::encodeResidue(const CUData
& ctu
, const CUGeom
& cuGeom
)
1591 if (cuGeom
.depth
< ctu
.m_cuDepth
[cuGeom
.encodeIdx
] && cuGeom
.depth
< g_maxCUDepth
)
1593 for (uint32_t subPartIdx
= 0; subPartIdx
< 4; subPartIdx
++)
1595 const CUGeom
& childCuData
= *(&cuGeom
+ cuGeom
.childOffset
+ subPartIdx
);
1596 if (childCuData
.flags
& CUGeom::PRESENT
)
1597 encodeResidue(ctu
, childCuData
);
1602 uint32_t absPartIdx
= cuGeom
.encodeIdx
;
1603 int sizeIdx
= cuGeom
.log2CUSize
- 2;
1605 Yuv
& fencYuv
= m_modeDepth
[0].fencYuv
;
1607 /* reuse the bestMode data structures at the current depth */
1608 Mode
*bestMode
= m_modeDepth
[cuGeom
.depth
].bestMode
;
1609 Yuv
& reconYuv
= bestMode
->reconYuv
;
1610 CUData
& cu
= bestMode
->cu
;
1612 cu
.copyFromPic(ctu
, cuGeom
);
1613 m_quant
.setQPforQuant(cu
);
1615 if (cu
.m_predMode
[0] == MODE_INTRA
)
1617 uint32_t tuDepthRange
[2];
1618 cu
.getIntraTUQtDepthRange(tuDepthRange
, 0);
1620 uint32_t initTrDepth
= cu
.m_partSize
[0] == SIZE_NxN
;
1621 residualTransformQuantIntra(*bestMode
, cuGeom
, initTrDepth
, 0, tuDepthRange
);
1622 getBestIntraModeChroma(*bestMode
, cuGeom
);
1623 residualQTIntraChroma(*bestMode
, cuGeom
, 0, 0);
1625 else if (cu
.m_predMode
[0] == MODE_INTER
)
1627 X265_CHECK(!ctu
.m_skipFlag
[absPartIdx
], "skip not expected prior to transform\n");
1629 /* Calculate residual for current CU part into depth sized resiYuv */
1631 ShortYuv
& resiYuv
= m_rqt
[cuGeom
.depth
].tmpResiYuv
;
1633 /* at RD 0, the prediction pixels are accumulated into the top depth predYuv */
1634 Yuv
& predYuv
= m_modeDepth
[0].bestMode
->predYuv
;
1635 pixel
* predY
= predYuv
.getLumaAddr(absPartIdx
);
1636 pixel
* predU
= predYuv
.getCbAddr(absPartIdx
);
1637 pixel
* predV
= predYuv
.getCrAddr(absPartIdx
);
1639 primitives
.luma_sub_ps
[sizeIdx
](resiYuv
.m_buf
[0], resiYuv
.m_size
,
1640 fencYuv
.getLumaAddr(absPartIdx
), predY
,
1641 fencYuv
.m_size
, predYuv
.m_size
);
1643 primitives
.chroma
[m_csp
].sub_ps
[sizeIdx
](resiYuv
.m_buf
[1], resiYuv
.m_csize
,
1644 fencYuv
.getCbAddr(absPartIdx
), predU
,
1645 fencYuv
.m_csize
, predYuv
.m_csize
);
1647 primitives
.chroma
[m_csp
].sub_ps
[sizeIdx
](resiYuv
.m_buf
[2], resiYuv
.m_csize
,
1648 fencYuv
.getCrAddr(absPartIdx
), predV
,
1649 fencYuv
.m_csize
, predYuv
.m_csize
);
1651 uint32_t tuDepthRange
[2];
1652 cu
.getInterTUQtDepthRange(tuDepthRange
, 0);
1654 residualTransformQuantInter(*bestMode
, cuGeom
, 0, cuGeom
.depth
, tuDepthRange
);
1656 if (cu
.m_mergeFlag
[0] && cu
.m_partSize
[0] == SIZE_2Nx2N
&& !cu
.getQtRootCbf(0))
1657 cu
.setSkipFlagSubParts(true);
1659 PicYuv
& reconPicYuv
= *m_frame
->m_reconPicYuv
;
1660 if (cu
.getQtRootCbf(0)) // TODO: split to each component
1662 /* residualTransformQuantInter() wrote transformed residual back into
1663 * resiYuv. Generate the recon pixels by adding it to the prediction */
1665 primitives
.luma_add_ps
[sizeIdx
](reconYuv
.m_buf
[0], reconYuv
.m_size
,
1666 predY
, resiYuv
.m_buf
[0], predYuv
.m_size
, resiYuv
.m_size
);
1667 primitives
.chroma
[m_csp
].add_ps
[sizeIdx
](reconYuv
.m_buf
[1], reconYuv
.m_csize
,
1668 predU
, resiYuv
.m_buf
[1], predYuv
.m_csize
, resiYuv
.m_csize
);
1669 primitives
.chroma
[m_csp
].add_ps
[sizeIdx
](reconYuv
.m_buf
[2], reconYuv
.m_csize
,
1670 predV
, resiYuv
.m_buf
[2], predYuv
.m_csize
, resiYuv
.m_csize
);
1672 /* copy the reconstructed part to the recon pic for later intra
1674 reconYuv
.copyToPicYuv(*m_frame
->m_reconPicYuv
, cu
.m_cuAddr
, absPartIdx
);
1678 /* copy the prediction pixels to the recon pic for later intra
1681 primitives
.luma_copy_pp
[sizeIdx
](reconPicYuv
.getLumaAddr(cu
.m_cuAddr
, absPartIdx
), reconPicYuv
.m_stride
,
1682 predY
, predYuv
.m_size
);
1683 primitives
.chroma
[m_csp
].copy_pp
[sizeIdx
](reconPicYuv
.getCbAddr(cu
.m_cuAddr
, absPartIdx
), reconPicYuv
.m_strideC
,
1684 predU
, predYuv
.m_csize
);
1685 primitives
.chroma
[m_csp
].copy_pp
[sizeIdx
](reconPicYuv
.getCrAddr(cu
.m_cuAddr
, absPartIdx
), reconPicYuv
.m_strideC
,
1686 predV
, predYuv
.m_csize
);
1689 /* else if (cu.m_predMode[0] == MODE_NONE) {} */
1691 checkDQP(cu
, cuGeom
);
1692 cu
.updatePic(cuGeom
.depth
);
1695 /* check whether current try is the best with identifying the depth of current try */
1696 void Analysis::checkBestMode(Mode
& mode
, uint32_t depth
)
1698 ModeDepth
& md
= m_modeDepth
[depth
];
1701 if (mode
.rdCost
< md
.bestMode
->rdCost
)
1702 md
.bestMode
= &mode
;
1705 md
.bestMode
= &mode
;
1708 void Analysis::addSplitFlagCost(Mode
& mode
, uint32_t depth
)
1710 if (m_param
->rdLevel
>= 3)
1712 /* code the split flag (0 or 1) and update bit costs */
1713 mode
.contexts
.resetBits();
1714 mode
.contexts
.codeSplitFlag(mode
.cu
, 0, depth
);
1715 uint32_t bits
= mode
.contexts
.getNumberOfWrittenBits();
1716 mode
.mvBits
+= bits
;
1717 mode
.totalBits
+= bits
;
1718 updateModeCost(mode
);
1720 else if (m_param
->rdLevel
<= 1)
1723 mode
.sa8dCost
= m_rdCost
.calcRdSADCost(mode
.distortion
, mode
.sa8dBits
);
1729 updateModeCost(mode
);
1733 void Analysis::checkDQP(CUData
& cu
, const CUGeom
& cuGeom
)
1735 if (m_slice
->m_pps
->bUseDQP
&& cuGeom
.depth
<= m_slice
->m_pps
->maxCuDQPDepth
)
1737 if (cu
.m_cuDepth
[0] > cuGeom
.depth
) // detect splits
1739 bool hasResidual
= false;
1740 for (uint32_t absPartIdx
= 0; absPartIdx
< cu
.m_numPartitions
; absPartIdx
++)
1742 if (cu
.getQtRootCbf(absPartIdx
))
1749 cu
.setQPSubCUs(cu
.getRefQP(0), 0, cuGeom
.depth
);
1751 cu
.setQPSubParts(cu
.getRefQP(0), 0, cuGeom
.depth
);
1755 if (!cu
.getCbf(0, TEXT_LUMA
, 0) && !cu
.getCbf(0, TEXT_CHROMA_U
, 0) && !cu
.getCbf(0, TEXT_CHROMA_V
, 0))
1756 cu
.setQPSubParts(cu
.getRefQP(0), 0, cuGeom
.depth
);
1761 uint32_t Analysis::topSkipMinDepth(const CUData
& parentCTU
, const CUGeom
& cuGeom
)
1763 /* Do not attempt to code a block larger than the largest block in the
1764 * co-located CTUs in L0 and L1 */
1765 int currentQP
= parentCTU
.m_qp
[0];
1766 int previousQP
= currentQP
;
1767 uint32_t minDepth0
= 4, minDepth1
= 4;
1770 if (m_slice
->m_numRefIdx
[0])
1773 const CUData
& cu
= *m_slice
->m_refPicList
[0][0]->m_encData
->getPicCTU(parentCTU
.m_cuAddr
);
1774 previousQP
= cu
.m_qp
[0];
1775 if (!cu
.m_cuDepth
[cuGeom
.encodeIdx
])
1777 for (uint32_t i
= 0; i
< cuGeom
.numPartitions
&& minDepth0
; i
+= 4)
1779 uint32_t d
= cu
.m_cuDepth
[cuGeom
.encodeIdx
+ i
];
1780 minDepth0
= X265_MIN(d
, minDepth0
);
1784 if (m_slice
->m_numRefIdx
[1])
1787 const CUData
& cu
= *m_slice
->m_refPicList
[1][0]->m_encData
->getPicCTU(parentCTU
.m_cuAddr
);
1788 if (!cu
.m_cuDepth
[cuGeom
.encodeIdx
])
1790 for (uint32_t i
= 0; i
< cuGeom
.numPartitions
; i
+= 4)
1792 uint32_t d
= cu
.m_cuDepth
[cuGeom
.encodeIdx
+ i
];
1793 minDepth1
= X265_MIN(d
, minDepth1
);
1800 uint32_t minDepth
= X265_MIN(minDepth0
, minDepth1
);
1801 uint32_t thresh
= minDepth
* numRefs
* (cuGeom
.numPartitions
>> 2);
1803 /* allow block size growth if QP is raising or avg depth is
1804 * less than 1.5 of min depth */
1805 if (minDepth
&& currentQP
>= previousQP
&& (sum
<= thresh
+ (thresh
>> 1)))
1811 /* returns true if recursion should be stopped */
1812 bool Analysis::recursionDepthCheck(const CUData
& parentCTU
, const CUGeom
& cuGeom
, const Mode
& bestMode
)
1814 /* early exit when the RD cost of best mode at depth n is less than the sum
1815 * of average of RD cost of the neighbor CU's(above, aboveleft, aboveright,
1816 * left, colocated) and avg cost of that CU at depth "n" with weightage for
1819 uint32_t depth
= cuGeom
.depth
;
1820 FrameData
& curEncData
= const_cast<FrameData
&>(*m_frame
->m_encData
);
1821 FrameData::RCStatCU
& cuStat
= curEncData
.m_cuStat
[parentCTU
.m_cuAddr
];
1822 uint64_t cuCost
= cuStat
.avgCost
[depth
] * cuStat
.count
[depth
];
1823 uint64_t cuCount
= cuStat
.count
[depth
];
1825 uint64_t neighCost
= 0, neighCount
= 0;
1826 const CUData
* above
= parentCTU
.m_cuAbove
;
1829 FrameData::RCStatCU
& astat
= curEncData
.m_cuStat
[above
->m_cuAddr
];
1830 neighCost
+= astat
.avgCost
[depth
] * astat
.count
[depth
];
1831 neighCount
+= astat
.count
[depth
];
1833 const CUData
* aboveLeft
= parentCTU
.m_cuAboveLeft
;
1836 FrameData::RCStatCU
& lstat
= curEncData
.m_cuStat
[aboveLeft
->m_cuAddr
];
1837 neighCost
+= lstat
.avgCost
[depth
] * lstat
.count
[depth
];
1838 neighCount
+= lstat
.count
[depth
];
1841 const CUData
* aboveRight
= parentCTU
.m_cuAboveRight
;
1844 FrameData::RCStatCU
& rstat
= curEncData
.m_cuStat
[aboveRight
->m_cuAddr
];
1845 neighCost
+= rstat
.avgCost
[depth
] * rstat
.count
[depth
];
1846 neighCount
+= rstat
.count
[depth
];
1849 const CUData
* left
= parentCTU
.m_cuLeft
;
1852 FrameData::RCStatCU
& nstat
= curEncData
.m_cuStat
[left
->m_cuAddr
];
1853 neighCost
+= nstat
.avgCost
[depth
] * nstat
.count
[depth
];
1854 neighCount
+= nstat
.count
[depth
];
1857 // give 60% weight to all CU's and 40% weight to neighbour CU's
1858 if (neighCost
+ cuCount
)
1860 uint64_t avgCost
= ((3 * cuCost
) + (2 * neighCost
)) / ((3 * cuCount
) + (2 * neighCount
));
1861 uint64_t curCost
= m_param
->rdLevel
> 1 ? bestMode
.rdCost
: bestMode
.sa8dCost
;
1862 if (curCost
< avgCost
&& avgCost
)