Imported Upstream version 1.4
[deb_x265.git] / source / encoder / entropy.cpp
CommitLineData
72b9787e
JB
1/*****************************************************************************
2* Copyright (C) 2013 x265 project
3*
4* Authors: Steve Borho <steve@borho.org>
5*
6* This program is free software; you can redistribute it and/or modify
7* it under the terms of the GNU General Public License as published by
8* the Free Software Foundation; either version 2 of the License, or
9* (at your option) any later version.
10*
11* This program is distributed in the hope that it will be useful,
12* but WITHOUT ANY WARRANTY; without even the implied warranty of
13* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14* GNU General Public License for more details.
15*
16* You should have received a copy of the GNU General Public License
17* along with this program; if not, write to the Free Software
18* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
19*
20* This program is also available under a commercial proprietary license.
21* For more information, contact us at license @ x265.com.
22*****************************************************************************/
23
24#include "common.h"
25#include "framedata.h"
26#include "scalinglist.h"
27#include "quant.h"
28#include "contexts.h"
29#include "picyuv.h"
30
31#include "sao.h"
32#include "entropy.h"
33
34#define CU_DQP_TU_CMAX 5 // max number bins for truncated unary
35#define CU_DQP_EG_k 0 // exp-golomb order
36#define START_VALUE 8 // start value for dpcm mode
37
38static const uint32_t g_puOffset[8] = { 0, 8, 4, 4, 2, 10, 1, 5 };
39
40namespace x265 {
41
42Entropy::Entropy()
43{
44 markValid();
45 m_fracBits = 0;
46 X265_CHECK(sizeof(m_contextState) >= sizeof(m_contextState[0]) * MAX_OFF_CTX_MOD, "context state table is too small\n");
47}
48
49void Entropy::codeVPS(const VPS& vps)
50{
51 WRITE_CODE(0, 4, "vps_video_parameter_set_id");
52 WRITE_CODE(3, 2, "vps_reserved_three_2bits");
53 WRITE_CODE(0, 6, "vps_reserved_zero_6bits");
54 WRITE_CODE(0, 3, "vps_max_sub_layers_minus1");
55 WRITE_FLAG(1, "vps_temporal_id_nesting_flag");
56 WRITE_CODE(0xffff, 16, "vps_reserved_ffff_16bits");
57
58 codeProfileTier(vps.ptl);
59
60 WRITE_FLAG(true, "vps_sub_layer_ordering_info_present_flag");
61 WRITE_UVLC(vps.maxDecPicBuffering - 1, "vps_max_dec_pic_buffering_minus1[i]");
62 WRITE_UVLC(vps.numReorderPics, "vps_num_reorder_pics[i]");
63
64 WRITE_UVLC(0, "vps_max_latency_increase_plus1[i]");
65 WRITE_CODE(0, 6, "vps_max_nuh_reserved_zero_layer_id");
66 WRITE_UVLC(0, "vps_max_op_sets_minus1");
67 WRITE_FLAG(0, "vps_timing_info_present_flag"); /* we signal timing info in SPS-VUI */
68 WRITE_FLAG(0, "vps_extension_flag");
69}
70
71void Entropy::codeSPS(const SPS& sps, const ScalingList& scalingList, const ProfileTierLevel& ptl)
72{
73 WRITE_CODE(0, 4, "sps_video_parameter_set_id");
74 WRITE_CODE(0, 3, "sps_max_sub_layers_minus1");
75 WRITE_FLAG(1, "sps_temporal_id_nesting_flag");
76
77 codeProfileTier(ptl);
78
79 WRITE_UVLC(0, "sps_seq_parameter_set_id");
80 WRITE_UVLC(sps.chromaFormatIdc, "chroma_format_idc");
81
82 if (sps.chromaFormatIdc == X265_CSP_I444)
83 WRITE_FLAG(0, "separate_colour_plane_flag");
84
85 WRITE_UVLC(sps.picWidthInLumaSamples, "pic_width_in_luma_samples");
86 WRITE_UVLC(sps.picHeightInLumaSamples, "pic_height_in_luma_samples");
87
88 const Window& conf = sps.conformanceWindow;
89 WRITE_FLAG(conf.bEnabled, "conformance_window_flag");
90 if (conf.bEnabled)
91 {
92 int hShift = CHROMA_H_SHIFT(sps.chromaFormatIdc), vShift = CHROMA_V_SHIFT(sps.chromaFormatIdc);
93 WRITE_UVLC(conf.leftOffset >> hShift, "conf_win_left_offset");
94 WRITE_UVLC(conf.rightOffset >> hShift, "conf_win_right_offset");
95 WRITE_UVLC(conf.topOffset >> vShift, "conf_win_top_offset");
96 WRITE_UVLC(conf.bottomOffset >> vShift, "conf_win_bottom_offset");
97 }
98
99 WRITE_UVLC(X265_DEPTH - 8, "bit_depth_luma_minus8");
100 WRITE_UVLC(X265_DEPTH - 8, "bit_depth_chroma_minus8");
101 WRITE_UVLC(BITS_FOR_POC - 4, "log2_max_pic_order_cnt_lsb_minus4");
102 WRITE_FLAG(true, "sps_sub_layer_ordering_info_present_flag");
103
104 WRITE_UVLC(sps.maxDecPicBuffering - 1, "sps_max_dec_pic_buffering_minus1[i]");
105 WRITE_UVLC(sps.numReorderPics, "sps_num_reorder_pics[i]");
106 WRITE_UVLC(0, "sps_max_latency_increase_plus1[i]");
107
108 WRITE_UVLC(sps.log2MinCodingBlockSize - 3, "log2_min_coding_block_size_minus3");
109 WRITE_UVLC(sps.log2DiffMaxMinCodingBlockSize, "log2_diff_max_min_coding_block_size");
110 WRITE_UVLC(sps.quadtreeTULog2MinSize - 2, "log2_min_transform_block_size_minus2");
111 WRITE_UVLC(sps.quadtreeTULog2MaxSize - sps.quadtreeTULog2MinSize, "log2_diff_max_min_transform_block_size");
112 WRITE_UVLC(sps.quadtreeTUMaxDepthInter - 1, "max_transform_hierarchy_depth_inter");
113 WRITE_UVLC(sps.quadtreeTUMaxDepthIntra - 1, "max_transform_hierarchy_depth_intra");
114 WRITE_FLAG(scalingList.m_bEnabled, "scaling_list_enabled_flag");
115 if (scalingList.m_bEnabled)
116 {
117 WRITE_FLAG(scalingList.m_bDataPresent, "sps_scaling_list_data_present_flag");
118 if (scalingList.m_bDataPresent)
119 codeScalingList(scalingList);
120 }
121 WRITE_FLAG(sps.bUseAMP, "amp_enabled_flag");
122 WRITE_FLAG(sps.bUseSAO, "sample_adaptive_offset_enabled_flag");
123
124 WRITE_FLAG(0, "pcm_enabled_flag");
125 WRITE_UVLC(0, "num_short_term_ref_pic_sets");
126 WRITE_FLAG(0, "long_term_ref_pics_present_flag");
127
128 WRITE_FLAG(sps.bTemporalMVPEnabled, "sps_temporal_mvp_enable_flag");
129 WRITE_FLAG(sps.bUseStrongIntraSmoothing, "sps_strong_intra_smoothing_enable_flag");
130
131 WRITE_FLAG(1, "vui_parameters_present_flag");
132 codeVUI(sps.vuiParameters);
133
134 WRITE_FLAG(0, "sps_extension_flag");
135}
136
137void Entropy::codePPS(const PPS& pps)
138{
139 WRITE_UVLC(0, "pps_pic_parameter_set_id");
140 WRITE_UVLC(0, "pps_seq_parameter_set_id");
141 WRITE_FLAG(0, "dependent_slice_segments_enabled_flag");
142 WRITE_FLAG(0, "output_flag_present_flag");
143 WRITE_CODE(0, 3, "num_extra_slice_header_bits");
144 WRITE_FLAG(pps.bSignHideEnabled, "sign_data_hiding_flag");
145 WRITE_FLAG(0, "cabac_init_present_flag");
146 WRITE_UVLC(0, "num_ref_idx_l0_default_active_minus1");
147 WRITE_UVLC(0, "num_ref_idx_l1_default_active_minus1");
148
149 WRITE_SVLC(0, "init_qp_minus26");
150 WRITE_FLAG(pps.bConstrainedIntraPred, "constrained_intra_pred_flag");
151 WRITE_FLAG(pps.bTransformSkipEnabled, "transform_skip_enabled_flag");
152
153 WRITE_FLAG(pps.bUseDQP, "cu_qp_delta_enabled_flag");
154 if (pps.bUseDQP)
155 WRITE_UVLC(pps.maxCuDQPDepth, "diff_cu_qp_delta_depth");
156
157 WRITE_SVLC(pps.chromaCbQpOffset, "pps_cb_qp_offset");
158 WRITE_SVLC(pps.chromaCrQpOffset, "pps_cr_qp_offset");
159 WRITE_FLAG(0, "pps_slice_chroma_qp_offsets_present_flag");
160
161 WRITE_FLAG(pps.bUseWeightPred, "weighted_pred_flag");
162 WRITE_FLAG(pps.bUseWeightedBiPred, "weighted_bipred_flag");
163 WRITE_FLAG(pps.bTransquantBypassEnabled, "transquant_bypass_enable_flag");
164 WRITE_FLAG(0, "tiles_enabled_flag");
165 WRITE_FLAG(pps.bEntropyCodingSyncEnabled, "entropy_coding_sync_enabled_flag");
166 WRITE_FLAG(1, "loop_filter_across_slices_enabled_flag");
167
168 WRITE_FLAG(pps.bDeblockingFilterControlPresent, "deblocking_filter_control_present_flag");
169 if (pps.bDeblockingFilterControlPresent)
170 {
171 WRITE_FLAG(0, "deblocking_filter_override_enabled_flag");
172 WRITE_FLAG(pps.bPicDisableDeblockingFilter, "pps_disable_deblocking_filter_flag");
173 if (!pps.bPicDisableDeblockingFilter)
174 {
175 WRITE_SVLC(pps.deblockingFilterBetaOffsetDiv2, "pps_beta_offset_div2");
176 WRITE_SVLC(pps.deblockingFilterTcOffsetDiv2, "pps_tc_offset_div2");
177 }
178 }
179
180 WRITE_FLAG(0, "pps_scaling_list_data_present_flag");
181 WRITE_FLAG(0, "lists_modification_present_flag");
182 WRITE_UVLC(0, "log2_parallel_merge_level_minus2");
183 WRITE_FLAG(0, "slice_segment_header_extension_present_flag");
184 WRITE_FLAG(0, "pps_extension_flag");
185}
186
187void Entropy::codeProfileTier(const ProfileTierLevel& ptl)
188{
189 WRITE_CODE(0, 2, "XXX_profile_space[]");
190 WRITE_FLAG(ptl.tierFlag, "XXX_tier_flag[]");
191 WRITE_CODE(ptl.profileIdc, 5, "XXX_profile_idc[]");
192 for (int j = 0; j < 32; j++)
193 WRITE_FLAG(ptl.profileCompatibilityFlag[j], "XXX_profile_compatibility_flag[][j]");
194
195 WRITE_FLAG(ptl.progressiveSourceFlag, "general_progressive_source_flag");
196 WRITE_FLAG(ptl.interlacedSourceFlag, "general_interlaced_source_flag");
197 WRITE_FLAG(ptl.nonPackedConstraintFlag, "general_non_packed_constraint_flag");
198 WRITE_FLAG(ptl.frameOnlyConstraintFlag, "general_frame_only_constraint_flag");
199
200 if (ptl.profileIdc == Profile::MAINREXT || ptl.profileIdc == Profile::HIGHTHROUGHPUTREXT)
201 {
202 uint32_t bitDepthConstraint = ptl.bitDepthConstraint;
203 int csp = ptl.chromaFormatConstraint;
204 WRITE_FLAG(bitDepthConstraint<=12, "general_max_12bit_constraint_flag");
205 WRITE_FLAG(bitDepthConstraint<=10, "general_max_10bit_constraint_flag");
206 WRITE_FLAG(bitDepthConstraint<= 8 && csp != X265_CSP_I422 , "general_max_8bit_constraint_flag");
207 WRITE_FLAG(csp == X265_CSP_I422 || csp == X265_CSP_I420 || csp == X265_CSP_I400, "general_max_422chroma_constraint_flag");
208 WRITE_FLAG(csp == X265_CSP_I420 || csp == X265_CSP_I400, "general_max_420chroma_constraint_flag");
209 WRITE_FLAG(csp == X265_CSP_I400, "general_max_monochrome_constraint_flag");
210 WRITE_FLAG(ptl.intraConstraintFlag, "general_intra_constraint_flag");
211 WRITE_FLAG(0, "general_one_picture_only_constraint_flag");
212 WRITE_FLAG(ptl.lowerBitRateConstraintFlag, "general_lower_bit_rate_constraint_flag");
213 WRITE_CODE(0 , 16, "XXX_reserved_zero_35bits[0..15]");
214 WRITE_CODE(0 , 16, "XXX_reserved_zero_35bits[16..31]");
215 WRITE_CODE(0 , 3, "XXX_reserved_zero_35bits[32..34]");
216 }
217 else
218 {
219 WRITE_CODE(0, 16, "XXX_reserved_zero_44bits[0..15]");
220 WRITE_CODE(0, 16, "XXX_reserved_zero_44bits[16..31]");
221 WRITE_CODE(0, 12, "XXX_reserved_zero_44bits[32..43]");
222 }
223
224 WRITE_CODE(ptl.levelIdc, 8, "general_level_idc");
225}
226
227void Entropy::codeVUI(const VUI& vui)
228{
229 WRITE_FLAG(vui.aspectRatioInfoPresentFlag, "aspect_ratio_info_present_flag");
230 if (vui.aspectRatioInfoPresentFlag)
231 {
232 WRITE_CODE(vui.aspectRatioIdc, 8, "aspect_ratio_idc");
233 if (vui.aspectRatioIdc == 255)
234 {
235 WRITE_CODE(vui.sarWidth, 16, "sar_width");
236 WRITE_CODE(vui.sarHeight, 16, "sar_height");
237 }
238 }
239
240 WRITE_FLAG(vui.overscanInfoPresentFlag, "overscan_info_present_flag");
241 if (vui.overscanInfoPresentFlag)
242 WRITE_FLAG(vui.overscanAppropriateFlag, "overscan_appropriate_flag");
243
244 WRITE_FLAG(vui.videoSignalTypePresentFlag, "video_signal_type_present_flag");
245 if (vui.videoSignalTypePresentFlag)
246 {
247 WRITE_CODE(vui.videoFormat, 3, "video_format");
248 WRITE_FLAG(vui.videoFullRangeFlag, "video_full_range_flag");
249 WRITE_FLAG(vui.colourDescriptionPresentFlag, "colour_description_present_flag");
250 if (vui.colourDescriptionPresentFlag)
251 {
252 WRITE_CODE(vui.colourPrimaries, 8, "colour_primaries");
253 WRITE_CODE(vui.transferCharacteristics, 8, "transfer_characteristics");
254 WRITE_CODE(vui.matrixCoefficients, 8, "matrix_coefficients");
255 }
256 }
257
258 WRITE_FLAG(vui.chromaLocInfoPresentFlag, "chroma_loc_info_present_flag");
259 if (vui.chromaLocInfoPresentFlag)
260 {
261 WRITE_UVLC(vui.chromaSampleLocTypeTopField, "chroma_sample_loc_type_top_field");
262 WRITE_UVLC(vui.chromaSampleLocTypeBottomField, "chroma_sample_loc_type_bottom_field");
263 }
264
265 WRITE_FLAG(0, "neutral_chroma_indication_flag");
266 WRITE_FLAG(vui.fieldSeqFlag, "field_seq_flag");
267 WRITE_FLAG(vui.frameFieldInfoPresentFlag, "frame_field_info_present_flag");
268
269 WRITE_FLAG(vui.defaultDisplayWindow.bEnabled, "default_display_window_flag");
270 if (vui.defaultDisplayWindow.bEnabled)
271 {
272 WRITE_UVLC(vui.defaultDisplayWindow.leftOffset, "def_disp_win_left_offset");
273 WRITE_UVLC(vui.defaultDisplayWindow.rightOffset, "def_disp_win_right_offset");
274 WRITE_UVLC(vui.defaultDisplayWindow.topOffset, "def_disp_win_top_offset");
275 WRITE_UVLC(vui.defaultDisplayWindow.bottomOffset, "def_disp_win_bottom_offset");
276 }
277
278 WRITE_FLAG(1, "vui_timing_info_present_flag");
279 WRITE_CODE(vui.timingInfo.numUnitsInTick, 32, "vui_num_units_in_tick");
280 WRITE_CODE(vui.timingInfo.timeScale, 32, "vui_time_scale");
281 WRITE_FLAG(0, "vui_poc_proportional_to_timing_flag");
282
283 WRITE_FLAG(vui.hrdParametersPresentFlag, "vui_hrd_parameters_present_flag");
284 if (vui.hrdParametersPresentFlag)
285 codeHrdParameters(vui.hrdParameters);
286
287 WRITE_FLAG(0, "bitstream_restriction_flag");
288}
289
290void Entropy::codeScalingList(const ScalingList& scalingList)
291{
292 for (int sizeId = 0; sizeId < ScalingList::NUM_SIZES; sizeId++)
293 {
294 for (int listId = 0; listId < ScalingList::NUM_LISTS; listId++)
295 {
296 int predList = scalingList.checkPredMode(sizeId, listId);
297 WRITE_FLAG(predList < 0, "scaling_list_pred_mode_flag");
298 if (predList >= 0)
299 WRITE_UVLC(listId - predList, "scaling_list_pred_matrix_id_delta");
300 else // DPCM Mode
301 codeScalingList(scalingList, sizeId, listId);
302 }
303 }
304}
305
306void Entropy::codeScalingList(const ScalingList& scalingList, uint32_t sizeId, uint32_t listId)
307{
308 int coefNum = X265_MIN(ScalingList::MAX_MATRIX_COEF_NUM, (int)ScalingList::s_numCoefPerSize[sizeId]);
309 const uint16_t* scan = (sizeId == 0 ? g_scan4x4[SCAN_DIAG] : g_scan8x8diag);
310 int nextCoef = START_VALUE;
311 int32_t *src = scalingList.m_scalingListCoef[sizeId][listId];
312 int data;
313
314 if (sizeId > BLOCK_8x8)
315 {
316 WRITE_SVLC(scalingList.m_scalingListDC[sizeId][listId] - 8, "scaling_list_dc_coef_minus8");
317 nextCoef = scalingList.m_scalingListDC[sizeId][listId];
318 }
319 for (int i = 0; i < coefNum; i++)
320 {
321 data = src[scan[i]] - nextCoef;
322 nextCoef = src[scan[i]];
323 if (data > 127)
324 data = data - 256;
325 if (data < -128)
326 data = data + 256;
327
328 WRITE_SVLC(data, "scaling_list_delta_coef");
329 }
330}
331
332void Entropy::codeHrdParameters(const HRDInfo& hrd)
333{
334 WRITE_FLAG(1, "nal_hrd_parameters_present_flag");
335 WRITE_FLAG(0, "vcl_hrd_parameters_present_flag");
336 WRITE_FLAG(0, "sub_pic_hrd_params_present_flag");
337
338 WRITE_CODE(hrd.bitRateScale, 4, "bit_rate_scale");
339 WRITE_CODE(hrd.cpbSizeScale, 4, "cpb_size_scale");
340
341 WRITE_CODE(hrd.initialCpbRemovalDelayLength - 1, 5, "initial_cpb_removal_delay_length_minus1");
342 WRITE_CODE(hrd.cpbRemovalDelayLength - 1, 5, "au_cpb_removal_delay_length_minus1");
343 WRITE_CODE(hrd.dpbOutputDelayLength - 1, 5, "dpb_output_delay_length_minus1");
344
345 WRITE_FLAG(1, "fixed_pic_rate_general_flag");
346 WRITE_UVLC(0, "elemental_duration_in_tc_minus1");
347 WRITE_UVLC(0, "cpb_cnt_minus1");
348
349 WRITE_UVLC(hrd.bitRateValue - 1, "bit_rate_value_minus1");
350 WRITE_UVLC(hrd.cpbSizeValue - 1, "cpb_size_value_minus1");
351 WRITE_FLAG(hrd.cbrFlag, "cbr_flag");
352}
353
354void Entropy::codeAUD(const Slice& slice)
355{
356 int picType;
357
358 switch (slice.m_sliceType)
359 {
360 case I_SLICE:
361 picType = 0;
362 break;
363 case P_SLICE:
364 picType = 1;
365 break;
366 case B_SLICE:
367 picType = 2;
368 break;
369 default:
370 picType = 7;
371 break;
372 }
373
374 WRITE_CODE(picType, 3, "pic_type");
375}
376
377void Entropy::codeSliceHeader(const Slice& slice, FrameData& encData)
378{
379 WRITE_FLAG(1, "first_slice_segment_in_pic_flag");
380 if (slice.getRapPicFlag())
381 WRITE_FLAG(0, "no_output_of_prior_pics_flag");
382
383 WRITE_UVLC(0, "slice_pic_parameter_set_id");
384
385 /* x265 does not use dependent slices, so always write all this data */
386
387 WRITE_UVLC(slice.m_sliceType, "slice_type");
388
389 if (!slice.getIdrPicFlag())
390 {
391 int picOrderCntLSB = (slice.m_poc - slice.m_lastIDR + (1 << BITS_FOR_POC)) % (1 << BITS_FOR_POC);
392 WRITE_CODE(picOrderCntLSB, BITS_FOR_POC, "pic_order_cnt_lsb");
393
394#if _DEBUG || CHECKED_BUILD
395 // check for bitstream restriction stating that:
396 // If the current picture is a BLA or CRA picture, the value of NumPocTotalCurr shall be equal to 0.
397 // Ideally this process should not be repeated for each slice in a picture
398 if (slice.isIRAP())
399 for (int picIdx = 0; picIdx < slice.m_rps.numberOfPictures; picIdx++)
400 X265_CHECK(!slice.m_rps.bUsed[picIdx], "pic unused failure\n");
401#endif
402
403 WRITE_FLAG(0, "short_term_ref_pic_set_sps_flag");
404 codeShortTermRefPicSet(slice.m_rps);
405
406 if (slice.m_sps->bTemporalMVPEnabled)
407 WRITE_FLAG(1, "slice_temporal_mvp_enable_flag");
408 }
409 const SAOParam *saoParam = encData.m_saoParam;
410 if (slice.m_sps->bUseSAO)
411 {
412 WRITE_FLAG(saoParam->bSaoFlag[0], "slice_sao_luma_flag");
413 WRITE_FLAG(saoParam->bSaoFlag[1], "slice_sao_chroma_flag");
414 }
415
416 // check if numRefIdx match the defaults (1, hard-coded in PPS). If not, override
417 // TODO: this might be a place to optimize a few bits per slice, by using param->refs for L0 default
418
419 if (!slice.isIntra())
420 {
421 bool overrideFlag = (slice.m_numRefIdx[0] != 1 || (slice.isInterB() && slice.m_numRefIdx[1] != 1));
422 WRITE_FLAG(overrideFlag, "num_ref_idx_active_override_flag");
423 if (overrideFlag)
424 {
425 WRITE_UVLC(slice.m_numRefIdx[0] - 1, "num_ref_idx_l0_active_minus1");
426 if (slice.isInterB())
427 WRITE_UVLC(slice.m_numRefIdx[1] - 1, "num_ref_idx_l1_active_minus1");
428 else
429 {
430 X265_CHECK(slice.m_numRefIdx[1] == 0, "expected no L1 references for P slice\n");
431 }
432 }
433 }
434 else
435 {
436 X265_CHECK(!slice.m_numRefIdx[0] && !slice.m_numRefIdx[1], "expected no references for I slice\n");
437 }
438
439 if (slice.isInterB())
440 WRITE_FLAG(0, "mvd_l1_zero_flag");
441
442 if (slice.m_sps->bTemporalMVPEnabled)
443 {
444 if (slice.m_sliceType == B_SLICE)
445 WRITE_FLAG(slice.m_colFromL0Flag, "collocated_from_l0_flag");
446
447 if (slice.m_sliceType != I_SLICE &&
448 ((slice.m_colFromL0Flag && slice.m_numRefIdx[0] > 1) ||
449 (!slice.m_colFromL0Flag && slice.m_numRefIdx[1] > 1)))
450 {
451 WRITE_UVLC(slice.m_colRefIdx, "collocated_ref_idx");
452 }
453 }
454 if ((slice.m_pps->bUseWeightPred && slice.m_sliceType == P_SLICE) || (slice.m_pps->bUseWeightedBiPred && slice.m_sliceType == B_SLICE))
455 codePredWeightTable(slice);
456
457 X265_CHECK(slice.m_maxNumMergeCand <= MRG_MAX_NUM_CANDS, "too many merge candidates\n");
458 if (!slice.isIntra())
459 WRITE_UVLC(MRG_MAX_NUM_CANDS - slice.m_maxNumMergeCand, "five_minus_max_num_merge_cand");
460
461 int code = slice.m_sliceQp - 26;
462 WRITE_SVLC(code, "slice_qp_delta");
463
464 bool isSAOEnabled = slice.m_sps->bUseSAO ? saoParam->bSaoFlag[0] || saoParam->bSaoFlag[1] : false;
465 bool isDBFEnabled = !slice.m_pps->bPicDisableDeblockingFilter;
466
467 if (isSAOEnabled || isDBFEnabled)
468 WRITE_FLAG(slice.m_sLFaseFlag, "slice_loop_filter_across_slices_enabled_flag");
469}
470
471/** write wavefront substreams sizes for the slice header */
472void Entropy::codeSliceHeaderWPPEntryPoints(const Slice& slice, const uint32_t *substreamSizes, uint32_t maxOffset)
473{
474 uint32_t offsetLen = 1;
475 while (maxOffset >= (1U << offsetLen))
476 {
477 offsetLen++;
478 X265_CHECK(offsetLen < 32, "offsetLen is too large\n");
479 }
480
481 uint32_t numRows = slice.m_sps->numCuInHeight - 1;
482 WRITE_UVLC(numRows, "num_entry_point_offsets");
483 if (numRows > 0)
484 WRITE_UVLC(offsetLen - 1, "offset_len_minus1");
485
486 for (uint32_t i = 0; i < numRows; i++)
487 WRITE_CODE(substreamSizes[i] - 1, offsetLen, "entry_point_offset_minus1");
488}
489
490void Entropy::codeShortTermRefPicSet(const RPS& rps)
491{
492 WRITE_UVLC(rps.numberOfNegativePictures, "num_negative_pics");
493 WRITE_UVLC(rps.numberOfPositivePictures, "num_positive_pics");
494 int prev = 0;
495 for (int j = 0; j < rps.numberOfNegativePictures; j++)
496 {
497 WRITE_UVLC(prev - rps.deltaPOC[j] - 1, "delta_poc_s0_minus1");
498 prev = rps.deltaPOC[j];
499 WRITE_FLAG(rps.bUsed[j], "used_by_curr_pic_s0_flag");
500 }
501
502 prev = 0;
503 for (int j = rps.numberOfNegativePictures; j < rps.numberOfNegativePictures + rps.numberOfPositivePictures; j++)
504 {
505 WRITE_UVLC(rps.deltaPOC[j] - prev - 1, "delta_poc_s1_minus1");
506 prev = rps.deltaPOC[j];
507 WRITE_FLAG(rps.bUsed[j], "used_by_curr_pic_s1_flag");
508 }
509}
510
511void Entropy::encodeCTU(const CUData& ctu, const CUGeom& cuGeom)
512{
513 bool bEncodeDQP = ctu.m_slice->m_pps->bUseDQP;
514 encodeCU(ctu, cuGeom, 0, 0, bEncodeDQP);
515}
516
517/* encode a CU block recursively */
518void Entropy::encodeCU(const CUData& cu, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t depth, bool& bEncodeDQP)
519{
520 const Slice* slice = cu.m_slice;
521
522 if (depth <= slice->m_pps->maxCuDQPDepth && slice->m_pps->bUseDQP)
523 bEncodeDQP = true;
524
525 int cuSplitFlag = !(cuGeom.flags & CUGeom::LEAF);
526 int cuUnsplitFlag = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY);
527
528 if (!cuUnsplitFlag)
529 {
530 uint32_t qNumParts = (NUM_CU_PARTITIONS >> (depth << 1)) >> 2;
531 for (uint32_t subPartIdx = 0; subPartIdx < 4; subPartIdx++, absPartIdx += qNumParts)
532 {
533 const CUGeom& childCuData = *(&cuGeom + cuGeom.childOffset + subPartIdx);
534 if (childCuData.flags & CUGeom::PRESENT)
535 encodeCU(cu, childCuData, absPartIdx, depth + 1, bEncodeDQP);
536 }
537 return;
538 }
539
540 // We need to split, so don't try these modes.
541 if (cuSplitFlag)
542 codeSplitFlag(cu, absPartIdx, depth);
543
544 if (depth < cu.m_cuDepth[absPartIdx] && depth < g_maxCUDepth)
545 {
546 uint32_t qNumParts = (NUM_CU_PARTITIONS >> (depth << 1)) >> 2;
547
548 for (uint32_t subPartIdx = 0; subPartIdx < 4; subPartIdx++, absPartIdx += qNumParts)
549 {
550 const CUGeom& childCuData = *(&cuGeom + cuGeom.childOffset + subPartIdx);
551 encodeCU(cu, childCuData, absPartIdx, depth + 1, bEncodeDQP);
552 }
553 return;
554 }
555
556 if (slice->m_pps->bTransquantBypassEnabled)
557 codeCUTransquantBypassFlag(cu.m_tqBypass[absPartIdx]);
558
559 if (!slice->isIntra())
560 codeSkipFlag(cu, absPartIdx);
561
562 if (cu.isSkipped(absPartIdx))
563 {
564 codeMergeIndex(cu, absPartIdx);
565 finishCU(cu, absPartIdx, depth);
566 return;
567 }
568
569 if (!slice->isIntra())
570 codePredMode(cu.m_predMode[absPartIdx]);
571
572 codePartSize(cu, absPartIdx, depth);
573
574 // prediction Info ( Intra : direction mode, Inter : Mv, reference idx )
575 codePredInfo(cu, absPartIdx);
576
577 uint32_t tuDepthRange[2];
578 if (cu.isIntra(absPartIdx))
579 cu.getIntraTUQtDepthRange(tuDepthRange, absPartIdx);
580 else
581 cu.getInterTUQtDepthRange(tuDepthRange, absPartIdx);
582
583 // Encode Coefficients, allow codeCoeff() to modify bEncodeDQP
584 codeCoeff(cu, absPartIdx, depth, bEncodeDQP, tuDepthRange);
585
586 // --- write terminating bit ---
587 finishCU(cu, absPartIdx, depth);
588}
589
590/* finish encoding a cu and handle end-of-slice conditions */
591void Entropy::finishCU(const CUData& cu, uint32_t absPartIdx, uint32_t depth)
592{
593 const Slice* slice = cu.m_slice;
594 X265_CHECK(cu.m_slice->m_endCUAddr == cu.m_slice->realEndAddress(slice->m_endCUAddr), "real end address expected\n");
595 uint32_t realEndAddress = slice->m_endCUAddr;
596 uint32_t cuAddr = cu.getSCUAddr() + absPartIdx;
597
598 uint32_t granularityMask = g_maxCUSize - 1;
599 uint32_t cuSize = 1 << cu.m_log2CUSize[absPartIdx];
600 uint32_t rpelx = cu.m_cuPelX + g_zscanToPelX[absPartIdx] + cuSize;
601 uint32_t bpely = cu.m_cuPelY + g_zscanToPelY[absPartIdx] + cuSize;
602 bool granularityBoundary = (((rpelx & granularityMask) == 0 || (rpelx == slice->m_sps->picWidthInLumaSamples )) &&
603 ((bpely & granularityMask) == 0 || (bpely == slice->m_sps->picHeightInLumaSamples)));
604
605 if (granularityBoundary)
606 {
607 // Encode slice finish
608 bool bTerminateSlice = false;
609 if (cuAddr + (NUM_CU_PARTITIONS >> (depth << 1)) == realEndAddress)
610 bTerminateSlice = true;
611
612 // The 1-terminating bit is added to all streams, so don't add it here when it's 1.
613 if (!bTerminateSlice)
614 encodeBinTrm(0);
615
616 if (!m_bitIf)
617 resetBits(); // TODO: most likely unnecessary
618 }
619}
620
621void Entropy::encodeTransform(const CUData& cu, CoeffCodeState& state, uint32_t offsetLuma, uint32_t offsetChroma, uint32_t absPartIdx,
622 uint32_t absPartIdxStep, uint32_t depth, uint32_t log2TrSize, uint32_t trIdx, bool& bCodeDQP, uint32_t depthRange[2])
623{
624 const bool subdiv = cu.m_tuDepth[absPartIdx] + cu.m_cuDepth[absPartIdx] > (uint8_t)depth;
625 uint32_t hChromaShift = cu.m_hChromaShift;
626 uint32_t vChromaShift = cu.m_vChromaShift;
627 uint32_t cbfY = cu.getCbf(absPartIdx, TEXT_LUMA, trIdx);
628 uint32_t cbfU = cu.getCbf(absPartIdx, TEXT_CHROMA_U, trIdx);
629 uint32_t cbfV = cu.getCbf(absPartIdx, TEXT_CHROMA_V, trIdx);
630
631 if (!trIdx)
632 state.bakAbsPartIdxCU = absPartIdx;
633
634 if (log2TrSize == 2 && cu.m_chromaFormat != X265_CSP_I444)
635 {
636 uint32_t partNum = NUM_CU_PARTITIONS >> ((depth - 1) << 1);
637 if (!(absPartIdx & (partNum - 1)))
638 {
639 state.bakAbsPartIdx = absPartIdx;
640 state.bakChromaOffset = offsetChroma;
641 }
642 else if ((absPartIdx & (partNum - 1)) == (partNum - 1))
643 {
644 cbfU = cu.getCbf(state.bakAbsPartIdx, TEXT_CHROMA_U, trIdx);
645 cbfV = cu.getCbf(state.bakAbsPartIdx, TEXT_CHROMA_V, trIdx);
646 }
647 }
648
649 /* in each of these conditions, the subdiv flag is implied and not signaled,
650 * so we have checks to make sure the implied value matches our intentions */
651 if (cu.m_predMode[absPartIdx] == MODE_INTRA && cu.m_partSize[absPartIdx] == SIZE_NxN && depth == cu.m_cuDepth[absPartIdx])
652 {
653 X265_CHECK(subdiv, "intra NxN requires TU depth below CU depth\n");
654 }
655 else if (cu.m_predMode[absPartIdx] == MODE_INTER && (cu.m_partSize[absPartIdx] != SIZE_2Nx2N) && depth == cu.m_cuDepth[absPartIdx] &&
656 cu.m_slice->m_sps->quadtreeTUMaxDepthInter == 1)
657 {
658 X265_CHECK(subdiv, "inter TU must be smaller than CU when not 2Nx2N part size: log2TrSize %d, depthRange[0] %d\n", log2TrSize, depthRange[0]);
659 }
660 else if (log2TrSize > depthRange[1])
661 {
662 X265_CHECK(subdiv, "TU is larger than the max allowed, it should have been split\n");
663 }
664 else if (log2TrSize == cu.m_slice->m_sps->quadtreeTULog2MinSize || log2TrSize == depthRange[0])
665 {
666 X265_CHECK(!subdiv, "min sized TU cannot be subdivided\n");
667 }
668 else
669 {
670 X265_CHECK(log2TrSize > depthRange[0], "transform size failure\n");
671 codeTransformSubdivFlag(subdiv, 5 - log2TrSize);
672 }
673
674 const uint32_t trDepthCurr = depth - cu.m_cuDepth[absPartIdx];
675 const bool bFirstCbfOfCU = trDepthCurr == 0;
676
677 bool mCodeAll = true;
678 const uint32_t numPels = 1 << (log2TrSize * 2 - hChromaShift - vChromaShift);
679 if (numPels < (MIN_TU_SIZE * MIN_TU_SIZE))
680 mCodeAll = false;
681
682 if (bFirstCbfOfCU || mCodeAll)
683 {
684 uint32_t tuSize = 1 << log2TrSize;
685 if (bFirstCbfOfCU || cu.getCbf(absPartIdx, TEXT_CHROMA_U, trDepthCurr - 1))
686 codeQtCbf(cu, absPartIdx, absPartIdxStep, (tuSize >> hChromaShift), (tuSize >> vChromaShift), TEXT_CHROMA_U, trDepthCurr, (subdiv == 0));
687 if (bFirstCbfOfCU || cu.getCbf(absPartIdx, TEXT_CHROMA_V, trDepthCurr - 1))
688 codeQtCbf(cu, absPartIdx, absPartIdxStep, (tuSize >> hChromaShift), (tuSize >> vChromaShift), TEXT_CHROMA_V, trDepthCurr, (subdiv == 0));
689 }
690 else
691 {
692 X265_CHECK(cu.getCbf(absPartIdx, TEXT_CHROMA_U, trDepthCurr) == cu.getCbf(absPartIdx, TEXT_CHROMA_U, trDepthCurr - 1), "chroma xform size match failure\n");
693 X265_CHECK(cu.getCbf(absPartIdx, TEXT_CHROMA_V, trDepthCurr) == cu.getCbf(absPartIdx, TEXT_CHROMA_V, trDepthCurr - 1), "chroma xform size match failure\n");
694 }
695
696 if (subdiv)
697 {
698 log2TrSize--;
699 uint32_t numCoeff = 1 << (log2TrSize * 2);
700 uint32_t numCoeffC = (numCoeff >> (hChromaShift + vChromaShift));
701 trIdx++;
702 ++depth;
703 absPartIdxStep >>= 2;
704 const uint32_t partNum = NUM_CU_PARTITIONS >> (depth << 1);
705
706 encodeTransform(cu, state, offsetLuma, offsetChroma, absPartIdx, absPartIdxStep, depth, log2TrSize, trIdx, bCodeDQP, depthRange);
707
708 absPartIdx += partNum;
709 offsetLuma += numCoeff;
710 offsetChroma += numCoeffC;
711 encodeTransform(cu, state, offsetLuma, offsetChroma, absPartIdx, absPartIdxStep, depth, log2TrSize, trIdx, bCodeDQP, depthRange);
712
713 absPartIdx += partNum;
714 offsetLuma += numCoeff;
715 offsetChroma += numCoeffC;
716 encodeTransform(cu, state, offsetLuma, offsetChroma, absPartIdx, absPartIdxStep, depth, log2TrSize, trIdx, bCodeDQP, depthRange);
717
718 absPartIdx += partNum;
719 offsetLuma += numCoeff;
720 offsetChroma += numCoeffC;
721 encodeTransform(cu, state, offsetLuma, offsetChroma, absPartIdx, absPartIdxStep, depth, log2TrSize, trIdx, bCodeDQP, depthRange);
722 }
723 else
724 {
725 if (cu.m_predMode[absPartIdx] != MODE_INTRA && depth == cu.m_cuDepth[absPartIdx] && !cu.getCbf(absPartIdx, TEXT_CHROMA_U, 0) && !cu.getCbf(absPartIdx, TEXT_CHROMA_V, 0))
726 {
727 X265_CHECK(cu.getCbf(absPartIdx, TEXT_LUMA, 0), "CBF should have been set\n");
728 }
729 else
730 codeQtCbf(cu, absPartIdx, TEXT_LUMA, cu.m_tuDepth[absPartIdx]);
731
732 if (cbfY || cbfU || cbfV)
733 {
734 // dQP: only for CTU once
735 if (cu.m_slice->m_pps->bUseDQP)
736 {
737 if (bCodeDQP)
738 {
739 codeDeltaQP(cu, state.bakAbsPartIdxCU);
740 bCodeDQP = false;
741 }
742 }
743 }
744 if (cbfY)
745 codeCoeffNxN(cu, cu.m_trCoeff[0] + offsetLuma, absPartIdx, log2TrSize, TEXT_LUMA);
746
747 int chFmt = cu.m_chromaFormat;
748 if (log2TrSize == 2 && chFmt != X265_CSP_I444)
749 {
750 uint32_t partNum = NUM_CU_PARTITIONS >> ((depth - 1) << 1);
751 if ((absPartIdx & (partNum - 1)) == (partNum - 1))
752 {
753 const uint32_t log2TrSizeC = 2;
754 const bool splitIntoSubTUs = (chFmt == X265_CSP_I422);
755
756 uint32_t curPartNum = NUM_CU_PARTITIONS >> ((depth - 1) << 1);
757
758 for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
759 {
760 TURecurse tuIterator(splitIntoSubTUs ? VERTICAL_SPLIT : DONT_SPLIT, curPartNum, state.bakAbsPartIdx);
761 const coeff_t* coeffChroma = cu.m_trCoeff[chromaId];
762 do
763 {
764 uint32_t cbf = cu.getCbf(tuIterator.absPartIdxTURelCU, (TextType)chromaId, trIdx + splitIntoSubTUs);
765 if (cbf)
766 {
767 uint32_t subTUOffset = tuIterator.section << (log2TrSizeC * 2);
768 codeCoeffNxN(cu, coeffChroma + state.bakChromaOffset + subTUOffset, tuIterator.absPartIdxTURelCU, log2TrSizeC, (TextType)chromaId);
769 }
770 }
771 while (tuIterator.isNextSection());
772 }
773 }
774 }
775 else
776 {
777 uint32_t log2TrSizeC = log2TrSize - hChromaShift;
778 const bool splitIntoSubTUs = (chFmt == X265_CSP_I422);
779 uint32_t curPartNum = NUM_CU_PARTITIONS >> (depth << 1);
780 for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
781 {
782 TURecurse tuIterator(splitIntoSubTUs ? VERTICAL_SPLIT : DONT_SPLIT, curPartNum, absPartIdx);
783 const coeff_t* coeffChroma = cu.m_trCoeff[chromaId];
784 do
785 {
786 uint32_t cbf = cu.getCbf(tuIterator.absPartIdxTURelCU, (TextType)chromaId, trIdx + splitIntoSubTUs);
787 if (cbf)
788 {
789 uint32_t subTUOffset = tuIterator.section << (log2TrSizeC * 2);
790 codeCoeffNxN(cu, coeffChroma + offsetChroma + subTUOffset, tuIterator.absPartIdxTURelCU, log2TrSizeC, (TextType)chromaId);
791 }
792 }
793 while (tuIterator.isNextSection());
794 }
795 }
796 }
797}
798
799void Entropy::codePredInfo(const CUData& cu, uint32_t absPartIdx)
800{
801 if (cu.isIntra(absPartIdx)) // If it is intra mode, encode intra prediction mode.
802 {
803 codeIntraDirLumaAng(cu, absPartIdx, true);
804 if (cu.m_chromaFormat != X265_CSP_I400)
805 {
806 uint32_t chromaDirMode[NUM_CHROMA_MODE];
807 cu.getAllowedChromaDir(absPartIdx, chromaDirMode);
808
809 codeIntraDirChroma(cu, absPartIdx, chromaDirMode);
810
811 if ((cu.m_chromaFormat == X265_CSP_I444) && (cu.m_partSize[absPartIdx] == SIZE_NxN))
812 {
813 uint32_t partOffset = (NUM_CU_PARTITIONS >> (cu.m_cuDepth[absPartIdx] << 1)) >> 2;
814 for (uint32_t i = 1; i <= 3; i++)
815 {
816 uint32_t offset = absPartIdx + i * partOffset;
817 cu.getAllowedChromaDir(offset, chromaDirMode);
818 codeIntraDirChroma(cu, offset, chromaDirMode);
819 }
820 }
821 }
822 }
823 else // if it is inter mode, encode motion vector and reference index
824 codePUWise(cu, absPartIdx);
825}
826
827/** encode motion information for every PU block */
828void Entropy::codePUWise(const CUData& cu, uint32_t absPartIdx)
829{
830 PartSize partSize = (PartSize)cu.m_partSize[absPartIdx];
831 uint32_t numPU = (partSize == SIZE_2Nx2N ? 1 : (partSize == SIZE_NxN ? 4 : 2));
832 uint32_t depth = cu.m_cuDepth[absPartIdx];
833 uint32_t puOffset = (g_puOffset[uint32_t(partSize)] << (g_maxFullDepth - depth) * 2) >> 4;
834
835 for (uint32_t puIdx = 0, subPartIdx = absPartIdx; puIdx < numPU; puIdx++, subPartIdx += puOffset)
836 {
837 codeMergeFlag(cu, subPartIdx);
838 if (cu.m_mergeFlag[subPartIdx])
839 codeMergeIndex(cu, subPartIdx);
840 else
841 {
842 if (cu.m_slice->isInterB())
843 codeInterDir(cu, subPartIdx);
844
845 uint32_t interDir = cu.m_interDir[subPartIdx];
846 for (uint32_t list = 0; list < 2; list++)
847 {
848 if (interDir & (1 << list))
849 {
850 X265_CHECK(cu.m_slice->m_numRefIdx[list] > 0, "numRefs should have been > 0\n");
851
852 codeRefFrmIdxPU(cu, subPartIdx, list);
853 codeMvd(cu, subPartIdx, list);
854 codeMVPIdx(cu.m_mvpIdx[list][subPartIdx]);
855 }
856 }
857 }
858 }
859}
860
861/** encode reference frame index for a PU block */
862void Entropy::codeRefFrmIdxPU(const CUData& cu, uint32_t absPartIdx, int list)
863{
864 X265_CHECK(!cu.isIntra(absPartIdx), "intra block not expected\n");
865
866 if (cu.m_slice->m_numRefIdx[list] > 1)
867 codeRefFrmIdx(cu, absPartIdx, list);
868}
869
870void Entropy::codeCoeff(const CUData& cu, uint32_t absPartIdx, uint32_t depth, bool& bCodeDQP, uint32_t depthRange[2])
871{
872 if (!cu.isIntra(absPartIdx))
873 {
874 if (!(cu.m_mergeFlag[absPartIdx] && cu.m_partSize[absPartIdx] == SIZE_2Nx2N))
875 codeQtRootCbf(cu.getQtRootCbf(absPartIdx));
876 if (!cu.getQtRootCbf(absPartIdx))
877 return;
878 }
879
880 uint32_t log2CUSize = cu.m_log2CUSize[absPartIdx];
881 uint32_t lumaOffset = absPartIdx << (LOG2_UNIT_SIZE * 2);
882 uint32_t chromaOffset = lumaOffset >> (cu.m_hChromaShift + cu.m_vChromaShift);
883 uint32_t absPartIdxStep = NUM_CU_PARTITIONS >> (depth << 1);
884 CoeffCodeState state;
885 encodeTransform(cu, state, lumaOffset, chromaOffset, absPartIdx, absPartIdxStep, depth, log2CUSize, 0, bCodeDQP, depthRange);
886}
887
888void Entropy::codeSaoOffset(const SaoCtuParam& ctuParam, int plane)
889{
890 int typeIdx = ctuParam.typeIdx;
891
892 if (plane != 2)
893 {
894 encodeBin(typeIdx >= 0, m_contextState[OFF_SAO_TYPE_IDX_CTX]);
895 if (typeIdx >= 0)
896 encodeBinEP(typeIdx < SAO_BO ? 1 : 0);
897 }
898
899 if (typeIdx >= 0)
900 {
901 enum { OFFSET_THRESH = 1 << X265_MIN(X265_DEPTH - 5, 5) };
902 if (typeIdx == SAO_BO)
903 {
904 for (int i = 0; i < SAO_BO_LEN; i++)
905 codeSaoMaxUvlc(abs(ctuParam.offset[i]), OFFSET_THRESH - 1);
906
907 for (int i = 0; i < SAO_BO_LEN; i++)
908 if (ctuParam.offset[i] != 0)
909 encodeBinEP(ctuParam.offset[i] < 0);
910
911 encodeBinsEP(ctuParam.bandPos, 5);
912 }
913 else // if (typeIdx < SAO_BO)
914 {
915 codeSaoMaxUvlc(ctuParam.offset[0], OFFSET_THRESH - 1);
916 codeSaoMaxUvlc(ctuParam.offset[1], OFFSET_THRESH - 1);
917 codeSaoMaxUvlc(-ctuParam.offset[2], OFFSET_THRESH - 1);
918 codeSaoMaxUvlc(-ctuParam.offset[3], OFFSET_THRESH - 1);
919 if (plane != 2)
920 encodeBinsEP((uint32_t)(typeIdx), 2);
921 }
922 }
923}
924
925/** initialize context model with respect to QP and initialization value */
926uint8_t sbacInit(int qp, int initValue)
927{
928 qp = Clip3(0, 51, qp);
929
930 int slope = (initValue >> 4) * 5 - 45;
931 int offset = ((initValue & 15) << 3) - 16;
932 int initState = X265_MIN(X265_MAX(1, (((slope * qp) >> 4) + offset)), 126);
933 uint32_t mpState = (initState >= 64);
934 uint32_t state = ((mpState ? (initState - 64) : (63 - initState)) << 1) + mpState;
935
936 return (uint8_t)state;
937}
938
939static void initBuffer(uint8_t* contextModel, SliceType sliceType, int qp, uint8_t* ctxModel, int size)
940{
941 ctxModel += sliceType * size;
942
943 for (int n = 0; n < size; n++)
944 contextModel[n] = sbacInit(qp, ctxModel[n]);
945}
946
947void Entropy::resetEntropy(const Slice& slice)
948{
949 int qp = slice.m_sliceQp;
950 SliceType sliceType = slice.m_sliceType;
951
952 initBuffer(&m_contextState[OFF_SPLIT_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_SPLIT_FLAG, NUM_SPLIT_FLAG_CTX);
953 initBuffer(&m_contextState[OFF_SKIP_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_SKIP_FLAG, NUM_SKIP_FLAG_CTX);
954 initBuffer(&m_contextState[OFF_MERGE_FLAG_EXT_CTX], sliceType, qp, (uint8_t*)INIT_MERGE_FLAG_EXT, NUM_MERGE_FLAG_EXT_CTX);
955 initBuffer(&m_contextState[OFF_MERGE_IDX_EXT_CTX], sliceType, qp, (uint8_t*)INIT_MERGE_IDX_EXT, NUM_MERGE_IDX_EXT_CTX);
956 initBuffer(&m_contextState[OFF_PART_SIZE_CTX], sliceType, qp, (uint8_t*)INIT_PART_SIZE, NUM_PART_SIZE_CTX);
957 initBuffer(&m_contextState[OFF_PRED_MODE_CTX], sliceType, qp, (uint8_t*)INIT_PRED_MODE, NUM_PRED_MODE_CTX);
958 initBuffer(&m_contextState[OFF_ADI_CTX], sliceType, qp, (uint8_t*)INIT_INTRA_PRED_MODE, NUM_ADI_CTX);
959 initBuffer(&m_contextState[OFF_CHROMA_PRED_CTX], sliceType, qp, (uint8_t*)INIT_CHROMA_PRED_MODE, NUM_CHROMA_PRED_CTX);
960 initBuffer(&m_contextState[OFF_DELTA_QP_CTX], sliceType, qp, (uint8_t*)INIT_DQP, NUM_DELTA_QP_CTX);
961 initBuffer(&m_contextState[OFF_INTER_DIR_CTX], sliceType, qp, (uint8_t*)INIT_INTER_DIR, NUM_INTER_DIR_CTX);
962 initBuffer(&m_contextState[OFF_REF_NO_CTX], sliceType, qp, (uint8_t*)INIT_REF_PIC, NUM_REF_NO_CTX);
963 initBuffer(&m_contextState[OFF_MV_RES_CTX], sliceType, qp, (uint8_t*)INIT_MVD, NUM_MV_RES_CTX);
964 initBuffer(&m_contextState[OFF_QT_CBF_CTX], sliceType, qp, (uint8_t*)INIT_QT_CBF, NUM_QT_CBF_CTX);
965 initBuffer(&m_contextState[OFF_TRANS_SUBDIV_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_TRANS_SUBDIV_FLAG, NUM_TRANS_SUBDIV_FLAG_CTX);
966 initBuffer(&m_contextState[OFF_QT_ROOT_CBF_CTX], sliceType, qp, (uint8_t*)INIT_QT_ROOT_CBF, NUM_QT_ROOT_CBF_CTX);
967 initBuffer(&m_contextState[OFF_SIG_CG_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_SIG_CG_FLAG, 2 * NUM_SIG_CG_FLAG_CTX);
968 initBuffer(&m_contextState[OFF_SIG_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_SIG_FLAG, NUM_SIG_FLAG_CTX);
969 initBuffer(&m_contextState[OFF_CTX_LAST_FLAG_X], sliceType, qp, (uint8_t*)INIT_LAST, NUM_CTX_LAST_FLAG_XY);
970 initBuffer(&m_contextState[OFF_CTX_LAST_FLAG_Y], sliceType, qp, (uint8_t*)INIT_LAST, NUM_CTX_LAST_FLAG_XY);
971 initBuffer(&m_contextState[OFF_ONE_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_ONE_FLAG, NUM_ONE_FLAG_CTX);
972 initBuffer(&m_contextState[OFF_ABS_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_ABS_FLAG, NUM_ABS_FLAG_CTX);
973 initBuffer(&m_contextState[OFF_MVP_IDX_CTX], sliceType, qp, (uint8_t*)INIT_MVP_IDX, NUM_MVP_IDX_CTX);
974 initBuffer(&m_contextState[OFF_SAO_MERGE_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_SAO_MERGE_FLAG, NUM_SAO_MERGE_FLAG_CTX);
975 initBuffer(&m_contextState[OFF_SAO_TYPE_IDX_CTX], sliceType, qp, (uint8_t*)INIT_SAO_TYPE_IDX, NUM_SAO_TYPE_IDX_CTX);
976 initBuffer(&m_contextState[OFF_TRANSFORMSKIP_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_TRANSFORMSKIP_FLAG, 2 * NUM_TRANSFORMSKIP_FLAG_CTX);
977 initBuffer(&m_contextState[OFF_TQUANT_BYPASS_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_CU_TRANSQUANT_BYPASS_FLAG, NUM_TQUANT_BYPASS_FLAG_CTX);
978 // new structure
979
980 start();
981}
982
983/* code explicit wp tables */
984void Entropy::codePredWeightTable(const Slice& slice)
985{
986 const WeightParam *wp;
987 bool bChroma = true; // 4:0:0 not yet supported
988 bool bDenomCoded = false;
989 int numRefDirs = slice.m_sliceType == B_SLICE ? 2 : 1;
990 uint32_t totalSignalledWeightFlags = 0;
991
992 if ((slice.m_sliceType == P_SLICE && slice.m_pps->bUseWeightPred) ||
993 (slice.m_sliceType == B_SLICE && slice.m_pps->bUseWeightedBiPred))
994 {
995 for (int list = 0; list < numRefDirs; list++)
996 {
997 for (int ref = 0; ref < slice.m_numRefIdx[list]; ref++)
998 {
999 wp = slice.m_weightPredTable[list][ref];
1000 if (!bDenomCoded)
1001 {
1002 WRITE_UVLC(wp[0].log2WeightDenom, "luma_log2_weight_denom");
1003
1004 if (bChroma)
1005 {
1006 int deltaDenom = wp[1].log2WeightDenom - wp[0].log2WeightDenom;
1007 WRITE_SVLC(deltaDenom, "delta_chroma_log2_weight_denom");
1008 }
1009 bDenomCoded = true;
1010 }
1011 WRITE_FLAG(wp[0].bPresentFlag, "luma_weight_lX_flag");
1012 totalSignalledWeightFlags += wp[0].bPresentFlag;
1013 }
1014
1015 if (bChroma)
1016 {
1017 for (int ref = 0; ref < slice.m_numRefIdx[list]; ref++)
1018 {
1019 wp = slice.m_weightPredTable[list][ref];
1020 WRITE_FLAG(wp[1].bPresentFlag, "chroma_weight_lX_flag");
1021 totalSignalledWeightFlags += 2 * wp[1].bPresentFlag;
1022 }
1023 }
1024
1025 for (int ref = 0; ref < slice.m_numRefIdx[list]; ref++)
1026 {
1027 wp = slice.m_weightPredTable[list][ref];
1028 if (wp[0].bPresentFlag)
1029 {
1030 int deltaWeight = (wp[0].inputWeight - (1 << wp[0].log2WeightDenom));
1031 WRITE_SVLC(deltaWeight, "delta_luma_weight_lX");
1032 WRITE_SVLC(wp[0].inputOffset, "luma_offset_lX");
1033 }
1034
1035 if (bChroma)
1036 {
1037 if (wp[1].bPresentFlag)
1038 {
1039 for (int plane = 1; plane < 3; plane++)
1040 {
1041 int deltaWeight = (wp[plane].inputWeight - (1 << wp[1].log2WeightDenom));
1042 WRITE_SVLC(deltaWeight, "delta_chroma_weight_lX");
1043
1044 int pred = (128 - ((128 * wp[plane].inputWeight) >> (wp[plane].log2WeightDenom)));
1045 int deltaChroma = (wp[plane].inputOffset - pred);
1046 WRITE_SVLC(deltaChroma, "delta_chroma_offset_lX");
1047 }
1048 }
1049 }
1050 }
1051 }
1052
1053 X265_CHECK(totalSignalledWeightFlags <= 24, "total weights must be <= 24\n");
1054 }
1055}
1056
1057void Entropy::writeUnaryMaxSymbol(uint32_t symbol, uint8_t* scmModel, int offset, uint32_t maxSymbol)
1058{
1059 X265_CHECK(maxSymbol > 0, "maxSymbol too small\n");
1060
1061 encodeBin(symbol ? 1 : 0, scmModel[0]);
1062
1063 if (!symbol)
1064 return;
1065
1066 bool bCodeLast = (maxSymbol > symbol);
1067
1068 while (--symbol)
1069 encodeBin(1, scmModel[offset]);
1070
1071 if (bCodeLast)
1072 encodeBin(0, scmModel[offset]);
1073}
1074
1075void Entropy::writeEpExGolomb(uint32_t symbol, uint32_t count)
1076{
1077 uint32_t bins = 0;
1078 int numBins = 0;
1079
1080 while (symbol >= (uint32_t)(1 << count))
1081 {
1082 bins = 2 * bins + 1;
1083 numBins++;
1084 symbol -= 1 << count;
1085 count++;
1086 }
1087
1088 bins = 2 * bins + 0;
1089 numBins++;
1090
1091 bins = (bins << count) | symbol;
1092 numBins += count;
1093
1094 X265_CHECK(numBins <= 32, "numBins too large\n");
1095 encodeBinsEP(bins, numBins);
1096}
1097
1098/** Coding of coeff_abs_level_minus3 */
1099void Entropy::writeCoefRemainExGolomb(uint32_t codeNumber, uint32_t absGoRice)
1100{
1101 uint32_t length;
1102 const uint32_t codeRemain = codeNumber & ((1 << absGoRice) - 1);
1103
1104 if ((codeNumber >> absGoRice) < COEF_REMAIN_BIN_REDUCTION)
1105 {
1106 length = codeNumber >> absGoRice;
1107
1108 X265_CHECK(codeNumber - (length << absGoRice) == (codeNumber & ((1 << absGoRice) - 1)), "codeNumber failure\n");
1109 X265_CHECK(length + 1 + absGoRice < 32, "length failure\n");
1110 encodeBinsEP((((1 << (length + 1)) - 2) << absGoRice) + codeRemain, length + 1 + absGoRice);
1111 }
1112 else
1113 {
1114 length = 0;
1115 codeNumber = (codeNumber >> absGoRice) - COEF_REMAIN_BIN_REDUCTION;
1116 if (codeNumber != 0)
1117 {
1118 unsigned long idx;
1119 CLZ32(idx, codeNumber + 1);
1120 length = idx;
1121 codeNumber -= (1 << idx) - 1;
1122 }
1123 codeNumber = (codeNumber << absGoRice) + codeRemain;
1124
1125 encodeBinsEP((1 << (COEF_REMAIN_BIN_REDUCTION + length + 1)) - 2, COEF_REMAIN_BIN_REDUCTION + length + 1);
1126 encodeBinsEP(codeNumber, length + absGoRice);
1127 }
1128}
1129
1130// SBAC RD
1131void Entropy::loadIntraDirModeLuma(const Entropy& src)
1132{
1133 X265_CHECK(src.m_valid, "invalid copy source context\n");
1134 m_fracBits = src.m_fracBits;
1135 m_contextState[OFF_ADI_CTX] = src.m_contextState[OFF_ADI_CTX];
1136}
1137
1138void Entropy::copyFrom(const Entropy& src)
1139{
1140 X265_CHECK(src.m_valid, "invalid copy source context\n");
1141
1142 copyState(src);
1143
1144 memcpy(m_contextState, src.m_contextState, MAX_OFF_CTX_MOD * sizeof(uint8_t));
1145 markValid();
1146}
1147
1148void Entropy::codeMVPIdx(uint32_t symbol)
1149{
1150 encodeBin(symbol, m_contextState[OFF_MVP_IDX_CTX]);
1151}
1152
1153void Entropy::codePartSize(const CUData& cu, uint32_t absPartIdx, uint32_t depth)
1154{
1155 PartSize partSize = (PartSize)cu.m_partSize[absPartIdx];
1156
1157 if (cu.isIntra(absPartIdx))
1158 {
1159 if (depth == g_maxCUDepth)
1160 encodeBin(partSize == SIZE_2Nx2N ? 1 : 0, m_contextState[OFF_PART_SIZE_CTX]);
1161 return;
1162 }
1163
1164 switch (partSize)
1165 {
1166 case SIZE_2Nx2N:
1167 encodeBin(1, m_contextState[OFF_PART_SIZE_CTX]);
1168 break;
1169
1170 case SIZE_2NxN:
1171 case SIZE_2NxnU:
1172 case SIZE_2NxnD:
1173 encodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 0]);
1174 encodeBin(1, m_contextState[OFF_PART_SIZE_CTX + 1]);
1175 if (cu.m_slice->m_sps->maxAMPDepth > depth)
1176 {
1177 encodeBin((partSize == SIZE_2NxN) ? 1 : 0, m_contextState[OFF_PART_SIZE_CTX + 3]);
1178 if (partSize != SIZE_2NxN)
1179 encodeBinEP((partSize == SIZE_2NxnU ? 0 : 1));
1180 }
1181 break;
1182
1183 case SIZE_Nx2N:
1184 case SIZE_nLx2N:
1185 case SIZE_nRx2N:
1186 encodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 0]);
1187 encodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 1]);
1188 if (depth == g_maxCUDepth && !(cu.m_log2CUSize[absPartIdx] == 3))
1189 encodeBin(1, m_contextState[OFF_PART_SIZE_CTX + 2]);
1190 if (cu.m_slice->m_sps->maxAMPDepth > depth)
1191 {
1192 encodeBin((partSize == SIZE_Nx2N) ? 1 : 0, m_contextState[OFF_PART_SIZE_CTX + 3]);
1193 if (partSize != SIZE_Nx2N)
1194 encodeBinEP((partSize == SIZE_nLx2N ? 0 : 1));
1195 }
1196 break;
1197 default:
1198 X265_CHECK(0, "invalid CU partition\n");
1199 break;
1200 }
1201}
1202
1203void Entropy::codePredMode(int predMode)
1204{
1205 encodeBin(predMode == MODE_INTER ? 0 : 1, m_contextState[OFF_PRED_MODE_CTX]);
1206}
1207
1208void Entropy::codeCUTransquantBypassFlag(uint32_t symbol)
1209{
1210 encodeBin(symbol, m_contextState[OFF_TQUANT_BYPASS_FLAG_CTX]);
1211}
1212
1213void Entropy::codeSkipFlag(const CUData& cu, uint32_t absPartIdx)
1214{
1215 // get context function is here
1216 uint32_t symbol = cu.isSkipped(absPartIdx) ? 1 : 0;
1217 uint32_t ctxSkip = cu.getCtxSkipFlag(absPartIdx);
1218
1219 encodeBin(symbol, m_contextState[OFF_SKIP_FLAG_CTX + ctxSkip]);
1220}
1221
1222void Entropy::codeMergeFlag(const CUData& cu, uint32_t absPartIdx)
1223{
1224 const uint32_t symbol = cu.m_mergeFlag[absPartIdx] ? 1 : 0;
1225
1226 encodeBin(symbol, m_contextState[OFF_MERGE_FLAG_EXT_CTX]);
1227}
1228
1229void Entropy::codeMergeIndex(const CUData& cu, uint32_t absPartIdx)
1230{
1231 uint32_t numCand = cu.m_slice->m_maxNumMergeCand;
1232
1233 if (numCand > 1)
1234 {
1235 uint32_t unaryIdx = cu.m_mvpIdx[0][absPartIdx]; // merge candidate index was stored in L0 MVP idx
1236 encodeBin((unaryIdx != 0), m_contextState[OFF_MERGE_IDX_EXT_CTX]);
1237
1238 X265_CHECK(unaryIdx < numCand, "unaryIdx out of range\n");
1239
1240 if (unaryIdx != 0)
1241 {
1242 uint32_t mask = (1 << unaryIdx) - 2;
1243 mask >>= (unaryIdx == numCand - 1) ? 1 : 0;
1244 encodeBinsEP(mask, unaryIdx - (unaryIdx == numCand - 1));
1245 }
1246 }
1247}
1248
1249void Entropy::codeSplitFlag(const CUData& cu, uint32_t absPartIdx, uint32_t depth)
1250{
1251 X265_CHECK(depth < g_maxCUDepth, "invalid depth\n");
1252
1253 uint32_t ctx = cu.getCtxSplitFlag(absPartIdx, depth);
1254 uint32_t currSplitFlag = (cu.m_cuDepth[absPartIdx] > depth) ? 1 : 0;
1255
1256 X265_CHECK(ctx < 3, "ctx out of range\n");
1257 encodeBin(currSplitFlag, m_contextState[OFF_SPLIT_FLAG_CTX + ctx]);
1258}
1259
1260void Entropy::codeTransformSubdivFlag(uint32_t symbol, uint32_t ctx)
1261{
1262 encodeBin(symbol, m_contextState[OFF_TRANS_SUBDIV_FLAG_CTX + ctx]);
1263}
1264
1265uint32_t Entropy::bitsIntraModeNonMPM() const
1266{
1267 uint32_t mstate = m_contextState[OFF_ADI_CTX];
1268 uint32_t bits = ((uint32_t)(m_fracBits & 32767) + sbacGetEntropyBits(mstate, 0)) >> 15;
1269 return bits + 5; /* fixed cost for encodeBinsEP() */
1270}
1271
1272uint32_t Entropy::bitsIntraModeMPM(const uint32_t preds[3], uint32_t dir) const
1273{
1274 X265_CHECK(dir == preds[0] || dir == preds[1] || dir == preds[2], "dir must be a most probable mode\n");
1275 uint32_t mstate = m_contextState[OFF_ADI_CTX];
1276 uint32_t bits = ((uint32_t)(m_fracBits & 32767) + sbacGetEntropyBits(mstate, 1)) >> 15;
1277 return bits + (dir == preds[0] ? 1 : 2);
1278}
1279
1280void Entropy::codeIntraDirLumaAng(const CUData& cu, uint32_t absPartIdx, bool isMultiple)
1281{
1282 uint32_t dir[4], j;
1283 uint32_t preds[4][3];
1284 int predIdx[4];
1285 PartSize mode = (PartSize)cu.m_partSize[absPartIdx];
1286 uint32_t partNum = isMultiple ? (mode == SIZE_NxN ? 4 : 1) : 1;
1287 uint32_t partOffset = (NUM_CU_PARTITIONS >> (cu.m_cuDepth[absPartIdx] << 1)) >> 2;
1288
1289 for (j = 0; j < partNum; j++)
1290 {
1291 dir[j] = cu.m_lumaIntraDir[absPartIdx + partOffset * j];
1292 cu.getIntraDirLumaPredictor(absPartIdx + partOffset * j, preds[j]);
1293 predIdx[j] = -1;
1294 for (uint32_t i = 0; i < 3; i++)
1295 if (dir[j] == preds[j][i])
1296 predIdx[j] = i;
1297
1298 encodeBin((predIdx[j] != -1) ? 1 : 0, m_contextState[OFF_ADI_CTX]);
1299 }
1300
1301 for (j = 0; j < partNum; j++)
1302 {
1303 if (predIdx[j] != -1)
1304 {
1305 X265_CHECK((predIdx[j] >= 0) && (predIdx[j] <= 2), "predIdx out of range\n");
1306 // NOTE: Mapping
1307 // 0 = 0
1308 // 1 = 10
1309 // 2 = 11
1310 int nonzero = (!!predIdx[j]);
1311 encodeBinsEP(predIdx[j] + nonzero, 1 + nonzero);
1312 }
1313 else
1314 {
1315 if (preds[j][0] > preds[j][1])
1316 std::swap(preds[j][0], preds[j][1]);
1317
1318 if (preds[j][0] > preds[j][2])
1319 std::swap(preds[j][0], preds[j][2]);
1320
1321 if (preds[j][1] > preds[j][2])
1322 std::swap(preds[j][1], preds[j][2]);
1323
1324 dir[j] += (dir[j] > preds[j][2]) ? -1 : 0;
1325 dir[j] += (dir[j] > preds[j][1]) ? -1 : 0;
1326 dir[j] += (dir[j] > preds[j][0]) ? -1 : 0;
1327
1328 encodeBinsEP(dir[j], 5);
1329 }
1330 }
1331}
1332
1333void Entropy::codeIntraDirChroma(const CUData& cu, uint32_t absPartIdx, uint32_t *chromaDirMode)
1334{
1335 uint32_t intraDirChroma = cu.m_chromaIntraDir[absPartIdx];
1336
1337 if (intraDirChroma == DM_CHROMA_IDX)
1338 encodeBin(0, m_contextState[OFF_CHROMA_PRED_CTX]);
1339 else
1340 {
1341 for (int i = 0; i < NUM_CHROMA_MODE - 1; i++)
1342 {
1343 if (intraDirChroma == chromaDirMode[i])
1344 {
1345 intraDirChroma = i;
1346 break;
1347 }
1348 }
1349
1350 encodeBin(1, m_contextState[OFF_CHROMA_PRED_CTX]);
1351 encodeBinsEP(intraDirChroma, 2);
1352 }
1353}
1354
1355void Entropy::codeInterDir(const CUData& cu, uint32_t absPartIdx)
1356{
1357 const uint32_t interDir = cu.m_interDir[absPartIdx] - 1;
1358 const uint32_t ctx = cu.m_cuDepth[absPartIdx]; // the context of the inter dir is the depth of the CU
1359
1360 if (cu.m_partSize[absPartIdx] == SIZE_2Nx2N || cu.m_log2CUSize[absPartIdx] != 3)
1361 encodeBin(interDir == 2 ? 1 : 0, m_contextState[OFF_INTER_DIR_CTX + ctx]);
1362 if (interDir < 2)
1363 encodeBin(interDir, m_contextState[OFF_INTER_DIR_CTX + 4]);
1364}
1365
1366void Entropy::codeRefFrmIdx(const CUData& cu, uint32_t absPartIdx, int list)
1367{
1368 uint32_t refFrame = cu.m_refIdx[list][absPartIdx];
1369
1370 encodeBin(refFrame > 0, m_contextState[OFF_REF_NO_CTX]);
1371
1372 if (refFrame > 0)
1373 {
1374 uint32_t refNum = cu.m_slice->m_numRefIdx[list] - 2;
1375 if (refNum == 0)
1376 return;
1377
1378 refFrame--;
1379 encodeBin(refFrame > 0, m_contextState[OFF_REF_NO_CTX + 1]);
1380 if (refFrame > 0)
1381 {
1382 uint32_t mask = (1 << refFrame) - 2;
1383 mask >>= (refFrame == refNum) ? 1 : 0;
1384 encodeBinsEP(mask, refFrame - (refFrame == refNum));
1385 }
1386 }
1387}
1388
1389void Entropy::codeMvd(const CUData& cu, uint32_t absPartIdx, int list)
1390{
1391 const MV& mvd = cu.m_mvd[list][absPartIdx];
1392 const int hor = mvd.x;
1393 const int ver = mvd.y;
1394
1395 encodeBin(hor != 0 ? 1 : 0, m_contextState[OFF_MV_RES_CTX]);
1396 encodeBin(ver != 0 ? 1 : 0, m_contextState[OFF_MV_RES_CTX]);
1397
1398 const bool bHorAbsGr0 = hor != 0;
1399 const bool bVerAbsGr0 = ver != 0;
1400 const uint32_t horAbs = 0 > hor ? -hor : hor;
1401 const uint32_t verAbs = 0 > ver ? -ver : ver;
1402
1403 if (bHorAbsGr0)
1404 encodeBin(horAbs > 1 ? 1 : 0, m_contextState[OFF_MV_RES_CTX + 1]);
1405
1406 if (bVerAbsGr0)
1407 encodeBin(verAbs > 1 ? 1 : 0, m_contextState[OFF_MV_RES_CTX + 1]);
1408
1409 if (bHorAbsGr0)
1410 {
1411 if (horAbs > 1)
1412 writeEpExGolomb(horAbs - 2, 1);
1413
1414 encodeBinEP(0 > hor ? 1 : 0);
1415 }
1416
1417 if (bVerAbsGr0)
1418 {
1419 if (verAbs > 1)
1420 writeEpExGolomb(verAbs - 2, 1);
1421
1422 encodeBinEP(0 > ver ? 1 : 0);
1423 }
1424}
1425
1426void Entropy::codeDeltaQP(const CUData& cu, uint32_t absPartIdx)
1427{
1428 int dqp = cu.m_qp[absPartIdx] - cu.getRefQP(absPartIdx);
1429
1430 int qpBdOffsetY = QP_BD_OFFSET;
1431
1432 dqp = (dqp + 78 + qpBdOffsetY + (qpBdOffsetY / 2)) % (52 + qpBdOffsetY) - 26 - (qpBdOffsetY / 2);
1433
1434 uint32_t absDQp = (uint32_t)((dqp > 0) ? dqp : (-dqp));
1435 uint32_t TUValue = X265_MIN((int)absDQp, CU_DQP_TU_CMAX);
1436 writeUnaryMaxSymbol(TUValue, &m_contextState[OFF_DELTA_QP_CTX], 1, CU_DQP_TU_CMAX);
1437 if (absDQp >= CU_DQP_TU_CMAX)
1438 writeEpExGolomb(absDQp - CU_DQP_TU_CMAX, CU_DQP_EG_k);
1439
1440 if (absDQp > 0)
1441 {
1442 uint32_t sign = (dqp > 0 ? 0 : 1);
1443 encodeBinEP(sign);
1444 }
1445}
1446
1447void Entropy::codeQtCbf(const CUData& cu, uint32_t absPartIdx, uint32_t absPartIdxStep, uint32_t width, uint32_t height, TextType ttype, uint32_t trDepth, bool lowestLevel)
1448{
1449 uint32_t ctx = ctxCbf[ttype][trDepth];
1450
1451 bool canQuadSplit = (width >= (MIN_TU_SIZE * 2)) && (height >= (MIN_TU_SIZE * 2));
1452 uint32_t lowestTUDepth = trDepth + ((!lowestLevel && !canQuadSplit) ? 1 : 0); // unsplittable TUs inherit their parent's CBF
1453
1454 if ((width != height) && (lowestLevel || !canQuadSplit)) // if sub-TUs are present
1455 {
1456 uint32_t subTUDepth = lowestTUDepth + 1; // if this is the lowest level of the TU-tree, the sub-TUs are directly below.
1457 // Otherwise, this must be the level above the lowest level (as specified above)
1458 uint32_t partIdxesPerSubTU = absPartIdxStep >> 1;
1459
1460 for (uint32_t subTU = 0; subTU < 2; subTU++)
1461 {
1462 uint32_t subTUAbsPartIdx = absPartIdx + (subTU * partIdxesPerSubTU);
1463 uint32_t cbf = cu.getCbf(subTUAbsPartIdx, ttype, subTUDepth);
1464
1465 encodeBin(cbf, m_contextState[OFF_QT_CBF_CTX + ctx]);
1466 }
1467 }
1468 else
1469 {
1470 uint32_t cbf = cu.getCbf(absPartIdx, ttype, lowestTUDepth);
1471
1472 encodeBin(cbf, m_contextState[OFF_QT_CBF_CTX + ctx]);
1473 }
1474}
1475
1476void Entropy::codeQtCbf(const CUData& cu, uint32_t absPartIdx, TextType ttype, uint32_t trDepth)
1477{
1478 uint32_t ctx = ctxCbf[ttype][trDepth];
1479 uint32_t cbf = cu.getCbf(absPartIdx, ttype, trDepth);
1480 encodeBin(cbf, m_contextState[OFF_QT_CBF_CTX + ctx]);
1481}
1482
1483void Entropy::codeQtCbf(uint32_t cbf, TextType ttype, uint32_t trDepth)
1484{
1485 uint32_t ctx = ctxCbf[ttype][trDepth];
1486 encodeBin(cbf, m_contextState[OFF_QT_CBF_CTX + ctx]);
1487}
1488
1489void Entropy::codeTransformSkipFlags(const CUData& cu, uint32_t absPartIdx, uint32_t trSize, TextType ttype)
1490{
1491 if (cu.m_tqBypass[absPartIdx])
1492 return;
1493 if (trSize != 4)
1494 return;
1495
1496 uint32_t useTransformSkip = cu.m_transformSkip[ttype][absPartIdx];
1497 encodeBin(useTransformSkip, m_contextState[OFF_TRANSFORMSKIP_FLAG_CTX + (ttype ? NUM_TRANSFORMSKIP_FLAG_CTX : 0)]);
1498}
1499
1500void Entropy::codeQtRootCbf(uint32_t cbf)
1501{
1502 encodeBin(cbf, m_contextState[OFF_QT_ROOT_CBF_CTX]);
1503}
1504
1505void Entropy::codeQtCbfZero(TextType ttype, uint32_t trDepth)
1506{
1507 // this function is only used to estimate the bits when cbf is 0
1508 // and will never be called when writing the bitsream.
1509 uint32_t ctx = ctxCbf[ttype][trDepth];
1510 encodeBin(0, m_contextState[OFF_QT_CBF_CTX + ctx]);
1511}
1512
1513void Entropy::codeQtRootCbfZero()
1514{
1515 // this function is only used to estimate the bits when cbf is 0
1516 // and will never be called when writing the bistream.
1517 encodeBin(0, m_contextState[OFF_QT_ROOT_CBF_CTX]);
1518}
1519
1520/** Encode (X,Y) position of the last significant coefficient
1521 * \param posx X component of last coefficient
1522 * \param posy Y component of last coefficient
1523 * \param log2TrSize
1524 * \param bIsLuma
1525 * \param scanIdx scan type (zig-zag, hor, ver)
1526 * This method encodes the X and Y component within a block of the last significant coefficient.
1527 */
1528void Entropy::codeLastSignificantXY(uint32_t posx, uint32_t posy, uint32_t log2TrSize, bool bIsLuma, uint32_t scanIdx)
1529{
1530 // swap
1531 if (scanIdx == SCAN_VER)
1532 std::swap(posx, posy);
1533
1534 uint32_t ctxLast;
1535 uint32_t groupIdxX = getGroupIdx(posx);
1536 uint32_t groupIdxY = getGroupIdx(posy);
1537
1538 int blkSizeOffset = bIsLuma ? ((log2TrSize - 2) * 3 + ((log2TrSize - 1) >> 2)) : NUM_CTX_LAST_FLAG_XY_LUMA;
1539 int ctxShift = bIsLuma ? ((log2TrSize + 1) >> 2) : log2TrSize - 2;
1540 uint32_t maxGroupIdx = log2TrSize * 2 - 1;
1541
1542 // posX
1543 uint8_t *ctxX = &m_contextState[OFF_CTX_LAST_FLAG_X];
1544 for (ctxLast = 0; ctxLast < groupIdxX; ctxLast++)
1545 encodeBin(1, *(ctxX + blkSizeOffset + (ctxLast >> ctxShift)));
1546
1547 if (groupIdxX < maxGroupIdx)
1548 encodeBin(0, *(ctxX + blkSizeOffset + (ctxLast >> ctxShift)));
1549
1550 // posY
1551 uint8_t *ctxY = &m_contextState[OFF_CTX_LAST_FLAG_Y];
1552 for (ctxLast = 0; ctxLast < groupIdxY; ctxLast++)
1553 encodeBin(1, *(ctxY + blkSizeOffset + (ctxLast >> ctxShift)));
1554
1555 if (groupIdxY < maxGroupIdx)
1556 encodeBin(0, *(ctxY + blkSizeOffset + (ctxLast >> ctxShift)));
1557
1558 if (groupIdxX > 3)
1559 {
1560 uint32_t count = (groupIdxX - 2) >> 1;
1561 posx = posx - g_minInGroup[groupIdxX];
1562 encodeBinsEP(posx, count);
1563 }
1564 if (groupIdxY > 3)
1565 {
1566 uint32_t count = (groupIdxY - 2) >> 1;
1567 posy = posy - g_minInGroup[groupIdxY];
1568 encodeBinsEP(posy, count);
1569 }
1570}
1571
1572void Entropy::codeCoeffNxN(const CUData& cu, const coeff_t* coeff, uint32_t absPartIdx, uint32_t log2TrSize, TextType ttype)
1573{
1574 uint32_t trSize = 1 << log2TrSize;
1575
1576 // compute number of significant coefficients
1577 uint32_t numSig = primitives.count_nonzero(coeff, (1 << (log2TrSize << 1)));
1578
1579 X265_CHECK(numSig > 0, "cbf check fail\n");
1580
1581 bool bHideFirstSign = cu.m_slice->m_pps->bSignHideEnabled && !cu.m_tqBypass[absPartIdx];
1582
1583 if (cu.m_slice->m_pps->bTransformSkipEnabled)
1584 codeTransformSkipFlags(cu, absPartIdx, trSize, ttype);
1585
1586 bool bIsLuma = ttype == TEXT_LUMA;
1587
1588 // select scans
1589 TUEntropyCodingParameters codingParameters;
1590 cu.getTUEntropyCodingParameters(codingParameters, absPartIdx, log2TrSize, bIsLuma);
1591
1592 //----- encode significance map -----
1593
1594 // Find position of last coefficient
1595 int scanPosLast = 0;
1596 uint32_t posLast;
1597 uint64_t sigCoeffGroupFlag64 = 0;
1598 const uint32_t maskPosXY = ((uint32_t)~0 >> (31 - log2TrSize + MLS_CG_LOG2_SIZE)) >> 1;
1599 assert((uint32_t)((1 << (log2TrSize - MLS_CG_LOG2_SIZE)) - 1) == (((uint32_t)~0 >> (31 - log2TrSize + MLS_CG_LOG2_SIZE)) >> 1));
1600 do
1601 {
1602 posLast = codingParameters.scan[scanPosLast++];
1603
1604 const uint32_t isNZCoeff = (coeff[posLast] != 0);
1605 // get L1 sig map
1606 // NOTE: the new algorithm is complicated, so I keep reference code here
1607 //uint32_t posy = posLast >> log2TrSize;
1608 //uint32_t posx = posLast - (posy << log2TrSize);
1609 //uint32_t blkIdx0 = ((posy >> MLS_CG_LOG2_SIZE) << codingParameters.log2TrSizeCG) + (posx >> MLS_CG_LOG2_SIZE);
1610 const uint32_t blkIdx = ((posLast >> (2 * MLS_CG_LOG2_SIZE)) & ~maskPosXY) + ((posLast >> MLS_CG_LOG2_SIZE) & maskPosXY);
1611 sigCoeffGroupFlag64 |= ((uint64_t)isNZCoeff << blkIdx);
1612 numSig -= isNZCoeff;
1613 }
1614 while (numSig > 0);
1615 scanPosLast--;
1616
1617 // Code position of last coefficient
1618 int posLastY = posLast >> log2TrSize;
1619 int posLastX = posLast & (trSize - 1);
1620 codeLastSignificantXY(posLastX, posLastY, log2TrSize, bIsLuma, codingParameters.scanType);
1621
1622 //===== code significance flag =====
1623 uint8_t * const baseCoeffGroupCtx = &m_contextState[OFF_SIG_CG_FLAG_CTX + (bIsLuma ? 0 : NUM_SIG_CG_FLAG_CTX)];
1624 uint8_t * const baseCtx = bIsLuma ? &m_contextState[OFF_SIG_FLAG_CTX] : &m_contextState[OFF_SIG_FLAG_CTX + NUM_SIG_FLAG_CTX_LUMA];
1625 const int lastScanSet = scanPosLast >> MLS_CG_SIZE;
1626 uint32_t c1 = 1;
1627 uint32_t goRiceParam = 0;
1628 int scanPosSig = scanPosLast;
1629
1630 for (int subSet = lastScanSet; subSet >= 0; subSet--)
1631 {
1632 int numNonZero = 0;
1633 int subPos = subSet << MLS_CG_SIZE;
1634 goRiceParam = 0;
1635 int absCoeff[1 << MLS_CG_SIZE];
1636 uint32_t coeffSigns = 0;
1637 int lastNZPosInCG = -1;
1638 int firstNZPosInCG = 1 << MLS_CG_SIZE;
1639 if (scanPosSig == scanPosLast)
1640 {
1641 absCoeff[0] = int(abs(coeff[posLast]));
1642 coeffSigns = (coeff[posLast] < 0);
1643 numNonZero = 1;
1644 lastNZPosInCG = scanPosSig;
1645 firstNZPosInCG = scanPosSig;
1646 scanPosSig--;
1647 }
1648 // encode significant_coeffgroup_flag
1649 const int cgBlkPos = codingParameters.scanCG[subSet];
1650 const int cgPosY = cgBlkPos >> codingParameters.log2TrSizeCG;
1651 const int cgPosX = cgBlkPos - (cgPosY << codingParameters.log2TrSizeCG);
1652 const uint64_t cgBlkPosMask = ((uint64_t)1 << cgBlkPos);
1653
1654 if (subSet == lastScanSet || !subSet)
1655 sigCoeffGroupFlag64 |= cgBlkPosMask;
1656 else
1657 {
1658 uint32_t sigCoeffGroup = ((sigCoeffGroupFlag64 & cgBlkPosMask) != 0);
1659 uint32_t ctxSig = Quant::getSigCoeffGroupCtxInc(sigCoeffGroupFlag64, cgPosX, cgPosY, codingParameters.log2TrSizeCG);
1660 encodeBin(sigCoeffGroup, baseCoeffGroupCtx[ctxSig]);
1661 }
1662
1663 // encode significant_coeff_flag
1664 if (sigCoeffGroupFlag64 & cgBlkPosMask)
1665 {
1666 const int patternSigCtx = Quant::calcPatternSigCtx(sigCoeffGroupFlag64, cgPosX, cgPosY, codingParameters.log2TrSizeCG);
1667 uint32_t blkPos, sig, ctxSig;
1668 for (; scanPosSig >= subPos; scanPosSig--)
1669 {
1670 blkPos = codingParameters.scan[scanPosSig];
1671 sig = (coeff[blkPos] != 0);
1672 if (scanPosSig > subPos || subSet == 0 || numNonZero)
1673 {
1674 ctxSig = Quant::getSigCtxInc(patternSigCtx, log2TrSize, trSize, blkPos, bIsLuma, codingParameters.firstSignificanceMapContext);
1675 encodeBin(sig, baseCtx[ctxSig]);
1676 }
1677 if (sig)
1678 {
1679 absCoeff[numNonZero] = int(abs(coeff[blkPos]));
1680 coeffSigns = 2 * coeffSigns + ((uint32_t)coeff[blkPos] >> 31);
1681 numNonZero++;
1682 if (lastNZPosInCG < 0)
1683 lastNZPosInCG = scanPosSig;
1684 firstNZPosInCG = scanPosSig;
1685 }
1686 }
1687 }
1688 else
1689 scanPosSig = subPos - 1;
1690
1691 if (numNonZero > 0)
1692 {
1693 bool signHidden = (lastNZPosInCG - firstNZPosInCG >= SBH_THRESHOLD);
1694 uint32_t ctxSet = (subSet > 0 && bIsLuma) ? 2 : 0;
1695
1696 if (c1 == 0)
1697 ctxSet++;
1698
1699 c1 = 1;
1700 uint8_t *baseCtxMod = bIsLuma ? &m_contextState[OFF_ONE_FLAG_CTX + 4 * ctxSet] : &m_contextState[OFF_ONE_FLAG_CTX + NUM_ONE_FLAG_CTX_LUMA + 4 * ctxSet];
1701
1702 int numC1Flag = X265_MIN(numNonZero, C1FLAG_NUMBER);
1703 int firstC2FlagIdx = -1;
1704 for (int idx = 0; idx < numC1Flag; idx++)
1705 {
1706 uint32_t symbol = absCoeff[idx] > 1;
1707 encodeBin(symbol, baseCtxMod[c1]);
1708 if (symbol)
1709 {
1710 c1 = 0;
1711
1712 if (firstC2FlagIdx == -1)
1713 firstC2FlagIdx = idx;
1714 }
1715 else if ((c1 < 3) && (c1 > 0))
1716 c1++;
1717 }
1718
1719 if (!c1)
1720 {
1721 baseCtxMod = bIsLuma ? &m_contextState[OFF_ABS_FLAG_CTX + ctxSet] : &m_contextState[OFF_ABS_FLAG_CTX + NUM_ABS_FLAG_CTX_LUMA + ctxSet];
1722 if (firstC2FlagIdx != -1)
1723 {
1724 uint32_t symbol = absCoeff[firstC2FlagIdx] > 2;
1725 encodeBin(symbol, baseCtxMod[0]);
1726 }
1727 }
1728
1729 if (bHideFirstSign && signHidden)
1730 encodeBinsEP((coeffSigns >> 1), numNonZero - 1);
1731 else
1732 encodeBinsEP(coeffSigns, numNonZero);
1733
1734 int firstCoeff2 = 1;
1735 if (!c1 || numNonZero > C1FLAG_NUMBER)
1736 {
1737 for (int idx = 0; idx < numNonZero; idx++)
1738 {
1739 int baseLevel = (idx < C1FLAG_NUMBER) ? (2 + firstCoeff2) : 1;
1740
1741 if (absCoeff[idx] >= baseLevel)
1742 {
1743 writeCoefRemainExGolomb(absCoeff[idx] - baseLevel, goRiceParam);
1744 if (absCoeff[idx] > 3 * (1 << goRiceParam))
1745 goRiceParam = std::min<uint32_t>(goRiceParam + 1, 4);
1746 }
1747 if (absCoeff[idx] >= 2)
1748 firstCoeff2 = 0;
1749 }
1750 }
1751 }
1752 }
1753}
1754
1755void Entropy::codeSaoMaxUvlc(uint32_t code, uint32_t maxSymbol)
1756{
1757 X265_CHECK(maxSymbol > 0, "maxSymbol too small\n");
1758
1759 uint32_t isCodeNonZero = !!code;
1760
1761 encodeBinEP(isCodeNonZero);
1762 if (isCodeNonZero)
1763 {
1764 uint32_t isCodeLast = (maxSymbol > code);
1765 uint32_t mask = (1 << (code - 1)) - 1;
1766 uint32_t len = code - 1 + isCodeLast;
1767 mask <<= isCodeLast;
1768
1769 encodeBinsEP(mask, len);
1770 }
1771}
1772
1773/* estimate bit cost for CBP, significant map and significant coefficients */
1774void Entropy::estBit(EstBitsSbac& estBitsSbac, uint32_t log2TrSize, bool bIsLuma) const
1775{
1776 estCBFBit(estBitsSbac);
1777
1778 estSignificantCoeffGroupMapBit(estBitsSbac, bIsLuma);
1779
1780 // encode significance map
1781 estSignificantMapBit(estBitsSbac, log2TrSize, bIsLuma);
1782
1783 // encode significant coefficients
1784 estSignificantCoefficientsBit(estBitsSbac, bIsLuma);
1785}
1786
1787/* estimate bit cost for each CBP bit */
1788void Entropy::estCBFBit(EstBitsSbac& estBitsSbac) const
1789{
1790 const uint8_t *ctx = &m_contextState[OFF_QT_CBF_CTX];
1791
1792 for (uint32_t ctxInc = 0; ctxInc < NUM_QT_CBF_CTX; ctxInc++)
1793 {
1794 estBitsSbac.blockCbpBits[ctxInc][0] = sbacGetEntropyBits(ctx[ctxInc], 0);
1795 estBitsSbac.blockCbpBits[ctxInc][1] = sbacGetEntropyBits(ctx[ctxInc], 1);
1796 }
1797
1798 ctx = &m_contextState[OFF_QT_ROOT_CBF_CTX];
1799
1800 estBitsSbac.blockRootCbpBits[0] = sbacGetEntropyBits(ctx[0], 0);
1801 estBitsSbac.blockRootCbpBits[1] = sbacGetEntropyBits(ctx[0], 1);
1802}
1803
1804/* estimate SAMBAC bit cost for significant coefficient group map */
1805void Entropy::estSignificantCoeffGroupMapBit(EstBitsSbac& estBitsSbac, bool bIsLuma) const
1806{
1807 int firstCtx = 0, numCtx = NUM_SIG_CG_FLAG_CTX;
1808
1809 for (int ctxIdx = firstCtx; ctxIdx < firstCtx + numCtx; ctxIdx++)
1810 for (uint32_t bin = 0; bin < 2; bin++)
1811 estBitsSbac.significantCoeffGroupBits[ctxIdx][bin] = sbacGetEntropyBits(m_contextState[OFF_SIG_CG_FLAG_CTX + ((bIsLuma ? 0 : NUM_SIG_CG_FLAG_CTX) + ctxIdx)], bin);
1812}
1813
1814/* estimate SAMBAC bit cost for significant coefficient map */
1815void Entropy::estSignificantMapBit(EstBitsSbac& estBitsSbac, uint32_t log2TrSize, bool bIsLuma) const
1816{
1817 int firstCtx = 1, numCtx = 8;
1818
1819 if (log2TrSize >= 4)
1820 {
1821 firstCtx = bIsLuma ? 21 : 12;
1822 numCtx = bIsLuma ? 6 : 3;
1823 }
1824 else if (log2TrSize == 3)
1825 {
1826 firstCtx = 9;
1827 numCtx = bIsLuma ? 12 : 3;
1828 }
1829
1830 if (bIsLuma)
1831 {
1832 for (uint32_t bin = 0; bin < 2; bin++)
1833 estBitsSbac.significantBits[0][bin] = sbacGetEntropyBits(m_contextState[OFF_SIG_FLAG_CTX], bin);
1834
1835 for (int ctxIdx = firstCtx; ctxIdx < firstCtx + numCtx; ctxIdx++)
1836 for (uint32_t bin = 0; bin < 2; bin++)
1837 estBitsSbac.significantBits[ctxIdx][bin] = sbacGetEntropyBits(m_contextState[OFF_SIG_FLAG_CTX + ctxIdx], bin);
1838 }
1839 else
1840 {
1841 for (uint32_t bin = 0; bin < 2; bin++)
1842 estBitsSbac.significantBits[0][bin] = sbacGetEntropyBits(m_contextState[OFF_SIG_FLAG_CTX + (NUM_SIG_FLAG_CTX_LUMA + 0)], bin);
1843
1844 for (int ctxIdx = firstCtx; ctxIdx < firstCtx + numCtx; ctxIdx++)
1845 for (uint32_t bin = 0; bin < 2; bin++)
1846 estBitsSbac.significantBits[ctxIdx][bin] = sbacGetEntropyBits(m_contextState[OFF_SIG_FLAG_CTX + (NUM_SIG_FLAG_CTX_LUMA + ctxIdx)], bin);
1847 }
1848 int bitsX = 0, bitsY = 0;
1849
1850 int blkSizeOffset = bIsLuma ? ((log2TrSize - 2) * 3 + ((log2TrSize - 1) >> 2)) : NUM_CTX_LAST_FLAG_XY_LUMA;
1851 int ctxShift = bIsLuma ? ((log2TrSize + 1) >> 2) : log2TrSize - 2;
1852 uint32_t maxGroupIdx = log2TrSize * 2 - 1;
1853
1854 uint32_t ctx;
1855 const uint8_t *ctxX = &m_contextState[OFF_CTX_LAST_FLAG_X];
1856 for (ctx = 0; ctx < maxGroupIdx; ctx++)
1857 {
1858 int ctxOffset = blkSizeOffset + (ctx >> ctxShift);
1859 estBitsSbac.lastXBits[ctx] = bitsX + sbacGetEntropyBits(ctxX[ctxOffset], 0);
1860 bitsX += sbacGetEntropyBits(ctxX[ctxOffset], 1);
1861 }
1862
1863 estBitsSbac.lastXBits[ctx] = bitsX;
1864
1865 const uint8_t *ctxY = &m_contextState[OFF_CTX_LAST_FLAG_Y];
1866 for (ctx = 0; ctx < maxGroupIdx; ctx++)
1867 {
1868 int ctxOffset = blkSizeOffset + (ctx >> ctxShift);
1869 estBitsSbac.lastYBits[ctx] = bitsY + sbacGetEntropyBits(ctxY[ctxOffset], 0);
1870 bitsY += sbacGetEntropyBits(ctxY[ctxOffset], 1);
1871 }
1872
1873 estBitsSbac.lastYBits[ctx] = bitsY;
1874}
1875
1876/* estimate bit cost of significant coefficient */
1877void Entropy::estSignificantCoefficientsBit(EstBitsSbac& estBitsSbac, bool bIsLuma) const
1878{
1879 if (bIsLuma)
1880 {
1881 const uint8_t *ctxOne = &m_contextState[OFF_ONE_FLAG_CTX];
1882 const uint8_t *ctxAbs = &m_contextState[OFF_ABS_FLAG_CTX];
1883
1884 for (int ctxIdx = 0; ctxIdx < NUM_ONE_FLAG_CTX_LUMA; ctxIdx++)
1885 {
1886 estBitsSbac.greaterOneBits[ctxIdx][0] = sbacGetEntropyBits(ctxOne[ctxIdx], 0);
1887 estBitsSbac.greaterOneBits[ctxIdx][1] = sbacGetEntropyBits(ctxOne[ctxIdx], 1);
1888 }
1889
1890 for (int ctxIdx = 0; ctxIdx < NUM_ABS_FLAG_CTX_LUMA; ctxIdx++)
1891 {
1892 estBitsSbac.levelAbsBits[ctxIdx][0] = sbacGetEntropyBits(ctxAbs[ctxIdx], 0);
1893 estBitsSbac.levelAbsBits[ctxIdx][1] = sbacGetEntropyBits(ctxAbs[ctxIdx], 1);
1894 }
1895 }
1896 else
1897 {
1898 const uint8_t *ctxOne = &m_contextState[OFF_ONE_FLAG_CTX + NUM_ONE_FLAG_CTX_LUMA];
1899 const uint8_t *ctxAbs = &m_contextState[OFF_ABS_FLAG_CTX + NUM_ABS_FLAG_CTX_LUMA];
1900
1901 for (int ctxIdx = 0; ctxIdx < NUM_ONE_FLAG_CTX_CHROMA; ctxIdx++)
1902 {
1903 estBitsSbac.greaterOneBits[ctxIdx][0] = sbacGetEntropyBits(ctxOne[ctxIdx], 0);
1904 estBitsSbac.greaterOneBits[ctxIdx][1] = sbacGetEntropyBits(ctxOne[ctxIdx], 1);
1905 }
1906
1907 for (int ctxIdx = 0; ctxIdx < NUM_ABS_FLAG_CTX_CHROMA; ctxIdx++)
1908 {
1909 estBitsSbac.levelAbsBits[ctxIdx][0] = sbacGetEntropyBits(ctxAbs[ctxIdx], 0);
1910 estBitsSbac.levelAbsBits[ctxIdx][1] = sbacGetEntropyBits(ctxAbs[ctxIdx], 1);
1911 }
1912 }
1913}
1914
1915/* Initialize our context information from the nominated source */
1916void Entropy::copyContextsFrom(const Entropy& src)
1917{
1918 X265_CHECK(src.m_valid, "invalid copy source context\n");
1919
1920 memcpy(m_contextState, src.m_contextState, MAX_OFF_CTX_MOD * sizeof(m_contextState[0]));
1921 markValid();
1922}
1923
1924void Entropy::start()
1925{
1926 m_low = 0;
1927 m_range = 510;
1928 m_bitsLeft = -12;
1929 m_numBufferedBytes = 0;
1930 m_bufferedByte = 0xff;
1931}
1932
1933void Entropy::finish()
1934{
1935 if (m_low >> (21 + m_bitsLeft))
1936 {
1937 m_bitIf->writeByte(m_bufferedByte + 1);
1938 while (m_numBufferedBytes > 1)
1939 {
1940 m_bitIf->writeByte(0x00);
1941 m_numBufferedBytes--;
1942 }
1943
1944 m_low -= 1 << (21 + m_bitsLeft);
1945 }
1946 else
1947 {
1948 if (m_numBufferedBytes > 0)
1949 m_bitIf->writeByte(m_bufferedByte);
1950
1951 while (m_numBufferedBytes > 1)
1952 {
1953 m_bitIf->writeByte(0xff);
1954 m_numBufferedBytes--;
1955 }
1956 }
1957 m_bitIf->write(m_low >> 8, 13 + m_bitsLeft);
1958}
1959
1960void Entropy::copyState(const Entropy& other)
1961{
1962 m_low = other.m_low;
1963 m_range = other.m_range;
1964 m_bitsLeft = other.m_bitsLeft;
1965 m_bufferedByte = other.m_bufferedByte;
1966 m_numBufferedBytes = other.m_numBufferedBytes;
1967 m_fracBits = other.m_fracBits;
1968}
1969
1970void Entropy::resetBits()
1971{
1972 m_low = 0;
1973 m_bitsLeft = -12;
1974 m_numBufferedBytes = 0;
1975 m_bufferedByte = 0xff;
1976 m_fracBits &= 32767;
1977 if (m_bitIf)
1978 m_bitIf->resetBits();
1979}
1980
1981/** Encode bin */
1982void Entropy::encodeBin(uint32_t binValue, uint8_t &ctxModel)
1983{
1984 uint32_t mstate = ctxModel;
1985
1986 ctxModel = sbacNext(mstate, binValue);
1987
1988 if (!m_bitIf)
1989 {
1990 m_fracBits += sbacGetEntropyBits(mstate, binValue);
1991 return;
1992 }
1993
1994 uint32_t range = m_range;
1995 uint32_t state = sbacGetState(mstate);
1996 uint32_t lps = g_lpsTable[state][((uint8_t)range >> 6)];
1997 range -= lps;
1998
1999 X265_CHECK(lps >= 2, "lps is too small\n");
2000
2001 int numBits = (uint32_t)(range - 256) >> 31;
2002 uint32_t low = m_low;
2003
2004 // NOTE: MPS must be LOWEST bit in mstate
2005 X265_CHECK((uint32_t)((binValue ^ mstate) & 1) == (uint32_t)(binValue != sbacGetMps(mstate)), "binValue failure\n");
2006 if ((binValue ^ mstate) & 1)
2007 {
2008 // NOTE: lps is non-zero and the maximum of idx is 8 because lps less than 256
2009 //numBits = g_renormTable[lps >> 3];
2010 unsigned long idx;
2011 CLZ32(idx, lps);
2012 X265_CHECK(state != 63 || idx == 1, "state failure\n");
2013
2014 numBits = 8 - idx;
2015 if (state >= 63)
2016 numBits = 6;
2017 X265_CHECK(numBits <= 6, "numBits failure\n");
2018
2019 low += range;
2020 range = lps;
2021 }
2022 m_low = (low << numBits);
2023 m_range = (range << numBits);
2024 m_bitsLeft += numBits;
2025
2026 if (m_bitsLeft >= 0)
2027 writeOut();
2028}
2029
2030/** Encode equiprobable bin */
2031void Entropy::encodeBinEP(uint32_t binValue)
2032{
2033 if (!m_bitIf)
2034 {
2035 m_fracBits += 32768;
2036 return;
2037 }
2038 m_low <<= 1;
2039 if (binValue)
2040 m_low += m_range;
2041 m_bitsLeft++;
2042
2043 if (m_bitsLeft >= 0)
2044 writeOut();
2045}
2046
2047/** Encode equiprobable bins */
2048void Entropy::encodeBinsEP(uint32_t binValues, int numBins)
2049{
2050 if (!m_bitIf)
2051 {
2052 m_fracBits += 32768 * numBins;
2053 return;
2054 }
2055
2056 while (numBins > 8)
2057 {
2058 numBins -= 8;
2059 uint32_t pattern = binValues >> numBins;
2060 m_low <<= 8;
2061 m_low += m_range * pattern;
2062 binValues -= pattern << numBins;
2063 m_bitsLeft += 8;
2064
2065 if (m_bitsLeft >= 0)
2066 writeOut();
2067 }
2068
2069 m_low <<= numBins;
2070 m_low += m_range * binValues;
2071 m_bitsLeft += numBins;
2072
2073 if (m_bitsLeft >= 0)
2074 writeOut();
2075}
2076
2077/** Encode terminating bin */
2078void Entropy::encodeBinTrm(uint32_t binValue)
2079{
2080 if (!m_bitIf)
2081 {
2082 m_fracBits += sbacGetEntropyBitsTrm(binValue);
2083 return;
2084 }
2085
2086 m_range -= 2;
2087 if (binValue)
2088 {
2089 m_low += m_range;
2090 m_low <<= 7;
2091 m_range = 2 << 7;
2092 m_bitsLeft += 7;
2093 }
2094 else if (m_range >= 256)
2095 return;
2096 else
2097 {
2098 m_low <<= 1;
2099 m_range <<= 1;
2100 m_bitsLeft++;
2101 }
2102
2103 if (m_bitsLeft >= 0)
2104 writeOut();
2105}
2106
2107/** Move bits from register into bitstream */
2108void Entropy::writeOut()
2109{
2110 uint32_t leadByte = m_low >> (13 + m_bitsLeft);
2111 uint32_t low_mask = (uint32_t)(~0) >> (11 + 8 - m_bitsLeft);
2112
2113 m_bitsLeft -= 8;
2114 m_low &= low_mask;
2115
2116 if (leadByte == 0xff)
2117 m_numBufferedBytes++;
2118 else
2119 {
2120 uint32_t numBufferedBytes = m_numBufferedBytes;
2121 if (numBufferedBytes > 0)
2122 {
2123 uint32_t carry = leadByte >> 8;
2124 uint32_t byteTowrite = m_bufferedByte + carry;
2125 m_bitIf->writeByte(byteTowrite);
2126
2127 byteTowrite = (0xff + carry) & 0xff;
2128 while (numBufferedBytes > 1)
2129 {
2130 m_bitIf->writeByte(byteTowrite);
2131 numBufferedBytes--;
2132 }
2133 }
2134 m_numBufferedBytes = 1;
2135 m_bufferedByte = (uint8_t)leadByte;
2136 }
2137}
2138
2139const uint32_t g_entropyBits[128] =
2140{
2141 // Corrected table, most notably for last state
2142 0x07b23, 0x085f9, 0x074a0, 0x08cbc, 0x06ee4, 0x09354, 0x067f4, 0x09c1b, 0x060b0, 0x0a62a, 0x05a9c, 0x0af5b, 0x0548d, 0x0b955, 0x04f56, 0x0c2a9,
2143 0x04a87, 0x0cbf7, 0x045d6, 0x0d5c3, 0x04144, 0x0e01b, 0x03d88, 0x0e937, 0x039e0, 0x0f2cd, 0x03663, 0x0fc9e, 0x03347, 0x10600, 0x03050, 0x10f95,
2144 0x02d4d, 0x11a02, 0x02ad3, 0x12333, 0x0286e, 0x12cad, 0x02604, 0x136df, 0x02425, 0x13f48, 0x021f4, 0x149c4, 0x0203e, 0x1527b, 0x01e4d, 0x15d00,
2145 0x01c99, 0x166de, 0x01b18, 0x17017, 0x019a5, 0x17988, 0x01841, 0x18327, 0x016df, 0x18d50, 0x015d9, 0x19547, 0x0147c, 0x1a083, 0x0138e, 0x1a8a3,
2146 0x01251, 0x1b418, 0x01166, 0x1bd27, 0x01068, 0x1c77b, 0x00f7f, 0x1d18e, 0x00eda, 0x1d91a, 0x00e19, 0x1e254, 0x00d4f, 0x1ec9a, 0x00c90, 0x1f6e0,
2147 0x00c01, 0x1fef8, 0x00b5f, 0x208b1, 0x00ab6, 0x21362, 0x00a15, 0x21e46, 0x00988, 0x2285d, 0x00934, 0x22ea8, 0x008a8, 0x239b2, 0x0081d, 0x24577,
2148 0x007c9, 0x24ce6, 0x00763, 0x25663, 0x00710, 0x25e8f, 0x006a0, 0x26a26, 0x00672, 0x26f23, 0x005e8, 0x27ef8, 0x005ba, 0x284b5, 0x0055e, 0x29057,
2149 0x0050c, 0x29bab, 0x004c1, 0x2a674, 0x004a7, 0x2aa5e, 0x0046f, 0x2b32f, 0x0041f, 0x2c0ad, 0x003e7, 0x2ca8d, 0x003ba, 0x2d323, 0x0010c, 0x3bfbb
2150};
2151
2152const uint8_t g_nextState[128][2] =
2153{
2154 { 2, 1 }, { 0, 3 }, { 4, 0 }, { 1, 5 }, { 6, 2 }, { 3, 7 }, { 8, 4 }, { 5, 9 },
2155 { 10, 4 }, { 5, 11 }, { 12, 8 }, { 9, 13 }, { 14, 8 }, { 9, 15 }, { 16, 10 }, { 11, 17 },
2156 { 18, 12 }, { 13, 19 }, { 20, 14 }, { 15, 21 }, { 22, 16 }, { 17, 23 }, { 24, 18 }, { 19, 25 },
2157 { 26, 18 }, { 19, 27 }, { 28, 22 }, { 23, 29 }, { 30, 22 }, { 23, 31 }, { 32, 24 }, { 25, 33 },
2158 { 34, 26 }, { 27, 35 }, { 36, 26 }, { 27, 37 }, { 38, 30 }, { 31, 39 }, { 40, 30 }, { 31, 41 },
2159 { 42, 32 }, { 33, 43 }, { 44, 32 }, { 33, 45 }, { 46, 36 }, { 37, 47 }, { 48, 36 }, { 37, 49 },
2160 { 50, 38 }, { 39, 51 }, { 52, 38 }, { 39, 53 }, { 54, 42 }, { 43, 55 }, { 56, 42 }, { 43, 57 },
2161 { 58, 44 }, { 45, 59 }, { 60, 44 }, { 45, 61 }, { 62, 46 }, { 47, 63 }, { 64, 48 }, { 49, 65 },
2162 { 66, 48 }, { 49, 67 }, { 68, 50 }, { 51, 69 }, { 70, 52 }, { 53, 71 }, { 72, 52 }, { 53, 73 },
2163 { 74, 54 }, { 55, 75 }, { 76, 54 }, { 55, 77 }, { 78, 56 }, { 57, 79 }, { 80, 58 }, { 59, 81 },
2164 { 82, 58 }, { 59, 83 }, { 84, 60 }, { 61, 85 }, { 86, 60 }, { 61, 87 }, { 88, 60 }, { 61, 89 },
2165 { 90, 62 }, { 63, 91 }, { 92, 64 }, { 65, 93 }, { 94, 64 }, { 65, 95 }, { 96, 66 }, { 67, 97 },
2166 { 98, 66 }, { 67, 99 }, { 100, 66 }, { 67, 101 }, { 102, 68 }, { 69, 103 }, { 104, 68 }, { 69, 105 },
2167 { 106, 70 }, { 71, 107 }, { 108, 70 }, { 71, 109 }, { 110, 70 }, { 71, 111 }, { 112, 72 }, { 73, 113 },
2168 { 114, 72 }, { 73, 115 }, { 116, 72 }, { 73, 117 }, { 118, 74 }, { 75, 119 }, { 120, 74 }, { 75, 121 },
2169 { 122, 74 }, { 75, 123 }, { 124, 76 }, { 77, 125 }, { 124, 76 }, { 77, 125 }, { 126, 126 }, { 127, 127 }
2170};
2171
2172}