f7eb5662e5d0c8ad41a40ca892fe999d344a9946
[deb_x265.git] / source / encoder / entropy.cpp
1 /*****************************************************************************
2 * Copyright (C) 2013 x265 project
3 *
4 * Authors: Steve Borho <steve@borho.org>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
19 *
20 * This program is also available under a commercial proprietary license.
21 * For more information, contact us at license @ x265.com.
22 *****************************************************************************/
23
24 #include "common.h"
25 #include "framedata.h"
26 #include "scalinglist.h"
27 #include "quant.h"
28 #include "contexts.h"
29 #include "picyuv.h"
30
31 #include "sao.h"
32 #include "entropy.h"
33
34 #define CU_DQP_TU_CMAX 5 // max number bins for truncated unary
35 #define CU_DQP_EG_k 0 // exp-golomb order
36 #define START_VALUE 8 // start value for dpcm mode
37
38 static const uint32_t g_puOffset[8] = { 0, 8, 4, 4, 2, 10, 1, 5 };
39
40 namespace x265 {
41
42 Entropy::Entropy()
43 {
44 markValid();
45 m_fracBits = 0;
46 X265_CHECK(sizeof(m_contextState) >= sizeof(m_contextState[0]) * MAX_OFF_CTX_MOD, "context state table is too small\n");
47 }
48
49 void Entropy::codeVPS(const VPS& vps)
50 {
51 WRITE_CODE(0, 4, "vps_video_parameter_set_id");
52 WRITE_CODE(3, 2, "vps_reserved_three_2bits");
53 WRITE_CODE(0, 6, "vps_reserved_zero_6bits");
54 WRITE_CODE(0, 3, "vps_max_sub_layers_minus1");
55 WRITE_FLAG(1, "vps_temporal_id_nesting_flag");
56 WRITE_CODE(0xffff, 16, "vps_reserved_ffff_16bits");
57
58 codeProfileTier(vps.ptl);
59
60 WRITE_FLAG(true, "vps_sub_layer_ordering_info_present_flag");
61 WRITE_UVLC(vps.maxDecPicBuffering - 1, "vps_max_dec_pic_buffering_minus1[i]");
62 WRITE_UVLC(vps.numReorderPics, "vps_num_reorder_pics[i]");
63
64 WRITE_UVLC(0, "vps_max_latency_increase_plus1[i]");
65 WRITE_CODE(0, 6, "vps_max_nuh_reserved_zero_layer_id");
66 WRITE_UVLC(0, "vps_max_op_sets_minus1");
67 WRITE_FLAG(0, "vps_timing_info_present_flag"); /* we signal timing info in SPS-VUI */
68 WRITE_FLAG(0, "vps_extension_flag");
69 }
70
71 void Entropy::codeSPS(const SPS& sps, const ScalingList& scalingList, const ProfileTierLevel& ptl)
72 {
73 WRITE_CODE(0, 4, "sps_video_parameter_set_id");
74 WRITE_CODE(0, 3, "sps_max_sub_layers_minus1");
75 WRITE_FLAG(1, "sps_temporal_id_nesting_flag");
76
77 codeProfileTier(ptl);
78
79 WRITE_UVLC(0, "sps_seq_parameter_set_id");
80 WRITE_UVLC(sps.chromaFormatIdc, "chroma_format_idc");
81
82 if (sps.chromaFormatIdc == X265_CSP_I444)
83 WRITE_FLAG(0, "separate_colour_plane_flag");
84
85 WRITE_UVLC(sps.picWidthInLumaSamples, "pic_width_in_luma_samples");
86 WRITE_UVLC(sps.picHeightInLumaSamples, "pic_height_in_luma_samples");
87
88 const Window& conf = sps.conformanceWindow;
89 WRITE_FLAG(conf.bEnabled, "conformance_window_flag");
90 if (conf.bEnabled)
91 {
92 int hShift = CHROMA_H_SHIFT(sps.chromaFormatIdc), vShift = CHROMA_V_SHIFT(sps.chromaFormatIdc);
93 WRITE_UVLC(conf.leftOffset >> hShift, "conf_win_left_offset");
94 WRITE_UVLC(conf.rightOffset >> hShift, "conf_win_right_offset");
95 WRITE_UVLC(conf.topOffset >> vShift, "conf_win_top_offset");
96 WRITE_UVLC(conf.bottomOffset >> vShift, "conf_win_bottom_offset");
97 }
98
99 WRITE_UVLC(X265_DEPTH - 8, "bit_depth_luma_minus8");
100 WRITE_UVLC(X265_DEPTH - 8, "bit_depth_chroma_minus8");
101 WRITE_UVLC(BITS_FOR_POC - 4, "log2_max_pic_order_cnt_lsb_minus4");
102 WRITE_FLAG(true, "sps_sub_layer_ordering_info_present_flag");
103
104 WRITE_UVLC(sps.maxDecPicBuffering - 1, "sps_max_dec_pic_buffering_minus1[i]");
105 WRITE_UVLC(sps.numReorderPics, "sps_num_reorder_pics[i]");
106 WRITE_UVLC(0, "sps_max_latency_increase_plus1[i]");
107
108 WRITE_UVLC(sps.log2MinCodingBlockSize - 3, "log2_min_coding_block_size_minus3");
109 WRITE_UVLC(sps.log2DiffMaxMinCodingBlockSize, "log2_diff_max_min_coding_block_size");
110 WRITE_UVLC(sps.quadtreeTULog2MinSize - 2, "log2_min_transform_block_size_minus2");
111 WRITE_UVLC(sps.quadtreeTULog2MaxSize - sps.quadtreeTULog2MinSize, "log2_diff_max_min_transform_block_size");
112 WRITE_UVLC(sps.quadtreeTUMaxDepthInter - 1, "max_transform_hierarchy_depth_inter");
113 WRITE_UVLC(sps.quadtreeTUMaxDepthIntra - 1, "max_transform_hierarchy_depth_intra");
114 WRITE_FLAG(scalingList.m_bEnabled, "scaling_list_enabled_flag");
115 if (scalingList.m_bEnabled)
116 {
117 WRITE_FLAG(scalingList.m_bDataPresent, "sps_scaling_list_data_present_flag");
118 if (scalingList.m_bDataPresent)
119 codeScalingList(scalingList);
120 }
121 WRITE_FLAG(sps.bUseAMP, "amp_enabled_flag");
122 WRITE_FLAG(sps.bUseSAO, "sample_adaptive_offset_enabled_flag");
123
124 WRITE_FLAG(0, "pcm_enabled_flag");
125 WRITE_UVLC(0, "num_short_term_ref_pic_sets");
126 WRITE_FLAG(0, "long_term_ref_pics_present_flag");
127
128 WRITE_FLAG(sps.bTemporalMVPEnabled, "sps_temporal_mvp_enable_flag");
129 WRITE_FLAG(sps.bUseStrongIntraSmoothing, "sps_strong_intra_smoothing_enable_flag");
130
131 WRITE_FLAG(1, "vui_parameters_present_flag");
132 codeVUI(sps.vuiParameters);
133
134 WRITE_FLAG(0, "sps_extension_flag");
135 }
136
137 void Entropy::codePPS(const PPS& pps)
138 {
139 WRITE_UVLC(0, "pps_pic_parameter_set_id");
140 WRITE_UVLC(0, "pps_seq_parameter_set_id");
141 WRITE_FLAG(0, "dependent_slice_segments_enabled_flag");
142 WRITE_FLAG(0, "output_flag_present_flag");
143 WRITE_CODE(0, 3, "num_extra_slice_header_bits");
144 WRITE_FLAG(pps.bSignHideEnabled, "sign_data_hiding_flag");
145 WRITE_FLAG(0, "cabac_init_present_flag");
146 WRITE_UVLC(0, "num_ref_idx_l0_default_active_minus1");
147 WRITE_UVLC(0, "num_ref_idx_l1_default_active_minus1");
148
149 WRITE_SVLC(0, "init_qp_minus26");
150 WRITE_FLAG(pps.bConstrainedIntraPred, "constrained_intra_pred_flag");
151 WRITE_FLAG(pps.bTransformSkipEnabled, "transform_skip_enabled_flag");
152
153 WRITE_FLAG(pps.bUseDQP, "cu_qp_delta_enabled_flag");
154 if (pps.bUseDQP)
155 WRITE_UVLC(pps.maxCuDQPDepth, "diff_cu_qp_delta_depth");
156
157 WRITE_SVLC(pps.chromaQpOffset[0], "pps_cb_qp_offset");
158 WRITE_SVLC(pps.chromaQpOffset[1], "pps_cr_qp_offset");
159 WRITE_FLAG(0, "pps_slice_chroma_qp_offsets_present_flag");
160
161 WRITE_FLAG(pps.bUseWeightPred, "weighted_pred_flag");
162 WRITE_FLAG(pps.bUseWeightedBiPred, "weighted_bipred_flag");
163 WRITE_FLAG(pps.bTransquantBypassEnabled, "transquant_bypass_enable_flag");
164 WRITE_FLAG(0, "tiles_enabled_flag");
165 WRITE_FLAG(pps.bEntropyCodingSyncEnabled, "entropy_coding_sync_enabled_flag");
166 WRITE_FLAG(1, "loop_filter_across_slices_enabled_flag");
167
168 WRITE_FLAG(pps.bDeblockingFilterControlPresent, "deblocking_filter_control_present_flag");
169 if (pps.bDeblockingFilterControlPresent)
170 {
171 WRITE_FLAG(0, "deblocking_filter_override_enabled_flag");
172 WRITE_FLAG(pps.bPicDisableDeblockingFilter, "pps_disable_deblocking_filter_flag");
173 if (!pps.bPicDisableDeblockingFilter)
174 {
175 WRITE_SVLC(pps.deblockingFilterBetaOffsetDiv2, "pps_beta_offset_div2");
176 WRITE_SVLC(pps.deblockingFilterTcOffsetDiv2, "pps_tc_offset_div2");
177 }
178 }
179
180 WRITE_FLAG(0, "pps_scaling_list_data_present_flag");
181 WRITE_FLAG(0, "lists_modification_present_flag");
182 WRITE_UVLC(0, "log2_parallel_merge_level_minus2");
183 WRITE_FLAG(0, "slice_segment_header_extension_present_flag");
184 WRITE_FLAG(0, "pps_extension_flag");
185 }
186
187 void Entropy::codeProfileTier(const ProfileTierLevel& ptl)
188 {
189 WRITE_CODE(0, 2, "XXX_profile_space[]");
190 WRITE_FLAG(ptl.tierFlag, "XXX_tier_flag[]");
191 WRITE_CODE(ptl.profileIdc, 5, "XXX_profile_idc[]");
192 for (int j = 0; j < 32; j++)
193 WRITE_FLAG(ptl.profileCompatibilityFlag[j], "XXX_profile_compatibility_flag[][j]");
194
195 WRITE_FLAG(ptl.progressiveSourceFlag, "general_progressive_source_flag");
196 WRITE_FLAG(ptl.interlacedSourceFlag, "general_interlaced_source_flag");
197 WRITE_FLAG(ptl.nonPackedConstraintFlag, "general_non_packed_constraint_flag");
198 WRITE_FLAG(ptl.frameOnlyConstraintFlag, "general_frame_only_constraint_flag");
199
200 if (ptl.profileIdc == Profile::MAINREXT || ptl.profileIdc == Profile::HIGHTHROUGHPUTREXT)
201 {
202 uint32_t bitDepthConstraint = ptl.bitDepthConstraint;
203 int csp = ptl.chromaFormatConstraint;
204 WRITE_FLAG(bitDepthConstraint<=12, "general_max_12bit_constraint_flag");
205 WRITE_FLAG(bitDepthConstraint<=10, "general_max_10bit_constraint_flag");
206 WRITE_FLAG(bitDepthConstraint<= 8 && csp != X265_CSP_I422 , "general_max_8bit_constraint_flag");
207 WRITE_FLAG(csp == X265_CSP_I422 || csp == X265_CSP_I420 || csp == X265_CSP_I400, "general_max_422chroma_constraint_flag");
208 WRITE_FLAG(csp == X265_CSP_I420 || csp == X265_CSP_I400, "general_max_420chroma_constraint_flag");
209 WRITE_FLAG(csp == X265_CSP_I400, "general_max_monochrome_constraint_flag");
210 WRITE_FLAG(ptl.intraConstraintFlag, "general_intra_constraint_flag");
211 WRITE_FLAG(0, "general_one_picture_only_constraint_flag");
212 WRITE_FLAG(ptl.lowerBitRateConstraintFlag, "general_lower_bit_rate_constraint_flag");
213 WRITE_CODE(0 , 16, "XXX_reserved_zero_35bits[0..15]");
214 WRITE_CODE(0 , 16, "XXX_reserved_zero_35bits[16..31]");
215 WRITE_CODE(0 , 3, "XXX_reserved_zero_35bits[32..34]");
216 }
217 else
218 {
219 WRITE_CODE(0, 16, "XXX_reserved_zero_44bits[0..15]");
220 WRITE_CODE(0, 16, "XXX_reserved_zero_44bits[16..31]");
221 WRITE_CODE(0, 12, "XXX_reserved_zero_44bits[32..43]");
222 }
223
224 WRITE_CODE(ptl.levelIdc, 8, "general_level_idc");
225 }
226
227 void Entropy::codeVUI(const VUI& vui)
228 {
229 WRITE_FLAG(vui.aspectRatioInfoPresentFlag, "aspect_ratio_info_present_flag");
230 if (vui.aspectRatioInfoPresentFlag)
231 {
232 WRITE_CODE(vui.aspectRatioIdc, 8, "aspect_ratio_idc");
233 if (vui.aspectRatioIdc == 255)
234 {
235 WRITE_CODE(vui.sarWidth, 16, "sar_width");
236 WRITE_CODE(vui.sarHeight, 16, "sar_height");
237 }
238 }
239
240 WRITE_FLAG(vui.overscanInfoPresentFlag, "overscan_info_present_flag");
241 if (vui.overscanInfoPresentFlag)
242 WRITE_FLAG(vui.overscanAppropriateFlag, "overscan_appropriate_flag");
243
244 WRITE_FLAG(vui.videoSignalTypePresentFlag, "video_signal_type_present_flag");
245 if (vui.videoSignalTypePresentFlag)
246 {
247 WRITE_CODE(vui.videoFormat, 3, "video_format");
248 WRITE_FLAG(vui.videoFullRangeFlag, "video_full_range_flag");
249 WRITE_FLAG(vui.colourDescriptionPresentFlag, "colour_description_present_flag");
250 if (vui.colourDescriptionPresentFlag)
251 {
252 WRITE_CODE(vui.colourPrimaries, 8, "colour_primaries");
253 WRITE_CODE(vui.transferCharacteristics, 8, "transfer_characteristics");
254 WRITE_CODE(vui.matrixCoefficients, 8, "matrix_coefficients");
255 }
256 }
257
258 WRITE_FLAG(vui.chromaLocInfoPresentFlag, "chroma_loc_info_present_flag");
259 if (vui.chromaLocInfoPresentFlag)
260 {
261 WRITE_UVLC(vui.chromaSampleLocTypeTopField, "chroma_sample_loc_type_top_field");
262 WRITE_UVLC(vui.chromaSampleLocTypeBottomField, "chroma_sample_loc_type_bottom_field");
263 }
264
265 WRITE_FLAG(0, "neutral_chroma_indication_flag");
266 WRITE_FLAG(vui.fieldSeqFlag, "field_seq_flag");
267 WRITE_FLAG(vui.frameFieldInfoPresentFlag, "frame_field_info_present_flag");
268
269 WRITE_FLAG(vui.defaultDisplayWindow.bEnabled, "default_display_window_flag");
270 if (vui.defaultDisplayWindow.bEnabled)
271 {
272 WRITE_UVLC(vui.defaultDisplayWindow.leftOffset, "def_disp_win_left_offset");
273 WRITE_UVLC(vui.defaultDisplayWindow.rightOffset, "def_disp_win_right_offset");
274 WRITE_UVLC(vui.defaultDisplayWindow.topOffset, "def_disp_win_top_offset");
275 WRITE_UVLC(vui.defaultDisplayWindow.bottomOffset, "def_disp_win_bottom_offset");
276 }
277
278 WRITE_FLAG(1, "vui_timing_info_present_flag");
279 WRITE_CODE(vui.timingInfo.numUnitsInTick, 32, "vui_num_units_in_tick");
280 WRITE_CODE(vui.timingInfo.timeScale, 32, "vui_time_scale");
281 WRITE_FLAG(0, "vui_poc_proportional_to_timing_flag");
282
283 WRITE_FLAG(vui.hrdParametersPresentFlag, "vui_hrd_parameters_present_flag");
284 if (vui.hrdParametersPresentFlag)
285 codeHrdParameters(vui.hrdParameters);
286
287 WRITE_FLAG(0, "bitstream_restriction_flag");
288 }
289
290 void Entropy::codeScalingList(const ScalingList& scalingList)
291 {
292 for (int sizeId = 0; sizeId < ScalingList::NUM_SIZES; sizeId++)
293 {
294 for (int listId = 0; listId < ScalingList::NUM_LISTS; listId++)
295 {
296 int predList = scalingList.checkPredMode(sizeId, listId);
297 WRITE_FLAG(predList < 0, "scaling_list_pred_mode_flag");
298 if (predList >= 0)
299 WRITE_UVLC(listId - predList, "scaling_list_pred_matrix_id_delta");
300 else // DPCM Mode
301 codeScalingList(scalingList, sizeId, listId);
302 }
303 }
304 }
305
306 void Entropy::codeScalingList(const ScalingList& scalingList, uint32_t sizeId, uint32_t listId)
307 {
308 int coefNum = X265_MIN(ScalingList::MAX_MATRIX_COEF_NUM, (int)ScalingList::s_numCoefPerSize[sizeId]);
309 const uint16_t* scan = (sizeId == 0 ? g_scan4x4[SCAN_DIAG] : g_scan8x8diag);
310 int nextCoef = START_VALUE;
311 int32_t *src = scalingList.m_scalingListCoef[sizeId][listId];
312 int data;
313
314 if (sizeId > BLOCK_8x8)
315 {
316 WRITE_SVLC(scalingList.m_scalingListDC[sizeId][listId] - 8, "scaling_list_dc_coef_minus8");
317 nextCoef = scalingList.m_scalingListDC[sizeId][listId];
318 }
319 for (int i = 0; i < coefNum; i++)
320 {
321 data = src[scan[i]] - nextCoef;
322 nextCoef = src[scan[i]];
323 if (data > 127)
324 data = data - 256;
325 if (data < -128)
326 data = data + 256;
327
328 WRITE_SVLC(data, "scaling_list_delta_coef");
329 }
330 }
331
332 void Entropy::codeHrdParameters(const HRDInfo& hrd)
333 {
334 WRITE_FLAG(1, "nal_hrd_parameters_present_flag");
335 WRITE_FLAG(0, "vcl_hrd_parameters_present_flag");
336 WRITE_FLAG(0, "sub_pic_hrd_params_present_flag");
337
338 WRITE_CODE(hrd.bitRateScale, 4, "bit_rate_scale");
339 WRITE_CODE(hrd.cpbSizeScale, 4, "cpb_size_scale");
340
341 WRITE_CODE(hrd.initialCpbRemovalDelayLength - 1, 5, "initial_cpb_removal_delay_length_minus1");
342 WRITE_CODE(hrd.cpbRemovalDelayLength - 1, 5, "au_cpb_removal_delay_length_minus1");
343 WRITE_CODE(hrd.dpbOutputDelayLength - 1, 5, "dpb_output_delay_length_minus1");
344
345 WRITE_FLAG(1, "fixed_pic_rate_general_flag");
346 WRITE_UVLC(0, "elemental_duration_in_tc_minus1");
347 WRITE_UVLC(0, "cpb_cnt_minus1");
348
349 WRITE_UVLC(hrd.bitRateValue - 1, "bit_rate_value_minus1");
350 WRITE_UVLC(hrd.cpbSizeValue - 1, "cpb_size_value_minus1");
351 WRITE_FLAG(hrd.cbrFlag, "cbr_flag");
352 }
353
354 void Entropy::codeAUD(const Slice& slice)
355 {
356 int picType;
357
358 switch (slice.m_sliceType)
359 {
360 case I_SLICE:
361 picType = 0;
362 break;
363 case P_SLICE:
364 picType = 1;
365 break;
366 case B_SLICE:
367 picType = 2;
368 break;
369 default:
370 picType = 7;
371 break;
372 }
373
374 WRITE_CODE(picType, 3, "pic_type");
375 }
376
377 void Entropy::codeSliceHeader(const Slice& slice, FrameData& encData)
378 {
379 WRITE_FLAG(1, "first_slice_segment_in_pic_flag");
380 if (slice.getRapPicFlag())
381 WRITE_FLAG(0, "no_output_of_prior_pics_flag");
382
383 WRITE_UVLC(0, "slice_pic_parameter_set_id");
384
385 /* x265 does not use dependent slices, so always write all this data */
386
387 WRITE_UVLC(slice.m_sliceType, "slice_type");
388
389 if (!slice.getIdrPicFlag())
390 {
391 int picOrderCntLSB = (slice.m_poc - slice.m_lastIDR + (1 << BITS_FOR_POC)) % (1 << BITS_FOR_POC);
392 WRITE_CODE(picOrderCntLSB, BITS_FOR_POC, "pic_order_cnt_lsb");
393
394 #if _DEBUG || CHECKED_BUILD
395 // check for bitstream restriction stating that:
396 // If the current picture is a BLA or CRA picture, the value of NumPocTotalCurr shall be equal to 0.
397 // Ideally this process should not be repeated for each slice in a picture
398 if (slice.isIRAP())
399 for (int picIdx = 0; picIdx < slice.m_rps.numberOfPictures; picIdx++)
400 {
401 X265_CHECK(!slice.m_rps.bUsed[picIdx], "pic unused failure\n");
402 }
403 #endif
404
405 WRITE_FLAG(0, "short_term_ref_pic_set_sps_flag");
406 codeShortTermRefPicSet(slice.m_rps);
407
408 if (slice.m_sps->bTemporalMVPEnabled)
409 WRITE_FLAG(1, "slice_temporal_mvp_enable_flag");
410 }
411 const SAOParam *saoParam = encData.m_saoParam;
412 if (slice.m_sps->bUseSAO)
413 {
414 WRITE_FLAG(saoParam->bSaoFlag[0], "slice_sao_luma_flag");
415 WRITE_FLAG(saoParam->bSaoFlag[1], "slice_sao_chroma_flag");
416 }
417
418 // check if numRefIdx match the defaults (1, hard-coded in PPS). If not, override
419 // TODO: this might be a place to optimize a few bits per slice, by using param->refs for L0 default
420
421 if (!slice.isIntra())
422 {
423 bool overrideFlag = (slice.m_numRefIdx[0] != 1 || (slice.isInterB() && slice.m_numRefIdx[1] != 1));
424 WRITE_FLAG(overrideFlag, "num_ref_idx_active_override_flag");
425 if (overrideFlag)
426 {
427 WRITE_UVLC(slice.m_numRefIdx[0] - 1, "num_ref_idx_l0_active_minus1");
428 if (slice.isInterB())
429 WRITE_UVLC(slice.m_numRefIdx[1] - 1, "num_ref_idx_l1_active_minus1");
430 else
431 {
432 X265_CHECK(slice.m_numRefIdx[1] == 0, "expected no L1 references for P slice\n");
433 }
434 }
435 }
436 else
437 {
438 X265_CHECK(!slice.m_numRefIdx[0] && !slice.m_numRefIdx[1], "expected no references for I slice\n");
439 }
440
441 if (slice.isInterB())
442 WRITE_FLAG(0, "mvd_l1_zero_flag");
443
444 if (slice.m_sps->bTemporalMVPEnabled)
445 {
446 if (slice.m_sliceType == B_SLICE)
447 WRITE_FLAG(slice.m_colFromL0Flag, "collocated_from_l0_flag");
448
449 if (slice.m_sliceType != I_SLICE &&
450 ((slice.m_colFromL0Flag && slice.m_numRefIdx[0] > 1) ||
451 (!slice.m_colFromL0Flag && slice.m_numRefIdx[1] > 1)))
452 {
453 WRITE_UVLC(slice.m_colRefIdx, "collocated_ref_idx");
454 }
455 }
456 if ((slice.m_pps->bUseWeightPred && slice.m_sliceType == P_SLICE) || (slice.m_pps->bUseWeightedBiPred && slice.m_sliceType == B_SLICE))
457 codePredWeightTable(slice);
458
459 X265_CHECK(slice.m_maxNumMergeCand <= MRG_MAX_NUM_CANDS, "too many merge candidates\n");
460 if (!slice.isIntra())
461 WRITE_UVLC(MRG_MAX_NUM_CANDS - slice.m_maxNumMergeCand, "five_minus_max_num_merge_cand");
462
463 int code = slice.m_sliceQp - 26;
464 WRITE_SVLC(code, "slice_qp_delta");
465
466 bool isSAOEnabled = slice.m_sps->bUseSAO ? saoParam->bSaoFlag[0] || saoParam->bSaoFlag[1] : false;
467 bool isDBFEnabled = !slice.m_pps->bPicDisableDeblockingFilter;
468
469 if (isSAOEnabled || isDBFEnabled)
470 WRITE_FLAG(slice.m_sLFaseFlag, "slice_loop_filter_across_slices_enabled_flag");
471 }
472
473 /** write wavefront substreams sizes for the slice header */
474 void Entropy::codeSliceHeaderWPPEntryPoints(const Slice& slice, const uint32_t *substreamSizes, uint32_t maxOffset)
475 {
476 uint32_t offsetLen = 1;
477 while (maxOffset >= (1U << offsetLen))
478 {
479 offsetLen++;
480 X265_CHECK(offsetLen < 32, "offsetLen is too large\n");
481 }
482
483 uint32_t numRows = slice.m_sps->numCuInHeight - 1;
484 WRITE_UVLC(numRows, "num_entry_point_offsets");
485 if (numRows > 0)
486 WRITE_UVLC(offsetLen - 1, "offset_len_minus1");
487
488 for (uint32_t i = 0; i < numRows; i++)
489 WRITE_CODE(substreamSizes[i] - 1, offsetLen, "entry_point_offset_minus1");
490 }
491
492 void Entropy::codeShortTermRefPicSet(const RPS& rps)
493 {
494 WRITE_UVLC(rps.numberOfNegativePictures, "num_negative_pics");
495 WRITE_UVLC(rps.numberOfPositivePictures, "num_positive_pics");
496 int prev = 0;
497 for (int j = 0; j < rps.numberOfNegativePictures; j++)
498 {
499 WRITE_UVLC(prev - rps.deltaPOC[j] - 1, "delta_poc_s0_minus1");
500 prev = rps.deltaPOC[j];
501 WRITE_FLAG(rps.bUsed[j], "used_by_curr_pic_s0_flag");
502 }
503
504 prev = 0;
505 for (int j = rps.numberOfNegativePictures; j < rps.numberOfNegativePictures + rps.numberOfPositivePictures; j++)
506 {
507 WRITE_UVLC(rps.deltaPOC[j] - prev - 1, "delta_poc_s1_minus1");
508 prev = rps.deltaPOC[j];
509 WRITE_FLAG(rps.bUsed[j], "used_by_curr_pic_s1_flag");
510 }
511 }
512
513 void Entropy::encodeCTU(const CUData& ctu, const CUGeom& cuGeom)
514 {
515 bool bEncodeDQP = ctu.m_slice->m_pps->bUseDQP;
516 encodeCU(ctu, cuGeom, 0, 0, bEncodeDQP);
517 }
518
519 /* encode a CU block recursively */
520 void Entropy::encodeCU(const CUData& ctu, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t depth, bool& bEncodeDQP)
521 {
522 const Slice* slice = ctu.m_slice;
523
524 if (depth <= slice->m_pps->maxCuDQPDepth && slice->m_pps->bUseDQP)
525 bEncodeDQP = true;
526
527 int cuSplitFlag = !(cuGeom.flags & CUGeom::LEAF);
528 int cuUnsplitFlag = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY);
529
530 if (!cuUnsplitFlag)
531 {
532 uint32_t qNumParts = cuGeom.numPartitions >> 2;
533 for (uint32_t qIdx = 0; qIdx < 4; ++qIdx, absPartIdx += qNumParts)
534 {
535 const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset + qIdx);
536 if (childGeom.flags & CUGeom::PRESENT)
537 encodeCU(ctu, childGeom, absPartIdx, depth + 1, bEncodeDQP);
538 }
539 return;
540 }
541
542 // We need to split, so don't try these modes.
543 if (cuSplitFlag)
544 codeSplitFlag(ctu, absPartIdx, depth);
545
546 if (depth < ctu.m_cuDepth[absPartIdx] && depth < g_maxCUDepth)
547 {
548 uint32_t qNumParts = cuGeom.numPartitions >> 2;
549 for (uint32_t qIdx = 0; qIdx < 4; ++qIdx, absPartIdx += qNumParts)
550 {
551 const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset + qIdx);
552 encodeCU(ctu, childGeom, absPartIdx, depth + 1, bEncodeDQP);
553 }
554 return;
555 }
556
557 if (slice->m_pps->bTransquantBypassEnabled)
558 codeCUTransquantBypassFlag(ctu.m_tqBypass[absPartIdx]);
559
560 if (!slice->isIntra())
561 {
562 codeSkipFlag(ctu, absPartIdx);
563 if (ctu.isSkipped(absPartIdx))
564 {
565 codeMergeIndex(ctu, absPartIdx);
566 finishCU(ctu, absPartIdx, depth);
567 return;
568 }
569 codePredMode(ctu.m_predMode[absPartIdx]);
570 }
571
572 codePartSize(ctu, absPartIdx, depth);
573
574 // prediction Info ( Intra : direction mode, Inter : Mv, reference idx )
575 codePredInfo(ctu, absPartIdx);
576
577 uint32_t tuDepthRange[2];
578 if (ctu.isIntra(absPartIdx))
579 ctu.getIntraTUQtDepthRange(tuDepthRange, absPartIdx);
580 else
581 ctu.getInterTUQtDepthRange(tuDepthRange, absPartIdx);
582
583 // Encode Coefficients, allow codeCoeff() to modify bEncodeDQP
584 codeCoeff(ctu, absPartIdx, bEncodeDQP, tuDepthRange);
585
586 // --- write terminating bit ---
587 finishCU(ctu, absPartIdx, depth);
588 }
589
590 /* Return bit count of signaling inter mode */
591 uint32_t Entropy::bitsInterMode(const CUData& cu, uint32_t absPartIdx, uint32_t depth) const
592 {
593 uint32_t bits;
594 bits = bitsCodeBin(0, m_contextState[OFF_SKIP_FLAG_CTX + cu.getCtxSkipFlag(absPartIdx)]); /* not skip */
595 bits += bitsCodeBin(0, m_contextState[OFF_PRED_MODE_CTX]); /* inter */
596 PartSize partSize = (PartSize)cu.m_partSize[absPartIdx];
597 switch (partSize)
598 {
599 case SIZE_2Nx2N:
600 bits += bitsCodeBin(1, m_contextState[OFF_PART_SIZE_CTX]);
601 break;
602
603 case SIZE_2NxN:
604 case SIZE_2NxnU:
605 case SIZE_2NxnD:
606 bits += bitsCodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 0]);
607 bits += bitsCodeBin(1, m_contextState[OFF_PART_SIZE_CTX + 1]);
608 if (cu.m_slice->m_sps->maxAMPDepth > depth)
609 {
610 bits += bitsCodeBin((partSize == SIZE_2NxN) ? 1 : 0, m_contextState[OFF_PART_SIZE_CTX + 3]);
611 if (partSize != SIZE_2NxN)
612 bits++; // encodeBinEP((partSize == SIZE_2NxnU ? 0 : 1));
613 }
614 break;
615
616 case SIZE_Nx2N:
617 case SIZE_nLx2N:
618 case SIZE_nRx2N:
619 bits += bitsCodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 0]);
620 bits += bitsCodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 1]);
621 if (depth == g_maxCUDepth && !(cu.m_log2CUSize[absPartIdx] == 3))
622 bits += bitsCodeBin(1, m_contextState[OFF_PART_SIZE_CTX + 2]);
623 if (cu.m_slice->m_sps->maxAMPDepth > depth)
624 {
625 bits += bitsCodeBin((partSize == SIZE_Nx2N) ? 1 : 0, m_contextState[OFF_PART_SIZE_CTX + 3]);
626 if (partSize != SIZE_Nx2N)
627 bits++; // encodeBinEP((partSize == SIZE_nLx2N ? 0 : 1));
628 }
629 break;
630 default:
631 X265_CHECK(0, "invalid CU partition\n");
632 break;
633 }
634
635 return bits;
636 }
637
638 /* finish encoding a cu and handle end-of-slice conditions */
639 void Entropy::finishCU(const CUData& ctu, uint32_t absPartIdx, uint32_t depth)
640 {
641 const Slice* slice = ctu.m_slice;
642 uint32_t realEndAddress = slice->m_endCUAddr;
643 uint32_t cuAddr = ctu.getSCUAddr() + absPartIdx;
644 X265_CHECK(realEndAddress == slice->realEndAddress(slice->m_endCUAddr), "real end address expected\n");
645
646 uint32_t granularityMask = g_maxCUSize - 1;
647 uint32_t cuSize = 1 << ctu.m_log2CUSize[absPartIdx];
648 uint32_t rpelx = ctu.m_cuPelX + g_zscanToPelX[absPartIdx] + cuSize;
649 uint32_t bpely = ctu.m_cuPelY + g_zscanToPelY[absPartIdx] + cuSize;
650 bool granularityBoundary = (((rpelx & granularityMask) == 0 || (rpelx == slice->m_sps->picWidthInLumaSamples )) &&
651 ((bpely & granularityMask) == 0 || (bpely == slice->m_sps->picHeightInLumaSamples)));
652
653 if (granularityBoundary)
654 {
655 // Encode slice finish
656 bool bTerminateSlice = false;
657 if (cuAddr + (NUM_CU_PARTITIONS >> (depth << 1)) == realEndAddress)
658 bTerminateSlice = true;
659
660 // The 1-terminating bit is added to all streams, so don't add it here when it's 1.
661 if (!bTerminateSlice)
662 encodeBinTrm(0);
663
664 if (!m_bitIf)
665 resetBits(); // TODO: most likely unnecessary
666 }
667 }
668
669 void Entropy::encodeTransform(const CUData& cu, uint32_t absPartIdx, uint32_t tuDepth, uint32_t log2TrSize,
670 bool& bCodeDQP, const uint32_t depthRange[2])
671 {
672 const bool subdiv = cu.m_tuDepth[absPartIdx] > tuDepth;
673
674 /* in each of these conditions, the subdiv flag is implied and not signaled,
675 * so we have checks to make sure the implied value matches our intentions */
676 if (cu.isIntra(absPartIdx) && cu.m_partSize[absPartIdx] != SIZE_2Nx2N && !tuDepth)
677 {
678 X265_CHECK(subdiv, "intra NxN requires TU depth below CU depth\n");
679 }
680 else if (cu.isInter(absPartIdx) && cu.m_partSize[absPartIdx] != SIZE_2Nx2N && !tuDepth &&
681 cu.m_slice->m_sps->quadtreeTUMaxDepthInter == 1)
682 {
683 X265_CHECK(subdiv, "inter TU must be smaller than CU when not 2Nx2N part size: log2TrSize %d, depthRange[0] %d\n", log2TrSize, depthRange[0]);
684 }
685 else if (log2TrSize > depthRange[1])
686 {
687 X265_CHECK(subdiv, "TU is larger than the max allowed, it should have been split\n");
688 }
689 else if (log2TrSize == cu.m_slice->m_sps->quadtreeTULog2MinSize || log2TrSize == depthRange[0])
690 {
691 X265_CHECK(!subdiv, "min sized TU cannot be subdivided\n");
692 }
693 else
694 {
695 X265_CHECK(log2TrSize > depthRange[0], "transform size failure\n");
696 codeTransformSubdivFlag(subdiv, 5 - log2TrSize);
697 }
698
699 uint32_t hChromaShift = cu.m_hChromaShift;
700 uint32_t vChromaShift = cu.m_vChromaShift;
701 bool bSmallChroma = (log2TrSize - hChromaShift < 2);
702 if (!tuDepth || !bSmallChroma)
703 {
704 if (!tuDepth || cu.getCbf(absPartIdx, TEXT_CHROMA_U, tuDepth - 1))
705 codeQtCbfChroma(cu, absPartIdx, TEXT_CHROMA_U, tuDepth, !subdiv);
706 if (!tuDepth || cu.getCbf(absPartIdx, TEXT_CHROMA_V, tuDepth - 1))
707 codeQtCbfChroma(cu, absPartIdx, TEXT_CHROMA_V, tuDepth, !subdiv);
708 }
709 else
710 {
711 X265_CHECK(cu.getCbf(absPartIdx, TEXT_CHROMA_U, tuDepth) == cu.getCbf(absPartIdx, TEXT_CHROMA_U, tuDepth - 1), "chroma xform size match failure\n");
712 X265_CHECK(cu.getCbf(absPartIdx, TEXT_CHROMA_V, tuDepth) == cu.getCbf(absPartIdx, TEXT_CHROMA_V, tuDepth - 1), "chroma xform size match failure\n");
713 }
714
715 if (subdiv)
716 {
717 --log2TrSize;
718 ++tuDepth;
719
720 uint32_t qNumParts = 1 << (log2TrSize - LOG2_UNIT_SIZE) * 2;
721
722 encodeTransform(cu, absPartIdx + 0 * qNumParts, tuDepth, log2TrSize, bCodeDQP, depthRange);
723 encodeTransform(cu, absPartIdx + 1 * qNumParts, tuDepth, log2TrSize, bCodeDQP, depthRange);
724 encodeTransform(cu, absPartIdx + 2 * qNumParts, tuDepth, log2TrSize, bCodeDQP, depthRange);
725 encodeTransform(cu, absPartIdx + 3 * qNumParts, tuDepth, log2TrSize, bCodeDQP, depthRange);
726 return;
727 }
728
729 uint32_t absPartIdxC = bSmallChroma ? absPartIdx & 0xFC : absPartIdx;
730
731 if (cu.isInter(absPartIdxC) && !tuDepth && !cu.getCbf(absPartIdxC, TEXT_CHROMA_U, 0) && !cu.getCbf(absPartIdxC, TEXT_CHROMA_V, 0))
732 {
733 X265_CHECK(cu.getCbf(absPartIdxC, TEXT_LUMA, 0), "CBF should have been set\n");
734 }
735 else
736 codeQtCbfLuma(cu, absPartIdx, tuDepth);
737
738 uint32_t cbfY = cu.getCbf(absPartIdx, TEXT_LUMA, tuDepth);
739 uint32_t cbfU = cu.getCbf(absPartIdxC, TEXT_CHROMA_U, tuDepth);
740 uint32_t cbfV = cu.getCbf(absPartIdxC, TEXT_CHROMA_V, tuDepth);
741 if (!(cbfY || cbfU || cbfV))
742 return;
743
744 // dQP: only for CTU once
745 if (cu.m_slice->m_pps->bUseDQP && bCodeDQP)
746 {
747 uint32_t log2CUSize = cu.m_log2CUSize[absPartIdx];
748 uint32_t absPartIdxLT = absPartIdx & (0xFF << (log2CUSize - LOG2_UNIT_SIZE) * 2);
749 codeDeltaQP(cu, absPartIdxLT);
750 bCodeDQP = false;
751 }
752
753 if (cbfY)
754 {
755 uint32_t coeffOffset = absPartIdx << (LOG2_UNIT_SIZE * 2);
756 codeCoeffNxN(cu, cu.m_trCoeff[0] + coeffOffset, absPartIdx, log2TrSize, TEXT_LUMA);
757 if (!(cbfU || cbfV))
758 return;
759 }
760
761 if (bSmallChroma)
762 {
763 if ((absPartIdx & 3) != 3)
764 return;
765
766 const uint32_t log2TrSizeC = 2;
767 const bool splitIntoSubTUs = (cu.m_chromaFormat == X265_CSP_I422);
768 const uint32_t curPartNum = 4;
769 uint32_t coeffOffsetC = absPartIdxC << (LOG2_UNIT_SIZE * 2 - (hChromaShift + vChromaShift));
770 for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
771 {
772 TURecurse tuIterator(splitIntoSubTUs ? VERTICAL_SPLIT : DONT_SPLIT, curPartNum, absPartIdxC);
773 const coeff_t* coeffChroma = cu.m_trCoeff[chromaId];
774 do
775 {
776 if (cu.getCbf(tuIterator.absPartIdxTURelCU, (TextType)chromaId, tuDepth + splitIntoSubTUs))
777 {
778 uint32_t subTUOffset = tuIterator.section << (log2TrSizeC * 2);
779 codeCoeffNxN(cu, coeffChroma + coeffOffsetC + subTUOffset, tuIterator.absPartIdxTURelCU, log2TrSizeC, (TextType)chromaId);
780 }
781 }
782 while (tuIterator.isNextSection());
783 }
784 }
785 else
786 {
787 uint32_t log2TrSizeC = log2TrSize - hChromaShift;
788 const bool splitIntoSubTUs = (cu.m_chromaFormat == X265_CSP_I422);
789 uint32_t curPartNum = 1 << (log2TrSize - LOG2_UNIT_SIZE) * 2;
790 uint32_t coeffOffsetC = absPartIdxC << (LOG2_UNIT_SIZE * 2 - (hChromaShift + vChromaShift));
791 for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
792 {
793 TURecurse tuIterator(splitIntoSubTUs ? VERTICAL_SPLIT : DONT_SPLIT, curPartNum, absPartIdxC);
794 const coeff_t* coeffChroma = cu.m_trCoeff[chromaId];
795 do
796 {
797 if (cu.getCbf(tuIterator.absPartIdxTURelCU, (TextType)chromaId, tuDepth + splitIntoSubTUs))
798 {
799 uint32_t subTUOffset = tuIterator.section << (log2TrSizeC * 2);
800 codeCoeffNxN(cu, coeffChroma + coeffOffsetC + subTUOffset, tuIterator.absPartIdxTURelCU, log2TrSizeC, (TextType)chromaId);
801 }
802 }
803 while (tuIterator.isNextSection());
804 }
805 }
806 }
807
808 void Entropy::codePredInfo(const CUData& cu, uint32_t absPartIdx)
809 {
810 if (cu.isIntra(absPartIdx)) // If it is intra mode, encode intra prediction mode.
811 {
812 codeIntraDirLumaAng(cu, absPartIdx, true);
813 if (cu.m_chromaFormat != X265_CSP_I400)
814 {
815 uint32_t chromaDirMode[NUM_CHROMA_MODE];
816 cu.getAllowedChromaDir(absPartIdx, chromaDirMode);
817
818 codeIntraDirChroma(cu, absPartIdx, chromaDirMode);
819
820 if (cu.m_chromaFormat == X265_CSP_I444 && cu.m_partSize[absPartIdx] != SIZE_2Nx2N)
821 {
822 uint32_t qNumParts = 1 << (cu.m_log2CUSize[absPartIdx] - 1 - LOG2_UNIT_SIZE) * 2;
823 for (uint32_t qIdx = 1; qIdx < 4; ++qIdx)
824 {
825 absPartIdx += qNumParts;
826 cu.getAllowedChromaDir(absPartIdx, chromaDirMode);
827 codeIntraDirChroma(cu, absPartIdx, chromaDirMode);
828 }
829 }
830 }
831 }
832 else // if it is inter mode, encode motion vector and reference index
833 codePUWise(cu, absPartIdx);
834 }
835
836 /** encode motion information for every PU block */
837 void Entropy::codePUWise(const CUData& cu, uint32_t absPartIdx)
838 {
839 PartSize partSize = (PartSize)cu.m_partSize[absPartIdx];
840 uint32_t numPU = (partSize == SIZE_2Nx2N ? 1 : (partSize == SIZE_NxN ? 4 : 2));
841 uint32_t depth = cu.m_cuDepth[absPartIdx];
842 uint32_t puOffset = (g_puOffset[uint32_t(partSize)] << (g_maxFullDepth - depth) * 2) >> 4;
843
844 for (uint32_t puIdx = 0, subPartIdx = absPartIdx; puIdx < numPU; puIdx++, subPartIdx += puOffset)
845 {
846 codeMergeFlag(cu, subPartIdx);
847 if (cu.m_mergeFlag[subPartIdx])
848 codeMergeIndex(cu, subPartIdx);
849 else
850 {
851 if (cu.m_slice->isInterB())
852 codeInterDir(cu, subPartIdx);
853
854 uint32_t interDir = cu.m_interDir[subPartIdx];
855 for (uint32_t list = 0; list < 2; list++)
856 {
857 if (interDir & (1 << list))
858 {
859 X265_CHECK(cu.m_slice->m_numRefIdx[list] > 0, "numRefs should have been > 0\n");
860
861 codeRefFrmIdxPU(cu, subPartIdx, list);
862 codeMvd(cu, subPartIdx, list);
863 codeMVPIdx(cu.m_mvpIdx[list][subPartIdx]);
864 }
865 }
866 }
867 }
868 }
869
870 /** encode reference frame index for a PU block */
871 void Entropy::codeRefFrmIdxPU(const CUData& cu, uint32_t absPartIdx, int list)
872 {
873 X265_CHECK(!cu.isIntra(absPartIdx), "intra block not expected\n");
874
875 if (cu.m_slice->m_numRefIdx[list] > 1)
876 codeRefFrmIdx(cu, absPartIdx, list);
877 }
878
879 void Entropy::codeCoeff(const CUData& cu, uint32_t absPartIdx, bool& bCodeDQP, const uint32_t depthRange[2])
880 {
881 if (!cu.isIntra(absPartIdx))
882 {
883 if (!(cu.m_mergeFlag[absPartIdx] && cu.m_partSize[absPartIdx] == SIZE_2Nx2N))
884 codeQtRootCbf(cu.getQtRootCbf(absPartIdx));
885 if (!cu.getQtRootCbf(absPartIdx))
886 return;
887 }
888
889 uint32_t log2CUSize = cu.m_log2CUSize[absPartIdx];
890 encodeTransform(cu, absPartIdx, 0, log2CUSize, bCodeDQP, depthRange);
891 }
892
893 void Entropy::codeSaoOffset(const SaoCtuParam& ctuParam, int plane)
894 {
895 int typeIdx = ctuParam.typeIdx;
896
897 if (plane != 2)
898 {
899 encodeBin(typeIdx >= 0, m_contextState[OFF_SAO_TYPE_IDX_CTX]);
900 if (typeIdx >= 0)
901 encodeBinEP(typeIdx < SAO_BO ? 1 : 0);
902 }
903
904 if (typeIdx >= 0)
905 {
906 enum { OFFSET_THRESH = 1 << X265_MIN(X265_DEPTH - 5, 5) };
907 if (typeIdx == SAO_BO)
908 {
909 for (int i = 0; i < SAO_BO_LEN; i++)
910 codeSaoMaxUvlc(abs(ctuParam.offset[i]), OFFSET_THRESH - 1);
911
912 for (int i = 0; i < SAO_BO_LEN; i++)
913 if (ctuParam.offset[i] != 0)
914 encodeBinEP(ctuParam.offset[i] < 0);
915
916 encodeBinsEP(ctuParam.bandPos, 5);
917 }
918 else // if (typeIdx < SAO_BO)
919 {
920 codeSaoMaxUvlc(ctuParam.offset[0], OFFSET_THRESH - 1);
921 codeSaoMaxUvlc(ctuParam.offset[1], OFFSET_THRESH - 1);
922 codeSaoMaxUvlc(-ctuParam.offset[2], OFFSET_THRESH - 1);
923 codeSaoMaxUvlc(-ctuParam.offset[3], OFFSET_THRESH - 1);
924 if (plane != 2)
925 encodeBinsEP((uint32_t)(typeIdx), 2);
926 }
927 }
928 }
929
930 /** initialize context model with respect to QP and initialization value */
931 uint8_t sbacInit(int qp, int initValue)
932 {
933 qp = Clip3(0, 51, qp);
934
935 int slope = (initValue >> 4) * 5 - 45;
936 int offset = ((initValue & 15) << 3) - 16;
937 int initState = X265_MIN(X265_MAX(1, (((slope * qp) >> 4) + offset)), 126);
938 uint32_t mpState = (initState >= 64);
939 uint32_t state = ((mpState ? (initState - 64) : (63 - initState)) << 1) + mpState;
940
941 return (uint8_t)state;
942 }
943
944 static void initBuffer(uint8_t* contextModel, SliceType sliceType, int qp, uint8_t* ctxModel, int size)
945 {
946 ctxModel += sliceType * size;
947
948 for (int n = 0; n < size; n++)
949 contextModel[n] = sbacInit(qp, ctxModel[n]);
950 }
951
952 void Entropy::resetEntropy(const Slice& slice)
953 {
954 int qp = slice.m_sliceQp;
955 SliceType sliceType = slice.m_sliceType;
956
957 initBuffer(&m_contextState[OFF_SPLIT_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_SPLIT_FLAG, NUM_SPLIT_FLAG_CTX);
958 initBuffer(&m_contextState[OFF_SKIP_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_SKIP_FLAG, NUM_SKIP_FLAG_CTX);
959 initBuffer(&m_contextState[OFF_MERGE_FLAG_EXT_CTX], sliceType, qp, (uint8_t*)INIT_MERGE_FLAG_EXT, NUM_MERGE_FLAG_EXT_CTX);
960 initBuffer(&m_contextState[OFF_MERGE_IDX_EXT_CTX], sliceType, qp, (uint8_t*)INIT_MERGE_IDX_EXT, NUM_MERGE_IDX_EXT_CTX);
961 initBuffer(&m_contextState[OFF_PART_SIZE_CTX], sliceType, qp, (uint8_t*)INIT_PART_SIZE, NUM_PART_SIZE_CTX);
962 initBuffer(&m_contextState[OFF_PRED_MODE_CTX], sliceType, qp, (uint8_t*)INIT_PRED_MODE, NUM_PRED_MODE_CTX);
963 initBuffer(&m_contextState[OFF_ADI_CTX], sliceType, qp, (uint8_t*)INIT_INTRA_PRED_MODE, NUM_ADI_CTX);
964 initBuffer(&m_contextState[OFF_CHROMA_PRED_CTX], sliceType, qp, (uint8_t*)INIT_CHROMA_PRED_MODE, NUM_CHROMA_PRED_CTX);
965 initBuffer(&m_contextState[OFF_DELTA_QP_CTX], sliceType, qp, (uint8_t*)INIT_DQP, NUM_DELTA_QP_CTX);
966 initBuffer(&m_contextState[OFF_INTER_DIR_CTX], sliceType, qp, (uint8_t*)INIT_INTER_DIR, NUM_INTER_DIR_CTX);
967 initBuffer(&m_contextState[OFF_REF_NO_CTX], sliceType, qp, (uint8_t*)INIT_REF_PIC, NUM_REF_NO_CTX);
968 initBuffer(&m_contextState[OFF_MV_RES_CTX], sliceType, qp, (uint8_t*)INIT_MVD, NUM_MV_RES_CTX);
969 initBuffer(&m_contextState[OFF_QT_CBF_CTX], sliceType, qp, (uint8_t*)INIT_QT_CBF, NUM_QT_CBF_CTX);
970 initBuffer(&m_contextState[OFF_TRANS_SUBDIV_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_TRANS_SUBDIV_FLAG, NUM_TRANS_SUBDIV_FLAG_CTX);
971 initBuffer(&m_contextState[OFF_QT_ROOT_CBF_CTX], sliceType, qp, (uint8_t*)INIT_QT_ROOT_CBF, NUM_QT_ROOT_CBF_CTX);
972 initBuffer(&m_contextState[OFF_SIG_CG_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_SIG_CG_FLAG, 2 * NUM_SIG_CG_FLAG_CTX);
973 initBuffer(&m_contextState[OFF_SIG_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_SIG_FLAG, NUM_SIG_FLAG_CTX);
974 initBuffer(&m_contextState[OFF_CTX_LAST_FLAG_X], sliceType, qp, (uint8_t*)INIT_LAST, NUM_CTX_LAST_FLAG_XY);
975 initBuffer(&m_contextState[OFF_CTX_LAST_FLAG_Y], sliceType, qp, (uint8_t*)INIT_LAST, NUM_CTX_LAST_FLAG_XY);
976 initBuffer(&m_contextState[OFF_ONE_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_ONE_FLAG, NUM_ONE_FLAG_CTX);
977 initBuffer(&m_contextState[OFF_ABS_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_ABS_FLAG, NUM_ABS_FLAG_CTX);
978 initBuffer(&m_contextState[OFF_MVP_IDX_CTX], sliceType, qp, (uint8_t*)INIT_MVP_IDX, NUM_MVP_IDX_CTX);
979 initBuffer(&m_contextState[OFF_SAO_MERGE_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_SAO_MERGE_FLAG, NUM_SAO_MERGE_FLAG_CTX);
980 initBuffer(&m_contextState[OFF_SAO_TYPE_IDX_CTX], sliceType, qp, (uint8_t*)INIT_SAO_TYPE_IDX, NUM_SAO_TYPE_IDX_CTX);
981 initBuffer(&m_contextState[OFF_TRANSFORMSKIP_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_TRANSFORMSKIP_FLAG, 2 * NUM_TRANSFORMSKIP_FLAG_CTX);
982 initBuffer(&m_contextState[OFF_TQUANT_BYPASS_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_CU_TRANSQUANT_BYPASS_FLAG, NUM_TQUANT_BYPASS_FLAG_CTX);
983 // new structure
984
985 start();
986 }
987
988 /* code explicit wp tables */
989 void Entropy::codePredWeightTable(const Slice& slice)
990 {
991 const WeightParam *wp;
992 bool bChroma = true; // 4:0:0 not yet supported
993 bool bDenomCoded = false;
994 int numRefDirs = slice.m_sliceType == B_SLICE ? 2 : 1;
995 uint32_t totalSignalledWeightFlags = 0;
996
997 if ((slice.m_sliceType == P_SLICE && slice.m_pps->bUseWeightPred) ||
998 (slice.m_sliceType == B_SLICE && slice.m_pps->bUseWeightedBiPred))
999 {
1000 for (int list = 0; list < numRefDirs; list++)
1001 {
1002 for (int ref = 0; ref < slice.m_numRefIdx[list]; ref++)
1003 {
1004 wp = slice.m_weightPredTable[list][ref];
1005 if (!bDenomCoded)
1006 {
1007 WRITE_UVLC(wp[0].log2WeightDenom, "luma_log2_weight_denom");
1008
1009 if (bChroma)
1010 {
1011 int deltaDenom = wp[1].log2WeightDenom - wp[0].log2WeightDenom;
1012 WRITE_SVLC(deltaDenom, "delta_chroma_log2_weight_denom");
1013 }
1014 bDenomCoded = true;
1015 }
1016 WRITE_FLAG(wp[0].bPresentFlag, "luma_weight_lX_flag");
1017 totalSignalledWeightFlags += wp[0].bPresentFlag;
1018 }
1019
1020 if (bChroma)
1021 {
1022 for (int ref = 0; ref < slice.m_numRefIdx[list]; ref++)
1023 {
1024 wp = slice.m_weightPredTable[list][ref];
1025 WRITE_FLAG(wp[1].bPresentFlag, "chroma_weight_lX_flag");
1026 totalSignalledWeightFlags += 2 * wp[1].bPresentFlag;
1027 }
1028 }
1029
1030 for (int ref = 0; ref < slice.m_numRefIdx[list]; ref++)
1031 {
1032 wp = slice.m_weightPredTable[list][ref];
1033 if (wp[0].bPresentFlag)
1034 {
1035 int deltaWeight = (wp[0].inputWeight - (1 << wp[0].log2WeightDenom));
1036 WRITE_SVLC(deltaWeight, "delta_luma_weight_lX");
1037 WRITE_SVLC(wp[0].inputOffset, "luma_offset_lX");
1038 }
1039
1040 if (bChroma)
1041 {
1042 if (wp[1].bPresentFlag)
1043 {
1044 for (int plane = 1; plane < 3; plane++)
1045 {
1046 int deltaWeight = (wp[plane].inputWeight - (1 << wp[1].log2WeightDenom));
1047 WRITE_SVLC(deltaWeight, "delta_chroma_weight_lX");
1048
1049 int pred = (128 - ((128 * wp[plane].inputWeight) >> (wp[plane].log2WeightDenom)));
1050 int deltaChroma = (wp[plane].inputOffset - pred);
1051 WRITE_SVLC(deltaChroma, "delta_chroma_offset_lX");
1052 }
1053 }
1054 }
1055 }
1056 }
1057
1058 X265_CHECK(totalSignalledWeightFlags <= 24, "total weights must be <= 24\n");
1059 }
1060 }
1061
1062 void Entropy::writeUnaryMaxSymbol(uint32_t symbol, uint8_t* scmModel, int offset, uint32_t maxSymbol)
1063 {
1064 X265_CHECK(maxSymbol > 0, "maxSymbol too small\n");
1065
1066 encodeBin(symbol ? 1 : 0, scmModel[0]);
1067
1068 if (!symbol)
1069 return;
1070
1071 bool bCodeLast = (maxSymbol > symbol);
1072
1073 while (--symbol)
1074 encodeBin(1, scmModel[offset]);
1075
1076 if (bCodeLast)
1077 encodeBin(0, scmModel[offset]);
1078 }
1079
1080 void Entropy::writeEpExGolomb(uint32_t symbol, uint32_t count)
1081 {
1082 uint32_t bins = 0;
1083 int numBins = 0;
1084
1085 while (symbol >= (uint32_t)(1 << count))
1086 {
1087 bins = 2 * bins + 1;
1088 numBins++;
1089 symbol -= 1 << count;
1090 count++;
1091 }
1092
1093 bins = 2 * bins + 0;
1094 numBins++;
1095
1096 bins = (bins << count) | symbol;
1097 numBins += count;
1098
1099 X265_CHECK(numBins <= 32, "numBins too large\n");
1100 encodeBinsEP(bins, numBins);
1101 }
1102
1103 /** Coding of coeff_abs_level_minus3 */
1104 void Entropy::writeCoefRemainExGolomb(uint32_t codeNumber, uint32_t absGoRice)
1105 {
1106 uint32_t length;
1107 const uint32_t codeRemain = codeNumber & ((1 << absGoRice) - 1);
1108
1109 if ((codeNumber >> absGoRice) < COEF_REMAIN_BIN_REDUCTION)
1110 {
1111 length = codeNumber >> absGoRice;
1112
1113 X265_CHECK(codeNumber - (length << absGoRice) == (codeNumber & ((1 << absGoRice) - 1)), "codeNumber failure\n");
1114 X265_CHECK(length + 1 + absGoRice < 32, "length failure\n");
1115 encodeBinsEP((((1 << (length + 1)) - 2) << absGoRice) + codeRemain, length + 1 + absGoRice);
1116 }
1117 else
1118 {
1119 length = 0;
1120 codeNumber = (codeNumber >> absGoRice) - COEF_REMAIN_BIN_REDUCTION;
1121 if (codeNumber != 0)
1122 {
1123 unsigned long idx;
1124 CLZ(idx, codeNumber + 1);
1125 length = idx;
1126 codeNumber -= (1 << idx) - 1;
1127 }
1128 codeNumber = (codeNumber << absGoRice) + codeRemain;
1129
1130 encodeBinsEP((1 << (COEF_REMAIN_BIN_REDUCTION + length + 1)) - 2, COEF_REMAIN_BIN_REDUCTION + length + 1);
1131 encodeBinsEP(codeNumber, length + absGoRice);
1132 }
1133 }
1134
1135 // SBAC RD
1136 void Entropy::loadIntraDirModeLuma(const Entropy& src)
1137 {
1138 X265_CHECK(src.m_valid, "invalid copy source context\n");
1139 m_fracBits = src.m_fracBits;
1140 m_contextState[OFF_ADI_CTX] = src.m_contextState[OFF_ADI_CTX];
1141 }
1142
1143 void Entropy::copyFrom(const Entropy& src)
1144 {
1145 X265_CHECK(src.m_valid, "invalid copy source context\n");
1146
1147 copyState(src);
1148
1149 memcpy(m_contextState, src.m_contextState, MAX_OFF_CTX_MOD * sizeof(uint8_t));
1150 markValid();
1151 }
1152
1153 void Entropy::codePartSize(const CUData& cu, uint32_t absPartIdx, uint32_t depth)
1154 {
1155 PartSize partSize = (PartSize)cu.m_partSize[absPartIdx];
1156
1157 if (cu.isIntra(absPartIdx))
1158 {
1159 if (depth == g_maxCUDepth)
1160 encodeBin(partSize == SIZE_2Nx2N ? 1 : 0, m_contextState[OFF_PART_SIZE_CTX]);
1161 return;
1162 }
1163
1164 switch (partSize)
1165 {
1166 case SIZE_2Nx2N:
1167 encodeBin(1, m_contextState[OFF_PART_SIZE_CTX]);
1168 break;
1169
1170 case SIZE_2NxN:
1171 case SIZE_2NxnU:
1172 case SIZE_2NxnD:
1173 encodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 0]);
1174 encodeBin(1, m_contextState[OFF_PART_SIZE_CTX + 1]);
1175 if (cu.m_slice->m_sps->maxAMPDepth > depth)
1176 {
1177 encodeBin((partSize == SIZE_2NxN) ? 1 : 0, m_contextState[OFF_PART_SIZE_CTX + 3]);
1178 if (partSize != SIZE_2NxN)
1179 encodeBinEP((partSize == SIZE_2NxnU ? 0 : 1));
1180 }
1181 break;
1182
1183 case SIZE_Nx2N:
1184 case SIZE_nLx2N:
1185 case SIZE_nRx2N:
1186 encodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 0]);
1187 encodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 1]);
1188 if (depth == g_maxCUDepth && !(cu.m_log2CUSize[absPartIdx] == 3))
1189 encodeBin(1, m_contextState[OFF_PART_SIZE_CTX + 2]);
1190 if (cu.m_slice->m_sps->maxAMPDepth > depth)
1191 {
1192 encodeBin((partSize == SIZE_Nx2N) ? 1 : 0, m_contextState[OFF_PART_SIZE_CTX + 3]);
1193 if (partSize != SIZE_Nx2N)
1194 encodeBinEP((partSize == SIZE_nLx2N ? 0 : 1));
1195 }
1196 break;
1197 default:
1198 X265_CHECK(0, "invalid CU partition\n");
1199 break;
1200 }
1201 }
1202
1203 void Entropy::codeMergeIndex(const CUData& cu, uint32_t absPartIdx)
1204 {
1205 uint32_t numCand = cu.m_slice->m_maxNumMergeCand;
1206
1207 if (numCand > 1)
1208 {
1209 uint32_t unaryIdx = cu.m_mvpIdx[0][absPartIdx]; // merge candidate index was stored in L0 MVP idx
1210 encodeBin((unaryIdx != 0), m_contextState[OFF_MERGE_IDX_EXT_CTX]);
1211
1212 X265_CHECK(unaryIdx < numCand, "unaryIdx out of range\n");
1213
1214 if (unaryIdx != 0)
1215 {
1216 uint32_t mask = (1 << unaryIdx) - 2;
1217 mask >>= (unaryIdx == numCand - 1) ? 1 : 0;
1218 encodeBinsEP(mask, unaryIdx - (unaryIdx == numCand - 1));
1219 }
1220 }
1221 }
1222
1223 void Entropy::codeIntraDirLumaAng(const CUData& cu, uint32_t absPartIdx, bool isMultiple)
1224 {
1225 uint32_t dir[4], j;
1226 uint32_t preds[4][3];
1227 int predIdx[4];
1228 uint32_t partNum = isMultiple && cu.m_partSize[absPartIdx] != SIZE_2Nx2N ? 4 : 1;
1229 uint32_t qNumParts = 1 << (cu.m_log2CUSize[absPartIdx] - 1 - LOG2_UNIT_SIZE) * 2;
1230
1231 for (j = 0; j < partNum; j++, absPartIdx += qNumParts)
1232 {
1233 dir[j] = cu.m_lumaIntraDir[absPartIdx];
1234 cu.getIntraDirLumaPredictor(absPartIdx, preds[j]);
1235 predIdx[j] = -1;
1236 for (uint32_t i = 0; i < 3; i++)
1237 if (dir[j] == preds[j][i])
1238 predIdx[j] = i;
1239
1240 encodeBin((predIdx[j] != -1) ? 1 : 0, m_contextState[OFF_ADI_CTX]);
1241 }
1242
1243 for (j = 0; j < partNum; j++)
1244 {
1245 if (predIdx[j] != -1)
1246 {
1247 X265_CHECK((predIdx[j] >= 0) && (predIdx[j] <= 2), "predIdx out of range\n");
1248 // NOTE: Mapping
1249 // 0 = 0
1250 // 1 = 10
1251 // 2 = 11
1252 int nonzero = (!!predIdx[j]);
1253 encodeBinsEP(predIdx[j] + nonzero, 1 + nonzero);
1254 }
1255 else
1256 {
1257 if (preds[j][0] > preds[j][1])
1258 std::swap(preds[j][0], preds[j][1]);
1259
1260 if (preds[j][0] > preds[j][2])
1261 std::swap(preds[j][0], preds[j][2]);
1262
1263 if (preds[j][1] > preds[j][2])
1264 std::swap(preds[j][1], preds[j][2]);
1265
1266 dir[j] += (dir[j] > preds[j][2]) ? -1 : 0;
1267 dir[j] += (dir[j] > preds[j][1]) ? -1 : 0;
1268 dir[j] += (dir[j] > preds[j][0]) ? -1 : 0;
1269
1270 encodeBinsEP(dir[j], 5);
1271 }
1272 }
1273 }
1274
1275 void Entropy::codeIntraDirChroma(const CUData& cu, uint32_t absPartIdx, uint32_t *chromaDirMode)
1276 {
1277 uint32_t intraDirChroma = cu.m_chromaIntraDir[absPartIdx];
1278
1279 if (intraDirChroma == DM_CHROMA_IDX)
1280 encodeBin(0, m_contextState[OFF_CHROMA_PRED_CTX]);
1281 else
1282 {
1283 for (int i = 0; i < NUM_CHROMA_MODE - 1; i++)
1284 {
1285 if (intraDirChroma == chromaDirMode[i])
1286 {
1287 intraDirChroma = i;
1288 break;
1289 }
1290 }
1291
1292 encodeBin(1, m_contextState[OFF_CHROMA_PRED_CTX]);
1293 encodeBinsEP(intraDirChroma, 2);
1294 }
1295 }
1296
1297 void Entropy::codeInterDir(const CUData& cu, uint32_t absPartIdx)
1298 {
1299 const uint32_t interDir = cu.m_interDir[absPartIdx] - 1;
1300 const uint32_t ctx = cu.m_cuDepth[absPartIdx]; // the context of the inter dir is the depth of the CU
1301
1302 if (cu.m_partSize[absPartIdx] == SIZE_2Nx2N || cu.m_log2CUSize[absPartIdx] != 3)
1303 encodeBin(interDir == 2 ? 1 : 0, m_contextState[OFF_INTER_DIR_CTX + ctx]);
1304 if (interDir < 2)
1305 encodeBin(interDir, m_contextState[OFF_INTER_DIR_CTX + 4]);
1306 }
1307
1308 void Entropy::codeRefFrmIdx(const CUData& cu, uint32_t absPartIdx, int list)
1309 {
1310 uint32_t refFrame = cu.m_refIdx[list][absPartIdx];
1311
1312 encodeBin(refFrame > 0, m_contextState[OFF_REF_NO_CTX]);
1313
1314 if (refFrame > 0)
1315 {
1316 uint32_t refNum = cu.m_slice->m_numRefIdx[list] - 2;
1317 if (refNum == 0)
1318 return;
1319
1320 refFrame--;
1321 encodeBin(refFrame > 0, m_contextState[OFF_REF_NO_CTX + 1]);
1322 if (refFrame > 0)
1323 {
1324 uint32_t mask = (1 << refFrame) - 2;
1325 mask >>= (refFrame == refNum) ? 1 : 0;
1326 encodeBinsEP(mask, refFrame - (refFrame == refNum));
1327 }
1328 }
1329 }
1330
1331 void Entropy::codeMvd(const CUData& cu, uint32_t absPartIdx, int list)
1332 {
1333 const MV& mvd = cu.m_mvd[list][absPartIdx];
1334 const int hor = mvd.x;
1335 const int ver = mvd.y;
1336
1337 encodeBin(hor != 0 ? 1 : 0, m_contextState[OFF_MV_RES_CTX]);
1338 encodeBin(ver != 0 ? 1 : 0, m_contextState[OFF_MV_RES_CTX]);
1339
1340 const bool bHorAbsGr0 = hor != 0;
1341 const bool bVerAbsGr0 = ver != 0;
1342 const uint32_t horAbs = 0 > hor ? -hor : hor;
1343 const uint32_t verAbs = 0 > ver ? -ver : ver;
1344
1345 if (bHorAbsGr0)
1346 encodeBin(horAbs > 1 ? 1 : 0, m_contextState[OFF_MV_RES_CTX + 1]);
1347
1348 if (bVerAbsGr0)
1349 encodeBin(verAbs > 1 ? 1 : 0, m_contextState[OFF_MV_RES_CTX + 1]);
1350
1351 if (bHorAbsGr0)
1352 {
1353 if (horAbs > 1)
1354 writeEpExGolomb(horAbs - 2, 1);
1355
1356 encodeBinEP(0 > hor ? 1 : 0);
1357 }
1358
1359 if (bVerAbsGr0)
1360 {
1361 if (verAbs > 1)
1362 writeEpExGolomb(verAbs - 2, 1);
1363
1364 encodeBinEP(0 > ver ? 1 : 0);
1365 }
1366 }
1367
1368 void Entropy::codeDeltaQP(const CUData& cu, uint32_t absPartIdx)
1369 {
1370 int dqp = cu.m_qp[absPartIdx] - cu.getRefQP(absPartIdx);
1371
1372 int qpBdOffsetY = QP_BD_OFFSET;
1373
1374 dqp = (dqp + 78 + qpBdOffsetY + (qpBdOffsetY / 2)) % (52 + qpBdOffsetY) - 26 - (qpBdOffsetY / 2);
1375
1376 uint32_t absDQp = (uint32_t)((dqp > 0) ? dqp : (-dqp));
1377 uint32_t TUValue = X265_MIN((int)absDQp, CU_DQP_TU_CMAX);
1378 writeUnaryMaxSymbol(TUValue, &m_contextState[OFF_DELTA_QP_CTX], 1, CU_DQP_TU_CMAX);
1379 if (absDQp >= CU_DQP_TU_CMAX)
1380 writeEpExGolomb(absDQp - CU_DQP_TU_CMAX, CU_DQP_EG_k);
1381
1382 if (absDQp > 0)
1383 {
1384 uint32_t sign = (dqp > 0 ? 0 : 1);
1385 encodeBinEP(sign);
1386 }
1387 }
1388
1389 void Entropy::codeQtCbfChroma(const CUData& cu, uint32_t absPartIdx, TextType ttype, uint32_t tuDepth, bool lowestLevel)
1390 {
1391 uint32_t ctx = tuDepth + 2;
1392
1393 uint32_t log2TrSize = cu.m_log2CUSize[absPartIdx] - tuDepth;
1394 bool canQuadSplit = (log2TrSize - cu.m_hChromaShift > 2);
1395 uint32_t lowestTUDepth = tuDepth + ((!lowestLevel && !canQuadSplit) ? 1 : 0); // unsplittable TUs inherit their parent's CBF
1396
1397 if (cu.m_chromaFormat == X265_CSP_I422 && (lowestLevel || !canQuadSplit)) // if sub-TUs are present
1398 {
1399 uint32_t subTUDepth = lowestTUDepth + 1; // if this is the lowest level of the TU-tree, the sub-TUs are directly below.
1400 // Otherwise, this must be the level above the lowest level (as specified above)
1401 uint32_t tuNumParts = 1 << ((log2TrSize - LOG2_UNIT_SIZE) * 2 - 1);
1402
1403 encodeBin(cu.getCbf(absPartIdx , ttype, subTUDepth), m_contextState[OFF_QT_CBF_CTX + ctx]);
1404 encodeBin(cu.getCbf(absPartIdx + tuNumParts, ttype, subTUDepth), m_contextState[OFF_QT_CBF_CTX + ctx]);
1405 }
1406 else
1407 encodeBin(cu.getCbf(absPartIdx, ttype, lowestTUDepth), m_contextState[OFF_QT_CBF_CTX + ctx]);
1408 }
1409
1410 void Entropy::codeTransformSkipFlags(const CUData& cu, uint32_t absPartIdx, uint32_t trSize, TextType ttype)
1411 {
1412 if (cu.m_tqBypass[absPartIdx])
1413 return;
1414 if (trSize != 4)
1415 return;
1416
1417 uint32_t useTransformSkip = cu.m_transformSkip[ttype][absPartIdx];
1418 encodeBin(useTransformSkip, m_contextState[OFF_TRANSFORMSKIP_FLAG_CTX + (ttype ? NUM_TRANSFORMSKIP_FLAG_CTX : 0)]);
1419 }
1420
1421 /** Encode (X,Y) position of the last significant coefficient
1422 * \param posx X component of last coefficient
1423 * \param posy Y component of last coefficient
1424 * \param log2TrSize
1425 * \param bIsLuma
1426 * \param scanIdx scan type (zig-zag, hor, ver)
1427 * This method encodes the X and Y component within a block of the last significant coefficient.
1428 */
1429 void Entropy::codeLastSignificantXY(uint32_t posx, uint32_t posy, uint32_t log2TrSize, bool bIsLuma, uint32_t scanIdx)
1430 {
1431 // swap
1432 if (scanIdx == SCAN_VER)
1433 std::swap(posx, posy);
1434
1435 uint32_t ctxLast;
1436 uint32_t groupIdxX = getGroupIdx(posx);
1437 uint32_t groupIdxY = getGroupIdx(posy);
1438
1439 int blkSizeOffset = bIsLuma ? ((log2TrSize - 2) * 3 + ((log2TrSize - 1) >> 2)) : NUM_CTX_LAST_FLAG_XY_LUMA;
1440 int ctxShift = bIsLuma ? ((log2TrSize + 1) >> 2) : log2TrSize - 2;
1441 uint32_t maxGroupIdx = log2TrSize * 2 - 1;
1442
1443 // posX
1444 uint8_t *ctxX = &m_contextState[OFF_CTX_LAST_FLAG_X];
1445 for (ctxLast = 0; ctxLast < groupIdxX; ctxLast++)
1446 encodeBin(1, *(ctxX + blkSizeOffset + (ctxLast >> ctxShift)));
1447
1448 if (groupIdxX < maxGroupIdx)
1449 encodeBin(0, *(ctxX + blkSizeOffset + (ctxLast >> ctxShift)));
1450
1451 // posY
1452 uint8_t *ctxY = &m_contextState[OFF_CTX_LAST_FLAG_Y];
1453 for (ctxLast = 0; ctxLast < groupIdxY; ctxLast++)
1454 encodeBin(1, *(ctxY + blkSizeOffset + (ctxLast >> ctxShift)));
1455
1456 if (groupIdxY < maxGroupIdx)
1457 encodeBin(0, *(ctxY + blkSizeOffset + (ctxLast >> ctxShift)));
1458
1459 if (groupIdxX > 3)
1460 {
1461 uint32_t count = (groupIdxX - 2) >> 1;
1462 posx = posx - g_minInGroup[groupIdxX];
1463 encodeBinsEP(posx, count);
1464 }
1465 if (groupIdxY > 3)
1466 {
1467 uint32_t count = (groupIdxY - 2) >> 1;
1468 posy = posy - g_minInGroup[groupIdxY];
1469 encodeBinsEP(posy, count);
1470 }
1471 }
1472
1473 void Entropy::codeCoeffNxN(const CUData& cu, const coeff_t* coeff, uint32_t absPartIdx, uint32_t log2TrSize, TextType ttype)
1474 {
1475 uint32_t trSize = 1 << log2TrSize;
1476
1477 // compute number of significant coefficients
1478 uint32_t numSig = primitives.count_nonzero(coeff, (1 << (log2TrSize << 1)));
1479
1480 X265_CHECK(numSig > 0, "cbf check fail\n");
1481
1482 bool bHideFirstSign = cu.m_slice->m_pps->bSignHideEnabled && !cu.m_tqBypass[absPartIdx];
1483
1484 if (cu.m_slice->m_pps->bTransformSkipEnabled)
1485 codeTransformSkipFlags(cu, absPartIdx, trSize, ttype);
1486
1487 bool bIsLuma = ttype == TEXT_LUMA;
1488
1489 // select scans
1490 TUEntropyCodingParameters codingParameters;
1491 cu.getTUEntropyCodingParameters(codingParameters, absPartIdx, log2TrSize, bIsLuma);
1492
1493 //----- encode significance map -----
1494
1495 // Find position of last coefficient
1496 int scanPosLast = 0;
1497 uint32_t posLast;
1498 uint64_t sigCoeffGroupFlag64 = 0;
1499 const uint32_t maskPosXY = ((uint32_t)~0 >> (31 - log2TrSize + MLS_CG_LOG2_SIZE)) >> 1;
1500 assert((uint32_t)((1 << (log2TrSize - MLS_CG_LOG2_SIZE)) - 1) == (((uint32_t)~0 >> (31 - log2TrSize + MLS_CG_LOG2_SIZE)) >> 1));
1501 do
1502 {
1503 posLast = codingParameters.scan[scanPosLast++];
1504
1505 const uint32_t isNZCoeff = (coeff[posLast] != 0);
1506 // get L1 sig map
1507 // NOTE: the new algorithm is complicated, so I keep reference code here
1508 //uint32_t posy = posLast >> log2TrSize;
1509 //uint32_t posx = posLast - (posy << log2TrSize);
1510 //uint32_t blkIdx0 = ((posy >> MLS_CG_LOG2_SIZE) << codingParameters.log2TrSizeCG) + (posx >> MLS_CG_LOG2_SIZE);
1511 const uint32_t blkIdx = ((posLast >> (2 * MLS_CG_LOG2_SIZE)) & ~maskPosXY) + ((posLast >> MLS_CG_LOG2_SIZE) & maskPosXY);
1512 sigCoeffGroupFlag64 |= ((uint64_t)isNZCoeff << blkIdx);
1513 numSig -= isNZCoeff;
1514 }
1515 while (numSig > 0);
1516 scanPosLast--;
1517
1518 // Code position of last coefficient
1519 int posLastY = posLast >> log2TrSize;
1520 int posLastX = posLast & (trSize - 1);
1521 codeLastSignificantXY(posLastX, posLastY, log2TrSize, bIsLuma, codingParameters.scanType);
1522
1523 //===== code significance flag =====
1524 uint8_t * const baseCoeffGroupCtx = &m_contextState[OFF_SIG_CG_FLAG_CTX + (bIsLuma ? 0 : NUM_SIG_CG_FLAG_CTX)];
1525 uint8_t * const baseCtx = bIsLuma ? &m_contextState[OFF_SIG_FLAG_CTX] : &m_contextState[OFF_SIG_FLAG_CTX + NUM_SIG_FLAG_CTX_LUMA];
1526 const int lastScanSet = scanPosLast >> MLS_CG_SIZE;
1527 uint32_t c1 = 1;
1528 uint32_t goRiceParam = 0;
1529 int scanPosSig = scanPosLast;
1530
1531 for (int subSet = lastScanSet; subSet >= 0; subSet--)
1532 {
1533 int numNonZero = 0;
1534 int subPos = subSet << MLS_CG_SIZE;
1535 goRiceParam = 0;
1536 int absCoeff[1 << MLS_CG_SIZE];
1537 uint32_t coeffSigns = 0;
1538 int lastNZPosInCG = -1;
1539 int firstNZPosInCG = 1 << MLS_CG_SIZE;
1540 if (scanPosSig == scanPosLast)
1541 {
1542 absCoeff[0] = int(abs(coeff[posLast]));
1543 coeffSigns = (coeff[posLast] < 0);
1544 numNonZero = 1;
1545 lastNZPosInCG = scanPosSig;
1546 firstNZPosInCG = scanPosSig;
1547 scanPosSig--;
1548 }
1549 // encode significant_coeffgroup_flag
1550 const int cgBlkPos = codingParameters.scanCG[subSet];
1551 const int cgPosY = cgBlkPos >> codingParameters.log2TrSizeCG;
1552 const int cgPosX = cgBlkPos - (cgPosY << codingParameters.log2TrSizeCG);
1553 const uint64_t cgBlkPosMask = ((uint64_t)1 << cgBlkPos);
1554
1555 if (subSet == lastScanSet || !subSet)
1556 sigCoeffGroupFlag64 |= cgBlkPosMask;
1557 else
1558 {
1559 uint32_t sigCoeffGroup = ((sigCoeffGroupFlag64 & cgBlkPosMask) != 0);
1560 uint32_t ctxSig = Quant::getSigCoeffGroupCtxInc(sigCoeffGroupFlag64, cgPosX, cgPosY, codingParameters.log2TrSizeCG);
1561 encodeBin(sigCoeffGroup, baseCoeffGroupCtx[ctxSig]);
1562 }
1563
1564 // encode significant_coeff_flag
1565 if (sigCoeffGroupFlag64 & cgBlkPosMask)
1566 {
1567 const int patternSigCtx = Quant::calcPatternSigCtx(sigCoeffGroupFlag64, cgPosX, cgPosY, codingParameters.log2TrSizeCG);
1568 uint32_t blkPos, sig, ctxSig;
1569 for (; scanPosSig >= subPos; scanPosSig--)
1570 {
1571 blkPos = codingParameters.scan[scanPosSig];
1572 sig = (coeff[blkPos] != 0);
1573 if (scanPosSig > subPos || subSet == 0 || numNonZero)
1574 {
1575 ctxSig = Quant::getSigCtxInc(patternSigCtx, log2TrSize, trSize, blkPos, bIsLuma, codingParameters.firstSignificanceMapContext);
1576 encodeBin(sig, baseCtx[ctxSig]);
1577 }
1578 if (sig)
1579 {
1580 absCoeff[numNonZero] = int(abs(coeff[blkPos]));
1581 coeffSigns = 2 * coeffSigns + ((uint32_t)coeff[blkPos] >> 31);
1582 numNonZero++;
1583 if (lastNZPosInCG < 0)
1584 lastNZPosInCG = scanPosSig;
1585 firstNZPosInCG = scanPosSig;
1586 }
1587 }
1588 }
1589 else
1590 scanPosSig = subPos - 1;
1591
1592 if (numNonZero > 0)
1593 {
1594 bool signHidden = (lastNZPosInCG - firstNZPosInCG >= SBH_THRESHOLD);
1595 uint32_t ctxSet = (subSet > 0 && bIsLuma) ? 2 : 0;
1596
1597 if (c1 == 0)
1598 ctxSet++;
1599
1600 c1 = 1;
1601 uint8_t *baseCtxMod = bIsLuma ? &m_contextState[OFF_ONE_FLAG_CTX + 4 * ctxSet] : &m_contextState[OFF_ONE_FLAG_CTX + NUM_ONE_FLAG_CTX_LUMA + 4 * ctxSet];
1602
1603 int numC1Flag = X265_MIN(numNonZero, C1FLAG_NUMBER);
1604 int firstC2FlagIdx = -1;
1605 for (int idx = 0; idx < numC1Flag; idx++)
1606 {
1607 uint32_t symbol = absCoeff[idx] > 1;
1608 encodeBin(symbol, baseCtxMod[c1]);
1609 if (symbol)
1610 {
1611 c1 = 0;
1612
1613 if (firstC2FlagIdx == -1)
1614 firstC2FlagIdx = idx;
1615 }
1616 else if ((c1 < 3) && (c1 > 0))
1617 c1++;
1618 }
1619
1620 if (!c1)
1621 {
1622 baseCtxMod = bIsLuma ? &m_contextState[OFF_ABS_FLAG_CTX + ctxSet] : &m_contextState[OFF_ABS_FLAG_CTX + NUM_ABS_FLAG_CTX_LUMA + ctxSet];
1623 if (firstC2FlagIdx != -1)
1624 {
1625 uint32_t symbol = absCoeff[firstC2FlagIdx] > 2;
1626 encodeBin(symbol, baseCtxMod[0]);
1627 }
1628 }
1629
1630 if (bHideFirstSign && signHidden)
1631 encodeBinsEP((coeffSigns >> 1), numNonZero - 1);
1632 else
1633 encodeBinsEP(coeffSigns, numNonZero);
1634
1635 int firstCoeff2 = 1;
1636 if (!c1 || numNonZero > C1FLAG_NUMBER)
1637 {
1638 for (int idx = 0; idx < numNonZero; idx++)
1639 {
1640 int baseLevel = (idx < C1FLAG_NUMBER) ? (2 + firstCoeff2) : 1;
1641
1642 if (absCoeff[idx] >= baseLevel)
1643 {
1644 writeCoefRemainExGolomb(absCoeff[idx] - baseLevel, goRiceParam);
1645 if (absCoeff[idx] > 3 * (1 << goRiceParam))
1646 goRiceParam = std::min<uint32_t>(goRiceParam + 1, 4);
1647 }
1648 if (absCoeff[idx] >= 2)
1649 firstCoeff2 = 0;
1650 }
1651 }
1652 }
1653 }
1654 }
1655
1656 void Entropy::codeSaoMaxUvlc(uint32_t code, uint32_t maxSymbol)
1657 {
1658 X265_CHECK(maxSymbol > 0, "maxSymbol too small\n");
1659
1660 uint32_t isCodeNonZero = !!code;
1661
1662 encodeBinEP(isCodeNonZero);
1663 if (isCodeNonZero)
1664 {
1665 uint32_t isCodeLast = (maxSymbol > code);
1666 uint32_t mask = (1 << (code - 1)) - 1;
1667 uint32_t len = code - 1 + isCodeLast;
1668 mask <<= isCodeLast;
1669
1670 encodeBinsEP(mask, len);
1671 }
1672 }
1673
1674 /* estimate bit cost for CBP, significant map and significant coefficients */
1675 void Entropy::estBit(EstBitsSbac& estBitsSbac, uint32_t log2TrSize, bool bIsLuma) const
1676 {
1677 estCBFBit(estBitsSbac);
1678
1679 estSignificantCoeffGroupMapBit(estBitsSbac, bIsLuma);
1680
1681 // encode significance map
1682 estSignificantMapBit(estBitsSbac, log2TrSize, bIsLuma);
1683
1684 // encode significant coefficients
1685 estSignificantCoefficientsBit(estBitsSbac, bIsLuma);
1686 }
1687
1688 /* estimate bit cost for each CBP bit */
1689 void Entropy::estCBFBit(EstBitsSbac& estBitsSbac) const
1690 {
1691 const uint8_t *ctx = &m_contextState[OFF_QT_CBF_CTX];
1692
1693 for (uint32_t ctxInc = 0; ctxInc < NUM_QT_CBF_CTX; ctxInc++)
1694 {
1695 estBitsSbac.blockCbpBits[ctxInc][0] = sbacGetEntropyBits(ctx[ctxInc], 0);
1696 estBitsSbac.blockCbpBits[ctxInc][1] = sbacGetEntropyBits(ctx[ctxInc], 1);
1697 }
1698
1699 ctx = &m_contextState[OFF_QT_ROOT_CBF_CTX];
1700
1701 estBitsSbac.blockRootCbpBits[0] = sbacGetEntropyBits(ctx[0], 0);
1702 estBitsSbac.blockRootCbpBits[1] = sbacGetEntropyBits(ctx[0], 1);
1703 }
1704
1705 /* estimate SAMBAC bit cost for significant coefficient group map */
1706 void Entropy::estSignificantCoeffGroupMapBit(EstBitsSbac& estBitsSbac, bool bIsLuma) const
1707 {
1708 int firstCtx = 0, numCtx = NUM_SIG_CG_FLAG_CTX;
1709
1710 for (int ctxIdx = firstCtx; ctxIdx < firstCtx + numCtx; ctxIdx++)
1711 for (uint32_t bin = 0; bin < 2; bin++)
1712 estBitsSbac.significantCoeffGroupBits[ctxIdx][bin] = sbacGetEntropyBits(m_contextState[OFF_SIG_CG_FLAG_CTX + ((bIsLuma ? 0 : NUM_SIG_CG_FLAG_CTX) + ctxIdx)], bin);
1713 }
1714
1715 /* estimate SAMBAC bit cost for significant coefficient map */
1716 void Entropy::estSignificantMapBit(EstBitsSbac& estBitsSbac, uint32_t log2TrSize, bool bIsLuma) const
1717 {
1718 int firstCtx = 1, numCtx = 8;
1719
1720 if (log2TrSize >= 4)
1721 {
1722 firstCtx = bIsLuma ? 21 : 12;
1723 numCtx = bIsLuma ? 6 : 3;
1724 }
1725 else if (log2TrSize == 3)
1726 {
1727 firstCtx = 9;
1728 numCtx = bIsLuma ? 12 : 3;
1729 }
1730
1731 if (bIsLuma)
1732 {
1733 for (uint32_t bin = 0; bin < 2; bin++)
1734 estBitsSbac.significantBits[0][bin] = sbacGetEntropyBits(m_contextState[OFF_SIG_FLAG_CTX], bin);
1735
1736 for (int ctxIdx = firstCtx; ctxIdx < firstCtx + numCtx; ctxIdx++)
1737 for (uint32_t bin = 0; bin < 2; bin++)
1738 estBitsSbac.significantBits[ctxIdx][bin] = sbacGetEntropyBits(m_contextState[OFF_SIG_FLAG_CTX + ctxIdx], bin);
1739 }
1740 else
1741 {
1742 for (uint32_t bin = 0; bin < 2; bin++)
1743 estBitsSbac.significantBits[0][bin] = sbacGetEntropyBits(m_contextState[OFF_SIG_FLAG_CTX + (NUM_SIG_FLAG_CTX_LUMA + 0)], bin);
1744
1745 for (int ctxIdx = firstCtx; ctxIdx < firstCtx + numCtx; ctxIdx++)
1746 for (uint32_t bin = 0; bin < 2; bin++)
1747 estBitsSbac.significantBits[ctxIdx][bin] = sbacGetEntropyBits(m_contextState[OFF_SIG_FLAG_CTX + (NUM_SIG_FLAG_CTX_LUMA + ctxIdx)], bin);
1748 }
1749 int bitsX = 0, bitsY = 0;
1750
1751 int blkSizeOffset = bIsLuma ? ((log2TrSize - 2) * 3 + ((log2TrSize - 1) >> 2)) : NUM_CTX_LAST_FLAG_XY_LUMA;
1752 int ctxShift = bIsLuma ? ((log2TrSize + 1) >> 2) : log2TrSize - 2;
1753 uint32_t maxGroupIdx = log2TrSize * 2 - 1;
1754
1755 uint32_t ctx;
1756 const uint8_t *ctxX = &m_contextState[OFF_CTX_LAST_FLAG_X];
1757 for (ctx = 0; ctx < maxGroupIdx; ctx++)
1758 {
1759 int ctxOffset = blkSizeOffset + (ctx >> ctxShift);
1760 estBitsSbac.lastXBits[ctx] = bitsX + sbacGetEntropyBits(ctxX[ctxOffset], 0);
1761 bitsX += sbacGetEntropyBits(ctxX[ctxOffset], 1);
1762 }
1763
1764 estBitsSbac.lastXBits[ctx] = bitsX;
1765
1766 const uint8_t *ctxY = &m_contextState[OFF_CTX_LAST_FLAG_Y];
1767 for (ctx = 0; ctx < maxGroupIdx; ctx++)
1768 {
1769 int ctxOffset = blkSizeOffset + (ctx >> ctxShift);
1770 estBitsSbac.lastYBits[ctx] = bitsY + sbacGetEntropyBits(ctxY[ctxOffset], 0);
1771 bitsY += sbacGetEntropyBits(ctxY[ctxOffset], 1);
1772 }
1773
1774 estBitsSbac.lastYBits[ctx] = bitsY;
1775 }
1776
1777 /* estimate bit cost of significant coefficient */
1778 void Entropy::estSignificantCoefficientsBit(EstBitsSbac& estBitsSbac, bool bIsLuma) const
1779 {
1780 if (bIsLuma)
1781 {
1782 const uint8_t *ctxOne = &m_contextState[OFF_ONE_FLAG_CTX];
1783 const uint8_t *ctxAbs = &m_contextState[OFF_ABS_FLAG_CTX];
1784
1785 for (int ctxIdx = 0; ctxIdx < NUM_ONE_FLAG_CTX_LUMA; ctxIdx++)
1786 {
1787 estBitsSbac.greaterOneBits[ctxIdx][0] = sbacGetEntropyBits(ctxOne[ctxIdx], 0);
1788 estBitsSbac.greaterOneBits[ctxIdx][1] = sbacGetEntropyBits(ctxOne[ctxIdx], 1);
1789 }
1790
1791 for (int ctxIdx = 0; ctxIdx < NUM_ABS_FLAG_CTX_LUMA; ctxIdx++)
1792 {
1793 estBitsSbac.levelAbsBits[ctxIdx][0] = sbacGetEntropyBits(ctxAbs[ctxIdx], 0);
1794 estBitsSbac.levelAbsBits[ctxIdx][1] = sbacGetEntropyBits(ctxAbs[ctxIdx], 1);
1795 }
1796 }
1797 else
1798 {
1799 const uint8_t *ctxOne = &m_contextState[OFF_ONE_FLAG_CTX + NUM_ONE_FLAG_CTX_LUMA];
1800 const uint8_t *ctxAbs = &m_contextState[OFF_ABS_FLAG_CTX + NUM_ABS_FLAG_CTX_LUMA];
1801
1802 for (int ctxIdx = 0; ctxIdx < NUM_ONE_FLAG_CTX_CHROMA; ctxIdx++)
1803 {
1804 estBitsSbac.greaterOneBits[ctxIdx][0] = sbacGetEntropyBits(ctxOne[ctxIdx], 0);
1805 estBitsSbac.greaterOneBits[ctxIdx][1] = sbacGetEntropyBits(ctxOne[ctxIdx], 1);
1806 }
1807
1808 for (int ctxIdx = 0; ctxIdx < NUM_ABS_FLAG_CTX_CHROMA; ctxIdx++)
1809 {
1810 estBitsSbac.levelAbsBits[ctxIdx][0] = sbacGetEntropyBits(ctxAbs[ctxIdx], 0);
1811 estBitsSbac.levelAbsBits[ctxIdx][1] = sbacGetEntropyBits(ctxAbs[ctxIdx], 1);
1812 }
1813 }
1814 }
1815
1816 /* Initialize our context information from the nominated source */
1817 void Entropy::copyContextsFrom(const Entropy& src)
1818 {
1819 X265_CHECK(src.m_valid, "invalid copy source context\n");
1820
1821 memcpy(m_contextState, src.m_contextState, MAX_OFF_CTX_MOD * sizeof(m_contextState[0]));
1822 markValid();
1823 }
1824
1825 void Entropy::start()
1826 {
1827 m_low = 0;
1828 m_range = 510;
1829 m_bitsLeft = -12;
1830 m_numBufferedBytes = 0;
1831 m_bufferedByte = 0xff;
1832 }
1833
1834 void Entropy::finish()
1835 {
1836 if (m_low >> (21 + m_bitsLeft))
1837 {
1838 m_bitIf->writeByte(m_bufferedByte + 1);
1839 while (m_numBufferedBytes > 1)
1840 {
1841 m_bitIf->writeByte(0x00);
1842 m_numBufferedBytes--;
1843 }
1844
1845 m_low -= 1 << (21 + m_bitsLeft);
1846 }
1847 else
1848 {
1849 if (m_numBufferedBytes > 0)
1850 m_bitIf->writeByte(m_bufferedByte);
1851
1852 while (m_numBufferedBytes > 1)
1853 {
1854 m_bitIf->writeByte(0xff);
1855 m_numBufferedBytes--;
1856 }
1857 }
1858 m_bitIf->write(m_low >> 8, 13 + m_bitsLeft);
1859 }
1860
1861 void Entropy::copyState(const Entropy& other)
1862 {
1863 m_low = other.m_low;
1864 m_range = other.m_range;
1865 m_bitsLeft = other.m_bitsLeft;
1866 m_bufferedByte = other.m_bufferedByte;
1867 m_numBufferedBytes = other.m_numBufferedBytes;
1868 m_fracBits = other.m_fracBits;
1869 }
1870
1871 void Entropy::resetBits()
1872 {
1873 m_low = 0;
1874 m_bitsLeft = -12;
1875 m_numBufferedBytes = 0;
1876 m_bufferedByte = 0xff;
1877 m_fracBits &= 32767;
1878 if (m_bitIf)
1879 m_bitIf->resetBits();
1880 }
1881
1882 /** Encode bin */
1883 void Entropy::encodeBin(uint32_t binValue, uint8_t &ctxModel)
1884 {
1885 uint32_t mstate = ctxModel;
1886
1887 ctxModel = sbacNext(mstate, binValue);
1888
1889 if (!m_bitIf)
1890 {
1891 m_fracBits += sbacGetEntropyBits(mstate, binValue);
1892 return;
1893 }
1894
1895 uint32_t range = m_range;
1896 uint32_t state = sbacGetState(mstate);
1897 uint32_t lps = g_lpsTable[state][((uint8_t)range >> 6)];
1898 range -= lps;
1899
1900 X265_CHECK(lps >= 2, "lps is too small\n");
1901
1902 int numBits = (uint32_t)(range - 256) >> 31;
1903 uint32_t low = m_low;
1904
1905 // NOTE: MPS must be LOWEST bit in mstate
1906 X265_CHECK((uint32_t)((binValue ^ mstate) & 1) == (uint32_t)(binValue != sbacGetMps(mstate)), "binValue failure\n");
1907 if ((binValue ^ mstate) & 1)
1908 {
1909 // NOTE: lps is non-zero and the maximum of idx is 8 because lps less than 256
1910 //numBits = g_renormTable[lps >> 3];
1911 unsigned long idx;
1912 CLZ(idx, lps);
1913 X265_CHECK(state != 63 || idx == 1, "state failure\n");
1914
1915 numBits = 8 - idx;
1916 if (state >= 63)
1917 numBits = 6;
1918 X265_CHECK(numBits <= 6, "numBits failure\n");
1919
1920 low += range;
1921 range = lps;
1922 }
1923 m_low = (low << numBits);
1924 m_range = (range << numBits);
1925 m_bitsLeft += numBits;
1926
1927 if (m_bitsLeft >= 0)
1928 writeOut();
1929 }
1930
1931 /** Encode equiprobable bin */
1932 void Entropy::encodeBinEP(uint32_t binValue)
1933 {
1934 if (!m_bitIf)
1935 {
1936 m_fracBits += 32768;
1937 return;
1938 }
1939 m_low <<= 1;
1940 if (binValue)
1941 m_low += m_range;
1942 m_bitsLeft++;
1943
1944 if (m_bitsLeft >= 0)
1945 writeOut();
1946 }
1947
1948 /** Encode equiprobable bins */
1949 void Entropy::encodeBinsEP(uint32_t binValues, int numBins)
1950 {
1951 if (!m_bitIf)
1952 {
1953 m_fracBits += 32768 * numBins;
1954 return;
1955 }
1956
1957 while (numBins > 8)
1958 {
1959 numBins -= 8;
1960 uint32_t pattern = binValues >> numBins;
1961 m_low <<= 8;
1962 m_low += m_range * pattern;
1963 binValues -= pattern << numBins;
1964 m_bitsLeft += 8;
1965
1966 if (m_bitsLeft >= 0)
1967 writeOut();
1968 }
1969
1970 m_low <<= numBins;
1971 m_low += m_range * binValues;
1972 m_bitsLeft += numBins;
1973
1974 if (m_bitsLeft >= 0)
1975 writeOut();
1976 }
1977
1978 /** Encode terminating bin */
1979 void Entropy::encodeBinTrm(uint32_t binValue)
1980 {
1981 if (!m_bitIf)
1982 {
1983 m_fracBits += sbacGetEntropyBitsTrm(binValue);
1984 return;
1985 }
1986
1987 m_range -= 2;
1988 if (binValue)
1989 {
1990 m_low += m_range;
1991 m_low <<= 7;
1992 m_range = 2 << 7;
1993 m_bitsLeft += 7;
1994 }
1995 else if (m_range >= 256)
1996 return;
1997 else
1998 {
1999 m_low <<= 1;
2000 m_range <<= 1;
2001 m_bitsLeft++;
2002 }
2003
2004 if (m_bitsLeft >= 0)
2005 writeOut();
2006 }
2007
2008 /** Move bits from register into bitstream */
2009 void Entropy::writeOut()
2010 {
2011 uint32_t leadByte = m_low >> (13 + m_bitsLeft);
2012 uint32_t low_mask = (uint32_t)(~0) >> (11 + 8 - m_bitsLeft);
2013
2014 m_bitsLeft -= 8;
2015 m_low &= low_mask;
2016
2017 if (leadByte == 0xff)
2018 m_numBufferedBytes++;
2019 else
2020 {
2021 uint32_t numBufferedBytes = m_numBufferedBytes;
2022 if (numBufferedBytes > 0)
2023 {
2024 uint32_t carry = leadByte >> 8;
2025 uint32_t byteTowrite = m_bufferedByte + carry;
2026 m_bitIf->writeByte(byteTowrite);
2027
2028 byteTowrite = (0xff + carry) & 0xff;
2029 while (numBufferedBytes > 1)
2030 {
2031 m_bitIf->writeByte(byteTowrite);
2032 numBufferedBytes--;
2033 }
2034 }
2035 m_numBufferedBytes = 1;
2036 m_bufferedByte = (uint8_t)leadByte;
2037 }
2038 }
2039
2040 const uint32_t g_entropyBits[128] =
2041 {
2042 // Corrected table, most notably for last state
2043 0x07b23, 0x085f9, 0x074a0, 0x08cbc, 0x06ee4, 0x09354, 0x067f4, 0x09c1b, 0x060b0, 0x0a62a, 0x05a9c, 0x0af5b, 0x0548d, 0x0b955, 0x04f56, 0x0c2a9,
2044 0x04a87, 0x0cbf7, 0x045d6, 0x0d5c3, 0x04144, 0x0e01b, 0x03d88, 0x0e937, 0x039e0, 0x0f2cd, 0x03663, 0x0fc9e, 0x03347, 0x10600, 0x03050, 0x10f95,
2045 0x02d4d, 0x11a02, 0x02ad3, 0x12333, 0x0286e, 0x12cad, 0x02604, 0x136df, 0x02425, 0x13f48, 0x021f4, 0x149c4, 0x0203e, 0x1527b, 0x01e4d, 0x15d00,
2046 0x01c99, 0x166de, 0x01b18, 0x17017, 0x019a5, 0x17988, 0x01841, 0x18327, 0x016df, 0x18d50, 0x015d9, 0x19547, 0x0147c, 0x1a083, 0x0138e, 0x1a8a3,
2047 0x01251, 0x1b418, 0x01166, 0x1bd27, 0x01068, 0x1c77b, 0x00f7f, 0x1d18e, 0x00eda, 0x1d91a, 0x00e19, 0x1e254, 0x00d4f, 0x1ec9a, 0x00c90, 0x1f6e0,
2048 0x00c01, 0x1fef8, 0x00b5f, 0x208b1, 0x00ab6, 0x21362, 0x00a15, 0x21e46, 0x00988, 0x2285d, 0x00934, 0x22ea8, 0x008a8, 0x239b2, 0x0081d, 0x24577,
2049 0x007c9, 0x24ce6, 0x00763, 0x25663, 0x00710, 0x25e8f, 0x006a0, 0x26a26, 0x00672, 0x26f23, 0x005e8, 0x27ef8, 0x005ba, 0x284b5, 0x0055e, 0x29057,
2050 0x0050c, 0x29bab, 0x004c1, 0x2a674, 0x004a7, 0x2aa5e, 0x0046f, 0x2b32f, 0x0041f, 0x2c0ad, 0x003e7, 0x2ca8d, 0x003ba, 0x2d323, 0x0010c, 0x3bfbb
2051 };
2052
2053 const uint8_t g_nextState[128][2] =
2054 {
2055 { 2, 1 }, { 0, 3 }, { 4, 0 }, { 1, 5 }, { 6, 2 }, { 3, 7 }, { 8, 4 }, { 5, 9 },
2056 { 10, 4 }, { 5, 11 }, { 12, 8 }, { 9, 13 }, { 14, 8 }, { 9, 15 }, { 16, 10 }, { 11, 17 },
2057 { 18, 12 }, { 13, 19 }, { 20, 14 }, { 15, 21 }, { 22, 16 }, { 17, 23 }, { 24, 18 }, { 19, 25 },
2058 { 26, 18 }, { 19, 27 }, { 28, 22 }, { 23, 29 }, { 30, 22 }, { 23, 31 }, { 32, 24 }, { 25, 33 },
2059 { 34, 26 }, { 27, 35 }, { 36, 26 }, { 27, 37 }, { 38, 30 }, { 31, 39 }, { 40, 30 }, { 31, 41 },
2060 { 42, 32 }, { 33, 43 }, { 44, 32 }, { 33, 45 }, { 46, 36 }, { 37, 47 }, { 48, 36 }, { 37, 49 },
2061 { 50, 38 }, { 39, 51 }, { 52, 38 }, { 39, 53 }, { 54, 42 }, { 43, 55 }, { 56, 42 }, { 43, 57 },
2062 { 58, 44 }, { 45, 59 }, { 60, 44 }, { 45, 61 }, { 62, 46 }, { 47, 63 }, { 64, 48 }, { 49, 65 },
2063 { 66, 48 }, { 49, 67 }, { 68, 50 }, { 51, 69 }, { 70, 52 }, { 53, 71 }, { 72, 52 }, { 53, 73 },
2064 { 74, 54 }, { 55, 75 }, { 76, 54 }, { 55, 77 }, { 78, 56 }, { 57, 79 }, { 80, 58 }, { 59, 81 },
2065 { 82, 58 }, { 59, 83 }, { 84, 60 }, { 61, 85 }, { 86, 60 }, { 61, 87 }, { 88, 60 }, { 61, 89 },
2066 { 90, 62 }, { 63, 91 }, { 92, 64 }, { 65, 93 }, { 94, 64 }, { 65, 95 }, { 96, 66 }, { 67, 97 },
2067 { 98, 66 }, { 67, 99 }, { 100, 66 }, { 67, 101 }, { 102, 68 }, { 69, 103 }, { 104, 68 }, { 69, 105 },
2068 { 106, 70 }, { 71, 107 }, { 108, 70 }, { 71, 109 }, { 110, 70 }, { 71, 111 }, { 112, 72 }, { 73, 113 },
2069 { 114, 72 }, { 73, 115 }, { 116, 72 }, { 73, 117 }, { 118, 74 }, { 75, 119 }, { 120, 74 }, { 75, 121 },
2070 { 122, 74 }, { 75, 123 }, { 124, 76 }, { 77, 125 }, { 124, 76 }, { 77, 125 }, { 126, 126 }, { 127, 127 }
2071 };
2072
2073 }